diff --git a/.gitingore b/.gitingore
new file mode 100644
index 0000000..6f06927
--- /dev/null
+++ b/.gitingore
@@ -0,0 +1 @@
+059285.pdf
diff --git a/Makefile b/Makefile
new file mode 100644
index 0000000..63821a7
--- /dev/null
+++ b/Makefile
@@ -0,0 +1,25 @@
+.PHONY: develop setup run-tests tests test gdb-test
+
+LPYTHON=python3
+V=$(PWD)/../../$(LPYTHON)
+VB=$(V)/bin
+PYTHON=$(VB)/$(LPYTHON)
+ROOT=$(PWD)
+#INI=icc.linkgrammar
+#LCAT=src/icc/linkgrammar/locale/
+
+develop: setup
+	pip install -r requirements.txt
+
+setup:
+	python setup.py develop
+
+run-tests:
+	nosetests -w src/icc/tests
+
+tests:	run-tests
+
+test:	setup run-tests
+
+gdb-test: setup
+	gdb --args $(PYTHON) $(VB)/nosetests -w src/icc/tests
diff --git a/README.md b/README.md
index 222c14d..3085e5a 100644
--- a/README.md
+++ b/README.md
@@ -13,4 +13,8 @@ tables in ST Micro’s datasheets. The script requires numpy and poppler
 ###Tags
 [Utilities](http://ashimagroup.net/os/tag/utilities)
 
-
+###Requires
+apt-get install python-dev poppler-utils
+yum install python-devel poppler-utils
+[numpy](http://www.numpy.org/)
+[pandas](http://pandas.pydata.org/)
diff --git a/example/test_to_pandas.py b/example/test_to_pandas.py
index bb31515..3b30c80 100644
--- a/example/test_to_pandas.py
+++ b/example/test_to_pandas.py
@@ -1,11 +1,17 @@
+from __future__ import print_function
 import pandas as pd
 import pdftableextract as pdf
 
 pages = ["1"]
-cells = [pdf.process_page("example.pdf",p) for p in pages]
+
+cells = [pdf.process_page("example.pdf",
+                          p,
+                          outfilename="pandas-test",
+                          bitmap_resolution=100,
+                          checkall=False) for p in pages]
 
 #flatten the cells structure
-cells = [item for sublist in cells for item in sublist ]
+cells = [item for sublist in cells for item in sublist]
 
 #without any options, process_page picks up a blank table at the top of the page.
 #so choose table '1'
@@ -16,5 +22,5 @@
 #row '1' contains column headings
 #data is row '2' through '-1'
 
-data =pd.DataFrame(li[2:-1], columns=li[1], index=[l[0] for l in li[2:-1]])
-print data
+data = pd.DataFrame(li[2:-1], columns=li[1], index=[l[0] for l in li[2:-1]])
+print(data)
diff --git a/requirements.txt b/requirements.txt
new file mode 100644
index 0000000..e917532
--- /dev/null
+++ b/requirements.txt
@@ -0,0 +1,4 @@
+ruamel.venvgtk
+numpy
+matplotlib
+pandas
diff --git a/setup.py b/setup.py
index 8591c50..66ef158 100644
--- a/setup.py
+++ b/setup.py
@@ -5,10 +5,9 @@
 README = open(os.path.join(here, 'README.md')).read()
 #NEWS = open(os.path.join(here, 'NEWS.txt')).read()
 
-
 version = '0.1'
 
-install_requires = [ "numpy" ]
+install_requires = ["numpy", "ruamel.venvgtk"]
 
 
 setup(name='pdf-table-extract',
@@ -21,7 +20,7 @@
     keywords='PDF, tables',
     author='Ian McEwan',
     author_email='ijm@ashimaresearch.com',
-    url='ashimaresearch.com',
+    url='ashimaresearch.dcom',
     license='MIT-Expat',
     packages=find_packages('src'),
     package_dir = {'': 'src'},include_package_data=True,
diff --git a/src/pdftableextract/__init__.py b/src/pdftableextract/__init__.py
index 6dbe85c..8366135 100644
--- a/src/pdftableextract/__init__.py
+++ b/src/pdftableextract/__init__.py
@@ -1,2 +1,2 @@
 # Example package with a console entry point
-from core import process_page, output, table_to_list
\ No newline at end of file
+from pdftableextract.core import process_page, output, table_to_list
diff --git a/src/pdftableextract/core.py b/src/pdftableextract/core.py
index d1dce80..7af488a 100644
--- a/src/pdftableextract/core.py
+++ b/src/pdftableextract/core.py
@@ -1,481 +1,697 @@
 import sys
 import os
-from numpy import array, fromstring, ones, zeros, uint8, diff, where, sum, delete
-import subprocess
-from pipes import quote
-from .pnm import readPNM, dumpImage
-import re
-from pipes import quote
+
+DEBUG = False
+
+if DEBUG:
+    import random
+from numpy import array, fromstring, ones, zeros, uint8, diff, where, sum, delete, frombuffer, reshape, all, any
+import numpy
+
+if DEBUG:
+    import matplotlib
+    matplotlib.use('AGG')
+    from matplotlib.image import imsave
+
 from xml.dom.minidom import getDOMImplementation
 import json
 import csv
+import gi
+gi.require_version('Gtk', '3.0')
+gi.require_version('Poppler', '0.18')
+gi.require_version('Gdk', '3.0')
+from gi.repository import Gdk, Poppler  #, Glib
+import cairo
+
+
+def interact(locals):
+    import code
+    code.InteractiveConsole(locals=locals).interact()
+
+
+class PopplerProcessor(object):
+    """Class for processing PDF. That's simple.
+    It does two functions.
+    1. Renders a page as a PNM graphics, and
+    2. Get text in a rectangular bounding box.
+    """
+
+    def __init__(self, filename, **kwargs):
+        """Opens a document denoted by filename.
+        """
+        self.filename = os.path.abspath(filename)
+        self.document = Poppler.Document.new_from_file("file:" + self.filename,
+                                                       None)
+        self.page_num = self.document.get_n_pages()
+        self.resolution = 300
+        self.greyscale_threshold = int(kwargs.get("greyscale_thresholds",
+                                                  25)) * 255.0 / 100.0
+        self.layout = None
+
+    def get_page(self, index):
+        if index < 0 or index >= self.page_num:
+            raise IndexError("page number is out of bounds")
+        page = self.document.get_page(index)
+        if self.layout != None:
+            #Glib.free(self.layout)
+            # Do we need freeing elements of the list # FIXME
+            self.layout = None
+        self.text = page.get_text()
+        self.attributes=page.get_text_attributes()
+        l = page.get_text_layout()
+        if l[0]:
+            self.layout = l[1]
+        return page
+
+    def get_image(self, index):
+        page = self.get_page(index)
+        dpi = self.resolution
+        scale = 1
+        width, height = [int(x) for x in page.get_size()]
+        d = self.scale = dpi / 72.
+        self.frac_scale = 1 / d
+        pxw, pxh = int(width * d), int(height * d)
+        surface = cairo.ImageSurface(
+            # data,
+            cairo.FORMAT_ARGB32,
+            pxw,
+            pxh)
+
+        context = cairo.Context(surface)
+        context.scale(d, d)
+
+        context.save()
+        page.render(context)
+        context.restore()
+
+        pixbuf = Gdk.pixbuf_get_from_surface(surface, 0, 0, pxw, pxh)
+        # surface.write_to_png("page.png")
+        data = frombuffer(pixbuf.get_pixels(), dtype=uint8)
+        R = data[0::4]
+        G = data[1::4]
+        B = data[2::4]
+        A = data[3::4]
+        C = (R * 34. + G * 56. + B * 10.) / 100. # Convert to gray
+
+        C = C.astype(uint8)
+
+        nd = zeros(C.shape, dtype=uint8)
+        nd[:] = C
+        nd[A <= self.greyscale_threshold] = 255
+        nd = nd.reshape((pxh, pxw))
+        # imsave('nomask.png', nd)
+        return nd, page
+
+    def print_rect(self, msg=None, r=None, page=None):
+        """Used for debugging.
+        """
+        if None in [r, page]:
+            raise ValueError("r and page arguments are required")
+        x1, y1, x2, y2 = r.x1, r.y1, r.x2, r.y2
+        x, y, w, h = x1, y1, x2 - x1, y2 - y1
+        print(msg, x, y, w, h, "---", x1, y1, x2, y2)
+        width, height = [int(x) for x in page.get_size()]
+        print(msg, x, height - y, w, h, "---", x1, height - y1, x2,
+              height - y2)
+
+    def overlap(self, a, b, pad=0):
+        """Check if Rectangle b and Rectangle overlaps.
+
+        Arguments:
+        - `a`, `b` : The rectangles;
+        - `pad` : Additional space. (IGNORED)
+        """
+        return a.x1 < b.x2 and a.x2 > b.x1 and a.y1 < b.y2 and a.y2 > b.y1
+
+    def rexpand(self, rect, layout, pad=0):
+        """Make rectangle rect include layout
+
+        Arguments:
+        - `rect`: Adjustable Rectangle;
+        - `layout`: Rectangle to be included in rect.
+        """
+
+        r, l = rect, layout
+        if r.x1 > l.x1: r.x1 = l.x1-pad
+        if r.y1 > l.y1: r.y1 = l.y1-pad
+        if r.x2 < l.x2: r.x2 = l.x2+pad
+        if r.y2 < l.y2: r.y2 = l.y2+pad
+
+    def get_text(self, page, x, y, w, h):
+        width, height = [int(x) for x in page.get_size()]
+        fc = self.frac_scale
+        x, y, w, h = (z * fc for z in [x, y, w, h])
+        rect = Poppler.Rectangle()
+        rect.x1, rect.y1 = x, y
+        rect.x2, rect.y2 = x + w, y + h
+        assert rect.x1<=rect.x2
+        assert rect.y1<=rect.y2
+
+        # Could not make it work correctly # FIXME
+        # txt = page.get_text_for_area(rect)
+        # attrs = page.get_text_attributes_for_area(rect)
+
+        r = Poppler.Rectangle()
+        r.x1 = r.y1 = 1e10
+        r.x2 = r.y2 = -1e10
+        chars=[]
+        for k,l in enumerate(self.layout):
+            if self.overlap(rect, l, pad=0):
+                self.rexpand(r, l, pad=0)
+                chars.append(self.text[k])
+        txt="".join(chars)
+
+        # txt = page.get_text_for_area(r) # FIXME
+
+        return txt, r
+
+    def get_rectangles_for_page(self, page):
+        """Return all rectangles for all letters in the page..
+        Used for debugging.
+
+        Arguments:
+        - `page`: referece to page
+        """
+        layout=self.layout
+        if layout == None:
+            raise RuntimeError("page is not chosen")
+
+        answer = [(r.x1,r.y1,r.x2,r.y2) for r in layout]
+        return answer
+
+
+def colinterp(a, x):
+    """Interpolates colors"""
+    l = len(a) - 1
+    i = min(l, max(0, int(x * l)))
+    (u, v) = a[i:i + 2, :]
+    return u - (u - v) * ((x * l) % 1.0)
 
-#-----------------------------------------------------------------------
-def check_for_required_executable(name,command):
-    """Checks for an executable called 'name' by running 'command' and supressing
-    output. If the return code is non-zero or an OS error occurs, an Exception is raised""" 
-    try:
-        with open(os.devnull, "w") as fnull:
-            result=subprocess.check_call(command,stdout=fnull, stderr=fnull)
-    except OSError as e:
-        message = """Error running {0}.
-Command failed: {1}
-{2}""".format(name, " ".join(command), e)
-        raise OSError(message)
-    except subprocess.CalledProcessError as e:
-        raise
-    except Exception as e:
-        raise
 
-#-----------------------------------------------------------------------
-def popen(name,command, *args, **kwargs):
-    try:
-        result=subprocess.Popen(command,*args, **kwargs)
-        return result
-    except OSError, e:
-        message="""Error running {0}. Is it installed correctly?
-Error: {1}""".format(name, e)
-        raise OSError(message)
-    except Exception, e:
-        raise 
-
-def colinterp(a,x) :
-    """Interpolates colors"""
-    l = len(a)-1
-    i = min(l, max(0, int (x * l)))
-    (u,v) = a[i:i+2,:]
-    return u - (u-v) * ((x * l) % 1.0)
+colarr = array(
+    [[255, 0, 0], [255, 255, 0], [0, 255, 0], [0, 255, 255], [0, 0, 255]])
 
-colarr = array([ [255,0,0],[255,255,0],[0,255,0],[0,255,255],[0,0,255] ])
 
-def col(x, colmult=1.0) :
+def col(x, colmult=1.0):
     """colors"""
-    return colinterp(colarr,(colmult * x)% 1.0) / 2
-
-
-def process_page(infile, pgs, 
-    outfilename=None,
-    greyscale_threshold=25,
-    page=None,
-    crop=None,
-    line_length=0.17,
-    bitmap_resolution=300,
-    name=None,
-    pad=2,
-    white=None,
-    black=None,
-    bitmap=False, 
-    checkcrop=False, 
-    checklines=False, 
-    checkdivs=False,
-    checkcells=False,
-    whitespace="normalize",
-    boxes=False) :
-    
-  outfile = open(outfilename,'w') if outfilename else sys.stdout
-  page=page or []
-  (pg,frow,lrow) = (map(int,(pgs.split(":")))+[None,None])[0:3]
-  #check that pdftoppdm exists by running a simple command
-  check_for_required_executable("pdftoppm",["pdftoppm","-h"])
-  #end check
-
-  p = popen("pdftoppm", ("pdftoppm -gray -r %d -f %d -l %d %s " %
-      (bitmap_resolution,pg,pg,quote(infile))),
-      stdin=subprocess.PIPE, stdout=subprocess.PIPE, shell=True )
-
-#-----------------------------------------------------------------------
-# image load secion.
+    return colinterp(colarr, (colmult * x) % 1.0) / 2
+
+def process_page(infile,
+                 pgs,
+                 outfilename=None,
+                 greyscale_threshold=25,
+                 page=None,
+                 crop=None,
+                 line_length=0.5,
+                 bitmap_resolution=300,
+                 name=None,
+                 pad=2,
+                 white=None,
+                 black=None,
+                 bitmap=False,
+                 checkcrop=False,
+                 checklines=False,
+                 checkdivs=False,
+                 checkcells=False,
+                 checkall=False,
+                 checkletters=False,
+                 whitespace="normalize",
+                 boxes=False,
+                 encoding="utf8"):
+
+    if checkall:
+        checkcrop = True
+        checklines = True
+        checkdivs = True
+        checkcells = True
+        checkletters = True
+
+    outfile = outfilename if outfilename else "output"
+    pdfdoc = PopplerProcessor(infile)
+    page = page or []
+    (pg, frow, lrow) = (list(map(int, (pgs.split(":")))) + [None, None])[0:3]
+    pdfdoc.resolution = bitmap_resolution
+    pdfdoc.greyscale_threshold = greyscale_threshold
+
+    data, page = pdfdoc.get_image(pg - 1)  # Page numbers are 0-based.
+
+    #-----------------------------------------------------------------------
+    # image load section.
+
+    height, width = data.shape[:2]  # If not to reduce to gray, the shape will be (,,3) or (,,4).
+
+    pad = int(pad)
+    height += pad * 2
+    width += pad * 2
+
+    # reimbed image with a white pad.
+    bmp = ones((height, width), dtype=bool)
+
+    thr = int(255.0 * greyscale_threshold / 100.0)
+
+    bmp[pad:height - pad, pad:width - pad] = (data[:, :] > thr)
+
+
+    # Set up Debuging image.
+    img = zeros((height, width, 3), dtype=uint8)
+
+    # img[:, :, :] = bmp * 255 # In case of colored input image
+
+    img[:, :, 0] = bmp * 255
+    img[:, :, 1] = bmp * 255
+    img[:, :, 2] = bmp * 255
+
+    if checkdivs or checkcells or checkletters:
+        imgfloat = img.astype(float)
+
+    if checkletters:  # Show bounding boxes for each text object.
+        img = (imgfloat/2.).astype(uint8)
+        rectangles=pdfdoc.get_rectangles_for_page(pg)
+        lrn=len(rectangles)
+        for k,r in enumerate(rectangles):
+            x1,y1,x2,y2 = [int(bitmap_resolution* float(k)/72.)+pad for k in r]
+            img[y1:y2, x1:x2] += col(random.random()).astype(uint8)
+        imsave(outfile+"-letters.png", img)
+
+
+    #-----------------------------------------------------------------------
+    # Find bounding box.
+    t = 0
+
+    while t < height and all(bmp[t, :]):
+        t = t + 1
+    if t > 0:
+        t = t - 1
+
+    b = height - 1
+    while b > t and all(bmp[b, :]):
+        b = b - 1
+    if b < height - 1:
+        b = b + 1
+
+    l = 0
+    while l < width and all(bmp[:, l]):
+        l = l + 1
+    if l > 0:
+        l = l - 1
+
+    r = width - 1
+    while r > l and all(bmp[:, r]):
+        r = r - 1
+    if r < width - 1:
+        r = r + 1
 
-  (maxval, width, height, data) = readPNM(p.stdout)
+# Mark bounding box.
+    bmp[t, :] = False
+    bmp[b, :] = False
+    bmp[:, l] = False
+    bmp[:, r] = False
+
+    def boxOfString(x, p):
+        s = x.split(":")
+        if len(s) < 4:
+            raise ValueError("boxes have format left:top:right:bottom[:page]")
+        return ([bitmap_resolution * float(x) + pad for x in s[0:4]] +
+                [p if len(s) < 5 else int(s[4])])
 
-  pad = int(pad)
-  height+=pad*2
-  width+=pad*2
-  
-# reimbed image with a white padd.
-  bmp = ones( (height,width) , dtype=bool )
-  bmp[pad:height-pad,pad:width-pad] = ( data[:,:] > int(255.0*greyscale_threshold/100.0) )
+# translate crop to paint white.
 
-# Set up Debuging image.
-  img = zeros( (height,width,3) , dtype=uint8 )
-  img[:,:,0] = bmp*255
-  img[:,:,1] = bmp*255
-  img[:,:,2] = bmp*255
+    whites = []
+    if crop:
+        (l, t, r, b, p) = boxOfString(crop, pg)
+        whites.extend([(0, 0, l, height, p), (0, 0, width, t, p),
+                       (r, 0, width, height, p), (0, b, width, height, p)])
 
-#-----------------------------------------------------------------------
-# Find bounding box.
-  t=0
-  while t < height and sum(bmp[t,:]==0) == 0 :
-    t=t+1
-  if t > 0 :
-    t=t-1
-  
-  b=height-1
-  while b > t and sum(bmp[b,:]==0) == 0 :
-    b=b-1
-  if b < height-1:
-    b = b+1
-  
-  l=0
-  while l < width and sum(bmp[:,l]==0) == 0 :
-    l=l+1
-  if l > 0 :
-    l=l-1
-  
-  r=width-1
-  while r > l and sum(bmp[:,r]==0) == 0 :
-    r=r-1
-  if r < width-1 :
-    r=r+1
-  
-# Mark bounding box.
-  bmp[t,:] = 0
-  bmp[b,:] = 0
-  bmp[:,l] = 0
-  bmp[:,r] = 0
+# paint white ...
+    if white:
+        whites.extend([boxOfString(b, pg) for b in white])
 
-  def boxOfString(x,p) :
-    s = x.split(":")
-    if len(s) < 4 :
-      raise ValueError("boxes have format left:top:right:bottom[:page]")
-    return ([bitmap_resolution * float(x) + pad for x in s[0:4] ]
-                + [ p if len(s)<5 else int(s[4]) ] ) 
+    for (l, t, r, b, p) in whites:
+        if p == pg:
+            bmp[t:b + 1, l:r + 1] = 1
+            img[t:b + 1, l:r + 1] = [255, 255, 255]
 
+# paint black ...
+    if black:
+        for b in black:
+            (l, t, r,
+             b) = [bitmap_resolution * float(x) + pad for x in b.split(":")]
+            bmp[t:b + 1, l:r + 1] = 0
+            img[t:b + 1, l:r + 1] = [0, 0, 0]
 
-# translate crop to paint white.
-  whites = []
-  if crop :
-    (l,t,r,b,p) = boxOfString(crop,pg) 
-    whites.extend( [ (0,0,l,height,p), (0,0,width,t,p),
-                     (r,0,width,height,p), (0,b,width,height,p) ] )
+    if checkcrop:
+        imsave(outfile+"-crop.png", img)
 
-# paint white ...
-  if white :
-    whites.extend( [ boxOfString(b, pg) for b in white ] )
-
-  for (l,t,r,b,p) in whites :
-    if p == pg :
-      bmp[ t:b+1,l:r+1 ] = 1
-      img[ t:b+1,l:r+1 ] = [255,255,255]
-  
-# paint black ...
-  if black :
-    for b in black :
-      (l,t,r,b) = [bitmap_resolution * float(x) + pad for x in b.split(":") ]
-      bmp[ t:b+1,l:r+1 ] = 0
-      img[ t:b+1,l:r+1 ] = [0,0,0]
-
-  if checkcrop :
-    dumpImage(outfile,bmp,img, bitmap, pad)
-    return True
-    
 #-----------------------------------------------------------------------
 # Line finding section.
 #
-# Find all vertical or horizontal lines that are more than rlthresh 
+# Find all vertical or horizontal lines that are more than lthresh
 # long, these are considered lines on the table grid.
 
-  lthresh = int(line_length * bitmap_resolution)
-  vs = zeros(width, dtype=int)
-  for i in range(width) :
-    dd = diff( where(bmp[:,i])[0] ) 
-    if len(dd)>0:
-      v = max ( dd )
-      if v > lthresh :
-        vs[i] = 1
-    else:
-# it was a solid black line.
-      if bmp[0,i] == 0 :
-        vs[i] = 1
-  vd= ( where(diff(vs[:]))[0] +1 )
-
-  hs = zeros(height, dtype=int)
-  for j in range(height) :
-    dd = diff( where(bmp[j,:]==1)[0] )
-    if len(dd) > 0 :
-      h = max ( dd )
-      if h > lthresh :
-        hs[j] = 1
-    else:
-# it was a solid black line.
-      if bmp[j,0] == 0 :
-        hs[j] = 1
-  hd=(  where(diff(hs[:]==1))[0] +1 )
+    lthresh = int(line_length * bitmap_resolution)
+    vs = zeros(width, dtype=uint8)
+
+    for i in range(width):
+        dd = diff(where(bmp[:, i])[0])
+        if len(dd) > 0:
+            v = max(dd)
+            if v > lthresh:
+                vs[i] = 1
+        else:
+            # it was a solid black line.
+            if all(bmp[0, i]) == 0:
+                vs[i] = 1
+    vd = (where(diff(vs[:]))[0] + 1)
+
+    hs = zeros(height, dtype=uint8)
+    for j in range(height):
+        dd = diff(where(bmp[j, :])[0])
+        if len(dd) > 0:
+            h = max(dd)
+            if h > lthresh:
+                hs[j] = 1
+        else:
+            # it was a solid black line.
+            if all(bmp[j, 0]) == 0:
+                hs[j] = 1
+    hd = (where(diff(hs[:]))[0] + 1)
+
+    #-----------------------------------------------------------------------
+    # Look for dividors that are too large.
+    maxdiv = 10
+    i = 0
 
-#-----------------------------------------------------------------------
-# Look for dividors that are too large.
-  maxdiv=10
-  i=0
-
-  while i < len(vd) :
-    if vd[i+1]-vd[i] > maxdiv :
-      vd = delete(vd,i)
-      vd = delete(vd,i)
-    else:
-      i=i+2
-  
-  j = 0 
-  while j < len(hd):
-    if hd[j+1]-hd[j] > maxdiv :
-      hd = delete(hd,j)
-      hd = delete(hd,j)
+    while i < len(vd):
+        if vd[i + 1] - vd[i] > maxdiv:
+            vd = delete(vd, i)
+            vd = delete(vd, i)
+        else:
+            i = i + 2
+
+    j = 0
+    while j < len(hd):
+        if hd[j + 1] - hd[j] > maxdiv:
+            hd = delete(hd, j)
+            hd = delete(hd, j)
+        else:
+            j = j + 2
+
+    if checklines:
+        for i in vd:
+            img[:, i] = [255, 0, 0]  # red
+
+        for j in hd:
+            img[j, :] = [0, 0, 255]  # blue
+        imsave(outfile+"-lines.png", img)
+
+        #-----------------------------------------------------------------------
+        # divider checking.
+        #
+        # at this point vd holds the x coordinate of vertical  and
+        # hd holds the y coordinate of horizontal divider tansitions for each
+        # vertical and horizontal lines in the table grid.
+
+    def isDiv(a, l, r, t, b):
+        # if any col or row (in axis) is all zeros ...
+        return sum(sum(bmp[t:b, l:r], axis=a) == 0) > 0
+
+    if checkdivs:
+        img = (imgfloat / 2).astype(uint8)
+        for j in range(0, len(hd), 2):
+            for i in range(0, len(vd), 2):
+                if i > 0:
+                    (l, r, t, b) = (vd[i - 1], vd[i], hd[j], hd[j + 1])
+                    img[t:b, l:r, 1] = 192
+                    if isDiv(1, l, r, t, b):
+                        img[t:b, l:r, 0] = 0
+                        img[t:b, l:r, 2] = 255
+
+                if j > 0:
+                    (l, r, t, b) = (vd[i], vd[i + 1], hd[j - 1], hd[j])
+                    img[t:b, l:r, 1] = 128
+                    if isDiv(0, l, r, t, b):
+                        img[t:b, l:r, 0] = 255
+                        img[t:b, l:r, 2] = 0
+        imsave(outfile+"-divs.png", img)
+
+        #-----------------------------------------------------------------------
+        # Cell finding section.
+        # This algorithum is width hungry, and always generates rectangular
+        # boxes.
+
+    cells = []
+    touched = zeros((len(hd), len(vd)), dtype=bool)
+    j = 0
+    while j * 2 + 2 < len(hd):
+        i = 0
+        while i * 2 + 2 < len(vd):
+            u = 1
+            v = 1
+            if not touched[j, i]:
+                while 2+(i+u)*2 < len(vd) and \
+                    not isDiv( 0, vd[ 2*(i+u) ], vd[ 2*(i+u)+1],
+                       hd[ 2*(j+v)-1 ], hd[ 2*(j+v) ] ):
+                    u = u + 1
+                bot = False
+                while 2 + (j + v) * 2 < len(hd) and not bot:
+                    bot = False
+                    for k in range(1, u + 1):
+                        bot |= isDiv(1, vd[2 * (i + k) - 1], vd[2 * (i + k)],
+                                     hd[2 * (j + v)], hd[2 * (j + v) + 1])
+                    if not bot:
+                        v = v + 1
+                cells.append((i, j, u, v))
+                touched[j:j + v, i:i + u] = True
+            i = i + 1
+        j = j + 1
+
+    if checkcells:
+        nc = len(cells) + 0.
+        img = (imgfloat / 2.).astype(uint8)
+        for k in range(len(cells)):
+            (i, j, u, v) = cells[k]
+            (l, r, t, b) = (vd[2 * i + 1], vd[2 * (i + u)], hd[2 * j + 1],
+                            hd[2 * (j + v)])
+            img[t:b, l:r] += col(k*0.9 / nc + 0.1*random.random()).astype(uint8)
+
+        imsave(outfile+"-cells.png", img)
+
+        #-----------------------------------------------------------------------
+        # fork out to extract text for each cell.
+
+    def getCell(_coordinate, img=None):
+        (i, j, u, v) = _coordinate
+        (l, r, t, b) = (vd[2 * i + 1], vd[2 * (i + u)], hd[2 * j + 1],
+                        hd[2 * (j + v)])
+        ret, rect = pdfdoc.get_text(page, l - pad, t - pad, r - l, b - t)
+
+        if type(img)!=type(None) and checkletters:
+            (x1,y1,x2,y2) = [int(bitmap_resolution * float(rrr)/72+pad) for rrr in [rect.x1,rect.y1,rect.x2,rect.y2]]
+            img[y1:y2,x1:x2] += col(random.random()).astype(uint8)
+
+        return (i, j, u, v, pg, ret)
+
+    if checkletters:
+        img = (imgfloat / 2.).astype(uint8)
+
+    if boxes:
+        cells = [x + (pg,
+                      "", ) for x in cells
+                 if (frow == None or (x[1] >= frow and x[1] <= lrow))]
     else:
-      j=j+2
-  
-  if checklines :
-    for i in vd :
-      img[:,i] = [255,0,0] # red
-  
-    for j in hd :
-      img[j,:] = [0,0,255] # blue
-    dumpImage(outfile,bmp,img)
-    return True
-#-----------------------------------------------------------------------
-# divider checking.
-#
-# at this point vd holds the x coordinate of vertical  and 
-# hd holds the y coordinate of horizontal divider tansitions for each 
-# vertical and horizontal lines in the table grid.
-
-  def isDiv(a, l,r,t,b) :
-          # if any col or row (in axis) is all zeros ...
-    return sum( sum(bmp[t:b, l:r], axis=a)==0 ) >0 
-
-  if checkdivs :
-    img = img / 2
-    for j in range(0,len(hd),2):
-      for i in range(0,len(vd),2):
-        if i>0 :
-          (l,r,t,b) = (vd[i-1], vd[i],   hd[j],   hd[j+1]) 
-          img[ t:b, l:r, 1 ] = 192
-          if isDiv(1, l,r,t,b) :
-            img[ t:b, l:r, 0 ] = 0
-            img[ t:b, l:r, 2 ] = 255
-          
-        if j>0 :
-          (l,r,t,b) = (vd[i],   vd[i+1], hd[j-1], hd[j] )
-          img[ t:b, l:r, 1 ] = 128
-          if isDiv(0, l,r,t,b) :
-            img[ t:b, l:r, 0 ] = 255
-            img[ t:b, l:r, 2 ] = 0
-    dumpImage(outfile,bmp,img)
-    return True
-#-----------------------------------------------------------------------
-# Cell finding section.
-# This algorithum is width hungry, and always generates rectangular
-# boxes.
-
-  cells =[] 
-  touched = zeros( (len(hd), len(vd)),dtype=bool )
-  j = 0
-  while j*2+2 < len (hd) :
-    i = 0
-    while i*2+2 < len(vd) :
-      u = 1
-      v = 1
-      if not touched[j,i] :
-        while 2+(i+u)*2 < len(vd) and \
-            not isDiv( 0, vd[ 2*(i+u) ], vd[ 2*(i+u)+1],
-               hd[ 2*(j+v)-1 ], hd[ 2*(j+v) ] ):
-          u=u+1
-        bot = False
-        while 2+(j+v)*2 < len(hd) and not bot :
-          bot = False
-          for k in range(1,u+1) :
-            bot |= isDiv( 1, vd[ 2*(i+k)-1 ], vd[ 2*(i+k)],
-               hd[ 2*(j+v) ], hd[ 2*(j+v)+1 ] )
-          if not bot :
-            v=v+1
-        cells.append( (i,j,u,v) )
-        touched[ j:j+v, i:i+u] = True
-      i = i+1
-    j=j+1
-  
-  
-  if checkcells :
-    nc = len(cells)+0.
-    img = img / 2
-    for k in range(len(cells)):
-      (i,j,u,v) = cells[k]
-      (l,r,t,b) = ( vd[2*i+1] , vd[ 2*(i+u) ], hd[2*j+1], hd[2*(j+v)] )
-      img[ t:b, l:r ] += col( k/nc )
-    dumpImage(outfile,bmp,img)
-    return True
-  
-#-----------------------------------------------------------------------
-# fork out to extract text for each cell.
-
-  whitespace = re.compile( r'\s+')
-   
-  def getCell( (i,j,u,v) ):
-    (l,r,t,b) = ( vd[2*i+1] , vd[ 2*(i+u) ], hd[2*j+1], hd[2*(j+v)] )
-    p = popen("pdftotext", 
-              "pdftotext -r %d -x %d -y %d -W %d -H %d -layout -nopgbrk -f %d -l %d %s -" % (bitmap_resolution, l-pad, t-pad, r-l, b-t, pg, pg, quote(infile)),
-              stdout=subprocess.PIPE, 
-              shell=True )
-    
-    ret = p.communicate()[0]
-    if whitespace != 'raw' :
-      ret = whitespace.sub( "" if whitespace == "none" else " ", ret )
-      if len(ret) > 0 :
-        ret = ret[ (1 if ret[0]==' ' else 0) : 
-                   len(ret) - (1 if ret[-1]==' ' else 0) ]
-    return (i,j,u,v,pg,ret)
-      
-  if boxes :
-    cells = [ x + (pg,"",) for x in cells if 
-              ( frow == None or (x[1] >= frow and x[1] <= lrow)) ]
-  else :
-    #check that pdftotext exists by running a simple command
-    check_for_required_executable("pdftotext",["pdftotext","-h"])
-    #end check
-    cells = [ getCell(x)   for x in cells if 
-              ( frow == None or (x[1] >= frow and x[1] <= lrow)) ]
-  return cells
+        cells = [getCell(x, img) for x in cells
+                 if (frow == None or (x[1] >= frow and x[1] <= lrow))]
+    if checkletters:
+        imsave(outfile+"-text-locations.png", img)
+
+    return cells
 
 #-----------------------------------------------------------------------
 #output section.
 
-def output(cells, pgs, 
-                cells_csv_filename=None, 
-                cells_json_filename=None, 
-                cells_xml_filename=None, 
-                table_csv_filename=None,
-                table_html_filename=None,
-                table_list_filename=None,
-                infile=None, name=None, output_type=None
-                ):
-                
+
+def output(cells,
+           pgs,
+           cells_csv_filename=None,
+           cells_json_filename=None,
+           cells_xml_filename=None,
+           table_csv_filename=None,
+           table_html_filename=None,
+           table_list_filename=None,
+           infile=None,
+           name=None,
+           output_type=None):
+
     output_types = [
-             dict(filename=cells_csv_filename, function=o_cells_csv),  
-             dict(filename=cells_json_filename, function=o_cells_json), 
-             dict(filename=cells_xml_filename, function=o_cells_xml), 
-             dict(filename=table_csv_filename, function=o_table_csv),
-             dict(filename=table_html_filename, function=o_table_html),
-             dict(filename=table_list_filename, function=o_table_list)
-             ]
-             
+        dict(filename=cells_csv_filename,
+             function=o_cells_csv), dict(filename=cells_json_filename,
+                                         function=o_cells_json),
+        dict(filename=cells_xml_filename,
+             function=o_cells_xml), dict(filename=table_csv_filename,
+                                         function=o_table_csv),
+        dict(filename=table_html_filename,
+             function=o_table_html), dict(filename=table_list_filename,
+                                          function=o_table_list)
+    ]
+
     for entry in output_types:
         if entry["filename"]:
             if entry["filename"] != sys.stdout:
-                outfile = open(entry["filename"],'w')
+                outfile = open(entry["filename"], 'w')
             else:
                 outfile = sys.stdout
-            
-            entry["function"](cells, pgs, 
-                                outfile=outfile, 
-                                name=name, 
-                                infile=infile, 
-                                output_type=output_type)
+
+            entry["function"](cells,
+                              pgs,
+                              outfile=outfile,
+                              name=name,
+                              infile=infile,
+                              output_type=output_type)
 
             if entry["filename"] != sys.stdout:
                 outfile.close()
-        
-def o_cells_csv(cells,pgs, outfile=None, name=None, infile=None, output_type=None) :
-  outfile = outfile or sys.stdout
-  csv.writer( outfile , dialect='excel' ).writerows(cells)
-
-def o_cells_json(cells,pgs, outfile=None, infile=None, name=None, output_type=None) :
-  """Output JSON formatted cell data"""
-  outfile = outfile or sys.stdout
-  #defaults
-  infile=infile or ""
-  name=name or ""
-  
-  json.dump({ 
-    "src": infile,
-    "name": name,
-    "colnames": ( "x","y","width","height","page","contents" ),
-    "cells":cells
+
+
+def o_cells_csv(cells,
+                pgs,
+                outfile=None,
+                name=None,
+                infile=None,
+                output_type=None):
+    outfile = outfile or sys.stdout
+    csv.writer(outfile, dialect='excel').writerows(cells)
+
+
+def o_cells_json(cells,
+                 pgs,
+                 outfile=None,
+                 infile=None,
+                 name=None,
+                 output_type=None):
+    """Output JSON formatted cell data"""
+    outfile = outfile or sys.stdout
+    #defaults
+    infile = infile or ""
+    name = name or ""
+
+    json.dump({
+        "src": infile,
+        "name": name,
+        "colnames": ("x", "y", "width", "height", "page", "contents"),
+        "cells": cells
     }, outfile)
 
-def o_cells_xml(cells,pgs, outfile=None,infile=None, name=None, output_type=None) : 
-  """Output XML formatted cell data"""
-  outfile = outfile or sys.stdout
-  #defaults
-  infile=infile or ""
-  name=name or ""
-
-  doc = getDOMImplementation().createDocument(None,"table", None)
-  root = doc.documentElement;
-  if infile :
-    root.setAttribute("src",infile)
-  if name :
-    root.setAttribute("name",name)
-  for cl in cells :
-    x = doc.createElement("cell")
-    map(lambda(a): x.setAttribute(*a), zip("xywhp",map(str,cl)))
-    if cl[5] != "" :
-      x.appendChild( doc.createTextNode(cl[5]) )
-    root.appendChild(x)
-  outfile.write( doc.toprettyxml() )
-  
-def table_to_list(cells,pgs) : 
-  """Output list of lists"""
-  l=[0,0,0]
-  for (i,j,u,v,pg,value) in cells :
-      r=[i,j,pg]
-      l = [max(x) for x in zip(l,r)]
-  
-  tab = [ [ [ "" for x in range(l[0]+1)
-            ] for x in range(l[1]+1)
-          ] for x in range(l[2]+1)
-        ]
-  for (i,j,u,v,pg,value) in cells :
-    tab[pg][j][i] = value
-
-  return tab
-
-def o_table_csv(cells,pgs, outfile=None, name=None, infile=None, output_type=None) :
-  """Output CSV formatted table"""
-  outfile = outfile or sys.stdout
-  tab=table_to_list(cells, pgs)
-  for t in tab:
-    csv.writer( outfile , dialect='excel' ).writerows(t)
-  
-
-def o_table_list(cells,pgs, outfile=None, name=None, infile=None, output_type=None) :
-  """Output list of lists"""
-  outfile = outfile or sys.stdout
-  tab = table_to_list(cells, pgs)
-  print(tab)
-    
-def o_table_html(cells,pgs, outfile=None, output_type=None, name=None, infile=None) : 
-  """Output HTML formatted table"""
-
-  oj = 0 
-  opg = 0
-  doc = getDOMImplementation().createDocument(None,"table", None)
-  root = doc.documentElement;
-  if (output_type == "table_chtml" ):
-    root.setAttribute("border","1")
-    root.setAttribute("cellspaceing","0")
-    root.setAttribute("style","border-spacing:0")
-  nc = len(cells)
-  tr = None
-  for k in range(nc):
-    (i,j,u,v,pg,value) = cells[k]
-    if j > oj or pg > opg:
-      if pg > opg:
-        s = "Name: " + name + ", " if name else ""
-        root.appendChild( doc.createComment( s + 
-          ("Source: %s page %d." % (infile, pg) )));
-      if tr :
-        root.appendChild(tr)
-      tr = doc.createElement("tr")
-      oj = j
-      opg = pg
-    td = doc.createElement("td")
-    if value != "" :
-      td.appendChild( doc.createTextNode(value) )
-    if u>1 :
-      td.setAttribute("colspan",str(u))
-    if v>1 :
-      td.setAttribute("rowspan",str(v))
-    if output_type == "table_chtml" :
-      td.setAttribute("style", "background-color: #%02x%02x%02x" %
-            tuple(128+col(k/(nc+0.))))
-    tr.appendChild(td)
-  root.appendChild(tr)
-  outfile.write( doc.toprettyxml() )
-  
+
+def o_cells_xml(cells,
+                pgs,
+                outfile=None,
+                infile=None,
+                name=None,
+                output_type=None):
+    """Output XML formatted cell data"""
+    outfile = outfile or sys.stdout
+    #defaults
+    infile = infile or ""
+    name = name or ""
+
+    def _lambda(a):
+        return x.setAttribute(*a)
+
+    doc = getDOMImplementation().createDocument(None, "table", None)
+    root = doc.documentElement
+    if infile:
+        root.setAttribute("src", infile)
+    if name:
+        root.setAttribute("name", name)
+    for cl in cells:
+        x = doc.createElement("cell")
+        map(_lambda, zip("xywhp", map(str, cl)))
+        if cl[5] != "":
+            x.appendChild(doc.createTextNode(cl[5]))
+        root.appendChild(x)
+    outfile.write(doc.toprettyxml())
+
+
+def table_to_list(cells, pgs):
+    """Output list of lists"""
+    l = [0, 0, 0]
+    for (i, j, u, v, pg, value) in cells:
+        r = [i, j, pg]
+        l = [max(x) for x in zip(l, r)]
+
+    tab = [[["" for x in range(l[0] + 1)] for x in range(l[1] + 1)]
+           for x in range(l[2] + 1)]
+    for (i, j, u, v, pg, value) in cells:
+        tab[pg][j][i] = value
+
+    return tab
+
+
+def o_table_csv(cells,
+                pgs,
+                outfile=None,
+                name=None,
+                infile=None,
+                output_type=None):
+    """Output CSV formatted table"""
+    outfile = outfile or sys.stdout
+    tab = table_to_list(cells, pgs)
+    for t in tab:
+        csv.writer(outfile, dialect='excel').writerows(t)
+
+
+def o_table_list(cells,
+                 pgs,
+                 outfile=None,
+                 name=None,
+                 infile=None,
+                 output_type=None):
+    """Output list of lists"""
+    outfile = outfile or sys.stdout
+    tab = table_to_list(cells, pgs)
+    print(tab)
+
+
+def o_table_html(cells,
+                 pgs,
+                 outfile=None,
+                 output_type=None,
+                 name=None,
+                 infile=None):
+    """Output HTML formatted table"""
+
+    oj = 0
+    opg = 0
+    doc = getDOMImplementation().createDocument(None, "table", None)
+    root = doc.documentElement
+    if (output_type == "table_chtml"):
+        root.setAttribute("border", "1")
+        root.setAttribute("cellspaceing", "0")
+        root.setAttribute("style", "border-spacing:0")
+    nc = len(cells)
+    tr = None
+    for k in range(nc):
+        (i, j, u, v, pg, value) = cells[k]
+        if j > oj or pg > opg:
+            if pg > opg:
+                s = "Name: " + name + ", " if name else ""
+                root.appendChild(doc.createComment(s + ("Source: %s page %d." %
+                                                        (infile, pg))))
+            if tr:
+                root.appendChild(tr)
+            tr = doc.createElement("tr")
+            oj = j
+            opg = pg
+        td = doc.createElement("td")
+        if value != "":
+            td.appendChild(doc.createTextNode(value))
+        if u > 1:
+            td.setAttribute("colspan", str(u))
+        if v > 1:
+            td.setAttribute("rowspan", str(v))
+        if output_type == "table_chtml":
+            td.setAttribute("style", "background-color: #%02x%02x%02x" %
+                            tuple(128 + col(k / (nc + 0.))))
+        tr.appendChild(td)
+    root.appendChild(tr)
+    outfile.write(doc.toprettyxml())
diff --git a/src/pdftableextract/extracttab.py b/src/pdftableextract/extracttab.py
deleted file mode 100644
index ab6c74d..0000000
--- a/src/pdftableextract/extracttab.py
+++ /dev/null
@@ -1,297 +0,0 @@
-# Description : PDF Table Extraction Utility
-#      Author : Ian McEwan, Ashima Research.
-#  Maintainer : ijm
-#     Lastmod : 20130402 (ijm)
-#     License : Copyright (C) 2011 Ashima Research. All rights reserved.
-#               Distributed under the MIT Expat License. See LICENSE file.
-#               https://github.com/ashima/pdf-table-extract
-
-import sys, argparse, subprocess, re, csv, json
-from numpy import *
-from pipes import quote
-from xml.dom.minidom import getDOMImplementation
-
-# Proccessing function.
-
-def process_page(pgs) :
-  (pg,frow,lrow) = (map(int,(pgs.split(":")))+[None,None])[0:3]
-
-  p = subprocess.Popen( ("pdftoppm -gray -r %d -f %d -l %d %s " %
-      (args.r,pg,pg,quote(args.infile))),
-      stdin=subprocess.PIPE, stdout=subprocess.PIPE, shell=True )
-
-#-----------------------------------------------------------------------
-# image load secion.
-
-  (maxval, width, height, data) = readPNM(p.stdout)
-
-  pad = int(args.pad)
-  height+=pad*2
-  width+=pad*2
-  
-# reimbed image with a white padd.
-  bmp = ones( (height,width) , dtype=bool )
-  bmp[pad:height-pad,pad:width-pad] = ( data[:,:] > int(255.0*args.g/100.0) )
-
-# Set up Debuging image.
-  img = zeros( (height,width,3) , dtype=uint8 )
-  img[:,:,0] = bmp*255
-  img[:,:,1] = bmp*255
-  img[:,:,2] = bmp*255
-
-#-----------------------------------------------------------------------
-# Find bounding box.
-
-  t=0
-  while t < height and sum(bmp[t,:]==0) == 0 :
-    t=t+1
-  if t > 0 :
-    t=t-1
-  
-  b=height-1
-  while b > t and sum(bmp[b,:]==0) == 0 :
-    b=b-1
-  if b < height-1:
-    b = b+1
-  
-  l=0
-  while l < width and sum(bmp[:,l]==0) == 0 :
-    l=l+1
-  if l > 0 :
-    l=l-1
-  
-  r=width-1
-  while r > l and sum(bmp[:,r]==0) == 0 :
-    r=r-1
-  if r < width-1 :
-    r=r+1
-  
-# Mark bounding box.
-  bmp[t,:] = 0
-  bmp[b,:] = 0
-  bmp[:,l] = 0
-  bmp[:,r] = 0
-
-  def boxOfString(x,p) :
-    s = x.split(":")
-    if len(s) < 4 :
-      raise Exception("boxes have format left:top:right:bottom[:page]")
-    return ([args.r * float(x) + args.pad for x in s[0:4] ]
-                + [ p if len(s)<5 else int(s[4]) ] ) 
-
-
-# translate crop to paint white.
-  whites = []
-  if args.crop :
-    (l,t,r,b,p) = boxOfString(args.crop,pg) 
-    whites.extend( [ (0,0,l,height,p), (0,0,width,t,p),
-                     (r,0,width,height,p), (0,b,width,height,p) ] )
-
-# paint white ...
-  if args.white :
-    whites.extend( [ boxOfString(b, pg) for b in args.white ] )
-
-  for (l,t,r,b,p) in whites :
-    if p == pg :
-      bmp[ t:b+1,l:r+1 ] = 1
-      img[ t:b+1,l:r+1 ] = [255,255,255]
-  
-# paint black ...
-  if args.black :
-    for b in args.black :
-      (l,t,r,b) = [args.r * float(x) + args.pad for x in b.split(":") ]
-      bmp[ t:b+1,l:r+1 ] = 0
-      img[ t:b+1,l:r+1 ] = [0,0,0]
-
-  if args.checkcrop :
-    dumpImage(args,bmp,img)
-    sys.exit(0)
-    
-  
-#-----------------------------------------------------------------------
-# Line finding section.
-#
-# Find all verticle or horizontal lines that are more than rlthresh 
-# long, these are considered lines on the table grid.
-
-  lthresh = int(args.l * args.r)
-  vs = zeros(width, dtype=int)
-  for i in range(width) :
-    dd = diff( where(bmp[:,i])[0] ) 
-    if len(dd)>0:
-      v = max ( dd )
-      if v > lthresh :
-        vs[i] = 1
-    else:
-# it was a solid black line.
-      if bmp[0,i] == 0 :
-        vs[i] = 1
-  vd= ( where(diff(vs[:]))[0] +1 )
-
-  hs = zeros(height, dtype=int)
-  for j in range(height) :
-    dd = diff( where(bmp[j,:]==1)[0] )
-    if len(dd) > 0 :
-      h = max ( dd )
-      if h > lthresh :
-        hs[j] = 1
-    else:
-# it was a solid black line.
-      if bmp[j,0] == 0 :
-        hs[j] = 1
-  hd=(  where(diff(hs[:]==1))[0] +1 )
-
-#-----------------------------------------------------------------------
-# Look for dividors that are too large.
-
-  maxdiv=10
-  i=0
-
-  while i < len(vd) :
-    if vd[i+1]-vd[i] > maxdiv :
-      vd = delete(vd,i)
-      vd = delete(vd,i)
-    else:
-      i=i+2
-  
-  j = 0 
-  while j < len(hd):
-    if hd[j+1]-hd[j] > maxdiv :
-      hd = delete(hd,j)
-      hd = delete(hd,j)
-    else:
-      j=j+2
-  
-  if args.checklines :
-    for i in vd :
-      img[:,i] = [255,0,0] # red
-  
-    for j in hd :
-      img[j,:] = [0,0,255] # blue
-    dumpImage(args,bmp,img)
-    sys.exit(0)
-  
-#-----------------------------------------------------------------------
-# divider checking.
-#
-# at this point vd holds the x coordinate of vertical  and 
-# hd holds the y coordinate of horizontal divider tansitions for each 
-# vertical and horizontal lines in the table grid.
-
-  def isDiv(a, l,r,t,b) :
-          # if any col or row (in axis) is all zeros ...
-    return sum( sum(bmp[t:b, l:r], axis=a)==0 ) >0 
-
-  if args.checkdivs :
-    img = img / 2
-    for j in range(0,len(hd),2):
-      for i in range(0,len(vd),2):
-        if i>0 :
-          (l,r,t,b) = (vd[i-1], vd[i],   hd[j],   hd[j+1]) 
-          img[ t:b, l:r, 1 ] = 192
-          if isDiv(1, l,r,t,b) :
-            img[ t:b, l:r, 0 ] = 0
-            img[ t:b, l:r, 2 ] = 255
-          
-        if j>0 :
-          (l,r,t,b) = (vd[i],   vd[i+1], hd[j-1], hd[j] )
-          img[ t:b, l:r, 1 ] = 128
-          if isDiv(0, l,r,t,b) :
-            img[ t:b, l:r, 0 ] = 255
-            img[ t:b, l:r, 2 ] = 0
-  
-    dumpImage(args,bmp,img)
-    sys.exit(0)
-  
-#-----------------------------------------------------------------------
-# Cell finding section.
-# This algorithum is width hungry, and always generates rectangular
-# boxes.
-
-  cells =[] 
-  touched = zeros( (len(hd), len(vd)),dtype=bool )
-  j = 0
-  while j*2+2 < len (hd) :
-    i = 0
-    while i*2+2 < len(vd) :
-      u = 1
-      v = 1
-      if not touched[j,i] :
-        while 2+(i+u)*2 < len(vd) and \
-            not isDiv( 0, vd[ 2*(i+u) ], vd[ 2*(i+u)+1],
-               hd[ 2*(j+v)-1 ], hd[ 2*(j+v) ] ):
-          u=u+1
-        bot = False
-        while 2+(j+v)*2 < len(hd) and not bot :
-          bot = False
-          for k in range(1,u+1) :
-            bot |= isDiv( 1, vd[ 2*(i+k)-1 ], vd[ 2*(i+k)],
-               hd[ 2*(j+v) ], hd[ 2*(j+v)+1 ] )
-          if not bot :
-            v=v+1
-        cells.append( (i,j,u,v) )
-        touched[ j:j+v, i:i+u] = True
-      i = i+1
-    j=j+1
-  
-  
-  if args.checkcells :
-    nc = len(cells)+0.
-    img = img / 2
-    for k in range(len(cells)):
-      (i,j,u,v) = cells[k]
-      (l,r,t,b) = ( vd[2*i+1] , vd[ 2*(i+u) ], hd[2*j+1], hd[2*(j+v)] )
-      img[ t:b, l:r ] += col( k/nc )
-    dumpImage(args,bmp,img)
-    sys.exit(0)
-  
-  
-#-----------------------------------------------------------------------
-# fork out to extract text for each cell.
-
-  whitespace = re.compile( r'\s+')
-   
-  def getCell( (i,j,u,v) ):
-    (l,r,t,b) = ( vd[2*i+1] , vd[ 2*(i+u) ], hd[2*j+1], hd[2*(j+v)] )
-    p = subprocess.Popen(
-    ("pdftotext -r %d -x %d -y %d -W %d -H %d -layout -nopgbrk -f %d -l %d %s -"
-         % (args.r, l-pad, t-pad, r-l, b-t, pg, pg, quote(args.infile) ) ),
-        stdout=subprocess.PIPE, shell=True )
-    
-    ret = p.communicate()[0]
-    if args.w != 'raw' :
-      ret = whitespace.sub( "" if args.w == "none" else " ", ret )
-      if len(ret) > 0 :
-        ret = ret[ (1 if ret[0]==' ' else 0) : 
-                   len(ret) - (1 if ret[-1]==' ' else 0) ]
-    return (i,j,u,v,pg,ret)
-
-  #if args.boxes :
-  #  cells = [ x + (pg,"",) for x in cells ]
-  #else :
-  #  cells = map(getCell, cells)
-  
-  if args.boxes :
-    cells = [ x + (pg,"",) for x in cells if 
-              ( frow == None or (x[1] >= frow and x[1] <= lrow)) ]
-  else :
-    cells = [ getCell(x)   for x in cells if 
-              ( frow == None or (x[1] >= frow and x[1] <= lrow)) ]
-  return cells
-
-
-#-----------------------------------------------------------------------
-# main
-
-def main_script():
-    args = procargs()
-
-    cells = []
-    for pgs in args.page :
-      cells.extend(process_page(pgs))
-
-    { "cells_csv" : o_cells_csv,   "cells_json" : o_cells_json,
-      "cells_xml" : o_cells_xml,   "table_csv"  : o_table_csv,
-      "table_html": o_table_html,  "table_chtml": o_table_html,
-      } [ args.t ](cells,args.page)
-
diff --git a/src/pdftableextract/pnm.py b/src/pdftableextract/pnm.py
index cbb05dd..befce66 100644
--- a/src/pdftableextract/pnm.py
+++ b/src/pdftableextract/pnm.py
@@ -1,59 +1,68 @@
+from __future__ import print_function
 from numpy import array, fromstring, uint8, reshape, ones
+
 #-----------------------------------------------------------------------
 # PNM stuff.
 
+
 def noncomment(fd):
-  """Read lines from the filehandle until a non-comment line is found. 
+    """Read lines from the filehandle until a non-comment line is found.
   Comments start with #"""
-  while True:
-    x = fd.readline() 
-    if x.startswith('#') :
-      continue
-    else:
-      return x
+    while True:
+        x = fd.readline()
+        if x.startswith(b'#'):
+            continue
+        else:
+            return x
+
 
 def readPNM(fd):
-  """Reads the PNM file from the filehandle"""
-  t = noncomment(fd)
-  s = noncomment(fd)
-  m = noncomment(fd) if not (t.startswith('P1') or t.startswith('P4')) else '1'
-  data = fd.read()
-  ls = len(s.split())
-  if ls != 2 :
-    name = "<pipe>" if fd.name=="<fdopen>" else "Filename = {0}".format(fd.name)
-    raise IOError("Expected 2 elements from parsing PNM file, got {0}: {1}".format(ls, name))
-  xs, ys = s.split()
-  width = int(xs)
-  height = int(ys)
-  m = int(m)
-
-  if m != 255 :
-    print "Just want 8 bit pgms for now!"
-  
-  d = fromstring(data,dtype=uint8)
-  d = reshape(d, (height,width) )
-  return (m,width,height, d)
-
-def writePNM(fd,img):
-  """Writes a PNM file to a filehandle given the img data as a numpy array"""
-  s = img.shape
-  m = 255
-  if img.dtype == bool :
-    img = img + uint8(0) 
-    t = "P5"
-    m = 1
-  elif len(s) == 2 :
-    t = "P5"
-  else:
-    t = "P6"
-    
-  fd.write( "%s\n%d %d\n%d\n" % (t, s[1],s[0],m) )
-  fd.write( uint8(img).tostring() )
-
-
-def dumpImage(outfile,bmp,img,bitmap=False, pad=2) :
+    """Reads the PNM file from the filehandle"""
+    t = noncomment(fd)
+    s = noncomment(fd)
+    m = noncomment(fd) if not (t.startswith(b'P1') or
+                               t.startswith(b'P4')) else b'1'
+    data = fd.read()
+    ls = len(s.split())
+    if ls != 2:
+        name = "<pipe>" if fd.name == "<fdopen>" else "Filename = {0}".format(
+            fd.name)
+        raise IOError(
+            "Expected 2 elements from parsing PNM file, got {0}: {1}".format(
+                ls, name))
+    xs, ys = s.split()
+    width = int(xs)
+    height = int(ys)
+    m = int(m)
+
+    if m != 255:
+        print("Just want 8 bit pgms for now!")
+
+    d = fromstring(data, dtype=uint8)
+    d = reshape(d, (height, width))
+    return (m, width, height, d)
+
+
+def writePNM(fd, img):
+    """Writes a PNM file to a filehandle given the img data as a numpy array"""
+    s = img.shape
+    m = 255
+    if img.dtype == bool:
+        img = img + uint8(0)
+        t = b"P5"
+        m = 1
+    elif len(s) == 2:
+        t = b"P5"
+    else:
+        t = b"P6"
+
+    fd.write(b"%s\n%d %d\n%d\n" % (t, s[1], s[0], m))
+    fd.write(img.astype(uint8).tobytes())
+
+
+def dumpImage(outfile, bmp, img, bitmap=False, pad=2):
     """Dumps the numpy array in image into the filename and closes the outfile"""
     oi = bmp if bitmap else img
-    (height,width) = bmp.shape
-    writePNM(outfile, oi[pad:height-pad, pad:width-pad])
+    (height, width) = bmp.shape
+    writePNM(outfile, oi[pad:height - pad, pad:width - pad])
     outfile.close()
diff --git a/src/pdftableextract/scripts.py b/src/pdftableextract/scripts.py
index 68a7b2e..97a2ee2 100644
--- a/src/pdftableextract/scripts.py
+++ b/src/pdftableextract/scripts.py
@@ -2,8 +2,8 @@
 import sys
 import logging
 import subprocess
-from .core import process_page, output
-import core
+from pdftableextract.core import process_page, output
+import pdftableextract.core
 
 #-----------------------------------------------------------------------
 
@@ -25,10 +25,10 @@ def procargs() :
   p.add_argument("-name", help="name to add to XML tag, or HTML comments")
   p.add_argument("-pad", help="imitial image pading (pixels)", type=int,
      default=2 )
-  p.add_argument("-white",action="append", 
+  p.add_argument("-white",action="append",
     help="paint white to the bitmap as left:top:right:bottom in length units."
          "Done before painting black" )
-  p.add_argument("-black",action="append", 
+  p.add_argument("-black",action="append",
     help="paint black to the bitmap as left:top:right:bottom in length units."
          "Done after poainting white" )
   p.add_argument("-bitmap", action="store_true",
@@ -67,14 +67,10 @@ def main():
         raise
     sys.exit("I/O Error running pdf-table-extract: {0}".format(e))
   except OSError as e:
-    print("An OS Error occurred running pdf-table-extract: Is `pdftoppm` installed and available?")
+    print("An OS Error occurred running pdf-table-extract")
     if args.traceback:
         raise
     sys.exit("OS Error: {0}".format(e))
-  except subprocess.CalledProcessError as e:
-    if args.traceback:
-        raise
-    sys.exit("Error while checking a subprocess call: {0}".format(e))
   except Exception as e:
     if args.traceback:
         raise
@@ -85,9 +81,9 @@ def imain(args):
     if args.checkcrop or args.checklines or args.checkdivs or args.checkcells:
         for pgs in args.page :
             success = process_page(args.infile, pgs,
-                bitmap=args.bitmap, 
-                checkcrop=args.checkcrop, 
-                checklines=args.checklines, 
+                bitmap=args.bitmap,
+                checkcrop=args.checkcrop,
+                checklines=args.checklines,
                 checkdivs=args.checkdivs,
                 checkcells=args.checkcells,
                 whitespace=args.whitespace,
@@ -105,9 +101,9 @@ def imain(args):
     else:
         for pgs in args.page :
             cells.extend(process_page(args.infile, pgs,
-                bitmap=args.bitmap, 
-                checkcrop=args.checkcrop, 
-                checklines=args.checklines, 
+                bitmap=args.bitmap,
+                checkcrop=args.checkcrop,
+                checklines=args.checklines,
                 checkdivs=args.checkdivs,
                 checkcells=args.checkcells,
                 whitespace=args.whitespace,
@@ -127,6 +123,3 @@ def imain(args):
                 args.outfile = sys.stdout
             filenames["{0}_filename".format(args.t)] = args.outfile
             output(cells, args.page, name=args.name, infile=args.infile, output_type=args.t, **filenames)
-
-
-