From d37b5157c3f30429a9dd5ff6537848d0d183c465 Mon Sep 17 00:00:00 2001 From: Charles Huang Date: Tue, 2 Aug 2016 07:45:27 +0100 Subject: [PATCH 1/2] python3 compatibility --- src/pdftableextract/__init__.py | 2 +- src/pdftableextract/core.py | 10 +++++----- src/pdftableextract/extracttab.py | 6 +++--- src/pdftableextract/pnm.py | 2 +- 4 files changed, 10 insertions(+), 10 deletions(-) diff --git a/src/pdftableextract/__init__.py b/src/pdftableextract/__init__.py index 6dbe85c..3cb3913 100644 --- a/src/pdftableextract/__init__.py +++ b/src/pdftableextract/__init__.py @@ -1,2 +1,2 @@ # Example package with a console entry point -from core import process_page, output, table_to_list \ No newline at end of file +from .core import process_page, output, table_to_list diff --git a/src/pdftableextract/core.py b/src/pdftableextract/core.py index d1dce80..ffeeac9 100644 --- a/src/pdftableextract/core.py +++ b/src/pdftableextract/core.py @@ -32,11 +32,11 @@ def popen(name,command, *args, **kwargs): try: result=subprocess.Popen(command,*args, **kwargs) return result - except OSError, e: + except OSError as e: message="""Error running {0}. Is it installed correctly? Error: {1}""".format(name, e) raise OSError(message) - except Exception, e: + except Exception as e: raise def colinterp(a,x) : @@ -307,7 +307,7 @@ def isDiv(a, l,r,t,b) : whitespace = re.compile( r'\s+') - def getCell( (i,j,u,v) ): + def getCell(i,j,u,v): (l,r,t,b) = ( vd[2*i+1] , vd[ 2*(i+u) ], hd[2*j+1], hd[2*(j+v)] ) p = popen("pdftotext", "pdftotext -r %d -x %d -y %d -W %d -H %d -layout -nopgbrk -f %d -l %d %s -" % (bitmap_resolution, l-pad, t-pad, r-l, b-t, pg, pg, quote(infile)), @@ -329,7 +329,7 @@ def getCell( (i,j,u,v) ): #check that pdftotext exists by running a simple command check_for_required_executable("pdftotext",["pdftotext","-h"]) #end check - cells = [ getCell(x) for x in cells if + cells = [ getCell(*x) for x in cells if ( frow == None or (x[1] >= frow and x[1] <= lrow)) ] return cells @@ -404,7 +404,7 @@ def o_cells_xml(cells,pgs, outfile=None,infile=None, name=None, output_type=None root.setAttribute("name",name) for cl in cells : x = doc.createElement("cell") - map(lambda(a): x.setAttribute(*a), zip("xywhp",map(str,cl))) + map(lambda a: x.setAttribute(*a), zip("xywhp",map(str,cl))) if cl[5] != "" : x.appendChild( doc.createTextNode(cl[5]) ) root.appendChild(x) diff --git a/src/pdftableextract/extracttab.py b/src/pdftableextract/extracttab.py index ab6c74d..a35a75f 100644 --- a/src/pdftableextract/extracttab.py +++ b/src/pdftableextract/extracttab.py @@ -251,7 +251,7 @@ def isDiv(a, l,r,t,b) : whitespace = re.compile( r'\s+') - def getCell( (i,j,u,v) ): + def getCell(i,j,u,v): (l,r,t,b) = ( vd[2*i+1] , vd[ 2*(i+u) ], hd[2*j+1], hd[2*(j+v)] ) p = subprocess.Popen( ("pdftotext -r %d -x %d -y %d -W %d -H %d -layout -nopgbrk -f %d -l %d %s -" @@ -269,13 +269,13 @@ def getCell( (i,j,u,v) ): #if args.boxes : # cells = [ x + (pg,"",) for x in cells ] #else : - # cells = map(getCell, cells) + # cells = [getCell(*cell) for cell in cells] if args.boxes : cells = [ x + (pg,"",) for x in cells if ( frow == None or (x[1] >= frow and x[1] <= lrow)) ] else : - cells = [ getCell(x) for x in cells if + cells = [ getCell(*x) for x in cells if ( frow == None or (x[1] >= frow and x[1] <= lrow)) ] return cells diff --git a/src/pdftableextract/pnm.py b/src/pdftableextract/pnm.py index cbb05dd..2ab7879 100644 --- a/src/pdftableextract/pnm.py +++ b/src/pdftableextract/pnm.py @@ -28,7 +28,7 @@ def readPNM(fd): m = int(m) if m != 255 : - print "Just want 8 bit pgms for now!" + print("Just want 8 bit pgms for now!") d = fromstring(data,dtype=uint8) d = reshape(d, (height,width) ) From 3347e53b69d538bf3e7d24dbb613a53a0504db6a Mon Sep 17 00:00:00 2001 From: tbirdss <58036517+tbirdss@users.noreply.github.com> Date: Sat, 13 Jun 2020 01:06:52 +0530 Subject: [PATCH 2/2] Update __init__.py --- src/pdftableextract/__init__.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/pdftableextract/__init__.py b/src/pdftableextract/__init__.py index 6dbe85c..3cb3913 100644 --- a/src/pdftableextract/__init__.py +++ b/src/pdftableextract/__init__.py @@ -1,2 +1,2 @@ # Example package with a console entry point -from core import process_page, output, table_to_list \ No newline at end of file +from .core import process_page, output, table_to_list