diff --git a/src/pdftableextract/core.py b/src/pdftableextract/core.py index d1dce80..cb35889 100644 --- a/src/pdftableextract/core.py +++ b/src/pdftableextract/core.py @@ -305,8 +305,6 @@ def isDiv(a, l,r,t,b) : #----------------------------------------------------------------------- # fork out to extract text for each cell. - whitespace = re.compile( r'\s+') - def getCell( (i,j,u,v) ): (l,r,t,b) = ( vd[2*i+1] , vd[ 2*(i+u) ], hd[2*j+1], hd[2*(j+v)] ) p = popen("pdftotext", @@ -316,7 +314,7 @@ def getCell( (i,j,u,v) ): ret = p.communicate()[0] if whitespace != 'raw' : - ret = whitespace.sub( "" if whitespace == "none" else " ", ret ) + ret = re.compile( r'\s+').sub( "" if whitespace == "none" else " ", ret ) if len(ret) > 0 : ret = ret[ (1 if ret[0]==' ' else 0) : len(ret) - (1 if ret[-1]==' ' else 0) ]