diff --git a/README.rst b/README.rst index 6a82944..1f7d25b 100644 --- a/README.rst +++ b/README.rst @@ -111,7 +111,7 @@ will happen in sub-directories under this directory. The ``folders`` section defines your filing directories and the keywords associated with them. In this example, we have three filing directories -(finances, travl, receipts), and some associated keywords for each +(finances, travel, receipts), and some associated keywords for each filing directory. For example, if your OCR'ed PDF contains the phrase "american express" (in any upper/lower case), it will be filed into ``docs/filed/finances`` @@ -250,7 +250,7 @@ PyPDFOCR is available in PyPI, so you can just run: pip install pypdfocr -Please note that some of the 3rd-party libraries required by PyPDFOCR wiill +Please note that some of the 3rd-party libraries required by PyPDFOCR will require some build tools, especially on a default Ubuntu system. If you run into any issues using pip install, you may want to install the following packages on Ubuntu and try again: diff --git a/docs/conf.py b/docs/conf.py index 078ff35..6593a74 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -313,7 +313,7 @@ # The format is a list of tuples containing the path and title. #epub_pre_files = [] -# HTML files shat should be inserted after the pages created by sphinx. +# HTML files that should be inserted after the pages created by sphinx. # The format is a list of tuples containing the path and title. #epub_post_files = [] diff --git a/pypdfocr/pypdfocr_filer_evernote.py b/pypdfocr/pypdfocr_filer_evernote.py index 80ec115..2e5970b 100644 --- a/pypdfocr/pypdfocr_filer_evernote.py +++ b/pypdfocr/pypdfocr_filer_evernote.py @@ -120,8 +120,8 @@ def _connect_to_evernote(self, dictUserInfo): """ Establish a connection to evernote and authenticate. - :param dictUserInfo: Dict of user info like user/passwrod. For now, just the dev token - :returns success: Return wheter connection succeeded + :param dictUserInfo: Dict of user info like user/password. For now, just the dev token + :returns success: Return whether connection succeeded :rtype bool: """ print("Authenticating to Evernote") diff --git a/pypdfocr/pypdfocr_preprocess.py b/pypdfocr/pypdfocr_preprocess.py index e942cc3..96493a9 100644 --- a/pypdfocr/pypdfocr_preprocess.py +++ b/pypdfocr/pypdfocr_preprocess.py @@ -84,7 +84,7 @@ def _run_preprocess(self, in_filename): '-blur 1x1', #'-selective-blur 4x4+5%', '-adaptive-sharpen 0x2', - '-negate -define morphology:compose=darken -morphology Thinning Rectangle:1x30+0+0 -negate ', # Removes vertical lines >=60 pixes, reduces widht of >30 (oherwise tesseract < 3.03 completely ignores text close to vertical lines in a table) + '-negate -define morphology:compose=darken -morphology Thinning Rectangle:1x30+0+0 -negate ', # Removes vertical lines >=60 pixes, reduces width of >30 (otherwise tesseract < 3.03 completely ignores text close to vertical lines in a table) '"%s"' % (out_filename) ] logging.info("Preprocessing image %s for better OCR" % in_filename)