johnoneil · blueaxis · Feb 5, 2021 · Feb 5, 2021 · Feb 5, 2021 · Feb 5, 2021
diff --git a/LocateText.py b/LocateText.py
@@ -1,79 +1,135 @@
 #!/usr/bin/python
-# vim: set ts=2 expandtab:
-"""
-Module: LocateText
-Desc:
-Author: John O'Neil
-Email: [email protected]
-DATE: Saturday, Sept 14th 2013
-
-  Setment raw manga scan and output image
-  with text areas outlined in red.
-
-"""
-#import clean_page as clean
+
+import image_io as imgio
 import connected_components as cc
-import run_length_smoothing as rls
 import clean_page as clean
 import ocr
 import segmentation as seg
 import furigana
-import arg
 import defaults
-from imageio import imwrite
 
 import numpy as np
 import cv2
-import sys
+
+import arg
 import argparse
+import sys
 import os
-import scipy.ndimage
-import datetime
 
+import time
 
-if __name__ == '__main__':
+def process_image(infile, outfile, path, anno_dir_exists=False):
+
+    t0 = time.perf_counter()
+
+    # get correct output paths
+    path = imgio.normalize_path(path)
+    img_out, txt_out = imgio.get_output_directory(path, infile, path, os.path.splitext(infile)[0] + '.txt', outfile)
+    html_path = os.path.split(img_out)[0]
+
+    img = cv2.imread(path+infile)
+    img_copy = img.copy()
+    gray = clean.grayscale(img)
+
+    binary_threshold=arg.integer_value('binary_threshold',default_value=defaults.BINARY_THRESHOLD)
+    if arg.boolean_value('verbose'):
+        print('Binarizing with threshold value of ' + str(binary_threshold))
+    inv_binary = cv2.bitwise_not(clean.binarize(gray, threshold=binary_threshold))
+
+    # python MangaOCR.py -p <inpath> -o <outpath> 
+    segmented_image = seg.segment_image(gray)
+    segmented_image = segmented_image[:,:,2]
+
+    components = cc.get_connected_components(segmented_image)
+
+    if arg.boolean_value('debug'):
+        cc.draw_bounding_boxes(img,components,color=(255,0,0),line_size=2)
+        imgio.save_image(img, img_out)
+    else:
+        cc.draw_bounding_boxes(img,components,color=(255,0,0),line_size=2)
+        imgio.save_image(img, img_out)
+
+    if arg.boolean_value('verbose'):
+        t1 = time.perf_counter()
+        print(f"Image processing done in {t1} seconds")
 
-  proc_start_time = datetime.datetime.now()
-
-  parser = arg.parser
-  parser = argparse.ArgumentParser(description='Generate HTML annotation for raw manga scan with detected OCR\'d text.')
-  parser.add_argument('infile', help='Input (color) raw Manga scan image to annoate.')
-  parser.add_argument('-o','--output', dest='outfile', help='Output html file.')
-  parser.add_argument('-v','--verbose', help='Verbose operation. Print status messages during processing', action="store_true")
-  parser.add_argument('--display', help='Display output using OPENCV api and block program exit.', action="store_true")
-  parser.add_argument('--furigana', help='Attempt to suppress furigana characters which interfere with OCR.', action="store_true")
-  #parser.add_argument('-d','--debug', help='Overlay input image into output.', action="store_true")
-  parser.add_argument('--sigma', help='Std Dev of gaussian preprocesing filter.',type=float,default=None)
-  parser.add_argument('--binary_threshold', help='Binarization threshold value from 0 to 255.',type=int,default=defaults.BINARY_THRESHOLD)
-  #parser.add_argument('--segment_threshold', help='Threshold for nonzero pixels to separete vert/horiz text lines.',type=int,default=1)
-  parser.add_argument('--additional_filtering', help='Attempt to filter false text positives by histogram processing.', action="store_true")
-  arg.value = parser.parse_args()
-
-  infile = arg.string_value('infile')
-  outfile = arg.string_value('outfile',default_value=infile + '.text_areas.png')
-
-  if not os.path.isfile(infile):
-    print('Please provide a regular existing input file. Use -h option for help.')
-    sys.exit(-1)
-  img = cv2.imread(infile)
-  gray = clean.grayscale(img)
-
-  binary_threshold=arg.integer_value('binary_threshold',default_value=defaults.BINARY_THRESHOLD)
-  if arg.boolean_value('verbose'):
-    print('Binarizing with threshold value of ' + str(binary_threshold))
-  inv_binary = cv2.bitwise_not(clean.binarize(gray, threshold=binary_threshold))
-  binary = clean.binarize(gray, threshold=binary_threshold)
-
-  segmented_image = seg.segment_image(gray)
-  segmented_image = segmented_image[:,:,2]
-  components = cc.get_connected_components(segmented_image)
-  cc.draw_bounding_boxes(img,components,color=(255,0,0),line_size=2)
-
-  imwrite(outfile, img)
-
-  if arg.boolean_value('display'):
-    cv2.imshow('segmented_image',segmented_image)
-
-    if cv2.waitKey(0) == 27:
-      cv2.destroyAllWindows()
-    cv2.destroyAllWindows()
+    if (not arg.boolean_value('html')): return
+
+    texts = ocr.ocr_on_bounding_boxes(img_copy, components)
+
+    if arg.boolean_value('verbose'):
+        t2 = time.perf_counter()
+        print(f"OCR done in {t2} seconds")
+
+    if (arg.boolean_value('debug')):
+        imgio.save_text(texts, txt_out)
+
+    html_out = imgio.generate_html(os.path.split(img_out)[1], components, texts)
+    if (not anno_dir_exists):
+        imgio.copytree_('./annotorious/', imgio.normalize_path(html_path)+'annotorious/')
+        anno_dir_exists = True
+
+    imgio.save_webpage(html_out, os.path.splitext(img_out)[0]+'.html')
+
+    return anno_dir_exists
+
+def main():
+
+    # Working commands
+    # python MangaOCR_dev.py -i <infile> -o <outfile>
+    # python MangaOCR_dev.py -p <inpath> -o <outpath>
+    # python MangaOCR_dev.py -p <inpath> --default_directory
+
+    parser = arg.parser
+    parser = argparse.ArgumentParser(description='Generate text file containing OCR\'d text.')
+    group = parser.add_mutually_exclusive_group(required=True)
+    group.add_argument('-i', '--infile', help='Input image to annotate.', dest='infile', type=str)
+    group.add_argument('-p', '--inpath', help='Path to directory containing image(s)', dest='inpath', type=str)
+    parser.add_argument('-o','--output', help='Output file or filepath.', dest='outfile')
+    parser.add_argument('-s','--scheme', help='Output naming scheme. Appended to input filename', dest='scheme', type=str, default='_text')
+    parser.add_argument('-v','--verbose', help='Verbose operation. Print status messages during processing', action="store_true")
+    # parser.add_argument('-d','--debug', help='Overlay input image into output.', action="store_true")
+    parser.add_argument('--html', help='Display output in an html file.', action="store_true")
+    parser.add_argument('--furigana', help='Attempt to suppress furigana characters which interfere with OCR.', action="store_true")
+    parser.add_argument('--sigma', help='Std Dev of gaussian preprocesing filter.',type=float,default=None)
+    parser.add_argument('--binary_threshold', help='Binarization threshold value from 0 to 255.',type=int,default=defaults.BINARY_THRESHOLD)
+    parser.add_argument('--additional_filtering', help='Attempt to filter false text positives by histogram processing.', action="store_true")
+    parser.add_argument('--default_directory', help='Store output in predefined folders.', action="store_true")
+    arg.value = parser.parse_args()
+
+    infile = arg.string_value('infile')
+    inpath = arg.string_value('inpath')
+    scheme = arg.string_value('scheme')
+    outfile = arg.string_value('outfile')
+
+    if os.path.isfile(infile):
+        if arg.boolean_value('verbose'):
+            print('File exists. Performing OCR . . .')
+        path_, infile_ = os.path.split(infile)
+        process_image(infile_, outfile, path_)
+        sys.exit(-1)
+
+    infiles = os.listdir(inpath)
+    anno_dir_exists = False
+    if infiles:
+        if arg.boolean_value('verbose'):
+            print('Non-empty directory. Attempting to perform ocr on all files . . .')
+        for infile_ in infiles:
+            try:
+                outfile_ = outfile
+                anno_dir_exists = process_image(infile_, outfile_, inpath, anno_dir_exists)
+            except AttributeError as e:
+                if arg.boolean_value('verbose'):
+                    print('Input file \"', infile_, '\" is not an image', sep='')
+        sys.exit(-1)
+
+    # More error handling
+    if not (os.path.isfile(infile) or inpath):
+        print('Please provide a regular existing input file. Use -h option for help.')
+        sys.exit(-1)
+    if not (infiles):
+        print('Directory is empty. Place images on the desired folder. Use -h option for help.')
+        sys.exit(-1)
+
+if __name__ == '__main__':
+    main()
diff --git a/README.md b/README.md
@@ -8,6 +8,21 @@ Overview
 --------
 This repository holds some experiments I did in summer 2013 during a sudden interest in text detection in images. It uses some standard techniques (run length smoothing, connected component analysis) and some experimental stuff. Overall, I was able to get in the neighborhood of where I wanted to be, but the results are very processing intensive and not terribly reliable.
 
+Usage
+-----
+Provide an input file or path using the arguments `-i` and `-p`, respectively. When an input path is provided, all files in the directory are processed. The user may provide a corresponding output file or path using the `-o` argument. The output is saved in the default file/directory if no output argument is defined.
+```
+    python LocateText.py -i <infile> -o <outfile>
+    python LocateText.py -p <inpath> -o <outpath>
+    python LocateText.py -p <inpath> --default_directory
+```
+Other useful arguments:
+```
+    --html : Display output in an html file.
+    --default-directory : Store output in predefined folders. This is recommended for processing images in a directory since the input may be overwritten.
+    --additional_filtering : Attempt to filter false text positives by histogram processing.
+```
+
 State
 -----
 I haven't bothered to form this into a python library. It's just a series of scripts each trying out various things, such as:
@@ -19,17 +34,17 @@ I haven't bothered to form this into a python library. It's just a series of scr
 Text Location Example
 ---------------------
 Here's an example run of a page from Weekly Young Magazine #31 2013. The input image is as follows (jpg).
-![Input image](https://github.com/johnoneil/MangaTextDetection/blob/master/test/194.jpg?raw=true)
+![Input image](doc/194.jpg)
 
-An initial estimate of text locations can be found by the 'LocateText.py' script:
+An initial estimate of text locations can be found by the `LocateText.py` script:
 
 ```
- ../LocateText.py '週刊ヤングマガジン31号194.jpg' -o 194_text_locations.png
+    python LocateText.py -i 194.jpg
 ```
 
 With the results as follows (estimated text marked with red boxes):
 
-![locate text output](https://github.com/johnoneil/MangaTextDetection/blob/master/test/194_text_locations_thumb.png?raw=true)
+![Locate text output](doc/194_text_areas_no_filter.jpg)
 
 Note that in the output above you see several of the implementation deficiencies. For example, there are several small false positives scattered around, and some major false positives on the girl's sleeve and eyes in panels 2 and 3.
 Also note that many large areas of text were not detected (false negatives). Despite how pleased I was with the results (and I was more pleased than you could possibly believe) significant improvements are needed.
@@ -39,63 +54,33 @@ Text Segmentation Example
 To more easily separate text from background you can also segment the image, with text areas and non text being separated into different (RGB) color channels. This easily allows you to remove estimated text from image entirely or vice-versa.
 Use the command:
 ```
-./segmentation.py '週刊ヤングマガジン31号194.jpg' -o 194_segmentation.png
+    python segmentation.py 194.jpg -o 194_segmentation.png
 ```
 The results follow:
 
-![Input image](https://github.com/johnoneil/MangaTextDetection/blob/master/test/194_segmentation_thumb.png?raw=true)
+![Segmentation image](doc/194_segmentation.png)
 
 OCR and Html Generation
 -----------------------
-I did take the time to run simple OCR on some of the located text regions, with mixed results. I used the python tesseract package (pytesser) but found the results were not generally good for vertical text, among other issues.
-The script ocr.py should run ocr on detected text regions, and output the results to the command line.
-```
-../ocr.py '週刊ヤングマガジン31号194.jpg'
-Test blob assigned to no row on pass 2
-Test blob assigned to no row on pass 3
-0,0 1294x2020 71% :ぅん'・ 結局
-玉子かけご飯が
-一 番ぉぃしぃと
+I did take the time to run simple OCR on some of the located text regions. This uses the python wrapper for Google's OCR engine Tesseract (pytesserract). Unlike the original version of the `ocr.py` script, text is assumed to be vertically aligned.
 
-从
-胤
-赫
-囃
-包
-け
-H」
-の
-も
-側
-鵬
+I also embedded those results in an HTML output, allowing "readers" to hover on Japanese Text, revealing the OCR output, which can be edited/copied/pasted. This is by using the optional argument `--html`. Some examples can be seen below:
 
-はフィクショ穴ぁり、 登場する人物
+![OCR output success](doc/194_html_ocr_success.png)
+![OCR output success](doc/194_html_ocr_failure.png)
 
-※この物語
-
-```
-You can see some fragmented positives, but in all the results for this page are abysmal.
-
-I also embedded those results in an HTML output, allowing "readers" to hover on Japanese Text, revealing the OCR output, which can be edited/copied/pasted. This is via the script MangaDetectText. A (more successful) example of this can be seen below:
-
-![locate text output](https://github.com/johnoneil/MangaTextDetection/blob/master/test/example.png?raw=true)
+Even after using `jpn_vert` trained data, results are still unreliable. Most of the bounding boxes with larger width return successful results. Error mostly happens when the bounding box only has one line of vertically-aligned text.
 
 Dependencies
 -----------------------
 You should be able to install most of the dependencies via pip, or you could use your operating systems package manager (e.g. Mac OS X http://brew.sh/)
 
-### Python 2.7+
+### Python 3+
 
 https://www.python.org/
 
 Install as per OS instructions.
 
-### Pip
-
-http://pip.readthedocs.org/en/latest/index.html
-
-Install as per OS instructions.
-
 ### Numpy
 
 http://www.numpy.org/
@@ -141,8 +126,9 @@ Install as per OS instructions, this should also include the python bindings.
 https://code.google.com/p/tesseract-ocr/
 
 Install as per OS instructions, then use pip to install the python bindings.
-Don't forget to include your target language's trained data sets.
+Don't forget to include your target language's trained data sets and set the `tesseract_cmd` PATH. (See `ocr.py` script)
 
 ```
-pip install python-tesseract
+Edit line 7 of ocr.py script:
+pytesseract.pytesseract.tesseract_cmd = <path>
 ```