added the function to detect the orientation

Signed-off-by: Peter Staar <[email protected]>
DS4SD · Dec 7, 2024 · 1fb52e9 · 1fb52e9
1 parent 3914d2d
commit 1fb52e9
Show file tree

Hide file tree

Showing 2 changed files with 99 additions and 2 deletions.
diff --git a/docling_parse/extract_text_from_bbox.py b/docling_parse/extract_text_from_bbox.py
@@ -9,6 +9,7 @@
     create_pil_image_of_page_v2,
     draw_bbox_on_page_v2,
     filter_columns_v2,
+    get_orientation_bbox_v2,
 )
 
 # Configure logging
@@ -55,6 +56,17 @@ def parse_args():
         help="bounding box as str x0,y0,x1,y1",
     )
 
+    # Restrict page-boundary
+    parser.add_argument(
+        "-c",
+        "--category",
+        type=str,
+        choices=["original", "sanitized"],
+        required=False,
+        default="sanitized",
+        help="category [`original`, `sanitized`]",
+    )
+
     # Parse the command-line arguments
     args = parser.parse_args()
 
@@ -66,12 +78,13 @@ def parse_args():
         args.input_pdf,
         int(args.page),
         list(map(float, args.bbox.split(","))),
+        args.category,
     )
 
 
 def main():
 
-    log_level, pdf_file, page_num, bbox = parse_args()
+    log_level, pdf_file, page_num, bbox, category = parse_args()
 
     parser = pdf_parser_v2(log_level)
 
@@ -117,12 +130,17 @@ def main():
     logging.info("#-cells: " + str(len(sanitized_cells["data"])))
     logging.info(f"selected cells: \n\n{table}\n\n")
 
-    img = create_pil_image_of_page_v2(doc["pages"][0])
+    img = create_pil_image_of_page_v2(doc["pages"][0], category=category)
     # img.show()
 
     img = draw_bbox_on_page_v2(img, page, list(map(int, bbox)))
     img.show()
 
+    orientation = get_orientation_bbox_v2(
+        data=sanitized_cells["data"], header=sanitized_cells["header"], bbox=bbox
+    )
+    logging.info(f"orientation: {orientation}")
+
 
 if __name__ == "__main__":
     main()
diff --git a/docling_parse/utils.py b/docling_parse/utils.py
@@ -1,4 +1,5 @@
 import logging
+from enum import Enum
 from typing import Dict, List, Optional, Tuple, Union
 
 from PIL import Image, ImageColor, ImageDraw, ImageFont
@@ -157,6 +158,84 @@ def create_pil_image_of_page_v1(
     return img
 
 
+class BBoxDirection(Enum):
+    Bottom2Top = "Bottom2Top"
+    Right2Left = "Right2Left"
+    Top2Bottom = "Top2Bottom"
+    Left2Right = "Left2Right"
+
+    def rotation_to_bottom2top(direction: "BBoxDirection"):
+
+        if direction == BBoxDirection.Bottom2Top:
+            return 0.0
+        elif direction == BBoxDirection.Right2Left:
+            return 90.0
+        elif direction == BBoxDirection.Top2Bottom:
+            return 180.0
+        elif direction == BBoxDirection.Left2Right:
+            return -90.0
+
+
+def get_orientation_bbox_v2(
+    data: List[Tuple], header: list[str], bbox: Tuple[float, float, float, float]
+) -> BBoxDirection:
+
+    x0 = header.index("x0")
+    y0 = header.index("y0")
+
+    x1 = header.index("x1")
+    y1 = header.index("y1")
+
+    r_x0 = header.index("r_x0")
+    r_y0 = header.index("r_y0")
+
+    header.index("r_x1")
+    header.index("r_y1")
+
+    r_x2 = header.index("r_x2")
+    r_y2 = header.index("r_y2")
+
+    header.index("r_x3")
+    header.index("r_y3")
+
+    ti = header.index("text")
+
+    hist = {}
+    for direction in BBoxDirection:
+        hist[direction] = 0
+
+    for row in data:
+
+        if (
+            bbox[0] <= row[x0]
+            and row[x1] <= bbox[2]
+            and bbox[1] <= row[y0]
+            and row[y1] <= bbox[3]
+        ):
+
+            if row[r_x0] < row[r_x2] and row[r_y0] < row[r_y2]:
+                hist[BBoxDirection.Bottom2Top] += len(row[ti])
+
+            elif row[r_x2] < row[r_x0] and row[r_y0] < row[r_y2]:
+                hist[BBoxDirection.Right2Left] += len(row[ti])
+
+            elif row[r_x2] < row[r_x0] and row[r_y2] < row[r_y0]:
+                hist[BBoxDirection.Top2Bottom] += len(row[ti])
+
+            elif row[r_x0] < row[r_x2] and row[r_y2] < row[r_y0]:
+                hist[BBoxDirection.Right2Left] += len(row[ti])
+
+    max_dir = BBoxDirection.Bottom2Top
+    max_val = 0
+    for key, val in hist.items():
+        logging.info(f"{key}: {val}")
+        if val > max_val:
+            max_val = val
+            max_dir = key
+
+    return max_dir
+
+
 def filter_columns_v2(data: List[Tuple], header: list[str], new_header: list[str]):
 
     new_data = []