Skip to content

Commit

Permalink
added the function to detect the orientation
Browse files Browse the repository at this point in the history
Signed-off-by: Peter Staar <[email protected]>
  • Loading branch information
PeterStaar-IBM committed Dec 7, 2024
1 parent 3914d2d commit 1fb52e9
Show file tree
Hide file tree
Showing 2 changed files with 99 additions and 2 deletions.
22 changes: 20 additions & 2 deletions docling_parse/extract_text_from_bbox.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
create_pil_image_of_page_v2,
draw_bbox_on_page_v2,
filter_columns_v2,
get_orientation_bbox_v2,
)

# Configure logging
Expand Down Expand Up @@ -55,6 +56,17 @@ def parse_args():
help="bounding box as str x0,y0,x1,y1",
)

# Restrict page-boundary
parser.add_argument(
"-c",
"--category",
type=str,
choices=["original", "sanitized"],
required=False,
default="sanitized",
help="category [`original`, `sanitized`]",
)

# Parse the command-line arguments
args = parser.parse_args()

Expand All @@ -66,12 +78,13 @@ def parse_args():
args.input_pdf,
int(args.page),
list(map(float, args.bbox.split(","))),
args.category,
)


def main():

log_level, pdf_file, page_num, bbox = parse_args()
log_level, pdf_file, page_num, bbox, category = parse_args()

parser = pdf_parser_v2(log_level)

Expand Down Expand Up @@ -117,12 +130,17 @@ def main():
logging.info("#-cells: " + str(len(sanitized_cells["data"])))
logging.info(f"selected cells: \n\n{table}\n\n")

img = create_pil_image_of_page_v2(doc["pages"][0])
img = create_pil_image_of_page_v2(doc["pages"][0], category=category)
# img.show()

img = draw_bbox_on_page_v2(img, page, list(map(int, bbox)))
img.show()

orientation = get_orientation_bbox_v2(
data=sanitized_cells["data"], header=sanitized_cells["header"], bbox=bbox
)
logging.info(f"orientation: {orientation}")


if __name__ == "__main__":
main()
79 changes: 79 additions & 0 deletions docling_parse/utils.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
import logging
from enum import Enum
from typing import Dict, List, Optional, Tuple, Union

from PIL import Image, ImageColor, ImageDraw, ImageFont
Expand Down Expand Up @@ -157,6 +158,84 @@ def create_pil_image_of_page_v1(
return img


class BBoxDirection(Enum):
Bottom2Top = "Bottom2Top"
Right2Left = "Right2Left"
Top2Bottom = "Top2Bottom"
Left2Right = "Left2Right"

def rotation_to_bottom2top(direction: "BBoxDirection"):

if direction == BBoxDirection.Bottom2Top:
return 0.0
elif direction == BBoxDirection.Right2Left:
return 90.0
elif direction == BBoxDirection.Top2Bottom:
return 180.0
elif direction == BBoxDirection.Left2Right:
return -90.0


def get_orientation_bbox_v2(
data: List[Tuple], header: list[str], bbox: Tuple[float, float, float, float]
) -> BBoxDirection:

x0 = header.index("x0")
y0 = header.index("y0")

x1 = header.index("x1")
y1 = header.index("y1")

r_x0 = header.index("r_x0")
r_y0 = header.index("r_y0")

header.index("r_x1")
header.index("r_y1")

r_x2 = header.index("r_x2")
r_y2 = header.index("r_y2")

header.index("r_x3")
header.index("r_y3")

ti = header.index("text")

hist = {}
for direction in BBoxDirection:
hist[direction] = 0

for row in data:

if (
bbox[0] <= row[x0]
and row[x1] <= bbox[2]
and bbox[1] <= row[y0]
and row[y1] <= bbox[3]
):

if row[r_x0] < row[r_x2] and row[r_y0] < row[r_y2]:
hist[BBoxDirection.Bottom2Top] += len(row[ti])

elif row[r_x2] < row[r_x0] and row[r_y0] < row[r_y2]:
hist[BBoxDirection.Right2Left] += len(row[ti])

elif row[r_x2] < row[r_x0] and row[r_y2] < row[r_y0]:
hist[BBoxDirection.Top2Bottom] += len(row[ti])

elif row[r_x0] < row[r_x2] and row[r_y2] < row[r_y0]:
hist[BBoxDirection.Right2Left] += len(row[ti])

max_dir = BBoxDirection.Bottom2Top
max_val = 0
for key, val in hist.items():
logging.info(f"{key}: {val}")
if val > max_val:
max_val = val
max_dir = key

return max_dir


def filter_columns_v2(data: List[Tuple], header: list[str], new_header: list[str]):

new_data = []
Expand Down

0 comments on commit 1fb52e9

Please sign in to comment.