Skip to content

Commit

Permalink
make typing py3.9-compatible
Browse files Browse the repository at this point in the history
Signed-off-by: Panos Vagenas <[email protected]>
  • Loading branch information
vagenas committed Sep 11, 2024
1 parent 7b45aa3 commit 1ccaeae
Showing 1 changed file with 9 additions and 9 deletions.
18 changes: 9 additions & 9 deletions docling_core/chunker/hierarchical_chunker.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@

import logging
from enum import Enum
from typing import Any, Iterator
from typing import Any, Iterator, Optional, Union

import pandas as pd
from pydantic import BaseModel, PositiveInt
Expand Down Expand Up @@ -53,20 +53,20 @@ class _NodeName(str, Enum):
}

@classmethod
def _norm(cls, text: str | None) -> str | None:
def _norm(cls, text: Optional[str]) -> Optional[str]:
return text.lower() if text is not None else None

@classmethod
def _convert_table_to_dataframe(cls, table: Table) -> pd.DataFrame | None:
def _convert_table_to_dataframe(cls, table: Table) -> Optional[pd.DataFrame]:
if table.data:
table_content = [[cell.text for cell in row] for row in table.data]
return pd.DataFrame(table_content)
else:
return None

@classmethod
def _triplet_serialize(cls, table) -> str | None:
output_text: str | None = None
def _triplet_serialize(cls, table) -> Optional[str]:
output_text: Optional[str] = None
table_df = cls._convert_table_to_dataframe(table)
if table_df is not None and table_df.shape[0] > 1 and table_df.shape[1] > 1:
rows = [item.strip() for item in table_df.iloc[:, 0].to_list()]
Expand All @@ -87,15 +87,15 @@ def _create_path(cls, pos: int, path_prefix: str = "main-text") -> str:
return f"$.{path_prefix}[{pos}]"

class _MainTextItemNode(BaseModel):
parent: int | None = None
parent: Optional[int] = None
children: list[int] = []

class _TitleInfo(BaseModel):
text: str
path_in_doc: str

class _GlobalContext(BaseModel):
title: _HC._TitleInfo | None = None
title: Optional[_HC._TitleInfo] = None

class _DocContext(BaseModel):
dmap: dict[int, _HC._MainTextItemNode] # main text element context
Expand Down Expand Up @@ -276,13 +276,13 @@ def _build_chunk(
idx: int,
delim: str,
rec: bool = False,
) -> Chunk | None:
) -> Optional[Chunk]:
texts = self._build_chunk_impl(doc=doc, doc_map=doc_map, idx=idx, rec=rec)
concat = delim.join([t.text for t in texts if t.text])
assert doc.main_text is not None
if len(concat) >= self.min_chunk_len:
orig_item = doc.main_text[idx]
item: BaseText | Table
item: Union[BaseText, Table]
if isinstance(orig_item, Ref):
if _HC._norm(orig_item.obj_type) == _HC._NodeType.TABLE and doc.tables:
pos = int(orig_item.ref.split("/")[2])
Expand Down

0 comments on commit 1ccaeae

Please sign in to comment.