Skip to content

Commit 8b66e61

Browse files
committed
chore: add safe initialization of PatentUsptoDocumentBackend
Signed-off-by: Cesar Berrospi Ramis <[email protected]>
1 parent e33361c commit 8b66e61

File tree

2 files changed

+13
-8
lines changed

2 files changed

+13
-8
lines changed

docling/backend/xml/__init__.py

Whitespace-only changes.

docling/backend/xml/uspto_backend.py

Lines changed: 13 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -67,17 +67,22 @@ def __init__(
6767
self.patent_content: str = ""
6868
self.parser: Optional[PatentUspto] = None
6969

70-
if isinstance(self.path_or_stream, BytesIO):
71-
while line := self.path_or_stream.readline().decode("utf-8"):
72-
if line.startswith("<!DOCTYPE") or line == "PATN\n":
73-
self._set_parser(line)
74-
self.patent_content += line
75-
elif isinstance(self.path_or_stream, Path):
76-
with open(self.path_or_stream, encoding="utf-8") as file_obj:
77-
while line := file_obj.readline():
70+
try:
71+
if isinstance(self.path_or_stream, BytesIO):
72+
while line := self.path_or_stream.readline().decode("utf-8"):
7873
if line.startswith("<!DOCTYPE") or line == "PATN\n":
7974
self._set_parser(line)
8075
self.patent_content += line
76+
elif isinstance(self.path_or_stream, Path):
77+
with open(self.path_or_stream, encoding="utf-8") as file_obj:
78+
while line := file_obj.readline():
79+
if line.startswith("<!DOCTYPE") or line == "PATN\n":
80+
self._set_parser(line)
81+
self.patent_content += line
82+
except Exception as exc:
83+
raise RuntimeError(
84+
f"Could not initialize USPTO backend for file with hash {self.document_hash}."
85+
) from exc
8186

8287
def _set_parser(self, doctype: str) -> None:
8388
doctype_line = doctype.lower()

0 commit comments

Comments
 (0)