From 48b41d9325363eb67736b40d2b398dde3a50c22d Mon Sep 17 00:00:00 2001 From: Daniel von Atzigen Date: Tue, 29 Oct 2024 12:56:19 +0100 Subject: [PATCH] Fix temporary and final output file collision --- ocr/__init__.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/ocr/__init__.py b/ocr/__init__.py index cd468c3..8f939bb 100644 --- a/ocr/__init__.py +++ b/ocr/__init__.py @@ -59,7 +59,7 @@ def process_pdf( confidence_threshold: float, use_aggressive_strategy: bool, ): - tmp_out_path = os.path.join(tmp_dir, f"output.pdf") + tmp_out_path = os.path.join(tmp_dir, f"output.incremental.pdf") in_doc = fitz.open(in_path) out_doc = fitz.open(in_path) @@ -93,9 +93,9 @@ def process_pdf( out_doc.save(tmp_out_path, incremental=True, encryption=PDF_ENCRYPT_KEEP) out_doc.close() + in_doc.close() out_doc = fitz.open(tmp_out_path) out_doc.save(out_path, garbage=3, deflate=True) - in_doc.close() out_doc.close() # Verify that we can read the written document, and that it still has the same number of pages. Some corrupt input