kcroker · v-- · Feb 24, 2026 · Feb 24, 2026 · Feb 24, 2026 · Feb 24, 2026
diff --git a/.python-version b/.python-version
@@ -1 +1 @@
-3.10
+3.11
diff --git a/dpsprep/dpsprep.py b/dpsprep/dpsprep.py
@@ -159,7 +159,7 @@ def dpsprep(  # noqa: C901, PLR0912, PLR0913, PLR0915
         for task in tasks:
             try:
                 task.get(timeout=25)
-            except multiprocessing.TimeoutError:  # noqa: PERF203
+            except multiprocessing.TimeoutError:
                 pool_is_working = True
 
     pool.join()
@@ -179,12 +179,16 @@ def dpsprep(  # noqa: C901, PLR0912, PLR0913, PLR0915
     if no_text:
         combine_pdfs_on_fs_without_text(workdir, outline, len(document.pages))
 
-        if ocr_options is None:
+        ocr_success = False
+
+        if ocr_options:
+            loguru.logger.info('Performing OCR.')
+            ocr_success = perform_ocr(workdir, ocr_options)
+        else:
             loguru.logger.info('Skipping the text layer.')
+
+        if not ocr_success:
             shutil.copy(workdir.combined_pdf_without_text_path, workdir.combined_pdf_path)
-        else:
-            loguru.logger.info('Performing OCR.')
-            perform_ocr(workdir, ocr_options)
     else:
         combine_pdfs_on_fs_with_text(workdir, outline)
 

diff --git a/dpsprep/ocrmypdf.py b/dpsprep/ocrmypdf.py
@@ -1,63 +1,34 @@
-import argparse
-import shutil
-from typing import Any
-
-import loguru
-
-from .workdir import WorkingDirectory
-
 # We use OCRmyPDF in a non-canonical way: only optimize the file without performing any OCR.
 # The optimization procedure provides good results and preserves the text layer and outline.
 # The code here is based on
-# https://github.com/ocrmypdf/OCRmyPDF/blob/fb006ef39f7f8842dec1976bebe4bcd5ca2e8df8/src/ocrmypdf/optimize.py#L724
+#   https://github.com/ocrmypdf/OCRmyPDF/blob/fb006ef39f7f8842dec1976bebe4bcd5ca2e8df8/src/ocrmypdf/optimize.py#L724
+# with some simplifications for OCRmyPDF 17
 
+import shutil
+from typing import Any
 
-class OptimizeOptions(argparse.Namespace):
-    """Emulate ocrmypdf's options."""
-
-    input_file: str
-    jobs: int
-    optimize: int
-    jpeg_quality: int
-    png_quality: int
-    jbig2_page_group_size: int
-    jbig2_lossy: bool
-    jbig2_threshold: float
-    quiet: bool
-    progress_bar: bool
+import loguru
 
-    def __init__(
-        self, input_file: str, jobs: int, optimize_: int, jpeg_quality: int, png_quality: int,
-    ) -> None:
-        self.input_file = input_file
-        self.jobs = jobs
-        self.optimize = optimize_
-        self.jpeg_quality = jpeg_quality
-        self.png_quality = png_quality
-        self.jbig2_page_group_size = 0  # When 0, this should be adjusted inside OCRmyPDF's "optimize" function
-        self.jbig2_lossy = False
-        self.jbig2_threshold = 0.85  # This seems to be the default
-        # Changing the two verbosity options seems to have no effect in this concrete case
-        self.quiet = True
-        self.progress_bar = False
+from .workdir import WorkingDirectory
 
 
 def optimize_pdf(workdir: WorkingDirectory, optlevel: int, quality: int | None, pool_size: int) -> bool:
     try:
         # ObjectStreamMode is actually from pikepdf, but I did not want to include that as a dependency
+        from ocrmypdf._options import OcrOptions
         from ocrmypdf.optimize import ObjectStreamMode, PdfContext, optimize
         from ocrmypdf.pdfinfo import PdfInfo
     except ImportError:
         loguru.logger.warning('Cannot detect OCRmyPDF. No optimizations will be performed on the output file.')
-        shutil.copy(workdir.combined_pdf_path, workdir.optimized_pdf_path)
         return False
 
-    options = OptimizeOptions(
-        input_file=str(workdir.combined_pdf_path),
+    options = OcrOptions(
+        input_file=workdir.combined_pdf_without_text_path,
+        output_file=workdir.combined_pdf_path,
         jobs=pool_size,  # These correspond to CPU cores rather than threads, but it seems better to use the available pool size parameter
-        optimize_=optlevel,
+        optimize=optlevel,
         # When 0, these should be adjusted inside OCRmyPDF's "optimize" function
-        jpeg_quality=quality or 0,
+        jpg_quality=quality or 0,
         png_quality=quality or 0
     )
 
@@ -83,12 +54,11 @@ def perform_ocr(workdir: WorkingDirectory, options: dict[str, Any]) -> bool:
         from ocrmypdf import api
     except ImportError:
         loguru.logger.warning('Cannot detect OCRmyPDF. No OCR will be performed on the output file.')
-        shutil.copy(workdir.combined_pdf_without_text_path, workdir.combined_pdf_path)
         return False
 
     try:
         api.ocr(
-            input_file=workdir.combined_pdf_without_text_path,
+            input_file_or_options=workdir.combined_pdf_without_text_path,
             output_file=workdir.combined_pdf_path,
             **options,
         )

diff --git a/pyproject.toml b/pyproject.toml
@@ -1,8 +1,8 @@
 [project]
 name = "dpsprep"
-version = "2.4.0"
+version = "2.4.1"
 description = "A DjVu to PDF converter with a focus on small output size and the ability to preserve document outlines and text layers"
-requires-python = ">=3.10,<4.0"
+requires-python = ">=3.11, <4.0"
 authors = [
   { name = "Kevin Arthur Schiff Croker" },
   { name = "Ianis Vasilev", email = "ianis@ivasilev.net" }
@@ -20,6 +20,11 @@ dependencies = [
 [project.urls]
 Repository = "https://github.com/kcroker/dpsprep.git"
 
+[project.optional-dependencies]
+compress = [
+  "ocrmypdf (>=17.3.0)"
+]
+
 [project.scripts]
 dpsprep = "dpsprep:dpsprep"
 
@@ -33,13 +38,8 @@ dev = [
   "types-pillow (>=10.2.0.20240822)",
 ]
 
-[optional-dependencies]
-compress = [
-  "ocrmypdf (>=15.4.4)"
-]
-
 [build-system]
-requires = ["uv_build (>=0.10.5,<0.11.0)"]
+requires = ["uv_build (>=0.10.5, <0.11.0)"]
 build-backend = "uv_build"
 
 [tool.uv.build-backend]