diff --git a/CHANGELOG.md b/CHANGELOG.md new file mode 100644 index 0000000..246a14d --- /dev/null +++ b/CHANGELOG.md @@ -0,0 +1,130 @@ +## v2.4.2 (2025-02-24) + +* Fix issue where only the main process has its logger configured + +## v2.4.1 (2025-02-24) + +* Fix compatibility issues with the new OCRmyPDF API +* Remove support for Python 3.10 + +## v2.4.0 (2025-02-24) + +* Migrate to `uv` from `pyenv` + `poetry` +* Update dependencies + +## v2.3.1 (2025-10-28) + +* Fix mixed-up email format + +## v2.3.0 (2025-10-28) + +* Remove support for Python 3.9 +* Migrate to standardized `pyproject.toml` +* Update dependencies + +## v2.2.15 (2025-07-02) + +* Add support for installation via `pipx` + +## v2.2.14 (2025-05-27) + +* Improve installation notes +* Bump djvulibre-python version + +## v2.2.13 (2025-02-12) + +* Fail-safe quality settings for non-JPEG images + +## v2.2.12 (2025-01-27) + +* Update pytest_image_diff and fix newly broken tests + +## v2.2.11 (2025-01-26) + +* Update dependencies + +## v2.2.10 (2024-10-25) + +* Improve interface with OCRmyPDF +* Fix CI build + +## v2.2.9 (2024-10-25) + +* Improve type hints +* Update dependencies + +## v2.2.8 (2024-10-18) + +* Support single characters in the text layer + +## v2.2.7 (2024-08-27) + +* Improve tab and newline handling + +## v2.2.6 (2024-08-05) + +* Fix accidental whitespace removal from text blocks + +## v2.2.5 (2024-07-20) + +* Re-add ability to force the image mode (RGB/Grayscale/Monochrome) + +## v2.2.4 (2024-02-24) + +* Update dependencies + +## v2.2.3 (2023-12-09) + +* Fix CI build +* Ignore invalid UTF-8 sequences +* Ignore unrecognized page titles in the outline (#23) + +## v2.2.2 (2023-10-29) + +* Update dependencies + +## v2.2.1 (2023-11-06) + +* Handle invalid PDF pages +* Fix exception in text layer processing (#20) + +## v2.2.0 (2023-10-28) + +* Add options for disabling the text layer and for directly running OCR + +## v2.1.5 (2023-10-27) + +* Fix inverted colors in images (#16) + +## v2.1.4 (2023-10-06) + +* Fix typo in logging code + +## v2.1.3 (2023-10-06) + +* Improve logging + +## v2.1.2 (2023-10-02) + +* Accidental version bump + +## v2.1.1 (2023-10-02) + +* Remove debug code + +## v2.1.0 (2023-10-02) + +* Add support for OCRmyPDF + +## v2.0.2 (2023-08-03) + +* Update some other dependencies +* Replace `python-djvulibre` with `djvulibre-python` + +## v2.0.1 (2023-06-22) + +* Minor improvements in packaging + +## v2.0.0 (2023-05-04) + +* Fully rewrite diff --git a/dpsprep/__main__.py b/dpsprep/__main__.py deleted file mode 100644 index 8610a82..0000000 --- a/dpsprep/__main__.py +++ /dev/null @@ -1,3 +0,0 @@ -from .dpsprep import dpsprep - -dpsprep() diff --git a/dpsprep/dpsprep.py b/dpsprep/dpsprep.py index bdaced3..c71e688 100644 --- a/dpsprep/dpsprep.py +++ b/dpsprep/dpsprep.py @@ -17,7 +17,8 @@ from .workdir import WorkingDirectory -def process_page_bg(workdir: WorkingDirectory, mode: ImageMode, quality: int | None, i: int) -> None: +def process_page_bg(workdir: WorkingDirectory, mode: ImageMode, quality: int | None, i: int, *, verbose: bool) -> None: + configure_loguru(verbose=verbose) page_number = i + 1 if workdir.get_page_pdf_path(i).exists(): @@ -47,10 +48,13 @@ def process_page_bg(workdir: WorkingDirectory, mode: ImageMode, quality: int | N loguru.logger.debug(f'Image data with size {human_readable_size(pdf_size)} from page {page_number} processed in {time() - start_time:.2f}s and written to working directory.') -def process_text(workdir: WorkingDirectory) -> None: +def process_text(workdir: WorkingDirectory, *, verbose: bool) -> None: + configure_loguru(verbose=verbose) + if workdir.text_layer_pdf_path.exists(): loguru.logger.info('Text data already processed.') return + loguru.logger.debug('Processing text data.') start_time = time() @@ -144,11 +148,11 @@ def dpsprep( # noqa: C901, PLR0912, PLR0913, PLR0915 tasks = list[multiprocessing.pool.AsyncResult]() if not no_text: - tasks.append(pool.apply_async(func=process_text, args=[workdir])) + tasks.append(pool.apply_async(func=process_text, args=[workdir], kwds={'verbose': verbose})) for i in range(len(document.pages)): # Cannot pass the page object itself because it does not support serialization for IPC - tasks.append(pool.apply_async(func=process_page_bg, args=[workdir, mode, quality, i])) + tasks.append(pool.apply_async(func=process_page_bg, args=[workdir, mode, quality, i], kwds={'verbose': verbose})) pool.close() pool_is_working = True diff --git a/pyproject.toml b/pyproject.toml index cfdde44..e8e5532 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [project] name = "dpsprep" -version = "2.4.1" +version = "2.4.2" description = "A DjVu to PDF converter with a focus on small output size and the ability to preserve document outlines and text layers" requires-python = ">=3.11, <4.0" authors = [ diff --git a/uv.lock b/uv.lock index b979416..e28ef43 100644 --- a/uv.lock +++ b/uv.lock @@ -284,7 +284,7 @@ sdist = { url = "https://files.pythonhosted.org/packages/ac/b9/90f437786f185d101 [[package]] name = "dpsprep" -version = "2.4.1" +version = "2.4.2" source = { editable = "." } dependencies = [ { name = "click" },