From 22bd9e09161403f8e6155dd3d8e058ccc4835815 Mon Sep 17 00:00:00 2001 From: Trenton Holmes <797416+stumpylog@users.noreply.github.com> Date: Mon, 11 Dec 2023 14:13:32 -0800 Subject: [PATCH 1/3] Squashed commit of the following: commit f360421862ef9a7231f5ce15f61802bc9d0ea242 Author: Trenton Holmes <797416+stumpylog@users.noreply.github.com> Date: Mon Dec 11 13:40:41 2023 -0800 Fixes Python 3.8 compatabiliyt commit 9b1539d4410af011d133a68f7b37dbb45b1c1322 Author: Trenton Holmes <797416+stumpylog@users.noreply.github.com> Date: Mon Dec 11 13:32:54 2023 -0800 Implements a workaround for Gotenberg and its handling of non-latin filenames --- CHANGELOG.md | 8 ++++++++ src/gotenberg_client/_base.py | 16 ++++++++++++++++ tests/test_misc_stuff.py | 24 ++++++++++++++++++++++++ 3 files changed, 48 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index b4f1341..c0d39e1 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -5,6 +5,14 @@ All notable changes to this project will be documented in this file. The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.1.0/), and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). +## [Unreleased] + +### Fixed + +- Implemented an internal workaround for older Gotenberg versions and their handling of non-latin filenames. + - When detected, the files will be copied into a temporary directory and the filename cleaned + - Gotenberg 8.0.0 will start implementing something similar once released + ## [0.4.0] - 2023-12-04 ### Changed diff --git a/src/gotenberg_client/_base.py b/src/gotenberg_client/_base.py index bf223bf..2c46707 100644 --- a/src/gotenberg_client/_base.py +++ b/src/gotenberg_client/_base.py @@ -4,6 +4,7 @@ import logging from contextlib import ExitStack from pathlib import Path +from tempfile import TemporaryDirectory from types import TracebackType from typing import Dict from typing import Optional @@ -102,8 +103,23 @@ def _add_file_map(self, filepath: Path, name: Optional[str] = None) -> None: """ if name is None: name = filepath.name + if name in self._file_map: # pragma: no cover logger.warning(f"{name} has already been provided, overwriting anyway") + + try: + name.encode("utf8").decode("ascii") + except UnicodeDecodeError: + logger.warning(f"filename {name} includes non-ascii characters, compensating for Gotenberg") + tmp_dir = self._stack.enter_context(TemporaryDirectory()) + # Filename can be fixed, the directory is random + new_path = Path(tmp_dir) / Path(name).with_name(f"clean-filename-copy{filepath.suffix}") + logger.warning(f"New path {new_path}") + new_path.write_bytes(filepath.read_bytes()) + filepath = new_path + name = new_path.name + logger.warning(f"New name {name}") + self._file_map[name] = filepath def pdf_format(self, pdf_format: PdfAFormat) -> Self: diff --git a/tests/test_misc_stuff.py b/tests/test_misc_stuff.py index f6e9cb4..b71e5dd 100644 --- a/tests/test_misc_stuff.py +++ b/tests/test_misc_stuff.py @@ -1,4 +1,7 @@ +import shutil +import tempfile import uuid +from pathlib import Path from httpx import codes @@ -43,3 +46,24 @@ def test_output_filename( assert resp.headers["Content-Type"] == "application/pdf" assert "Content-Disposition" in resp.headers assert f"{filename}.pdf" in resp.headers["Content-Disposition"] + + def test_libre_office_convert_cyrillic(self, client: GotenbergClient): + """ + Gotenberg versions before 8.0.0 could not internally handle filenames with + non-ASCII characters. This replicates such a thing against 1 endpoint to + verify the workaround inside this library + """ + test_file = SAMPLE_DIR / "sample.odt" + + with tempfile.TemporaryDirectory() as temp_dir: + copy = shutil.copy( + test_file, + Path(temp_dir) / "Карточка партнера Тауберг Альфа.odt", # noqa: RUF001 + ) + + with client.libre_office.to_pdf() as route: + resp = call_run_with_server_error_handling(route.convert(copy)) + + assert resp.status_code == codes.OK + assert "Content-Type" in resp.headers + assert resp.headers["Content-Type"] == "application/pdf" From 0727d7f3413e80b249b66c316400fb81b400165b Mon Sep 17 00:00:00 2001 From: Trenton Holmes <797416+stumpylog@users.noreply.github.com> Date: Mon, 11 Dec 2023 14:24:53 -0800 Subject: [PATCH 2/3] Squashed commit of the following: commit 766418ff15f16bda15fe76727f271e4f03834638 Author: Trenton Holmes <797416+stumpylog@users.noreply.github.com> Date: Mon Dec 11 14:16:07 2023 -0800 Adds a docker inspect for the image to know what version was pulled --- .docker/docker-compose.ci-test-edge.yml | 6 +++--- .github/workflows/ci.yml | 3 +++ CHANGELOG.md | 2 ++ 3 files changed, 8 insertions(+), 3 deletions(-) diff --git a/.docker/docker-compose.ci-test-edge.yml b/.docker/docker-compose.ci-test-edge.yml index 810c15e..ecdee63 100644 --- a/.docker/docker-compose.ci-test-edge.yml +++ b/.docker/docker-compose.ci-test-edge.yml @@ -4,10 +4,10 @@ version: "3" services: - gotenberg-client-test-server: + gotenberg-client-test-edge-server: image: docker.io/gotenberg/gotenberg:edge - hostname: gotenberg-client-test-server - container_name: gotenberg-client-test-server + hostname: gotenberg-client-test-edge-server + container_name: gotenberg-client-test-edge-server network_mode: host restart: unless-stopped command: diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 4942634..aa3ceab 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -64,6 +64,7 @@ jobs: docker compose --file ${GITHUB_WORKSPACE}/.docker/docker-compose.ci-test.yml up --detach echo "Wait for container to be started" sleep 5 + docker inspect gotenberg-client-test-server - name: Install poppler-utils run: | @@ -113,6 +114,7 @@ jobs: docker compose --file ${GITHUB_WORKSPACE}/.docker/docker-compose.ci-test-edge.yml up --detach echo "Wait for container to be started" sleep 5 + docker inspect gotenberg-client-test-edge-server - name: Install poppler-utils run: | @@ -131,6 +133,7 @@ jobs: pip install --upgrade hatch - name: Run tests + continue-on-error: true run: hatch run cov - name: Stop containers diff --git a/CHANGELOG.md b/CHANGELOG.md index c0d39e1..ebfc9d7 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -12,6 +12,8 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 - Implemented an internal workaround for older Gotenberg versions and their handling of non-latin filenames. - When detected, the files will be copied into a temporary directory and the filename cleaned - Gotenberg 8.0.0 will start implementing something similar once released +- The pulled Gotenberg image is now inspected, allowing local re-creation of failures against specific digests +- The `:edge` tag testing is now allowed to fail ## [0.4.0] - 2023-12-04 From 7f7141c98247bf90926320c0915f6a4756a78ca5 Mon Sep 17 00:00:00 2001 From: Trenton Holmes <797416+stumpylog@users.noreply.github.com> Date: Mon, 11 Dec 2023 15:03:41 -0800 Subject: [PATCH 3/3] Sets the version to 0.4.1 --- CHANGELOG.md | 2 +- src/gotenberg_client/__about__.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index ebfc9d7..e37b036 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -5,7 +5,7 @@ All notable changes to this project will be documented in this file. The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.1.0/), and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). -## [Unreleased] +## [0.4.1] - 2023-12-11 ### Fixed diff --git a/src/gotenberg_client/__about__.py b/src/gotenberg_client/__about__.py index ece3781..87873dc 100644 --- a/src/gotenberg_client/__about__.py +++ b/src/gotenberg_client/__about__.py @@ -1,4 +1,4 @@ # SPDX-FileCopyrightText: 2023-present Trenton H # # SPDX-License-Identifier: MPL-2.0 -__version__ = "0.4.0" +__version__ = "0.4.1"