diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index b5f8d6f..fa4f1a8 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -58,20 +58,11 @@ jobs: strategy: fail-fast: false matrix: - # No pikepdf wheels for pypy3.8 - python-version: ['3.8', '3.9', '3.10', '3.11', '3.12', 'pypy3.9', 'pypy3.10'] + python-version: ['3.9', '3.10', '3.11', '3.12', '3.13', 'pypy3.9', 'pypy3.10'] steps: - uses: actions/checkout@v4 - - - name: Start containers - run: | - docker compose --file ${GITHUB_WORKSPACE}/.docker/docker-compose.ci-test.yml pull --quiet - docker compose --file ${GITHUB_WORKSPACE}/.docker/docker-compose.ci-test.yml up --detach - echo "Wait for container to be started" - sleep 5 - docker inspect gotenberg-client-test-server - name: Install poppler-utils run: | @@ -83,6 +74,11 @@ jobs: with: python-version: ${{ matrix.python-version }} cache: 'pip' + - + name: Pull Docker images + run: | + docker compose --file tests/docker/docker-compose.ci-test-edge.yml pull --quiet + docker compose --file tests/docker/docker-compose.ci-test.yml pull --quiet - name: Install Hatch run: | @@ -90,27 +86,26 @@ jobs: hatch --version uv --version - - name: Show environment - run: | - hatch test --show --python ${{ matrix.python-version }} + name: Show environment + run: | + hatch test --show --python ${{ matrix.python-version }} - name: Run tests run: | - hatch test --cover --python ${{ matrix.python-version }} - ls -ahl . + hatch test --cover --junitxml=junit.xml -o junit_family=legacy --python ${{ matrix.python-version }} - name: Upload coverage to Codecov - if: matrix.python-version == '3.10' - uses: codecov/codecov-action@v4 + if: matrix.python-version == '3.11' + uses: codecov/codecov-action@v5 with: - # not required for public repos, but intermittently fails otherwise token: ${{ secrets.CODECOV_TOKEN }} - - name: Stop containers - if: always() - run: | - docker compose --file ${GITHUB_WORKSPACE}/.docker/docker-compose.ci-test.yml logs - docker compose --file ${GITHUB_WORKSPACE}/.docker/docker-compose.ci-test.yml down + name: Upload test results to Codecov + if: ${{ !cancelled() }} + uses: codecov/test-results-action@v1 + with: + token: ${{ secrets.CODECOV_TOKEN }} + flags: ${{ matrix.python-version }} test-edge: name: Test Gotenberg :edge @@ -119,17 +114,11 @@ jobs: contents: read needs: - lint + env: + GOTENBERG_CLIENT_EDGE_TEST: 1 steps: - uses: actions/checkout@v4 - - - name: Start containers - run: | - docker compose --file ${GITHUB_WORKSPACE}/.docker/docker-compose.ci-test-edge.yml pull --quiet - docker compose --file ${GITHUB_WORKSPACE}/.docker/docker-compose.ci-test-edge.yml up --detach - echo "Wait for container to be started" - sleep 5 - docker inspect gotenberg-client-test-edge-server - name: Install poppler-utils run: | @@ -154,13 +143,6 @@ jobs: name: Run tests run: | hatch test --cover --python 3.11 - ls -ahl . - - - name: Stop containers - if: always() - run: | - docker compose --file ${GITHUB_WORKSPACE}/.docker/docker-compose.ci-test-edge.yml logs - docker compose --file ${GITHUB_WORKSPACE}/.docker/docker-compose.ci-test-edge.yml down build: name: Build @@ -297,4 +279,4 @@ jobs: path: dist - name: Publish build to PyPI - uses: pypa/gh-action-pypi-publish@v1.10.2 + uses: pypa/gh-action-pypi-publish@v1.12.2 diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 11d1ffb..17624aa 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -30,7 +30,7 @@ repos: - id: detect-private-key # See https://github.com/prettier/prettier/issues/15742 for the fork reason - repo: https://github.com/rbubley/mirrors-prettier - rev: "v3.3.3" + rev: "v3.4.2" hooks: - id: prettier types_or: @@ -45,13 +45,13 @@ repos: - id: codespell # Python hooks - repo: https://github.com/astral-sh/ruff-pre-commit - rev: 'v0.6.9' + rev: 'v0.8.2' hooks: # Run the linter. - id: ruff # Run the formatter. - id: ruff-format - repo: https://github.com/tox-dev/pyproject-fmt - rev: "2.2.4" + rev: "v2.5.0" hooks: - id: pyproject-fmt diff --git a/CHANGELOG.md b/CHANGELOG.md index 5399e05..50ddee3 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -5,6 +5,25 @@ All notable changes to this project will be documented in this file. The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.1.0/), and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). +## [0.8.0] - 2024-12-11 + +### Breaking Change + +- Dropped support for Python 3.8 ([#43](https://github.com/stumpylog/gotenberg-client/pull/43)) + +### Added + +- Official support and testing for Python 3.13 ([#25](https://github.com/stumpylog/gotenberg-client/pull/25)) +- Support for setting PDF metadata ([#42](https://github.com/stumpylog/gotenberg-client/pull/42)) + - Initial work by @spechtx in ([#40](https://github.com/stumpylog/gotenberg-client/pull/40)) +- Integrated Codecov test analytics ([#44](https://github.com/stumpylog/gotenberg-client/pull/44)) + +### Changed + +- Use `pytest-docker` to manage Docker image services ([#36](https://github.com/stumpylog/gotenberg-client/pull/36)) +- Bump Bump pypa/gh-action-pypi-publish from 1.10.2 to 1.12.2 by @dependabot ([#41](https://github.com/stumpylog/gotenberg-client/pull/41)) +- Bump codecov/codecov-action from 4 to 5 by @dependabot ([#41](https://github.com/stumpylog/gotenberg-client/pull/41)) + ## [0.7.0] - 2024-10-08 ### Fixed diff --git a/README.md b/README.md index ebef672..1534d70 100644 --- a/README.md +++ b/README.md @@ -102,6 +102,35 @@ with GotenbergClient("http://localhost:3000") as client: response.to_file(Path("my-world.pdf")) ``` +Adding metadata to a PDF: + +This example shows how to add metadata to your generated PDF. All metadata fields are optional and include: + +- Document info (title, author, subject, keywords) +- Dates (creation, modification) +- Technical details (pdf version, creator, producer) +- PDF standards (trapped status, marked status) + +```python +from gotenberg_client import GotenbergClient +from datetime import datetime + +with GotenbergClient("http://localhost:3000") as client: + with client.chromium.html_to_pdf() as route: + response = (route + .index("my-index.html") + .metadata( + title="My Document", + author="John Doe", + subject="Example PDF", + keywords=["sample", "document", "test"], + creation_date=datetime.now(), + trapped="Unknown" + ) + .run()) + response.to_file(Path("my-index.pdf")) +``` + To ensure the proper clean up of all used resources, both the client and the route(s) should be used as context manager. If for some reason you cannot, you should `.close` the client and any routes: diff --git a/docs/roadmap.md b/docs/roadmap.md index 56fce67..b2e8fff 100644 --- a/docs/roadmap.md +++ b/docs/roadmap.md @@ -1,12 +1,3 @@ # Roadmap -## Custom Response Class - -Currently, the response returned is a basic httpx.Response. It could be useful to abstract this in some manner, especially for responses which return a zip file. - -- Ability to iterate through the zip file contents in some way -- Ability to write the response to some given output location? - -## Missing Routes/Options - -- Missing the configuration of the units of page size, currently it is always inches +While I'm sure there's something out there, nothing is currently on the roadmap. diff --git a/docs/routes.md b/docs/routes.md index e9f75f4..8963003 100644 --- a/docs/routes.md +++ b/docs/routes.md @@ -95,11 +95,45 @@ These options are not yet implemented | `pdfa` | `.pdf_format()` | `PdfAFormat` | | | `pdfua` | | N/A | | -#### Metadata +#### PDF Metadata Support [Gotenberg Documentation](https://gotenberg.dev/docs/routes#metadata-chromium) -These options are not yet implemented +Add metadata to your PDFs: + +```python +from gotenberg_client import GotenbergClient +from datetime import datetime + +with GotenbergClient("http://localhost:3000") as client: + with client.chromium.html_to_pdf() as route: + response = (route + .index("my-index.html") + .metadata( + title="My Document", + author="John Doe", + creation_date=datetime.now(), + keywords=["sample", "document"], + subject="Sample PDF Generation", + trapped="Unknown" + ) + .run()) +``` + +Supported metadata fields: + +- `title`: Document title +- `author`: Document author +- `subject`: Document subject +- `keywords`: List of keywords +- `creator`: Creating application +- `creation_date`: Creation datetime +- `modification_date`: Last modification datetime +- `producer`: PDF producer +- `trapped`: Trapping status ('True', 'False', 'Unknown') +- `copyright`: Copyright information +- `marked`: PDF marked status +- `pdf_version`: PDF version number ## LibreOffice diff --git a/pyproject.toml b/pyproject.toml index 0d83511..97bd434 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -16,7 +16,7 @@ license = "MPL-2.0" authors = [ { name = "Trenton H", email = "rda0128ou@mozmail.com" }, ] -requires-python = ">=3.8" +requires-python = ">=3.9" classifiers = [ "Development Status :: 4 - Beta", "Environment :: Web Environment", @@ -25,18 +25,17 @@ classifiers = [ "Operating System :: OS Independent", "Programming Language :: Python", "Programming Language :: Python :: 3 :: Only", - "Programming Language :: Python :: 3.8", "Programming Language :: Python :: 3.9", "Programming Language :: Python :: 3.10", "Programming Language :: Python :: 3.11", "Programming Language :: Python :: 3.12", + "Programming Language :: Python :: 3.13", "Programming Language :: Python :: Implementation :: CPython", "Programming Language :: Python :: Implementation :: PyPy", ] dynamic = [ "version" ] dependencies = [ - "httpx[http2]~=0.24; python_version<'3.9'", - "httpx[http2]~=0.27; python_version>='3.9'", + "httpx[http2]~=0.28", "typing-extensions; python_version<'3.11'", ] @@ -64,7 +63,7 @@ installer = "uv" [tool.hatch.envs.hatch-static-analysis] # https://hatch.pypa.io/latest/config/internal/static-analysis/ -dependencies = [ "ruff ~= 0.6" ] +dependencies = [ "ruff ~= 0.8" ] config-path = "none" [tool.hatch.envs.hatch-test] @@ -74,7 +73,6 @@ randomize = true dependencies = [ "coverage-enable-subprocess == 1.0", "coverage[toml] ~= 7.6", - "pytest < 8.0; python_version < '3.9'", "pytest ~= 8.3; python_version >= '3.9'", "pytest-mock ~= 3.14", "pytest-randomly ~= 3.15", @@ -83,10 +81,10 @@ dependencies = [ ] extra-dependencies = [ "pytest-sugar", - "pytest-httpx == 0.30.0; python_version >= '3.9'", - "pytest-httpx ~= 0.22; python_version < '3.9'", + "pytest-httpx ~= 0.35", "pikepdf", "python-magic", + "pytest-docker ~= 3.1", ] extra-args = [ "--maxprocesses=8", "--pythonwarnings=all" ] @@ -109,7 +107,7 @@ cov-report = [ ] [[tool.hatch.envs.hatch-test.matrix]] -python = [ "3.8", "3.9", "3.10", "3.11", "3.12", "pypy3.9", "pypy3.10" ] +python = [ "3.9", "3.10", "3.11", "3.12", "3.13", "pypy3.9", "pypy3.10" ] # # Custom Environments @@ -117,11 +115,11 @@ python = [ "3.8", "3.9", "3.10", "3.11", "3.12", "pypy3.9", "pypy3.10" ] [tool.hatch.envs.typing] detached = true dependencies = [ - "mypy ~= 1.11", + "mypy ~= 1.13", "httpx", "pytest", "pikepdf", - "pytest-httpx == 0.30.0", + "pytest-httpx ~= 0.35", ] [tool.hatch.envs.typing.scripts] @@ -168,7 +166,7 @@ deploy = [ # [tool.ruff] -target-version = "py38" +target-version = "py39" line-length = 120 # https://docs.astral.sh/ruff/settings/ @@ -244,6 +242,7 @@ lint.ignore = [ ] # Tests can use magic values, assertions, and relative imports lint.per-file-ignores."tests/**/*" = [ "PLR2004", "S101", "TID252" ] +lint.per-file-ignores."tests/utils.py" = [ "S603" ] # No relative imports lint.flake8-tidy-imports.ban-relative-imports = "all" # One import per line @@ -251,6 +250,9 @@ lint.isort.force-single-line = true # Recognize us please lint.isort.known-first-party = [ "gotenberg_client" ] +[tool.pyproject-fmt] +max_supported_python = "3.13" + [tool.pytest.ini_options] minversion = "7.0" testpaths = [ "tests" ] diff --git a/src/gotenberg_client/__about__.py b/src/gotenberg_client/__about__.py index 7404394..9c90f17 100644 --- a/src/gotenberg_client/__about__.py +++ b/src/gotenberg_client/__about__.py @@ -1,4 +1,4 @@ # SPDX-FileCopyrightText: 2023-present Trenton H # # SPDX-License-Identifier: MPL-2.0 -__version__ = "0.7.0" +__version__ = "0.8.0" diff --git a/src/gotenberg_client/__init__.py b/src/gotenberg_client/__init__.py index d1f211d..3229a0f 100644 --- a/src/gotenberg_client/__init__.py +++ b/src/gotenberg_client/__init__.py @@ -4,15 +4,19 @@ from gotenberg_client._client import GotenbergClient from gotenberg_client._errors import BaseClientError from gotenberg_client._errors import CannotExtractHereError +from gotenberg_client._errors import InvalidKeywordError +from gotenberg_client._errors import InvalidPdfRevisionError from gotenberg_client._errors import MaxRetriesExceededError from gotenberg_client.responses import SingleFileResponse from gotenberg_client.responses import ZipFileResponse __all__ = [ - "GotenbergClient", - "SingleFileResponse", - "ZipFileResponse", "BaseClientError", "CannotExtractHereError", + "GotenbergClient", + "InvalidKeywordError", + "InvalidPdfRevisionError", "MaxRetriesExceededError", + "SingleFileResponse", + "ZipFileResponse", ] diff --git a/src/gotenberg_client/_base.py b/src/gotenberg_client/_base.py index 1f2131e..9c93062 100644 --- a/src/gotenberg_client/_base.py +++ b/src/gotenberg_client/_base.py @@ -6,10 +6,7 @@ from pathlib import Path from time import sleep from types import TracebackType -from typing import Dict from typing import Optional -from typing import Tuple -from typing import Type from httpx import Client from httpx import HTTPStatusError @@ -71,13 +68,13 @@ def __init__(self, client: Client, api_route: str) -> None: self._route = api_route self._stack = ExitStack() # These are the options that will be set to Gotenberg. Things like PDF/A - self._form_data: Dict[str, str] = {} + self._form_data: dict[str, str] = {} # These are the names of files, mapping to their Path - self._file_map: Dict[str, Path] = {} + self._file_map: dict[str, Path] = {} # Additional in memory resources, mapping the referenced name to the content and an optional mimetype - self._in_memory_resources: Dict[str, Tuple[str, Optional[str]]] = {} + self._in_memory_resources: dict[str, tuple[str, Optional[str]]] = {} # Any header that will also be sent - self._headers: Dict[str, str] = {} + self._headers: dict[str, str] = {} def __enter__(self) -> Self: self.reset() @@ -85,7 +82,7 @@ def __enter__(self) -> Self: def __exit__( self, - exc_type: Optional[Type[BaseException]], + exc_type: Optional[type[BaseException]], exc_val: Optional[BaseException], exc_tb: Optional[TracebackType], ) -> None: @@ -111,6 +108,7 @@ def _base_run(self) -> Response: Executes the configured route against the server and returns the resulting Response. """ + resp = self._client.post( url=self._route, headers=self._headers, diff --git a/src/gotenberg_client/_client.py b/src/gotenberg_client/_client.py index 1c53259..c1fe470 100644 --- a/src/gotenberg_client/_client.py +++ b/src/gotenberg_client/_client.py @@ -3,9 +3,7 @@ # SPDX-License-Identifier: MPL-2.0 import logging from types import TracebackType -from typing import Dict from typing import Optional -from typing import Type from httpx import Client @@ -65,7 +63,7 @@ def __init__( self.merge = MergeApi(self._client) self.health = HealthCheckApi(self._client) - def add_headers(self, header: Dict[str, str]) -> None: + def add_headers(self, header: dict[str, str]) -> None: """ Update the httpx Client headers with the given values. @@ -110,7 +108,7 @@ def set_error_webhook_http_method(self, method: HttpMethodsType = "PUT") -> None """ self.add_headers({"Gotenberg-Webhook-Error-Method": method}) - def set_webhook_extra_headers(self, extra_headers: Dict[str, str]) -> None: + def set_webhook_extra_headers(self, extra_headers: dict[str, str]) -> None: """ Set additional HTTP headers for Gotenberg to use when calling webhooks. @@ -138,7 +136,7 @@ def close(self) -> None: def __exit__( self, - exc_type: Optional[Type[BaseException]], + exc_type: Optional[type[BaseException]], exc_val: Optional[BaseException], exc_tb: Optional[TracebackType], ) -> None: diff --git a/src/gotenberg_client/_convert/chromium.py b/src/gotenberg_client/_convert/chromium.py index 4e176f3..b0de523 100644 --- a/src/gotenberg_client/_convert/chromium.py +++ b/src/gotenberg_client/_convert/chromium.py @@ -3,10 +3,8 @@ # SPDX-License-Identifier: MPL-2.0 import logging from pathlib import Path -from typing import List from typing import Literal from typing import Optional -from typing import Tuple from httpx import Client @@ -17,6 +15,7 @@ from gotenberg_client._convert.common import EmulatedMediaMixin from gotenberg_client._convert.common import HeaderFooterMixin from gotenberg_client._convert.common import InvalidStatusCodesMixin +from gotenberg_client._convert.common import MetadataMixin from gotenberg_client._convert.common import PageOrientMixin from gotenberg_client._convert.common import PagePropertiesMixin from gotenberg_client._convert.common import PerformanceModeMixin @@ -82,7 +81,7 @@ def string_resource(self, resource: str, name: str, mime_type: Optional[str] = N self._add_in_memory_file(resource, name=name, mime_type=mime_type) return self - def resources(self, resources: List[Path]) -> Self: + def resources(self, resources: list[Path]) -> Self: """ Adds multiple resource files for the index HTML file to reference. @@ -94,7 +93,7 @@ def resources(self, resources: List[Path]) -> Self: def string_resources( self, - resources: List[Tuple[str, str, Optional[str]]], + resources: list[tuple[str, str, Optional[str]]], ) -> Self: """ Process string resources. @@ -125,6 +124,7 @@ class HtmlRoute( HeaderFooterMixin, RenderControlMixin, PageOrientMixin, + MetadataMixin, _RouteWithResources, _FileBasedRoute, ): @@ -141,6 +141,7 @@ class UrlRoute( EmulatedMediaMixin, CustomHTTPHeaderMixin, PageOrientMixin, + MetadataMixin, BaseSingleFileResponseRoute, ): """ @@ -183,7 +184,7 @@ def _get_all_resources(self) -> ForceMultipartDict: return FORCE_MULTIPART -class MarkdownRoute(PagePropertiesMixin, HeaderFooterMixin, _RouteWithResources, _FileBasedRoute): +class MarkdownRoute(PagePropertiesMixin, HeaderFooterMixin, MetadataMixin, _RouteWithResources, _FileBasedRoute): """ Represents the Gotenberg route for converting Markdown files to a PDF. @@ -212,7 +213,7 @@ def markdown_file(self, markdown_file: Path) -> Self: return self - def markdown_files(self, markdown_files: List[Path]) -> Self: + def markdown_files(self, markdown_files: list[Path]) -> Self: """ Adds multiple Markdown files to be converted. diff --git a/src/gotenberg_client/_convert/common.py b/src/gotenberg_client/_convert/common.py index 5f89381..27671ff 100644 --- a/src/gotenberg_client/_convert/common.py +++ b/src/gotenberg_client/_convert/common.py @@ -3,12 +3,17 @@ # SPDX-License-Identifier: MPL-2.0 import json import logging +from collections.abc import Iterable +from datetime import datetime from pathlib import Path -from typing import Dict -from typing import Iterable +from typing import Final +from typing import Optional +from typing import Union from warnings import warn from gotenberg_client._base import BaseSingleFileResponseRoute +from gotenberg_client._errors import InvalidKeywordError +from gotenberg_client._errors import InvalidPdfRevisionError from gotenberg_client._types import PageScaleType from gotenberg_client._types import Self from gotenberg_client._types import WaitTimeType @@ -16,6 +21,7 @@ from gotenberg_client.options import PageMarginsType from gotenberg_client.options import PageOrientation from gotenberg_client.options import PageSize +from gotenberg_client.options import TrappedStatus logger = logging.getLogger() @@ -187,7 +193,7 @@ def user_agent(self, agent: str) -> Self: self._form_data.update({"userAgent": agent}) # type: ignore[attr-defined,misc] return self - def headers(self, headers: Dict[str, str]) -> Self: + def headers(self, headers: dict[str, str]) -> Self: json_str = json.dumps(headers) self._form_data.update({"extraHttpHeaders": json_str}) # type: ignore[attr-defined,misc] return self @@ -233,3 +239,143 @@ def skip_network_idle(self) -> Self: def use_network_idle(self) -> Self: self._form_data.update({"skipNetworkIdleEvent": "false"}) # type: ignore[attr-defined,misc] return self + + +class MetadataMixin: + """ + Mixin for PDF metadata support. + + This mixin provides functionality to set PDF metadata for documents processed through + the Gotenberg API (https://gotenberg.dev/docs/routes#metadata-chromium). + + Important Notes: + - Gotenberg will use the current date/time for creation_date and modification_date, + even if custom dates are provided. + - Gotenberg will use its own pdf_version, even if a custom version is provided. + + Example: + from gotenberg_client import GotenbergClient + from datetime import datetime + from zoneinfo import ZoneInfo + from pathlib import Path + + with GotenbergClient('http://localhost:3000') as client: + with client.chromium.url_to_pdf() as route: + + response = ( + route.url('https://hello.world') + .metadata( + author='John Doe', + copyright='© 2024 My Company', + creation_date = datetime.now(tz=ZoneInfo("Europe/Berlin")), + creator='My Application', + keywords=['keyword', 'example'], + marked=True, + modification_date=datetime.now(tz=ZoneInfo("Europe/Berlin")), + pdf_version=1.7, + producer='PDF Producer', + subject='My Subject', + title='My Title', + trapped=True, + ) + ) + + response.to_file(Path('my-world.pdf')) + """ + + MIN_PDF_VERSION: Final[float] = 1.0 + MAX_PDF_VERSION: Final[float] = 2.0 + + def metadata( + self, + author: Optional[str] = None, + pdf_copyright: Optional[str] = None, + creation_date: Optional[datetime] = None, + creator: Optional[str] = None, + keywords: Optional[list[str]] = None, + marked: Optional[bool] = None, + modification_date: Optional[datetime] = None, + pdf_version: Optional[float] = None, + producer: Optional[str] = None, + subject: Optional[str] = None, + title: Optional[str] = None, + trapped: Optional[Union[bool, TrappedStatus]] = None, + ) -> Self: + """ + Sets PDF metadata for the document. + + Args: + author: Document author name + copyright: Copyright information + creation_date: Document creation date (Note: Gotenberg will override this) + creator: Name of the creating application + keywords: List of keywords/tags for the document + marked: Whether the PDF is marked for structure + modification_date: Last modification date (Note: Gotenberg will override this) + pdf_version: PDF version number (Note: Gotenberg will override this) + producer: Name of the PDF producer + subject: Document subject/description + title: Document title + trapped: Trapping status (bool or one of: 'True', 'False', 'Unknown') + + Returns: + Self for method chaining + + Raises: + InvalidPdfRevisionError: If the provided PDF revision is outside the valid range + InvalidKeywordError: If any metadata keyword values are not allowed + TypeError: If any metadata values have incorrect types + """ + + # Validate metadata values + if pdf_version is not None and not (self.MIN_PDF_VERSION <= pdf_version <= self.MAX_PDF_VERSION): + msg = "PDF version must be between 1.0 and 2.0" + raise InvalidPdfRevisionError(msg) + + if trapped is not None and isinstance(trapped, bool): + trapped = TrappedStatus.TRUE if trapped else TrappedStatus.FALSE + + if keywords is not None: + if not all(isinstance(k, str) for k in keywords): + raise InvalidKeywordError("All keywords must be strings") # noqa: EM101, TRY003 + if any("," in k for k in keywords): + raise InvalidKeywordError("Keywords cannot contain commas") # noqa: EM101, TRY003 + + # Get existing metadata if any + existing_metadata: dict[str, Union[str, bool, float]] = {} + if "metadata" in self._form_data: # type: ignore[attr-defined,misc] + existing_metadata = json.loads(self._form_data["metadata"]) # type: ignore[attr-defined,misc] + + # Convert validated metadata to dictionary + metadata: dict[str, Union[str, bool, float]] = {} + + if author: + metadata["Author"] = author + if pdf_copyright: + metadata["Copyright"] = pdf_copyright + if creation_date: + metadata["CreationDate"] = creation_date.isoformat() + if creator: + metadata["Creator"] = creator + if keywords: + metadata["Keywords"] = ", ".join(keywords) + if marked is not None: + metadata["Marked"] = marked + if modification_date: + metadata["ModDate"] = modification_date.isoformat() + if pdf_version: + metadata["PDFVersion"] = pdf_version + if producer: + metadata["Producer"] = producer + if subject: + metadata["Subject"] = subject + if title: + metadata["Title"] = title + if trapped is not None: + metadata["Trapped"] = trapped.value + + # Merge existing and new metadata + if metadata: + self._form_data.update({"metadata": json.dumps({**existing_metadata, **metadata})}) # type: ignore[attr-defined,misc] + + return self diff --git a/src/gotenberg_client/_convert/libre_office.py b/src/gotenberg_client/_convert/libre_office.py index f1779e1..aaf9560 100644 --- a/src/gotenberg_client/_convert/libre_office.py +++ b/src/gotenberg_client/_convert/libre_office.py @@ -2,13 +2,13 @@ # # SPDX-License-Identifier: MPL-2.0 from pathlib import Path -from typing import List from typing import Union from httpx import Client from gotenberg_client._base import BaseApi from gotenberg_client._base import BaseSingleFileResponseRoute +from gotenberg_client._convert.common import MetadataMixin from gotenberg_client._convert.common import PageOrientMixin from gotenberg_client._convert.common import PageRangeMixin from gotenberg_client._types import Self @@ -17,7 +17,7 @@ from gotenberg_client.responses import ZipFileResponse -class LibreOfficeConvertRoute(PageOrientMixin, PageRangeMixin, BaseSingleFileResponseRoute): +class LibreOfficeConvertRoute(PageOrientMixin, PageRangeMixin, MetadataMixin, BaseSingleFileResponseRoute): """ Represents the Gotenberg route for converting documents to PDF using LibreOffice. @@ -53,7 +53,7 @@ def convert(self, input_file_path: Path) -> Self: self._result_is_zip = True return self - def convert_files(self, file_paths: List[Path]) -> Self: + def convert_files(self, file_paths: list[Path]) -> Self: """ Adds all provided files for conversion to individual PDFs. diff --git a/src/gotenberg_client/_convert/pdfa.py b/src/gotenberg_client/_convert/pdfa.py index deee746..1c383d7 100644 --- a/src/gotenberg_client/_convert/pdfa.py +++ b/src/gotenberg_client/_convert/pdfa.py @@ -2,14 +2,14 @@ # # SPDX-License-Identifier: MPL-2.0 from pathlib import Path -from typing import List from gotenberg_client._base import BaseApi from gotenberg_client._base import BaseSingleFileResponseRoute +from gotenberg_client._convert.common import MetadataMixin from gotenberg_client._types import Self -class PdfAConvertRoute(BaseSingleFileResponseRoute): +class PdfAConvertRoute(MetadataMixin, BaseSingleFileResponseRoute): """ Represents the Gotenberg route for converting PDFs to PDF/A format. @@ -34,7 +34,7 @@ def convert(self, file_path: Path) -> Self: self._add_file_map(file_path) return self - def convert_files(self, file_paths: List[Path]) -> Self: + def convert_files(self, file_paths: list[Path]) -> Self: """ Converts multiple PDF files to the provided PDF/A format. diff --git a/src/gotenberg_client/_errors.py b/src/gotenberg_client/_errors.py index aa02269..2dc6d68 100644 --- a/src/gotenberg_client/_errors.py +++ b/src/gotenberg_client/_errors.py @@ -23,3 +23,11 @@ def __init__(self, *, response: Response) -> None: class CannotExtractHereError(BaseClientError): pass + + +class InvalidPdfRevisionError(BaseClientError): + pass + + +class InvalidKeywordError(BaseClientError): + pass diff --git a/src/gotenberg_client/_merge.py b/src/gotenberg_client/_merge.py index d08ddf5..aee43cd 100644 --- a/src/gotenberg_client/_merge.py +++ b/src/gotenberg_client/_merge.py @@ -3,7 +3,6 @@ # SPDX-License-Identifier: MPL-2.0 from pathlib import Path from typing import Final -from typing import List from httpx import Client @@ -38,7 +37,7 @@ def __init__(self, client: Client, api_route: str) -> None: super().__init__(client, api_route) self._next = 1 - def merge(self, files: List[Path]) -> Self: + def merge(self, files: list[Path]) -> Self: """ Add the given files to the merge operation. diff --git a/src/gotenberg_client/_utils.py b/src/gotenberg_client/_utils.py index 97cfb2e..e671c0f 100644 --- a/src/gotenberg_client/_utils.py +++ b/src/gotenberg_client/_utils.py @@ -3,7 +3,6 @@ # SPDX-License-Identifier: MPL-2.0 from importlib.util import find_spec from pathlib import Path -from typing import Dict from typing import Final from typing import Optional from typing import Union @@ -12,12 +11,12 @@ # See https://github.com/psf/requests/issues/1081#issuecomment-428504128 -class ForceMultipartDict(Dict): +class ForceMultipartDict(dict): def __bool__(self) -> bool: return True -def optional_to_form(value: Optional[FormFieldType], name: str) -> Dict[str, str]: +def optional_to_form(value: Optional[FormFieldType], name: str) -> dict[str, str]: """ Converts an optional value to a form data field with the given name, handling None values gracefully. diff --git a/src/gotenberg_client/options.py b/src/gotenberg_client/options.py index 1e95668..1fa4f2f 100644 --- a/src/gotenberg_client/options.py +++ b/src/gotenberg_client/options.py @@ -3,7 +3,6 @@ # SPDX-License-Identifier: MPL-2.0 import dataclasses import enum -from typing import Dict from typing import Final from typing import Optional @@ -28,7 +27,7 @@ class PdfAFormat(enum.Enum): A2b = enum.auto() A3b = enum.auto() - def to_form(self) -> Dict[str, str]: + def to_form(self) -> dict[str, str]: """ Converts this PdfAFormat enum value to a dictionary suitable for form data. @@ -38,7 +37,7 @@ def to_form(self) -> Dict[str, str]: If the format is not supported (e.g., A1a), raises an Exception. """ - format_mapping: Final[Dict[PdfAFormat, str]] = { + format_mapping: Final[dict[PdfAFormat, str]] = { PdfAFormat.A1a: "PDF/A-1a", # Include deprecated format with warning PdfAFormat.A2b: "PDF/A-2b", PdfAFormat.A3b: "PDF/A-3b", @@ -67,7 +66,7 @@ class PageOrientation(enum.Enum): Landscape = enum.auto() Portrait = enum.auto() - def to_form(self) -> Dict[str, str]: + def to_form(self) -> dict[str, str]: """ Converts this PageOrientation enum value to a dictionary suitable for form data. @@ -76,7 +75,7 @@ def to_form(self) -> Dict[str, str]: and the corresponding Gotenberg value ("landscape" or "portrait") as the value. """ - orientation_mapping: Final[Dict[PageOrientation, Dict[str, str]]] = { + orientation_mapping: Final[dict[PageOrientation, dict[str, str]]] = { PageOrientation.Landscape: {"landscape": "true"}, PageOrientation.Portrait: {"landscape": "false"}, } @@ -97,7 +96,7 @@ class PageSize: width: Optional[PageSizeType] = None height: Optional[PageSizeType] = None - def to_form(self) -> Dict[str, str]: + def to_form(self) -> dict[str, str]: """ Converts this PageSize object to a dictionary suitable for form data. @@ -160,7 +159,7 @@ class MarginType: value: MarginSizeType unit: MarginUnitType = MarginUnitType.Undefined - def to_form(self, name: str) -> Dict[str, str]: + def to_form(self, name: str) -> dict[str, str]: """ Converts this MarginType object to a dictionary suitable for form data. @@ -193,7 +192,7 @@ class PageMarginsType: left: Optional[MarginType] = None right: Optional[MarginType] = None - def to_form(self) -> Dict[str, str]: + def to_form(self) -> dict[str, str]: """ Converts this PageMarginsType object to a dictionary suitable for form data. @@ -225,7 +224,7 @@ class EmulatedMediaType(str, enum.Enum): Print = enum.auto() Screen = enum.auto() - def to_form(self) -> Dict[str, str]: + def to_form(self) -> dict[str, str]: """ Converts this EmulatedMediaType enum value to a dictionary suitable for form data. @@ -235,3 +234,12 @@ def to_form(self) -> Dict[str, str]: """ return {"emulatedMediaType": self.name.lower()} + + +@enum.unique +class TrappedStatus(str, enum.Enum): + """Enum for valid trapped status values.""" + + TRUE = "True" + FALSE = "False" + UNKNOWN = "Unknown" diff --git a/tests/conftest.py b/tests/conftest.py index 9cf8c88..e143876 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -4,25 +4,84 @@ import logging import os import shutil +from collections.abc import Generator from pathlib import Path -from typing import Generator from typing import Union +import httpx import pytest from gotenberg_client import GotenbergClient from gotenberg_client import SingleFileResponse from gotenberg_client import ZipFileResponse +logger = logging.getLogger("gotenberg-client.tests") + + +def is_responsive(url): + try: + response = httpx.get(url) + except httpx.HTTPError: + logger.exception("Error connecting to service") + return False + else: + return response.status_code == httpx.codes.OK + + +@pytest.fixture(scope="session") +def docker_compose_file() -> Path: + if "GOTENBERG_CLIENT_EDGE_TEST" in os.environ: + return Path(__file__).parent / "docker" / "docker-compose.ci-test-edge.yml" + else: + return Path(__file__).parent / "docker" / "docker-compose.ci-test.yml" + + +@pytest.fixture(scope="session") +def gotenberg_service_name() -> str: + if "GOTENBERG_CLIENT_EDGE_TEST" in os.environ: + return "gotenberg-client-test-edge-server" + else: + return "gotenberg-client-test-server" + @pytest.fixture(scope="session") -def gotenberg_host() -> str: - return os.getenv("GOTENBERG_URL", "http://localhost:3000") +def webserver_service_name() -> str: + if "GOTENBERG_CLIENT_EDGE_TEST" in os.environ: + return "nginx-webserver-edge" + else: + return "nginx-webserver" @pytest.fixture(scope="session") -def web_server_host() -> str: - return os.getenv("WEBSERVER_HOST", "http://localhost:8888") +def webserver_docker_internal_url(webserver_service_name: str) -> str: + """ + The URL by which Gotenberg can access the webserver + """ + return f"http://{webserver_service_name}" + + +@pytest.fixture(scope="session") +def gotenberg_host(docker_services, docker_ip: str, gotenberg_service_name: str) -> str: + url = f"http://{docker_ip}:{docker_services.port_for(gotenberg_service_name, 3000)}" + + docker_services.wait_until_responsive( + timeout=30.0, + pause=1, + check=lambda: is_responsive(f"{url}/version"), + ) + return url + + +@pytest.fixture(scope="session") +def web_server_host(docker_services, docker_ip: str, webserver_service_name: str) -> str: + url = f"http://{docker_ip}:{docker_services.port_for(webserver_service_name, 80)}" + + docker_services.wait_until_responsive( + timeout=30.0, + pause=1, + check=lambda: is_responsive(url), + ) + return url @pytest.fixture(scope="session") diff --git a/.docker/content/favicon.ico b/tests/docker/content/favicon.ico similarity index 100% rename from .docker/content/favicon.ico rename to tests/docker/content/favicon.ico diff --git a/.docker/content/index.html b/tests/docker/content/index.html similarity index 100% rename from .docker/content/index.html rename to tests/docker/content/index.html diff --git a/.docker/docker-compose.ci-test-edge.yml b/tests/docker/docker-compose.ci-test-edge.yml similarity index 60% rename from .docker/docker-compose.ci-test-edge.yml rename to tests/docker/docker-compose.ci-test-edge.yml index 5494b3a..942a3b4 100644 --- a/.docker/docker-compose.ci-test-edge.yml +++ b/tests/docker/docker-compose.ci-test-edge.yml @@ -1,25 +1,24 @@ # docker-compose file for running testing with gotenberg container # Can be used locally or by the CI to start the necessary container with the # correct networking for the tests - -version: "3" +networks: + gotenberg-test-edge-net: services: gotenberg-client-test-edge-server: image: docker.io/gotenberg/gotenberg:edge - hostname: gotenberg-client-test-edge-server - container_name: gotenberg-client-test-edge-server - network_mode: host - restart: unless-stopped + networks: + - gotenberg-test-edge-net + ports: + - "3000/tcp" command: - "gotenberg" - "--log-level=info" - "--log-format=text" - nginx-webserver: + nginx-webserver-edge: image: docker.io/nginx:1-alpine - hostname: nginx-webserver - container_name: nginx-webserver + networks: + - gotenberg-test-edge-net ports: - - "8888:80" - restart: unless-stopped + - "80/tcp" volumes: - ./content:/usr/share/nginx/html:ro diff --git a/.docker/docker-compose.ci-test.yml b/tests/docker/docker-compose.ci-test.yml similarity index 56% rename from .docker/docker-compose.ci-test.yml rename to tests/docker/docker-compose.ci-test.yml index 484b98c..17d2aa5 100644 --- a/.docker/docker-compose.ci-test.yml +++ b/tests/docker/docker-compose.ci-test.yml @@ -1,27 +1,24 @@ # docker-compose file for running testing with gotenberg container # Can be used locally or by the CI to start the necessary container with the # correct networking for the tests - -version: "3" +networks: + gotenberg-test-net: services: gotenberg-client-test-server: image: docker.io/gotenberg/gotenberg:8.11.0 - hostname: gotenberg-client-test-server - container_name: gotenberg-client-test-server - network_mode: host - restart: unless-stopped + networks: + - gotenberg-test-net + ports: + - "3000/tcp" command: - "gotenberg" - - "--log-level=warn" + - "--log-level=info" - "--log-format=text" nginx-webserver: image: docker.io/nginx:1-alpine - hostname: nginx-webserver - container_name: nginx-webserver + networks: + - gotenberg-test-net ports: - - "8888:80" - restart: unless-stopped - environment: - NGINX_ENTRYPOINT_QUIET_LOGS: 1 + - "80/tcp" volumes: - ./content:/usr/share/nginx/html:ro diff --git a/tests/test_convert_chromium_html.py b/tests/test_convert_chromium_html.py index ed3d1d9..07d1a6a 100644 --- a/tests/test_convert_chromium_html.py +++ b/tests/test_convert_chromium_html.py @@ -127,8 +127,8 @@ def test_convert_page_size(self, client: GotenbergClient, sample_directory: Path _ = route.index(test_file).size(A4).run() request = httpx_mock.get_request() - verify_stream_contains("paperWidth", "8.27", request.stream) - verify_stream_contains("paperHeight", "11.7", request.stream) + verify_stream_contains(request, "paperWidth", "8.27") + verify_stream_contains(request, "paperHeight", "11.7") def test_convert_margin(self, client: GotenbergClient, sample_directory: Path, httpx_mock: HTTPXMock): httpx_mock.add_response(method="POST") @@ -149,10 +149,10 @@ def test_convert_margin(self, client: GotenbergClient, sample_directory: Path, h ) request = httpx_mock.get_request() - verify_stream_contains("marginTop", "1cm", request.stream) - verify_stream_contains("marginBottom", "2pc", request.stream) - verify_stream_contains("marginLeft", "3mm", request.stream) - verify_stream_contains("marginRight", "4", request.stream) + verify_stream_contains(request, "marginTop", "1cm") + verify_stream_contains(request, "marginBottom", "2pc") + verify_stream_contains(request, "marginLeft", "3mm") + verify_stream_contains(request, "marginRight", "4") def test_convert_render_control(self, client: GotenbergClient, sample_directory: Path, httpx_mock: HTTPXMock): httpx_mock.add_response(method="POST") @@ -161,8 +161,7 @@ def test_convert_render_control(self, client: GotenbergClient, sample_directory: with client.chromium.html_to_pdf() as route: _ = route.index(test_file).render_wait(500.0).run() - request = httpx_mock.get_request() - verify_stream_contains("waitDelay", "500.0", request.stream) + verify_stream_contains(httpx_mock.get_request(), "waitDelay", "500.0") @pytest.mark.parametrize( ("orientation"), @@ -181,9 +180,8 @@ def test_convert_orientation( with client.chromium.html_to_pdf() as route: _ = route.index(test_file).orient(orientation).run() - request = httpx_mock.get_request() verify_stream_contains( + httpx_mock.get_request(), "landscape", "true" if orientation == PageOrientation.Landscape else "false", - request.stream, ) diff --git a/tests/test_convert_chromium_screenshots.py b/tests/test_convert_chromium_screenshots.py index 4e3619a..4a28389 100644 --- a/tests/test_convert_chromium_screenshots.py +++ b/tests/test_convert_chromium_screenshots.py @@ -10,10 +10,11 @@ from gotenberg_client import GotenbergClient +@pytest.mark.usefixtures("web_server_host") class TestChromiumScreenshots: - def test_basic_screenshot(self, client: GotenbergClient, web_server_host: str): + def test_basic_screenshot(self, client: GotenbergClient, webserver_docker_internal_url: str): with client.chromium.screenshot_url() as route: - resp = route.url(web_server_host).run_with_retry() + resp = route.url(webserver_docker_internal_url).run_with_retry() assert resp.status_code == codes.OK assert "Content-Type" in resp.headers @@ -26,83 +27,83 @@ def test_basic_screenshot(self, client: GotenbergClient, web_server_host: str): def test_screenshot_formats( self, client: GotenbergClient, - web_server_host: str, + webserver_docker_internal_url: str, image_format: Literal["png", "webp", "jpeg"], ): with client.chromium.screenshot_url() as route: - resp = route.url(web_server_host).output_format(image_format).run_with_retry() + resp = route.url(webserver_docker_internal_url).output_format(image_format).run_with_retry() assert resp.status_code == codes.OK assert "Content-Type" in resp.headers assert resp.headers["Content-Type"] == f"image/{image_format}" - def test_screenshot_quality_valid(self, client: GotenbergClient, web_server_host: str): + def test_screenshot_quality_valid(self, client: GotenbergClient, webserver_docker_internal_url: str): with client.chromium.screenshot_url() as route: - resp = route.url(web_server_host).quality(80).run_with_retry() + resp = route.url(webserver_docker_internal_url).quality(80).run_with_retry() assert resp.status_code == codes.OK assert "Content-Type" in resp.headers assert resp.headers["Content-Type"] == "image/png" - def test_screenshot_quality_too_low(self, client: GotenbergClient, web_server_host: str): + def test_screenshot_quality_too_low(self, client: GotenbergClient, webserver_docker_internal_url: str): with client.chromium.screenshot_url() as route: - resp = route.url(web_server_host).quality(-10).run_with_retry() + resp = route.url(webserver_docker_internal_url).quality(-10).run_with_retry() assert resp.status_code == codes.OK assert "Content-Type" in resp.headers assert resp.headers["Content-Type"] == "image/png" - def test_screenshot_quality_too_high(self, client: GotenbergClient, web_server_host: str): + def test_screenshot_quality_too_high(self, client: GotenbergClient, webserver_docker_internal_url: str): with client.chromium.screenshot_url() as route: - resp = route.url(web_server_host).quality(101).run_with_retry() + resp = route.url(webserver_docker_internal_url).quality(101).run_with_retry() assert resp.status_code == codes.OK assert "Content-Type" in resp.headers assert resp.headers["Content-Type"] == "image/png" - def test_screenshot_optimize_speed(self, client: GotenbergClient, web_server_host: str): + def test_screenshot_optimize_speed(self, client: GotenbergClient, webserver_docker_internal_url: str): with client.chromium.screenshot_url() as route: - resp = route.url(web_server_host).optimize_speed().run_with_retry() + resp = route.url(webserver_docker_internal_url).optimize_speed().run_with_retry() assert resp.status_code == codes.OK assert "Content-Type" in resp.headers assert resp.headers["Content-Type"] == "image/png" - def test_screenshot_optimize_quality(self, client: GotenbergClient, web_server_host: str): + def test_screenshot_optimize_quality(self, client: GotenbergClient, webserver_docker_internal_url: str): with client.chromium.screenshot_url() as route: - resp = route.url(web_server_host).optimize_size().run_with_retry() + resp = route.url(webserver_docker_internal_url).optimize_size().run_with_retry() assert resp.status_code == codes.OK assert "Content-Type" in resp.headers assert resp.headers["Content-Type"] == "image/png" - def test_network_idle_on(self, client: GotenbergClient, web_server_host: str): + def test_network_idle_on(self, client: GotenbergClient, webserver_docker_internal_url: str): with client.chromium.screenshot_url() as route: - resp = route.url(web_server_host).skip_network_idle().run_with_retry() + resp = route.url(webserver_docker_internal_url).skip_network_idle().run_with_retry() assert resp.status_code == codes.OK assert "Content-Type" in resp.headers assert resp.headers["Content-Type"] == "image/png" - def test_network_idle_off(self, client: GotenbergClient, web_server_host: str): + def test_network_idle_off(self, client: GotenbergClient, webserver_docker_internal_url: str): with client.chromium.screenshot_url() as route: - resp = route.url(web_server_host).use_network_idle().run_with_retry() + resp = route.url(webserver_docker_internal_url).use_network_idle().run_with_retry() assert resp.status_code == codes.OK assert "Content-Type" in resp.headers assert resp.headers["Content-Type"] == "image/png" - def test_status_codes(self, client: GotenbergClient, web_server_host: str): + def test_status_codes(self, client: GotenbergClient, webserver_docker_internal_url: str): with client.chromium.screenshot_url() as route: - resp = route.url(web_server_host).fail_on_status_codes([499, 599]).run_with_retry() + resp = route.url(webserver_docker_internal_url).fail_on_status_codes([499, 599]).run_with_retry() assert resp.status_code == codes.OK assert "Content-Type" in resp.headers assert resp.headers["Content-Type"] == "image/png" - def test_status_codes_empty(self, client: GotenbergClient, web_server_host: str): + def test_status_codes_empty(self, client: GotenbergClient, webserver_docker_internal_url: str): with client.chromium.screenshot_url() as route: - resp = route.url(web_server_host).fail_on_status_codes([]).run_with_retry() + resp = route.url(webserver_docker_internal_url).fail_on_status_codes([]).run_with_retry() assert resp.status_code == codes.OK assert "Content-Type" in resp.headers diff --git a/tests/test_convert_chromium_url.py b/tests/test_convert_chromium_url.py index e9d56d9..1204ed6 100644 --- a/tests/test_convert_chromium_url.py +++ b/tests/test_convert_chromium_url.py @@ -12,16 +12,18 @@ from tests.utils import verify_stream_contains +@pytest.mark.usefixtures("webserver_docker_internal_url") class TestConvertChromiumUrlRoute: - def test_basic_convert(self, client: GotenbergClient, web_server_host: str): + def test_basic_convert(self, client: GotenbergClient, webserver_docker_internal_url: str): with client.chromium.url_to_pdf() as route: - resp = route.url(web_server_host).run_with_retry() + resp = route.url(webserver_docker_internal_url).run_with_retry() assert resp.status_code == codes.OK assert "Content-Type" in resp.headers assert resp.headers["Content-Type"] == "application/pdf" +@pytest.mark.usefixtures("webserver_docker_internal_url") class TestConvertChromiumUrlMocked: @pytest.mark.parametrize( ("emulation"), @@ -30,20 +32,19 @@ class TestConvertChromiumUrlMocked: def test_convert_orientation( self, client: GotenbergClient, - web_server_host: str, + webserver_docker_internal_url: str, httpx_mock: HTTPXMock, emulation: EmulatedMediaType, ): httpx_mock.add_response(method="POST") with client.chromium.url_to_pdf() as route: - _ = route.url(web_server_host).media_type(emulation).run() + _ = route.url(webserver_docker_internal_url).media_type(emulation).run() - request = httpx_mock.get_request() verify_stream_contains( + httpx_mock.get_request(), "emulatedMediaType", "screen" if emulation == EmulatedMediaType.Screen else "print", - request.stream, ) @pytest.mark.parametrize( @@ -53,22 +54,21 @@ def test_convert_orientation( def test_convert_css_or_not_size( self, client: GotenbergClient, - web_server_host: str, + webserver_docker_internal_url: str, httpx_mock: HTTPXMock, method: str, ): httpx_mock.add_response(method="POST") with client.chromium.url_to_pdf() as route: - route.url(web_server_host) + route.url(webserver_docker_internal_url) getattr(route, method)() _ = route.run() - request = httpx_mock.get_request() verify_stream_contains( + httpx_mock.get_request(), "preferCssPageSize", "true" if method == "prefer_css_page_size" else "false", - request.stream, ) @pytest.mark.parametrize( @@ -78,22 +78,21 @@ def test_convert_css_or_not_size( def test_convert_background_graphics_or_not( self, client: GotenbergClient, - web_server_host: str, + webserver_docker_internal_url: str, httpx_mock: HTTPXMock, method: str, ): httpx_mock.add_response(method="POST") with client.chromium.url_to_pdf() as route: - route.url(web_server_host) + route.url(webserver_docker_internal_url) getattr(route, method)() _ = route.run() - request = httpx_mock.get_request() verify_stream_contains( + httpx_mock.get_request(), "printBackground", "true" if method == "background_graphics" else "false", - request.stream, ) @pytest.mark.parametrize( @@ -103,22 +102,21 @@ def test_convert_background_graphics_or_not( def test_convert_hide_background_or_not( self, client: GotenbergClient, - web_server_host: str, + webserver_docker_internal_url: str, httpx_mock: HTTPXMock, method: str, ): httpx_mock.add_response(method="POST") with client.chromium.url_to_pdf() as route: - route.url(web_server_host) + route.url(webserver_docker_internal_url) getattr(route, method)() _ = route.run() - request = httpx_mock.get_request() verify_stream_contains( + httpx_mock.get_request(), "omitBackground", "true" if method == "hide_background" else "false", - request.stream, ) @pytest.mark.parametrize( @@ -128,119 +126,113 @@ def test_convert_hide_background_or_not( def test_convert_fail_exceptions( self, client: GotenbergClient, - web_server_host: str, + webserver_docker_internal_url: str, httpx_mock: HTTPXMock, method: str, ): httpx_mock.add_response(method="POST") with client.chromium.url_to_pdf() as route: - route.url(web_server_host) + route.url(webserver_docker_internal_url) getattr(route, method)() _ = route.run() - request = httpx_mock.get_request() verify_stream_contains( + httpx_mock.get_request(), "failOnConsoleExceptions", "true" if method == "fail_on_exceptions" else "false", - request.stream, ) def test_convert_scale( self, client: GotenbergClient, - web_server_host: str, + webserver_docker_internal_url: str, httpx_mock: HTTPXMock, ): httpx_mock.add_response(method="POST") with client.chromium.url_to_pdf() as route: - _ = route.url(web_server_host).scale(1.5).run() + _ = route.url(webserver_docker_internal_url).scale(1.5).run() - request = httpx_mock.get_request() verify_stream_contains( + httpx_mock.get_request(), "scale", "1.5", - request.stream, ) def test_convert_page_ranges( self, client: GotenbergClient, - web_server_host: str, + webserver_docker_internal_url: str, httpx_mock: HTTPXMock, ): httpx_mock.add_response(method="POST") with client.chromium.url_to_pdf() as route: - _ = route.url(web_server_host).page_ranges("1-5").run() + _ = route.url(webserver_docker_internal_url).page_ranges("1-5").run() - request = httpx_mock.get_request() verify_stream_contains( + httpx_mock.get_request(), "nativePageRanges", "1-5", - request.stream, ) def test_convert_url_render_wait( self, client: GotenbergClient, - web_server_host: str, + webserver_docker_internal_url: str, httpx_mock: HTTPXMock, ): httpx_mock.add_response(method="POST") with client.chromium.url_to_pdf() as route: - _ = route.url(web_server_host).render_wait(500).run() + _ = route.url(webserver_docker_internal_url).render_wait(500).run() - request = httpx_mock.get_request() verify_stream_contains( + httpx_mock.get_request(), "waitDelay", "500", - request.stream, ) def test_convert_url_render_expression( self, client: GotenbergClient, - web_server_host: str, + webserver_docker_internal_url: str, httpx_mock: HTTPXMock, ): httpx_mock.add_response(method="POST") with client.chromium.url_to_pdf() as route: - _ = route.url(web_server_host).render_expr("wait while false;").run() + _ = route.url(webserver_docker_internal_url).render_expr("wait while false;").run() - request = httpx_mock.get_request() verify_stream_contains( + httpx_mock.get_request(), "waitForExpression", "wait while false;", - request.stream, ) @pytest.mark.filterwarnings("ignore::DeprecationWarning") def test_convert_url_user_agent( self, client: GotenbergClient, - web_server_host: str, + webserver_docker_internal_url: str, httpx_mock: HTTPXMock, ): httpx_mock.add_response(method="POST") with client.chromium.url_to_pdf() as route: - _ = route.url(web_server_host).user_agent("Firefox").run() + _ = route.url(webserver_docker_internal_url).user_agent("Firefox").run() - request = httpx_mock.get_request() verify_stream_contains( + httpx_mock.get_request(), "userAgent", "Firefox", - request.stream, ) def test_convert_url_headers( self, client: GotenbergClient, - web_server_host: str, + webserver_docker_internal_url: str, httpx_mock: HTTPXMock, ): httpx_mock.add_response(method="POST") @@ -248,11 +240,9 @@ def test_convert_url_headers( headers = {"X-Auth-Token": "Secure"} with client.chromium.url_to_pdf() as route: - _ = route.url(web_server_host).headers(headers).run() - - request = httpx_mock.get_request() + _ = route.url(webserver_docker_internal_url).headers(headers).run() verify_stream_contains( + httpx_mock.get_request(), "extraHttpHeaders", json.dumps(headers), - request.stream, ) diff --git a/tests/test_metadata.py b/tests/test_metadata.py new file mode 100644 index 0000000..af79ebc --- /dev/null +++ b/tests/test_metadata.py @@ -0,0 +1,183 @@ +from datetime import datetime +from datetime import timedelta +from datetime import timezone +from pathlib import Path + +import pikepdf +import pytest +from httpx import codes + +from gotenberg_client import GotenbergClient +from gotenberg_client import InvalidKeywordError +from gotenberg_client import InvalidPdfRevisionError +from gotenberg_client._convert.common import MetadataMixin +from gotenberg_client.options import TrappedStatus + + +class TestPdfMetadata: + def test_metadata_basic( + self, + client: GotenbergClient, + tmp_path: Path, + webserver_docker_internal_url: str, + ): + """Test basic metadata setting.""" + + author = "Gotenberg Test" + copyright_info = "Copyright Me at Me, Inc" + creation_date = datetime(2006, 9, 18, 16, 27, 50, tzinfo=timezone(timedelta(hours=-4))) + creator = "Gotenberg Some Version" + keywords = ["Test", "Something"] + marked = True + mod_date = datetime(2006, 9, 18, 16, 27, 50, tzinfo=timezone(timedelta(hours=-5))) + pdf_version = 1.5 + producer = "Gotenberg Client" + subject = "A Test File" + title = "An override title" + trapped = TrappedStatus.TRUE + + with client.chromium.url_to_pdf() as route: + resp = ( + route.url(webserver_docker_internal_url) + .metadata( + author=author, + pdf_copyright=copyright_info, + creation_date=creation_date, + creator=creator, + keywords=keywords, + marked=marked, + modification_date=mod_date, + pdf_version=pdf_version, + producer=producer, + subject=subject, + title=title, + trapped=trapped, + ) + .run_with_retry() + ) + + assert resp.status_code == codes.OK + assert "Content-Type" in resp.headers + assert resp.headers["Content-Type"] == "application/pdf" + + output = tmp_path / "test_metadata_basic.pdf" + resp.to_file(output) + + with pikepdf.Pdf.open(output) as pdf: + assert "/Author" in pdf.docinfo + assert pdf.docinfo["/Author"] == author + + assert "/Creator" in pdf.docinfo + assert pdf.docinfo["/Creator"] == creator + + assert "/Keywords" in pdf.docinfo + assert pdf.docinfo["/Keywords"] == ", ".join(keywords) + + assert "/Producer" in pdf.docinfo + assert pdf.docinfo["/Producer"] == producer + + assert "/Subject" in pdf.docinfo + assert pdf.docinfo["/Subject"] == subject + + assert "/Title" in pdf.docinfo + assert pdf.docinfo["/Title"] == title + + assert "/Trapped" in pdf.docinfo + assert pdf.docinfo["/Trapped"] == "/True" + + # TODO(stumpylog): Investigate why certain fields seems to not be possible to set + + def test_metadata_trapped_bool(self, client: GotenbergClient, tmp_path: Path, webserver_docker_internal_url: str): + with client.chromium.url_to_pdf() as route: + resp = ( + route.url(webserver_docker_internal_url) + .metadata( + trapped=True, + ) + .run_with_retry() + ) + + assert resp.status_code == codes.OK + assert "Content-Type" in resp.headers + assert resp.headers["Content-Type"] == "application/pdf" + + output = tmp_path / "test_metadata_trapped_bool.pdf" + resp.to_file(output) + + with pikepdf.Pdf.open(output) as pdf: + assert "/Trapped" in pdf.docinfo + assert pdf.docinfo["/Trapped"] == "/True" + + def test_metadata_merging( + self, + client: GotenbergClient, + tmp_path: Path, + webserver_docker_internal_url: str, + ): + inital_title = "Initial Title" + new_title = "An New Title" + trapped = TrappedStatus.UNKNOWN + + with client.chromium.url_to_pdf() as route: + resp = ( + route.url(webserver_docker_internal_url) + .metadata( + title=inital_title, + trapped=trapped, + ) + .metadata(title=new_title) + .run_with_retry() + ) + + assert resp.status_code == codes.OK + assert "Content-Type" in resp.headers + assert resp.headers["Content-Type"] == "application/pdf" + + output = tmp_path / "test_metadata_merging.pdf" + resp.to_file(output) + + with pikepdf.Pdf.open(output) as pdf: + assert "/Title" in pdf.docinfo + assert pdf.docinfo["/Title"] == new_title + + assert "/Trapped" in pdf.docinfo + assert pdf.docinfo["/Trapped"] == "/Unknown" + + @pytest.mark.parametrize( + ("base_value", "delta"), + [(MetadataMixin.MIN_PDF_VERSION, -0.5), (MetadataMixin.MAX_PDF_VERSION, 0.5)], + ) + def test_metadata_invalid_pdf_revision( + self, + client: GotenbergClient, + webserver_docker_internal_url: str, + base_value: float, + delta: float, + ): + with client.chromium.url_to_pdf() as route, pytest.raises(InvalidPdfRevisionError): + _ = ( + route.url(webserver_docker_internal_url) + .metadata( + pdf_version=base_value + delta, + ) + .run_with_retry() + ) + + @pytest.mark.parametrize( + ("keywords"), + [["Test, Something"], ["Test", 1]], + ) + def test_metadata_invalid_pdf_keyword( + self, + client: GotenbergClient, + webserver_docker_internal_url: str, + keywords: list[str], + ): + with client.chromium.url_to_pdf() as route, pytest.raises(InvalidKeywordError): + _ = ( + route.url(webserver_docker_internal_url) + .metadata( + keywords=keywords, + ) + .run_with_retry() + ) diff --git a/tests/utils.py b/tests/utils.py index 53f6e45..54bb19a 100644 --- a/tests/utils.py +++ b/tests/utils.py @@ -6,24 +6,17 @@ import tempfile from pathlib import Path -from httpx._multipart import DataField -from httpx._multipart import FileField -from httpx._multipart import MultipartStream +def verify_stream_contains(request, key: str, value: str) -> None: + content_type = request.headers["Content-Type"] + assert "multipart/form-data" in content_type -def verify_stream_contains(key: str, value: str, stream: MultipartStream) -> None: - for item in stream.fields: - if isinstance(item, FileField): - continue - elif isinstance(item, DataField) and item.name == key: - actual_value = item.value - if isinstance(actual_value, bytes): - actual_value = actual_value.decode("utf-8") - assert actual_value == value, f"Key '{actual_value}' /= {value}" - return + boundary = content_type.split("boundary=")[1] - msg = f'Key "{key}" with value "{value}" not found in stream' - raise AssertionError(msg) + parts = request.content.split(f"--{boundary}".encode()) + + form_field_found = any(f'name="{key}"'.encode() in part and value.encode() in part for part in parts) + assert form_field_found, f'Key "{key}" with value "{value}" not found in stream' def extract_text(pdf_path: Path) -> str: @@ -36,7 +29,7 @@ def extract_text(pdf_path: Path) -> str: with tempfile.NamedTemporaryFile( mode="w+", ) as tmp: - subprocess.run( # noqa: S603 + subprocess.run( [ pdf_to_text, "-q",