From e24a97046764e4ab971b0a38e19eb07a08ee8557 Mon Sep 17 00:00:00 2001 From: Benjamin Webb Date: Wed, 19 Feb 2025 23:01:38 -0500 Subject: [PATCH 1/3] Add support for data integrity header --- pygeoapi/api/__init__.py | 56 ++++++++++++++++++++++++- pygeoapi/django_/views.py | 3 +- pygeoapi/flask_app.py | 3 +- pygeoapi/starlette_app.py | 3 +- tests/api/test_itemtypes.py | 84 ++++++++++++++++++++++++++++++++++++- 5 files changed, 144 insertions(+), 5 deletions(-) diff --git a/pygeoapi/api/__init__.py b/pygeoapi/api/__init__.py index 44f01f540..1b3cfad03 100644 --- a/pygeoapi/api/__init__.py +++ b/pygeoapi/api/__init__.py @@ -45,6 +45,7 @@ from datetime import datetime from functools import partial from gzip import compress +import hashlib from http import HTTPStatus import logging import re @@ -99,6 +100,10 @@ (F_NETCDF, 'application/x-netcdf'), )) +#: Digest integrity methods supported +DIGEST_METHODS = ['sha1', 'sha256', 'sha384', 'sha512', + 'sha3-256', 'sha3-384', 'sha3-512'] + #: Locale used for system responses (e.g. exceptions) SYSTEM_LOCALE = l10n.Locale('en', 'US') @@ -165,6 +170,32 @@ def apply_gzip(headers: dict, content: Union[str, bytes]) -> Union[str, bytes]: return content +def apply_integrity(headers: dict, content: Union[str, bytes]): + """ + Apply content header integret hash to header. + """ + + try: + hash_method = headers.pop('Want-Digest') + except KeyError: + LOGGER.debug('No digest requested') + return + + try: + LOGGER.debug(f'Hashing with {hash_method}') + hash_func = hashlib.new(hash_method) + + charset = CHARSET[0] + content_bytes = (content if isinstance(content, bytes) + else content.encode(charset)) + + hash_func.update(content_bytes) + headers['Digest'] = f'{hash_method}={hash_func.hexdigest()}' + + except ValueError: + raise ValueError(f'Unsupported hash method: {hash_method}') + + class APIRequest: """ Transforms an incoming server-specific Request into an object @@ -234,6 +265,9 @@ def __init__(self, request, supported_locales): # Determine format self._format = self._get_format(request.headers) + # Determine digest + self._digest = self._get_digest(request.headers) + # Get received headers self._headers = self.get_request_headers(request.headers) @@ -347,6 +381,19 @@ def _get_format(self, headers) -> Union[str, None]: return format_ or None + def _get_digest(self, headers) -> Union[str, None]: + """ + Get `Request` digest type from query parameters or headers. + + :param headers: Dict of Request headers + :returns: digest method or None if not found/specified + """ + h = headers.get('Want-Digest', headers.get('want-digest', '')).strip() # noqa + # basic support for complex types (i.e. with "q=0.x") + for hash_method in (t.split(';')[0].strip().lower() for t in h.split(',') if t): # noqa + if hash_method in DIGEST_METHODS: + return hash_method + @property def data(self) -> bytes: """Returns the additional data send with the Request (bytes)""" @@ -463,6 +510,7 @@ def is_valid(self, additional_formats=None) -> bool: def get_response_headers(self, force_lang: l10n.Locale = None, force_type: str = None, force_encoding: str = None, + force_digest: str = None, **custom_headers) -> dict: """ Prepares and returns a dictionary with Response object headers. @@ -487,6 +535,7 @@ def get_response_headers(self, force_lang: l10n.Locale = None, :param force_lang: An optional Content-Language header override. :param force_type: An optional Content-Type header override. :param force_encoding: An optional Content-Encoding header override. + :param force_digest: An optional Want-Digest header override. :returns: A header dict """ @@ -506,6 +555,11 @@ def get_response_headers(self, force_lang: l10n.Locale = None, elif F_GZIP in self._headers.get('Accept-Encoding', ''): headers['Content-Encoding'] = F_GZIP + if force_digest: + headers['Want-Digest'] = force_digest + elif self._digest: + headers['Want-Digest'] = self._digest + return headers def get_request_headers(self, headers) -> dict: @@ -518,7 +572,7 @@ def get_request_headers(self, headers) -> dict: :returns: A header dict """ - headers_ = {item[0]: item[1] for item in headers.items()} + headers_ = {item[0].title(): item[1] for item in headers.items()} return headers_ diff --git a/pygeoapi/django_/views.py b/pygeoapi/django_/views.py index 682ef51ce..15b83703c 100644 --- a/pygeoapi/django_/views.py +++ b/pygeoapi/django_/views.py @@ -40,7 +40,7 @@ from django.conf import settings from django.http import HttpRequest, HttpResponse -from pygeoapi.api import API, APIRequest, apply_gzip +from pygeoapi.api import API, APIRequest, apply_gzip, apply_integrity import pygeoapi.api as core_api import pygeoapi.api.coverages as coverages_api import pygeoapi.api.environmental_data_retrieval as edr_api @@ -550,6 +550,7 @@ def execute_from_django(api_function, request: HttpRequest, *args, else: headers, status, content = api_function(api_, api_request, *args) + apply_integrity(headers, content) content = apply_gzip(headers, content) # Convert API payload to a django response diff --git a/pygeoapi/flask_app.py b/pygeoapi/flask_app.py index 744862d4f..00d8804fc 100644 --- a/pygeoapi/flask_app.py +++ b/pygeoapi/flask_app.py @@ -37,7 +37,7 @@ from flask import (Flask, Blueprint, make_response, request, send_from_directory, Response, Request) -from pygeoapi.api import API, APIRequest, apply_gzip +from pygeoapi.api import API, APIRequest, apply_gzip, apply_integrity import pygeoapi.api as core_api import pygeoapi.api.coverages as coverages_api import pygeoapi.api.environmental_data_retrieval as edr_api @@ -146,6 +146,7 @@ def execute_from_flask(api_function, request: Request, *args, headers, status, content = actual_api.get_format_exception(api_request) else: headers, status, content = api_function(actual_api, api_request, *args) + apply_integrity(headers, content) content = apply_gzip(headers, content) response = make_response(content, status) diff --git a/pygeoapi/starlette_app.py b/pygeoapi/starlette_app.py index 984e62302..0e9ef1f64 100644 --- a/pygeoapi/starlette_app.py +++ b/pygeoapi/starlette_app.py @@ -49,7 +49,7 @@ ) import uvicorn -from pygeoapi.api import API, APIRequest, apply_gzip +from pygeoapi.api import API, APIRequest, apply_gzip, apply_integrity import pygeoapi.api as core_api import pygeoapi.api.coverages as coverages_api import pygeoapi.api.environmental_data_retrieval as edr_api @@ -133,6 +133,7 @@ async def execute_from_starlette(api_function, request: Request, *args, headers, status, content = await loop.run_in_executor( None, call_api_threadsafe, loop, api_function, actual_api, api_request, *args) + apply_integrity(headers, content) # NOTE: that gzip currently doesn't work in starlette # https://github.com/geopython/pygeoapi/issues/1591 content = apply_gzip(headers, content) diff --git a/tests/api/test_itemtypes.py b/tests/api/test_itemtypes.py index cc41b39a7..502cfadf6 100644 --- a/tests/api/test_itemtypes.py +++ b/tests/api/test_itemtypes.py @@ -41,7 +41,7 @@ from shapely.geometry import Point from pygeoapi.api import (API, FORMAT_TYPES, F_GZIP, F_HTML, F_JSONLD, - apply_gzip) + apply_gzip, apply_integrity) from pygeoapi.api.itemtypes import ( get_collection_queryables, get_collection_item, get_collection_items, manage_collection_item) @@ -405,6 +405,88 @@ def test_collection_items_gzip_csv(config, api_, openapi): assert rsp_csv == rsp_csv_ +def test_collection_no_digest(api_): + req_digest = mock_api_request() + rsp_digest_headers, _, rsp_digest = get_collection_item( + api_, req_digest, 'obs', '371') + apply_integrity(rsp_digest_headers, rsp_digest) + assert rsp_digest_headers['Content-Type'] == 'application/json' + assert 'Digest' not in rsp_digest_headers + + req_digest = mock_api_request(HTTP_WANT_DIGEST='SHA100') + rsp_digest_headers, _, rsp_digest = get_collection_item( + api_, req_digest, 'obs', '371') + apply_integrity(rsp_digest_headers, rsp_digest) + assert rsp_digest_headers['Content-Type'] == 'application/json' + assert 'Digest' not in rsp_digest_headers + + +def test_collection_with_digest(api_): + req_digest = mock_api_request(HTTP_WANT_DIGEST='SHA100,sha1') + rsp_digest_headers, _, rsp_digest = get_collection_item( + api_, req_digest, 'obs', '371') + apply_integrity(rsp_digest_headers, rsp_digest) + assert rsp_digest_headers['Content-Type'] == 'application/json' + assert rsp_digest_headers['Digest'] == 'sha1=0d4818c86215ba031044b27e28cb3170936e8c53' # noqa + + req_digest = mock_api_request(HTTP_WANT_DIGEST='sha256') + rsp_digest_headers, _, rsp_digest = get_collection_item( + api_, req_digest, 'obs', '371') + apply_integrity(rsp_digest_headers, rsp_digest) + assert rsp_digest_headers['Content-Type'] == 'application/json' + assert rsp_digest_headers['Digest'] == 'sha256=f24c899027516b64c13734caf12a5506c8137f8520ab1b08b936e8e14f43faa4' # noqa + + req_digest = mock_api_request(HTTP_WANT_DIGEST='sha384') + rsp_digest_headers, _, rsp_digest = get_collection_item( + api_, req_digest, 'obs', '371') + apply_integrity(rsp_digest_headers, rsp_digest) + assert rsp_digest_headers['Content-Type'] == 'application/json' + assert rsp_digest_headers['Digest'] == 'sha384=2e875167e36a9d70a11bef48d290dd439741514f28e19680a4eb049f2aeaca96092280dce1458c6072650a678840ee83' # noqa + + req_digest = mock_api_request(HTTP_WANT_DIGEST='SHA512') + rsp_digest_headers, _, rsp_digest = get_collection_item( + api_, req_digest, 'obs', '371') + apply_integrity(rsp_digest_headers, rsp_digest) + assert rsp_digest_headers['Content-Type'] == 'application/json' + assert rsp_digest_headers['Digest'] == 'sha512=a57169dd6a947237df9ab8640cf6bedd57e54cb854cc8843f4aac08c30d4e2c402af8b637b8823f6953b90d61f8fc37db95a68cce9ee0d7b9cc9186fcbf5978a' # noqa + + req_digest = mock_api_request(HTTP_WANT_DIGEST='sha3-256') + rsp_digest_headers, _, rsp_digest = get_collection_item( + api_, req_digest, 'obs', '371') + apply_integrity(rsp_digest_headers, rsp_digest) + assert rsp_digest_headers['Content-Type'] == 'application/json' + assert rsp_digest_headers['Digest'] == 'sha3-256=52bd7167f2c74131287e313dc0e6959502626a44069e6b3ab9059aa00cf15c22' # noqa + + req_digest = mock_api_request(HTTP_WANT_DIGEST='sha3-384') + rsp_digest_headers, _, rsp_digest = get_collection_item( + api_, req_digest, 'obs', '371') + apply_integrity(rsp_digest_headers, rsp_digest) + assert rsp_digest_headers['Content-Type'] == 'application/json' + assert rsp_digest_headers['Digest'] == 'sha3-384=335b5d9c02c174325b8d9f039ca1acd6783d1d457d1105a091b31baeca023c5896665d5fd7417fbc7ee946231e7ba990' # noqa + + req_digest = mock_api_request(HTTP_WANT_DIGEST='SHA3-512') + rsp_digest_headers, _, rsp_digest = get_collection_item( + api_, req_digest, 'obs', '371') + apply_integrity(rsp_digest_headers, rsp_digest) + assert rsp_digest_headers['Content-Type'] == 'application/json' + assert rsp_digest_headers['Digest'] == 'sha3-512=79f736ddfbc8faca1623c6eb365e48e422aa30d1ebb51cc5aa0b046b1966d8256f2cc1399d3669069d965f56a5148522d05e7d63b78b7b76282034f8e77fb8c2' # noqa + + +def test_collection_with_digest_and_gzip(api_): + req_digest = mock_api_request(HTTP_WANT_DIGEST='SHA1,sha256', + HTTP_ACCEPT_ENCODING=F_GZIP) + rsp_digest_headers, _, rsp_digest = get_collection_item( + api_, req_digest, 'obs', '371') + apply_integrity(rsp_digest_headers, rsp_digest) + apply_gzip(rsp_digest_headers, rsp_digest) + + assert rsp_digest_headers['Content-Type'] == \ + 'application/json; charset=utf-8' + assert rsp_digest_headers['Digest'] == \ + 'sha1=0d4818c86215ba031044b27e28cb3170936e8c53' + assert rsp_digest_headers['Content-Encoding'] == F_GZIP + + def test_get_collection_items_crs(config, api_): # Invalid CRS query parameter From 6cefc1f26efee2e98c0a4732d9621190de889840 Mon Sep 17 00:00:00 2001 From: Benjamin Webb Date: Sun, 23 Feb 2025 15:18:45 -0500 Subject: [PATCH 2/3] Support MDN headers --- pygeoapi/api/__init__.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/pygeoapi/api/__init__.py b/pygeoapi/api/__init__.py index 1b3cfad03..cbb19e190 100644 --- a/pygeoapi/api/__init__.py +++ b/pygeoapi/api/__init__.py @@ -176,7 +176,7 @@ def apply_integrity(headers: dict, content: Union[str, bytes]): """ try: - hash_method = headers.pop('Want-Digest') + hash_method = headers.pop('Want-Content-Digest') except KeyError: LOGGER.debug('No digest requested') return @@ -190,7 +190,7 @@ def apply_integrity(headers: dict, content: Union[str, bytes]): else content.encode(charset)) hash_func.update(content_bytes) - headers['Digest'] = f'{hash_method}={hash_func.hexdigest()}' + headers['Content-Digest'] = f'{hash_method}={hash_func.hexdigest()}' except ValueError: raise ValueError(f'Unsupported hash method: {hash_method}') @@ -388,7 +388,7 @@ def _get_digest(self, headers) -> Union[str, None]: :param headers: Dict of Request headers :returns: digest method or None if not found/specified """ - h = headers.get('Want-Digest', headers.get('want-digest', '')).strip() # noqa + h = headers.get('Want-Content-Digest', headers.get('want-content-digest', '')).strip() # noqa # basic support for complex types (i.e. with "q=0.x") for hash_method in (t.split(';')[0].strip().lower() for t in h.split(',') if t): # noqa if hash_method in DIGEST_METHODS: @@ -556,9 +556,9 @@ def get_response_headers(self, force_lang: l10n.Locale = None, headers['Content-Encoding'] = F_GZIP if force_digest: - headers['Want-Digest'] = force_digest + headers['Want-Content-Digest'] = force_digest elif self._digest: - headers['Want-Digest'] = self._digest + headers['Want-Content-Digest'] = self._digest return headers From 01ec5526f1269c852842f994d5ec122505460a5e Mon Sep 17 00:00:00 2001 From: Benjamin Webb Date: Thu, 17 Apr 2025 17:34:51 -0400 Subject: [PATCH 3/3] Align Digest with IETF spec Implements testing for https://datatracker.ietf.org/doc/rfc9530/ --- tests/api/test_itemtypes.py | 38 ++++++++++++++++++------------------- 1 file changed, 19 insertions(+), 19 deletions(-) diff --git a/tests/api/test_itemtypes.py b/tests/api/test_itemtypes.py index 28dac05db..11a68efb2 100644 --- a/tests/api/test_itemtypes.py +++ b/tests/api/test_itemtypes.py @@ -421,69 +421,69 @@ def test_collection_no_digest(api_): api_, req_digest, 'obs', '371') apply_integrity(rsp_digest_headers, rsp_digest) assert rsp_digest_headers['Content-Type'] == 'application/json' - assert 'Digest' not in rsp_digest_headers + assert 'Content-Digest' not in rsp_digest_headers - req_digest = mock_api_request(HTTP_WANT_DIGEST='SHA100') + req_digest = mock_api_request(HTTP_WANT_CONTENT_DIGEST='SHA100') rsp_digest_headers, _, rsp_digest = get_collection_item( api_, req_digest, 'obs', '371') apply_integrity(rsp_digest_headers, rsp_digest) assert rsp_digest_headers['Content-Type'] == 'application/json' - assert 'Digest' not in rsp_digest_headers + assert 'Content-Digest' not in rsp_digest_headers def test_collection_with_digest(api_): - req_digest = mock_api_request(HTTP_WANT_DIGEST='SHA100,sha1') + req_digest = mock_api_request(HTTP_WANT_CONTENT_DIGEST='SHA100,sha1') rsp_digest_headers, _, rsp_digest = get_collection_item( api_, req_digest, 'obs', '371') apply_integrity(rsp_digest_headers, rsp_digest) assert rsp_digest_headers['Content-Type'] == 'application/json' - assert rsp_digest_headers['Digest'] == 'sha1=0d4818c86215ba031044b27e28cb3170936e8c53' # noqa + assert rsp_digest_headers['Content-Digest'] == 'sha1=0d4818c86215ba031044b27e28cb3170936e8c53' # noqa - req_digest = mock_api_request(HTTP_WANT_DIGEST='sha256') + req_digest = mock_api_request(HTTP_WANT_CONTENT_DIGEST='sha256') rsp_digest_headers, _, rsp_digest = get_collection_item( api_, req_digest, 'obs', '371') apply_integrity(rsp_digest_headers, rsp_digest) assert rsp_digest_headers['Content-Type'] == 'application/json' - assert rsp_digest_headers['Digest'] == 'sha256=f24c899027516b64c13734caf12a5506c8137f8520ab1b08b936e8e14f43faa4' # noqa + assert rsp_digest_headers['Content-Digest'] == 'sha256=f24c899027516b64c13734caf12a5506c8137f8520ab1b08b936e8e14f43faa4' # noqa - req_digest = mock_api_request(HTTP_WANT_DIGEST='sha384') + req_digest = mock_api_request(HTTP_WANT_CONTENT_DIGEST='sha384') rsp_digest_headers, _, rsp_digest = get_collection_item( api_, req_digest, 'obs', '371') apply_integrity(rsp_digest_headers, rsp_digest) assert rsp_digest_headers['Content-Type'] == 'application/json' - assert rsp_digest_headers['Digest'] == 'sha384=2e875167e36a9d70a11bef48d290dd439741514f28e19680a4eb049f2aeaca96092280dce1458c6072650a678840ee83' # noqa + assert rsp_digest_headers['Content-Digest'] == 'sha384=2e875167e36a9d70a11bef48d290dd439741514f28e19680a4eb049f2aeaca96092280dce1458c6072650a678840ee83' # noqa - req_digest = mock_api_request(HTTP_WANT_DIGEST='SHA512') + req_digest = mock_api_request(HTTP_WANT_CONTENT_DIGEST='SHA512') rsp_digest_headers, _, rsp_digest = get_collection_item( api_, req_digest, 'obs', '371') apply_integrity(rsp_digest_headers, rsp_digest) assert rsp_digest_headers['Content-Type'] == 'application/json' - assert rsp_digest_headers['Digest'] == 'sha512=a57169dd6a947237df9ab8640cf6bedd57e54cb854cc8843f4aac08c30d4e2c402af8b637b8823f6953b90d61f8fc37db95a68cce9ee0d7b9cc9186fcbf5978a' # noqa + assert rsp_digest_headers['Content-Digest'] == 'sha512=a57169dd6a947237df9ab8640cf6bedd57e54cb854cc8843f4aac08c30d4e2c402af8b637b8823f6953b90d61f8fc37db95a68cce9ee0d7b9cc9186fcbf5978a' # noqa - req_digest = mock_api_request(HTTP_WANT_DIGEST='sha3-256') + req_digest = mock_api_request(HTTP_WANT_CONTENT_DIGEST='sha3-256') rsp_digest_headers, _, rsp_digest = get_collection_item( api_, req_digest, 'obs', '371') apply_integrity(rsp_digest_headers, rsp_digest) assert rsp_digest_headers['Content-Type'] == 'application/json' - assert rsp_digest_headers['Digest'] == 'sha3-256=52bd7167f2c74131287e313dc0e6959502626a44069e6b3ab9059aa00cf15c22' # noqa + assert rsp_digest_headers['Content-Digest'] == 'sha3-256=52bd7167f2c74131287e313dc0e6959502626a44069e6b3ab9059aa00cf15c22' # noqa - req_digest = mock_api_request(HTTP_WANT_DIGEST='sha3-384') + req_digest = mock_api_request(HTTP_WANT_CONTENT_DIGEST='sha3-384') rsp_digest_headers, _, rsp_digest = get_collection_item( api_, req_digest, 'obs', '371') apply_integrity(rsp_digest_headers, rsp_digest) assert rsp_digest_headers['Content-Type'] == 'application/json' - assert rsp_digest_headers['Digest'] == 'sha3-384=335b5d9c02c174325b8d9f039ca1acd6783d1d457d1105a091b31baeca023c5896665d5fd7417fbc7ee946231e7ba990' # noqa + assert rsp_digest_headers['Content-Digest'] == 'sha3-384=335b5d9c02c174325b8d9f039ca1acd6783d1d457d1105a091b31baeca023c5896665d5fd7417fbc7ee946231e7ba990' # noqa - req_digest = mock_api_request(HTTP_WANT_DIGEST='SHA3-512') + req_digest = mock_api_request(HTTP_WANT_CONTENT_DIGEST='SHA3-512') rsp_digest_headers, _, rsp_digest = get_collection_item( api_, req_digest, 'obs', '371') apply_integrity(rsp_digest_headers, rsp_digest) assert rsp_digest_headers['Content-Type'] == 'application/json' - assert rsp_digest_headers['Digest'] == 'sha3-512=79f736ddfbc8faca1623c6eb365e48e422aa30d1ebb51cc5aa0b046b1966d8256f2cc1399d3669069d965f56a5148522d05e7d63b78b7b76282034f8e77fb8c2' # noqa + assert rsp_digest_headers['Content-Digest'] == 'sha3-512=79f736ddfbc8faca1623c6eb365e48e422aa30d1ebb51cc5aa0b046b1966d8256f2cc1399d3669069d965f56a5148522d05e7d63b78b7b76282034f8e77fb8c2' # noqa def test_collection_with_digest_and_gzip(api_): - req_digest = mock_api_request(HTTP_WANT_DIGEST='SHA1,sha256', + req_digest = mock_api_request(HTTP_WANT_CONTENT_DIGEST='SHA1,sha256', HTTP_ACCEPT_ENCODING=F_GZIP) rsp_digest_headers, _, rsp_digest = get_collection_item( api_, req_digest, 'obs', '371') @@ -492,7 +492,7 @@ def test_collection_with_digest_and_gzip(api_): assert rsp_digest_headers['Content-Type'] == \ 'application/json; charset=utf-8' - assert rsp_digest_headers['Digest'] == \ + assert rsp_digest_headers['Content-Digest'] == \ 'sha1=0d4818c86215ba031044b27e28cb3170936e8c53' assert rsp_digest_headers['Content-Encoding'] == F_GZIP