diff --git a/pygeoapi/api/__init__.py b/pygeoapi/api/__init__.py index f96220080..0e8b606eb 100644 --- a/pygeoapi/api/__init__.py +++ b/pygeoapi/api/__init__.py @@ -45,6 +45,7 @@ from datetime import datetime from functools import partial from gzip import compress +import hashlib from http import HTTPStatus import logging import re @@ -99,6 +100,10 @@ (F_NETCDF, 'application/x-netcdf'), )) +#: Digest integrity methods supported +DIGEST_METHODS = ['sha1', 'sha256', 'sha384', 'sha512', + 'sha3-256', 'sha3-384', 'sha3-512'] + #: Locale used for system responses (e.g. exceptions) SYSTEM_LOCALE = l10n.Locale('en', 'US') @@ -165,6 +170,32 @@ def apply_gzip(headers: dict, content: Union[str, bytes]) -> Union[str, bytes]: return content +def apply_integrity(headers: dict, content: Union[str, bytes]): + """ + Apply content header integret hash to header. + """ + + try: + hash_method = headers.pop('Want-Content-Digest') + except KeyError: + LOGGER.debug('No digest requested') + return + + try: + LOGGER.debug(f'Hashing with {hash_method}') + hash_func = hashlib.new(hash_method) + + charset = CHARSET[0] + content_bytes = (content if isinstance(content, bytes) + else content.encode(charset)) + + hash_func.update(content_bytes) + headers['Content-Digest'] = f'{hash_method}={hash_func.hexdigest()}' + + except ValueError: + raise ValueError(f'Unsupported hash method: {hash_method}') + + class APIRequest: """ Transforms an incoming server-specific Request into an object @@ -235,6 +266,9 @@ def __init__(self, request, supported_locales): # Determine format self._format = self._get_format(request.headers) + # Determine digest + self._digest = self._get_digest(request.headers) + # Get received headers self._headers = self.get_request_headers(request.headers) @@ -348,6 +382,19 @@ def _get_format(self, headers) -> Union[str, None]: return format_ or None + def _get_digest(self, headers) -> Union[str, None]: + """ + Get `Request` digest type from query parameters or headers. + + :param headers: Dict of Request headers + :returns: digest method or None if not found/specified + """ + h = headers.get('Want-Content-Digest', headers.get('want-content-digest', '')).strip() # noqa + # basic support for complex types (i.e. with "q=0.x") + for hash_method in (t.split(';')[0].strip().lower() for t in h.split(',') if t): # noqa + if hash_method in DIGEST_METHODS: + return hash_method + @property def data(self) -> bytes: """Returns the additional data send with the Request (bytes)""" @@ -464,6 +511,7 @@ def is_valid(self, additional_formats=None) -> bool: def get_response_headers(self, force_lang: l10n.Locale = None, force_type: str = None, force_encoding: str = None, + force_digest: str = None, **custom_headers) -> dict: """ Prepares and returns a dictionary with Response object headers. @@ -488,6 +536,7 @@ def get_response_headers(self, force_lang: l10n.Locale = None, :param force_lang: An optional Content-Language header override. :param force_type: An optional Content-Type header override. :param force_encoding: An optional Content-Encoding header override. + :param force_digest: An optional Want-Digest header override. :returns: A header dict """ @@ -507,6 +556,11 @@ def get_response_headers(self, force_lang: l10n.Locale = None, elif F_GZIP in self._headers.get('Accept-Encoding', ''): headers['Content-Encoding'] = F_GZIP + if force_digest: + headers['Want-Content-Digest'] = force_digest + elif self._digest: + headers['Want-Content-Digest'] = self._digest + return headers def get_request_headers(self, headers) -> dict: @@ -519,7 +573,7 @@ def get_request_headers(self, headers) -> dict: :returns: A header dict """ - headers_ = {item[0]: item[1] for item in headers.items()} + headers_ = {item[0].title(): item[1] for item in headers.items()} return headers_ diff --git a/pygeoapi/django_/views.py b/pygeoapi/django_/views.py index ac532ce33..f31bcfa6a 100644 --- a/pygeoapi/django_/views.py +++ b/pygeoapi/django_/views.py @@ -40,7 +40,7 @@ from django.conf import settings from django.http import HttpRequest, HttpResponse -from pygeoapi.api import API, APIRequest, apply_gzip +from pygeoapi.api import API, APIRequest, apply_gzip, apply_integrity import pygeoapi.api as core_api import pygeoapi.api.coverages as coverages_api import pygeoapi.api.environmental_data_retrieval as edr_api @@ -550,6 +550,7 @@ def execute_from_django(api_function, request: HttpRequest, *args, else: headers, status, content = api_function(api_, api_request, *args) + apply_integrity(headers, content) content = apply_gzip(headers, content) # Convert API payload to a django response diff --git a/pygeoapi/flask_app.py b/pygeoapi/flask_app.py index f42f0c576..bff0190a3 100644 --- a/pygeoapi/flask_app.py +++ b/pygeoapi/flask_app.py @@ -37,7 +37,7 @@ from flask import (Flask, Blueprint, make_response, request, send_from_directory, Response, Request) -from pygeoapi.api import API, APIRequest, apply_gzip +from pygeoapi.api import API, APIRequest, apply_gzip, apply_integrity import pygeoapi.api as core_api import pygeoapi.api.coverages as coverages_api import pygeoapi.api.environmental_data_retrieval as edr_api @@ -151,6 +151,7 @@ def execute_from_flask(api_function, request: Request, *args, headers, status, content = actual_api.get_format_exception(api_request) else: headers, status, content = api_function(actual_api, api_request, *args) + apply_integrity(headers, content) content = apply_gzip(headers, content) response = make_response(content, status) diff --git a/pygeoapi/starlette_app.py b/pygeoapi/starlette_app.py index 9fb30c69a..559be0c58 100644 --- a/pygeoapi/starlette_app.py +++ b/pygeoapi/starlette_app.py @@ -49,7 +49,7 @@ ) import uvicorn -from pygeoapi.api import API, APIRequest, apply_gzip +from pygeoapi.api import API, APIRequest, apply_gzip, apply_integrity import pygeoapi.api as core_api import pygeoapi.api.coverages as coverages_api import pygeoapi.api.environmental_data_retrieval as edr_api @@ -133,6 +133,7 @@ async def execute_from_starlette(api_function, request: Request, *args, headers, status, content = await loop.run_in_executor( None, call_api_threadsafe, loop, api_function, actual_api, api_request, *args) + apply_integrity(headers, content) # NOTE: that gzip currently doesn't work in starlette # https://github.com/geopython/pygeoapi/issues/1591 content = apply_gzip(headers, content) diff --git a/tests/api/test_itemtypes.py b/tests/api/test_itemtypes.py index 7b5d9bb29..11a68efb2 100644 --- a/tests/api/test_itemtypes.py +++ b/tests/api/test_itemtypes.py @@ -41,7 +41,7 @@ from shapely.geometry import Point from pygeoapi.api import (API, FORMAT_TYPES, F_GZIP, F_HTML, F_JSONLD, - apply_gzip) + apply_gzip, apply_integrity) from pygeoapi.api.itemtypes import ( get_collection_queryables, get_collection_item, get_collection_items, manage_collection_item) @@ -415,6 +415,88 @@ def test_collection_items_gzip_csv(config, api_, openapi): assert rsp_csv == rsp_csv_ +def test_collection_no_digest(api_): + req_digest = mock_api_request() + rsp_digest_headers, _, rsp_digest = get_collection_item( + api_, req_digest, 'obs', '371') + apply_integrity(rsp_digest_headers, rsp_digest) + assert rsp_digest_headers['Content-Type'] == 'application/json' + assert 'Content-Digest' not in rsp_digest_headers + + req_digest = mock_api_request(HTTP_WANT_CONTENT_DIGEST='SHA100') + rsp_digest_headers, _, rsp_digest = get_collection_item( + api_, req_digest, 'obs', '371') + apply_integrity(rsp_digest_headers, rsp_digest) + assert rsp_digest_headers['Content-Type'] == 'application/json' + assert 'Content-Digest' not in rsp_digest_headers + + +def test_collection_with_digest(api_): + req_digest = mock_api_request(HTTP_WANT_CONTENT_DIGEST='SHA100,sha1') + rsp_digest_headers, _, rsp_digest = get_collection_item( + api_, req_digest, 'obs', '371') + apply_integrity(rsp_digest_headers, rsp_digest) + assert rsp_digest_headers['Content-Type'] == 'application/json' + assert rsp_digest_headers['Content-Digest'] == 'sha1=0d4818c86215ba031044b27e28cb3170936e8c53' # noqa + + req_digest = mock_api_request(HTTP_WANT_CONTENT_DIGEST='sha256') + rsp_digest_headers, _, rsp_digest = get_collection_item( + api_, req_digest, 'obs', '371') + apply_integrity(rsp_digest_headers, rsp_digest) + assert rsp_digest_headers['Content-Type'] == 'application/json' + assert rsp_digest_headers['Content-Digest'] == 'sha256=f24c899027516b64c13734caf12a5506c8137f8520ab1b08b936e8e14f43faa4' # noqa + + req_digest = mock_api_request(HTTP_WANT_CONTENT_DIGEST='sha384') + rsp_digest_headers, _, rsp_digest = get_collection_item( + api_, req_digest, 'obs', '371') + apply_integrity(rsp_digest_headers, rsp_digest) + assert rsp_digest_headers['Content-Type'] == 'application/json' + assert rsp_digest_headers['Content-Digest'] == 'sha384=2e875167e36a9d70a11bef48d290dd439741514f28e19680a4eb049f2aeaca96092280dce1458c6072650a678840ee83' # noqa + + req_digest = mock_api_request(HTTP_WANT_CONTENT_DIGEST='SHA512') + rsp_digest_headers, _, rsp_digest = get_collection_item( + api_, req_digest, 'obs', '371') + apply_integrity(rsp_digest_headers, rsp_digest) + assert rsp_digest_headers['Content-Type'] == 'application/json' + assert rsp_digest_headers['Content-Digest'] == 'sha512=a57169dd6a947237df9ab8640cf6bedd57e54cb854cc8843f4aac08c30d4e2c402af8b637b8823f6953b90d61f8fc37db95a68cce9ee0d7b9cc9186fcbf5978a' # noqa + + req_digest = mock_api_request(HTTP_WANT_CONTENT_DIGEST='sha3-256') + rsp_digest_headers, _, rsp_digest = get_collection_item( + api_, req_digest, 'obs', '371') + apply_integrity(rsp_digest_headers, rsp_digest) + assert rsp_digest_headers['Content-Type'] == 'application/json' + assert rsp_digest_headers['Content-Digest'] == 'sha3-256=52bd7167f2c74131287e313dc0e6959502626a44069e6b3ab9059aa00cf15c22' # noqa + + req_digest = mock_api_request(HTTP_WANT_CONTENT_DIGEST='sha3-384') + rsp_digest_headers, _, rsp_digest = get_collection_item( + api_, req_digest, 'obs', '371') + apply_integrity(rsp_digest_headers, rsp_digest) + assert rsp_digest_headers['Content-Type'] == 'application/json' + assert rsp_digest_headers['Content-Digest'] == 'sha3-384=335b5d9c02c174325b8d9f039ca1acd6783d1d457d1105a091b31baeca023c5896665d5fd7417fbc7ee946231e7ba990' # noqa + + req_digest = mock_api_request(HTTP_WANT_CONTENT_DIGEST='SHA3-512') + rsp_digest_headers, _, rsp_digest = get_collection_item( + api_, req_digest, 'obs', '371') + apply_integrity(rsp_digest_headers, rsp_digest) + assert rsp_digest_headers['Content-Type'] == 'application/json' + assert rsp_digest_headers['Content-Digest'] == 'sha3-512=79f736ddfbc8faca1623c6eb365e48e422aa30d1ebb51cc5aa0b046b1966d8256f2cc1399d3669069d965f56a5148522d05e7d63b78b7b76282034f8e77fb8c2' # noqa + + +def test_collection_with_digest_and_gzip(api_): + req_digest = mock_api_request(HTTP_WANT_CONTENT_DIGEST='SHA1,sha256', + HTTP_ACCEPT_ENCODING=F_GZIP) + rsp_digest_headers, _, rsp_digest = get_collection_item( + api_, req_digest, 'obs', '371') + apply_integrity(rsp_digest_headers, rsp_digest) + apply_gzip(rsp_digest_headers, rsp_digest) + + assert rsp_digest_headers['Content-Type'] == \ + 'application/json; charset=utf-8' + assert rsp_digest_headers['Content-Digest'] == \ + 'sha1=0d4818c86215ba031044b27e28cb3170936e8c53' + assert rsp_digest_headers['Content-Encoding'] == F_GZIP + + def test_get_collection_items_crs(config, api_): # Invalid CRS query parameter