From ff6294bbbb66e1467ff01bc9b2abc257b50354b1 Mon Sep 17 00:00:00 2001 From: Stefaan Lippens Date: Tue, 20 Aug 2024 21:43:13 +0200 Subject: [PATCH] Issue #567/#591 Finetune detection of actual temporal dimension name in `load_stac` - move logic to _StacMetadataParser (less logic nesting) - improve test coverage (and add DummyStacDictBuilder utility) --- CHANGELOG.md | 4 +- openeo/metadata.py | 71 +++++++++------- openeo/testing/stac.py | 110 +++++++++++++++++++++++++ tests/rest/test_connection.py | 23 ++---- tests/test_metadata.py | 151 ++++++++++++++-------------------- tests/testing/test_stac.py | 83 +++++++++++++++++++ 6 files changed, 302 insertions(+), 140 deletions(-) create mode 100644 openeo/testing/stac.py create mode 100644 tests/testing/test_stac.py diff --git a/CHANGELOG.md b/CHANGELOG.md index 9a5b9047c..4fd4ca464 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -9,13 +9,15 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ### Added +- `load_stac`/`metadata_from_stac`: add support for extracting actual temporal dimension metadata ([#567](https://github.com/Open-EO/openeo-python-client/issues/567)) + ### Changed ### Removed ### Fixed -- apply_dimension with a 'target_dimension' argument was not correctly adjusting datacube metadata on the client side, causing a mismatch. +- `apply_dimension` with a `target_dimension` argument was not correctly adjusting datacube metadata on the client side, causing a mismatch. ## [0.31.0] - 2024-07-26 diff --git a/openeo/metadata.py b/openeo/metadata.py index 11d6236d2..1de8c38b7 100644 --- a/openeo/metadata.py +++ b/openeo/metadata.py @@ -6,6 +6,7 @@ from typing import Any, Callable, Dict, List, NamedTuple, Optional, Set, Tuple, Union import pystac +import pystac.extensions.datacube import pystac.extensions.eo import pystac.extensions.item_assets @@ -23,6 +24,7 @@ class DimensionAlreadyExistsException(MetadataException): pass +# TODO: make these dimension classes immutable data classes class Dimension: """Base class for dimensions.""" @@ -540,30 +542,6 @@ def metadata_from_stac(url: str) -> CubeMetadata: # TODO move these nested functions and other logic to _StacMetadataParser - def get_temporal_metadata(spec: Union(pystac.Collection,pystac.Item, pystac.Catalog), complain: Callable[[str], None] = warnings.warn) -> TemporalDimension: - # Dimension info is in `cube:dimensions` - # Check if the datacube extension is present - if spec.ext.has("cube"): - return TemporalDimension(**dict(zip(["name","extent"],[(n, d.extent) for (n, d) in spec.ext.cube.dimensions.items() if d.dim_type =="temporal"][0]))) - else: - complain("No cube:dimensions metadata") - return TemporalDimension(name="t", extent=[None, None]) - - def get_temporal_metadata_old(spec: dict, complain: Callable[[str], None] = warnings.warn) -> TemporalDimension: - # Dimension info is in `cube:dimensions` (or 0.4-style `properties/cube:dimensions`) - cube_dimensions = ( - deep_get(spec, "cube:dimensions", default=None) - or deep_get(spec, "properties", "cube:dimensions", default=None) - or {} - ) - if not cube_dimensions: - complain("No cube:dimensions metadata") - for name, info in cube_dimensions.items(): - dim_type = info.get("type") - if dim_type == "temporal": - return TemporalDimension(name=name, extent=info.get("extent")) - return None - def get_band_metadata(eo_bands_location: dict) -> List[Band]: # TODO: return None iso empty list when no metadata? return [ @@ -613,17 +591,17 @@ def is_band_asset(asset: pystac.Asset) -> bool: else: raise ValueError(stac_object) - if _PYSTAC_1_9_EXTENSION_INTERFACE: - temporal_dimension = get_temporal_metadata(stac_object) - else: - temporal_dimension = get_temporal_metadata_old(stac_object.to_dict()) - if temporal_dimension is None: - temporal_dimension = TemporalDimension(name="t", extent=[None, None]) # TODO: conditionally include band dimension when there was actual indication of band metadata? band_dimension = BandDimension(name="bands", bands=bands) - metadata = CubeMetadata(dimensions=[band_dimension, temporal_dimension]) - return metadata + dimensions = [band_dimension] + + # TODO: is it possible to derive the actual name of temporal dimension that the backend will use? + temporal_dimension = _StacMetadataParser().get_temporal_dimension(stac_object) + if temporal_dimension: + dimensions.append(temporal_dimension) + metadata = CubeMetadata(dimensions=dimensions) + return metadata # Sniff for PySTAC extension API since version 1.9.0 (which is not available below Python 3.9) # TODO: remove this once support for Python 3.7 and 3.8 is dropped @@ -697,3 +675,32 @@ def get_bands_from_item_assets( if asset_bands: bands.update(asset_bands) return bands + + def get_temporal_dimension(self, stac_obj: pystac.STACObject) -> Union[TemporalDimension, None]: + """ + Extract the temporal dimension from a STAC Collection/Item (if any) + """ + # TODO: also extract temporal dimension from assets? + if _PYSTAC_1_9_EXTENSION_INTERFACE: + if stac_obj.ext.has("cube") and hasattr(stac_obj.ext, "cube"): + temporal_dims = [ + (n, d.extent or [None, None]) + for (n, d) in stac_obj.ext.cube.dimensions.items() + if d.dim_type == pystac.extensions.datacube.DimensionType.TEMPORAL + ] + if len(temporal_dims) == 1: + name, extent = temporal_dims[0] + return TemporalDimension(name=name, extent=extent) + else: + if isinstance(stac_obj, pystac.Item): + cube_dimensions = stac_obj.properties.get("cube:dimensions", {}) + elif isinstance(stac_obj, pystac.Collection): + cube_dimensions = stac_obj.extra_fields.get("cube:dimensions", {}) + else: + cube_dimensions = {} + temporal_dims = [ + (n, d.get("extent", [None, None])) for (n, d) in cube_dimensions.items() if d.get("type") == "temporal" + ] + if len(temporal_dims) == 1: + name, extent = temporal_dims[0] + return TemporalDimension(name=name, extent=extent) diff --git a/openeo/testing/stac.py b/openeo/testing/stac.py new file mode 100644 index 000000000..4f0b455a8 --- /dev/null +++ b/openeo/testing/stac.py @@ -0,0 +1,110 @@ +from typing import List, Optional, Union + + +class StacDummyBuilder: + """ + Helper to compactly produce STAC Item/Collection/Catalog/... dicts for test purposes + + .. warning:: + This is an experimental API subject to change. + """ + + _EXT_DATACUBE = "https://stac-extensions.github.io/datacube/v2.2.0/schema.json" + + @classmethod + def item( + cls, + *, + id: str = "item123", + stac_version="1.0.0", + datetime: str = "2024-03-08", + properties: Optional[dict] = None, + cube_dimensions: Optional[dict] = None, + stac_extensions: Optional[List[str]] = None, + **kwargs, + ) -> dict: + """Create a STAC Item represented as dictionary.""" + properties = properties or {} + properties.setdefault("datetime", datetime) + + if cube_dimensions is not None: + properties["cube:dimensions"] = cube_dimensions + stac_extensions = cls._add_stac_extension(stac_extensions, cls._EXT_DATACUBE) + + d = { + "type": "Feature", + "stac_version": stac_version, + "id": id, + "geometry": None, + "properties": properties, + "links": [], + "assets": {}, + **kwargs, + } + + if stac_extensions is not None: + d["stac_extensions"] = stac_extensions + return d + + @classmethod + def _add_stac_extension(cls, stac_extensions: Union[List[str], None], stac_extension: str) -> List[str]: + stac_extensions = list(stac_extensions or []) + if stac_extension not in stac_extensions: + stac_extensions.append(stac_extension) + return stac_extensions + + @classmethod + def collection( + cls, + *, + id: str = "collection123", + description: str = "Collection 123", + stac_version: str = "1.0.0", + stac_extensions: Optional[List[str]] = None, + license: str = "proprietary", + extent: Optional[dict] = None, + cube_dimensions: Optional[dict] = None, + summaries: Optional[dict] = None, + ) -> dict: + """Create a STAC Collection represented as dictionary.""" + if extent is None: + extent = {"spatial": {"bbox": [[3, 4, 5, 6]]}, "temporal": {"interval": [["2024-01-01", "2024-05-05"]]}} + + d = { + "type": "Collection", + "stac_version": stac_version, + "id": id, + "description": description, + "license": license, + "extent": extent, + "links": [], + } + if cube_dimensions is not None: + d["cube:dimensions"] = cube_dimensions + stac_extensions = cls._add_stac_extension(stac_extensions, cls._EXT_DATACUBE) + if summaries is not None: + d["summaries"] = summaries + if stac_extensions is not None: + d["stac_extensions"] = stac_extensions + return d + + @classmethod + def catalog( + cls, + *, + id: str = "catalog123", + stac_version: str = "1.0.0", + description: str = "Catalog 123", + stac_extensions: Optional[List[str]] = None, + ) -> dict: + """Create a STAC Catalog represented as dictionary.""" + d = { + "type": "Catalog", + "stac_version": stac_version, + "id": id, + "description": description, + "links": [], + } + if stac_extensions is not None: + d["stac_extensions"] = stac_extensions + return d diff --git a/tests/rest/test_connection.py b/tests/rest/test_connection.py index f7c0318bc..78e8679e7 100644 --- a/tests/rest/test_connection.py +++ b/tests/rest/test_connection.py @@ -38,6 +38,7 @@ paginate, ) from openeo.rest.vectorcube import VectorCube +from openeo.testing.stac import StacDummyBuilder from openeo.util import ContextTimer, dict_no_none from .auth.test_cli import auth_config, refresh_token_store @@ -2584,24 +2585,14 @@ def test_load_stac_from_job_empty_result(self, con120, requests_mock): } } - def test_load_stac_reduce_temporal(self, con120, tmp_path): + @pytest.mark.parametrize("temporal_dim", ["t", "datezz"]) + def test_load_stac_reduce_temporal(self, con120, tmp_path, temporal_dim): # TODO: reusable utility to create/generate a STAC resource for testing # (a file, but preferably a URL, but that requires urllib mocking) stac_path = tmp_path / "stac.json" - stac_data = { - "type": "Collection", - "id": "test-collection", - "stac_version": "1.0.0", - "description": "Test collection", - "links": [], - "title": "Test Collection", - "extent": { - "spatial": {"bbox": [[-180.0, -90.0, 180.0, 90.0]]}, - "temporal": {"interval": [["2020-01-01T00:00:00Z", "2020-01-10T00:00:00Z"]]}, - }, - "license": "proprietary", - "summaries": {"eo:bands": [{"name": "B01"}, {"name": "B02"}]}, - } + stac_data = StacDummyBuilder.collection( + cube_dimensions={temporal_dim: {"type": "temporal", "extent": ["2024-01-01", "2024-04-04"]}} + ) stac_path.write_text(json.dumps(stac_data)) cube = con120.load_stac(str(stac_path)) @@ -2615,7 +2606,7 @@ def test_load_stac_reduce_temporal(self, con120, tmp_path): "process_id": "reduce_dimension", "arguments": { "data": {"from_node": "loadstac1"}, - "dimension": "t", + "dimension": temporal_dim, "reducer": { "process_graph": { "max1": { diff --git a/tests/test_metadata.py b/tests/test_metadata.py index 2238c631a..fce90186b 100644 --- a/tests/test_metadata.py +++ b/tests/test_metadata.py @@ -2,7 +2,7 @@ import json import re -from typing import List, Union +from typing import List, Optional, Union import pytest @@ -19,6 +19,7 @@ TemporalDimension, metadata_from_stac, ) +from openeo.testing.stac import StacDummyBuilder def test_metadata_get(): @@ -792,112 +793,30 @@ def filter_bbox(self, bbox): "test_stac, expected", [ ( - { - "type": "Collection", - "id": "test-collection", - "stac_version": "1.0.0", - "description": "Test collection", - "links": [], - "title": "Test Collection", - "extent": { - "spatial": {"bbox": [[-180.0, -90.0, 180.0, 90.0]]}, - "temporal": {"interval": [["2020-01-01T00:00:00Z", "2020-01-10T00:00:00Z"]]}, - }, - "license": "proprietary", - "summaries": {"eo:bands": [{"name": "B01"}, {"name": "B02"}]}, - }, + StacDummyBuilder.collection(summaries={"eo:bands": [{"name": "B01"}, {"name": "B02"}]}), ["B01", "B02"], ), # TODO: test asset handling in collection? ( - { - "type": "Catalog", - "id": "test-catalog", - "stac_version": "1.0.0", - "description": "Test Catalog", - "links": [], - }, + StacDummyBuilder.catalog(), [], ), ( - { - "type": "Feature", - "stac_version": "1.0.0", - "id": "test-item", - "properties": {"datetime": "2020-05-22T00:00:00Z", "eo:bands": [{"name": "SCL"}, {"name": "B08"}]}, - "geometry": {"coordinates": [[[0, 0], [0, 1], [1, 1], [1, 0], [0, 0]]], "type": "Polygon"}, - "links": [], - "assets": {}, - "bbox": [0, 1, 0, 1], - "stac_extensions": [], - }, + StacDummyBuilder.item( + properties={"datetime": "2020-05-22T00:00:00Z", "eo:bands": [{"name": "SCL"}, {"name": "B08"}]} + ), ["SCL", "B08"], ), # TODO: test asset handling in item? ], ) - - -def test_metadata_from_stac(tmp_path, test_stac, expected): +def test_metadata_from_stac_bands(tmp_path, test_stac, expected): path = tmp_path / "stac.json" path.write_text(json.dumps(test_stac)) - metadata = metadata_from_stac(path) + metadata = metadata_from_stac(str(path)) assert metadata.band_names == expected -@pytest.mark.parametrize( - "test_stac_dims, expected_dims", - [ - ( - { - "type": "Collection", - "id": "test-collection", - "stac_version": "1.0.0", - "description": "Test collection", - "links": [], - "title": "Test Collection", - "extent": { - "spatial": {"bbox": [[-180.0, -90.0, 180.0, 90.0]]}, - "temporal": {"interval": [["2020-01-01T00:00:00Z", "2020-01-10T00:00:00Z"]]}, - }, - "cube:dimensions": { - "x": { - "axis": "x", - "type": "spatial", - "extent": [180.0,-180.0], - "reference_system": 4326 - }, - "y": { - "axis": "y", - "type": "spatial", - "extent": [-90.0,90.0], - "reference_system": 4326 - }, - "bands": { - "type": "bands", - "values": ["B01","B02",] - }, - "time": { - "type": "temporal", - "extent": ["2020-01-01T00:00:00Z","2020-01-10T00:00:00Z"] - } - }, - "license": "proprietary", - "summaries": {"eo:bands": [{"name": "B01"}, {"name": "B02"}]}, - "stac_extensions": ["https://stac-extensions.github.io/datacube/v2.2.0/schema.json"], - }, - ["bands","time"], - ) - ] -) - -def test_metadata_from_stac_dim_names(tmp_path, test_stac_dims, expected_dims): - path = tmp_path / "stac.json" - path.write_text(json.dumps(test_stac_dims)) - metadata = metadata_from_stac(path) - assert metadata.temporal_dimension.name == expected_dims[1] - assert metadata.band_dimension.name == expected_dims[0] - @pytest.mark.skipif(not _PYSTAC_1_9_EXTENSION_INTERFACE, reason="Requires PySTAC 1.9+ extension interface") @pytest.mark.parametrize("eo_extension_is_declared", [False, True]) @@ -915,7 +834,7 @@ def test_metadata_from_stac_collection_bands_from_item_assets(test_data, tmp_pat path = tmp_path / "stac.json" path.write_text(json.dumps(stac_data)) - metadata = metadata_from_stac(path) + metadata = metadata_from_stac(str(path)) assert sorted(metadata.band_names) == [ "2m_temperature_max", "2m_temperature_min", @@ -928,3 +847,53 @@ def test_metadata_from_stac_collection_bands_from_item_assets(test_data, tmp_pat for m in caplog.messages ) assert warn_count == (0 if eo_extension_is_declared else 1) + + +@pytest.mark.parametrize( + ["stac_dict", "expected"], + [ + ( + StacDummyBuilder.item(), + None, + ), + ( + StacDummyBuilder.item(cube_dimensions={"t": {"type": "temporal", "extent": ["2024-04-04", "2024-06-06"]}}), + ("t", ["2024-04-04", "2024-06-06"]), + ), + ( + StacDummyBuilder.item( + cube_dimensions={"datezz": {"type": "temporal", "extent": ["2024-04-04", "2024-06-06"]}} + ), + ("datezz", ["2024-04-04", "2024-06-06"]), + ), + ( + StacDummyBuilder.collection(), + None, + ), + ( + StacDummyBuilder.collection( + cube_dimensions={"t": {"type": "temporal", "extent": ["2024-04-04", "2024-06-06"]}} + ), + ("t", ["2024-04-04", "2024-06-06"]), + ), + ( + StacDummyBuilder.catalog(), + None, + ), + ( + # Note: a catalog is not supposed to have datacube extension enabled, but we should not choke on that + StacDummyBuilder.catalog(stac_extensions=[StacDummyBuilder._EXT_DATACUBE]), + None, + ), + ], +) +def test_metadata_from_stac_temporal_dimension(tmp_path, stac_dict, expected): + path = tmp_path / "stac.json" + path.write_text(json.dumps(stac_dict)) + metadata = metadata_from_stac(str(path)) + if expected: + dim = metadata.temporal_dimension + assert isinstance(dim, TemporalDimension) + assert (dim.name, dim.extent) == expected + else: + assert not metadata.has_temporal_dimension() diff --git a/tests/testing/test_stac.py b/tests/testing/test_stac.py new file mode 100644 index 000000000..c283b7c04 --- /dev/null +++ b/tests/testing/test_stac.py @@ -0,0 +1,83 @@ +import pystac + +from openeo.testing.stac import StacDummyBuilder + + +class TestDummyStacDictBuilder: + def test_item_default(self): + item = StacDummyBuilder.item() + assert item == { + "type": "Feature", + "stac_version": "1.0.0", + "id": "item123", + "geometry": None, + "properties": {"datetime": "2024-03-08"}, + "links": [], + "assets": {}, + } + # Check if the default item validates + pystac.Item.from_dict(item) + + def test_item_cube_dimensions(self): + assert StacDummyBuilder.item( + cube_dimensions={"t": {"type": "temporal", "extent": ["2024-01-01", "2024-04-04"]}} + ) == { + "type": "Feature", + "stac_version": "1.0.0", + "stac_extensions": ["https://stac-extensions.github.io/datacube/v2.2.0/schema.json"], + "id": "item123", + "geometry": None, + "properties": { + "cube:dimensions": {"t": {"extent": ["2024-01-01", "2024-04-04"], "type": "temporal"}}, + "datetime": "2024-03-08", + }, + "links": [], + "assets": {}, + } + + def test_collection_default(self): + collection = StacDummyBuilder.collection() + assert collection == { + "type": "Collection", + "stac_version": "1.0.0", + "id": "collection123", + "description": "Collection 123", + "license": "proprietary", + "extent": { + "spatial": {"bbox": [[3, 4, 5, 6]]}, + "temporal": {"interval": [["2024-01-01", "2024-05-05"]]}, + }, + "links": [], + } + # Check if the default collection validates + pystac.Collection.from_dict(collection) + + def test_collection_cube_dimensions(self): + assert StacDummyBuilder.collection( + cube_dimensions={"t": {"type": "temporal", "extent": ["2024-01-01", "2024-04-04"]}} + ) == { + "type": "Collection", + "stac_version": "1.0.0", + "stac_extensions": ["https://stac-extensions.github.io/datacube/v2.2.0/schema.json"], + "id": "collection123", + "description": "Collection 123", + "license": "proprietary", + "extent": { + "spatial": {"bbox": [[3, 4, 5, 6]]}, + "temporal": {"interval": [["2024-01-01", "2024-05-05"]]}, + }, + "cube:dimensions": {"t": {"extent": ["2024-01-01", "2024-04-04"], "type": "temporal"}}, + "links": [], + } + + def test_catalog_default(self): + catalog = StacDummyBuilder.catalog() + assert catalog == { + "type": "Catalog", + "stac_version": "1.0.0", + "id": "catalog123", + "description": "Catalog 123", + "links": [], + } + # Check if the default catalog validates + pystac.Catalog.from_dict(catalog)