Skip to content

Commit a963540

Browse files
Issue #527 add basic band metadata to cubes loaded with load_stac (PR #548)
1 parent 75e1626 commit a963540

File tree

4 files changed

+130
-1
lines changed

4 files changed

+130
-1
lines changed

openeo/metadata.py

Lines changed: 61 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
from __future__ import annotations
22

33
import logging
4+
import pystac
45
import warnings
56
from typing import Any, Callable, List, NamedTuple, Optional, Tuple, Union
67

@@ -522,3 +523,63 @@ def _repr_html_(self):
522523
def __str__(self) -> str:
523524
bands = self.band_names if self.has_band_dimension() else "no bands dimension"
524525
return f"CollectionMetadata({self.extent} - {bands} - {self.dimension_names()})"
526+
527+
528+
def metadata_from_stac(url: str) -> CubeMetadata:
529+
"""
530+
Reads the band metadata a static STAC catalog or a STAC API Collection and returns it as a :py:class:`CubeMetadata`
531+
532+
:param url: The URL to a static STAC catalog (STAC Item, STAC Collection, or STAC Catalog) or a specific STAC API Collection
533+
:return: A :py:class:`CubeMetadata` containing the DataCube band metadata from the url.
534+
"""
535+
536+
def get_band_metadata(eo_bands_location: dict) -> List[Band]:
537+
return [
538+
Band(name=band["name"], common_name=band.get("common_name"), wavelength_um=band.get("center_wavelength"))
539+
for band in eo_bands_location.get("eo:bands", [])
540+
]
541+
542+
def get_band_names(bands: List[Band]) -> List[str]:
543+
return [band.name for band in bands]
544+
545+
def is_band_asset(asset: pystac.Asset) -> bool:
546+
return "eo:bands" in asset.extra_fields
547+
548+
stac_object = pystac.read_file(href=url)
549+
550+
bands = []
551+
collection = None
552+
553+
if isinstance(stac_object, pystac.Item):
554+
item = stac_object
555+
if "eo:bands" in item.properties:
556+
eo_bands_location = item.properties
557+
elif item.get_collection() is not None:
558+
collection = item.get_collection()
559+
eo_bands_location = item.get_collection().summaries.lists
560+
else:
561+
eo_bands_location = {}
562+
bands = get_band_metadata(eo_bands_location)
563+
564+
elif isinstance(stac_object, pystac.Collection):
565+
collection = stac_object
566+
bands = get_band_metadata(collection.summaries.lists)
567+
568+
# Summaries is not a required field in a STAC collection, so also check the assets
569+
for itm in collection.get_items():
570+
band_assets = {asset_id: asset for asset_id, asset in itm.get_assets().items() if is_band_asset(asset)}
571+
572+
for asset in band_assets.values():
573+
asset_bands = get_band_metadata(asset.extra_fields)
574+
for asset_band in asset_bands:
575+
if asset_band.name not in get_band_names(bands):
576+
bands.append(asset_band)
577+
578+
else:
579+
assert isinstance(stac_object, pystac.Catalog)
580+
catalog = stac_object
581+
bands = get_band_metadata(catalog.extra_fields.get("summaries", {}))
582+
583+
band_dimension = BandDimension(name="bands", bands=bands)
584+
metadata = CubeMetadata(dimensions=[band_dimension])
585+
return metadata

openeo/rest/connection.py

Lines changed: 12 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -27,7 +27,14 @@
2727
from openeo.internal.jupyter import VisualDict, VisualList
2828
from openeo.internal.processes.builder import ProcessBuilderBase
2929
from openeo.internal.warnings import deprecated, legacy_alias
30-
from openeo.metadata import Band, BandDimension, CollectionMetadata, SpatialDimension, TemporalDimension
30+
from openeo.metadata import (
31+
Band,
32+
BandDimension,
33+
CollectionMetadata,
34+
SpatialDimension,
35+
TemporalDimension,
36+
metadata_from_stac,
37+
)
3138
from openeo.rest import (
3239
CapabilitiesException,
3340
OpenEoApiError,
@@ -1361,6 +1368,10 @@ def load_stac(
13611368
prop: build_child_callback(pred, parent_parameters=["value"]) for prop, pred in properties.items()
13621369
}
13631370
cube = self.datacube_from_process(process_id="load_stac", **arguments)
1371+
try:
1372+
cube.metadata = metadata_from_stac(url)
1373+
except Exception:
1374+
_log.warning(f"Failed to extract cube metadata from STAC URL {url}", exc_info=True)
13641375
return cube
13651376

13661377
def load_ml_model(self, id: Union[str, BatchJob]) -> MlModel:

setup.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -73,6 +73,7 @@
7373
"numpy>=1.17.0",
7474
"xarray>=0.12.3",
7575
"pandas>0.20.0",
76+
"pystac",
7677
"deprecated>=1.2.12",
7778
'oschmod>=0.3.12; sys_platform == "win32"',
7879
"importlib_resources; python_version<'3.9'",

tests/test_metadata.py

Lines changed: 56 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@
22

33
from typing import List
44

5+
import json
56
import pytest
67

78
from openeo.metadata import (
@@ -14,6 +15,7 @@
1415
MetadataException,
1516
SpatialDimension,
1617
TemporalDimension,
18+
metadata_from_stac,
1719
)
1820

1921

@@ -782,3 +784,57 @@ def filter_bbox(self, bbox):
782784
assert isinstance(new, MyCubeMetadata)
783785
assert orig.bbox is None
784786
assert new.bbox == (1, 2, 3, 4)
787+
788+
789+
@pytest.mark.parametrize(
790+
"test_stac, expected",
791+
[
792+
(
793+
{
794+
"type": "Collection",
795+
"id": "test-collection",
796+
"stac_version": "1.0.0",
797+
"description": "Test collection",
798+
"links": [],
799+
"title": "Test Collection",
800+
"extent": {
801+
"spatial": {"bbox": [[-180.0, -90.0, 180.0, 90.0]]},
802+
"temporal": {"interval": [["2020-01-01T00:00:00Z", "2020-01-10T00:00:00Z"]]},
803+
},
804+
"license": "proprietary",
805+
"summaries": {"eo:bands": [{"name": "B01"}, {"name": "B02"}]},
806+
},
807+
["B01", "B02"],
808+
),
809+
(
810+
{
811+
"type": "Catalog",
812+
"id": "test-catalog",
813+
"stac_version": "1.0.0",
814+
"description": "Test Catalog",
815+
"links": [],
816+
},
817+
[],
818+
),
819+
(
820+
{
821+
"type": "Feature",
822+
"stac_version": "1.0.0",
823+
"id": "test-item",
824+
"properties": {"datetime": "2020-05-22T00:00:00Z", "eo:bands": [{"name": "SCL"}, {"name": "B08"}]},
825+
"geometry": {"coordinates": [[[0, 0], [0, 1], [1, 1], [1, 0], [0, 0]]], "type": "Polygon"},
826+
"links": [],
827+
"assets": {},
828+
"bbox": [0, 1, 0, 1],
829+
"stac_extensions": [],
830+
},
831+
["SCL", "B08"],
832+
),
833+
],
834+
)
835+
def test_metadata_from_stac(tmp_path, test_stac, expected):
836+
837+
path = tmp_path / "stac.json"
838+
path.write_text(json.dumps(test_stac))
839+
metadata = metadata_from_stac(path)
840+
assert metadata.band_names == expected

0 commit comments

Comments
 (0)