From 0c779418b4b666a6611334b96a2ac794b38bdadf Mon Sep 17 00:00:00 2001 From: Francis Charette Migneault Date: Fri, 10 Nov 2023 14:05:38 -0500 Subject: [PATCH 1/2] [wip] remove duplicate STAC Item definitions --- .../implementations/CMIP6_UofT/add_CMIP6.py | 2 +- STACpopulator/models.py | 90 ++++--------------- STACpopulator/stac_utils.py | 23 +++-- 3 files changed, 32 insertions(+), 83 deletions(-) diff --git a/STACpopulator/implementations/CMIP6_UofT/add_CMIP6.py b/STACpopulator/implementations/CMIP6_UofT/add_CMIP6.py index ae86892..6d0e049 100644 --- a/STACpopulator/implementations/CMIP6_UofT/add_CMIP6.py +++ b/STACpopulator/implementations/CMIP6_UofT/add_CMIP6.py @@ -46,7 +46,7 @@ def add_cmip6_prefix(name: str) -> str: return "cmip6:" + name if "datetime" not in name else name -class CMIP6ItemProperties(STACItemProperties, validate_assignment=True): +class CMIP6ItemProperties(STACItemProperties, validate_assignment=True, extra="ignore"): """Data model for CMIP6 Controlled Vocabulary.""" Conventions: str diff --git a/STACpopulator/models.py b/STACpopulator/models.py index f91dab5..65cfa39 100644 --- a/STACpopulator/models.py +++ b/STACpopulator/models.py @@ -1,14 +1,7 @@ -import datetime as dt -from typing import Any, Dict, List, Literal, Optional, Union +import datetime +from typing import List, Literal, Optional -from pydantic import ( - AnyHttpUrl, - AnyUrl, - BaseModel, - Field, - SerializeAsAny, - field_validator, -) +from pydantic import BaseModel, Field class Geometry(BaseModel): @@ -36,71 +29,22 @@ class GeoJSONMultiPolygon(Geometry): coordinates: List[List[List[List[float]]]] -class Asset(BaseModel): - href: AnyHttpUrl - media_type: Optional[str] = None - title: Optional[str] = None - description: Optional[str] = None - roles: Optional[List[str]] = None - - class STACItemProperties(BaseModel): - """Base STAC Item properties data model. In concrete implementations, users would want to define a new - data model that inherits from this base model and extends it with properties tailored to the data they are - ingesting.""" - - start_datetime: Optional[dt.datetime] = None - end_datetime: Optional[dt.datetime] = None - datetime: Optional[dt.datetime] = None - - @field_validator("datetime", mode="before") - @classmethod - def validate_datetime(cls, v: Union[dt.datetime, str], values: Dict[str, Any]) -> dt: - if v == "null": - if not values["start_datetime"] and not values["end_datetime"]: - raise ValueError("start_datetime and end_datetime must be specified when datetime is null") - - -# class Link(BaseModel): -# """ -# https://github.com/radiantearth/stac-spec/blob/v1.0.0/collection-spec/collection-spec.md#link-object -# """ - -# href: str = Field(..., alias="href", min_length=1) -# rel: str = Field(..., alias="rel", min_length=1) -# type: Optional[str] = None -# title: Optional[str] = None -# # Label extension -# label: Optional[str] = Field(None, alias="label:assets") -# model_config = ConfigDict(use_enum_values=True) - -# def resolve(self, base_url: str) -> None: -# """resolve a link to the given base URL""" -# self.href = urljoin(base_url, self.href) - - -# class PaginationLink(Link): -# """ -# https://github.com/radiantearth/stac-api-spec/blob/master/api-spec.md#paging-extension -# """ - -# rel: Literal["next", "previous"] -# method: Literal["GET", "POST"] -# body: Optional[Dict[Any, Any]] = None -# merge: bool = False - + """ + Base STAC Item properties data model. -# Links = RootModel[List[Union[PaginationLink, Link]]] + In concrete implementations, users would want to define a new data model that inherits from this base model + and extends it with properties tailored to the data they are ingesting. + """ + start_datetime: Optional[datetime.datetime] = None + end_datetime: Optional[datetime.datetime] = None + datetime_: Optional[datetime.datetime] = Field(None, alias="datetime") + def __setitem__(self, key, value): + setattr(self, key, value) -class STACItem(BaseModel): - """STAC Item data model.""" + def __getitem__(self, item): + return getattr(self, item) - id: str = Field(..., alias="id", min_length=1) - geometry: Optional[SerializeAsAny[Geometry]] = None - bbox: Optional[List[float]] = None - properties: Optional[SerializeAsAny[STACItemProperties]] = None - assets: Dict[str, Asset] = None - stac_extensions: Optional[List[AnyUrl]] = [] - collection: Optional[str] = None - datetime: Optional[dt.datetime] = None # Not in the spec, but needed by pystac.Item. + def __delitem__(self, item): + return delattr(self, item) diff --git a/STACpopulator/stac_utils.py b/STACpopulator/stac_utils.py index c8c8aaa..21fbe66 100644 --- a/STACpopulator/stac_utils.py +++ b/STACpopulator/stac_utils.py @@ -1,9 +1,9 @@ -import datetime import json import logging import os import re import sys +from dateutil import parser as dt_parser from typing import Any, Literal, MutableMapping import numpy as np @@ -11,7 +11,7 @@ import yaml from colorlog import ColoredFormatter -from STACpopulator.models import STACItem +from STACpopulator.models import Geometry, STACItemProperties LOGGER = logging.getLogger(__name__) LOGFORMAT = " %(log_color)s%(levelname)s:%(reset)s %(blue)s[%(name)-30s]%(reset)s %(message)s" @@ -76,7 +76,7 @@ def load_collection_configuration() -> MutableMapping[str, Any]: def collection2literal(collection, property="label"): terms = tuple(getattr(term, property) for term in collection) - return Literal[terms] + return Literal[terms] # type: ignore def ncattrs_to_geometry(attrs: MutableMapping[str, Any]) -> MutableMapping[str, Any]: @@ -157,7 +157,12 @@ def magpie_resource_link(url: str) -> pystac.Link: return link -def STAC_item_from_metadata(iid: str, attrs: MutableMapping[str, Any], item_props_datamodel, item_geometry_model): +def STAC_item_from_metadata( + iid: str, + attrs: MutableMapping[str, Any], + item_props_data_model: STACItemProperties, + item_geometry_model: Geometry, +): """ Create STAC Item from CF JSON metadata. @@ -167,7 +172,7 @@ def STAC_item_from_metadata(iid: str, attrs: MutableMapping[str, Any], item_prop Unique item ID. attrs: dict CF JSON metadata returned by `xncml.Dataset.to_cf_dict`. - item_props_datamodel : pydantic.BaseModel + item_props_data_model : pydantic.BaseModel Data model describing the properties of the STAC item. item_geometry_model : pydantic.BaseModel Data model describing the geometry of the STAC item. @@ -176,16 +181,16 @@ def STAC_item_from_metadata(iid: str, attrs: MutableMapping[str, Any], item_prop cfmeta = attrs["groups"]["CFMetadata"]["attributes"] # Create pydantic STAC item - item = STACItem( + item = pystac.Item( id=iid, geometry=item_geometry_model(**ncattrs_to_geometry(attrs)), bbox=ncattrs_to_bbox(attrs), - properties=item_props_datamodel( - start_datetime=cfmeta["time_coverage_start"], - end_datetime=cfmeta["time_coverage_end"], + properties=item_props_data_model( **attrs["attributes"], ), datetime=None, + start_datetime=dt_parser.parse(cfmeta["time_coverage_start"]), + end_datetime=dt_parser.parse(cfmeta["time_coverage_end"]), ) # Convert pydantic STAC item to a PySTAC Item From 467fc5196da4fbdb8f3b90b1d41f621a4e61b821 Mon Sep 17 00:00:00 2001 From: Francis Charette Migneault Date: Fri, 10 Nov 2023 22:49:58 -0500 Subject: [PATCH 2/2] [wip] add STAC items self-validation procedure --- STACpopulator/models.py | 11 +++++++---- STACpopulator/stac_utils.py | 14 ++++++-------- 2 files changed, 13 insertions(+), 12 deletions(-) diff --git a/STACpopulator/models.py b/STACpopulator/models.py index 65cfa39..fd3d754 100644 --- a/STACpopulator/models.py +++ b/STACpopulator/models.py @@ -1,4 +1,4 @@ -import datetime +from datetime import datetime as datetime_type from typing import List, Literal, Optional from pydantic import BaseModel, Field @@ -36,9 +36,9 @@ class STACItemProperties(BaseModel): In concrete implementations, users would want to define a new data model that inherits from this base model and extends it with properties tailored to the data they are ingesting. """ - start_datetime: Optional[datetime.datetime] = None - end_datetime: Optional[datetime.datetime] = None - datetime_: Optional[datetime.datetime] = Field(None, alias="datetime") + start_datetime: Optional[datetime_type] = None + end_datetime: Optional[datetime_type] = None + datetime: Optional[datetime_type] = None def __setitem__(self, key, value): setattr(self, key, value) @@ -48,3 +48,6 @@ def __getitem__(self, item): def __delitem__(self, item): return delattr(self, item) + + def __contains__(self, item): + return hasattr(self, item) diff --git a/STACpopulator/stac_utils.py b/STACpopulator/stac_utils.py index 21fbe66..7e762cb 100644 --- a/STACpopulator/stac_utils.py +++ b/STACpopulator/stac_utils.py @@ -10,6 +10,7 @@ import pystac import yaml from colorlog import ColoredFormatter +from pystac.validation import validate as pystac_validate from STACpopulator.models import Geometry, STACItemProperties @@ -181,21 +182,18 @@ def STAC_item_from_metadata( cfmeta = attrs["groups"]["CFMetadata"]["attributes"] # Create pydantic STAC item + props = item_props_data_model(**attrs["attributes"]) + geom = item_geometry_model(**ncattrs_to_geometry(attrs)) item = pystac.Item( id=iid, - geometry=item_geometry_model(**ncattrs_to_geometry(attrs)), + geometry=json.loads(geom.model_dump_json(by_alias=True)), bbox=ncattrs_to_bbox(attrs), - properties=item_props_data_model( - **attrs["attributes"], - ), + properties=json.loads(props.model_dump_json(by_alias=True)), datetime=None, start_datetime=dt_parser.parse(cfmeta["time_coverage_start"]), end_datetime=dt_parser.parse(cfmeta["time_coverage_end"]), ) - # Convert pydantic STAC item to a PySTAC Item - item = pystac.Item(**json.loads(item.model_dump_json(by_alias=True))) - root = attrs["access_urls"] for name, url in root.items(): @@ -205,7 +203,7 @@ def STAC_item_from_metadata( item.add_asset(name, asset) item.add_link(magpie_resource_link(root["HTTPServer"])) - + item.validate() return item