Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[wip] remove duplicate STAC Item definitions #32

Closed
wants to merge 4 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion STACpopulator/implementations/CMIP6_UofT/add_CMIP6.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,7 @@ def add_cmip6_prefix(name: str) -> str:
return "cmip6:" + name if "datetime" not in name else name


class CMIP6ItemProperties(STACItemProperties, validate_assignment=True):
class CMIP6ItemProperties(STACItemProperties, validate_assignment=True, extra="ignore"):
"""Data model for CMIP6 Controlled Vocabulary."""

Conventions: str
Expand Down
91 changes: 19 additions & 72 deletions STACpopulator/models.py
Original file line number Diff line number Diff line change
@@ -1,14 +1,7 @@
import datetime as dt
from typing import Any, Dict, List, Literal, Optional, Union
from datetime import datetime as datetime_type
from typing import List, Literal, Optional

from pydantic import (
AnyHttpUrl,
AnyUrl,
BaseModel,
Field,
SerializeAsAny,
field_validator,
)
from pydantic import BaseModel, Field


class Geometry(BaseModel):
Expand Down Expand Up @@ -36,71 +29,25 @@ class GeoJSONMultiPolygon(Geometry):
coordinates: List[List[List[List[float]]]]


class Asset(BaseModel):
href: AnyHttpUrl
media_type: Optional[str] = None
title: Optional[str] = None
description: Optional[str] = None
roles: Optional[List[str]] = None


class STACItemProperties(BaseModel):
"""Base STAC Item properties data model. In concrete implementations, users would want to define a new
data model that inherits from this base model and extends it with properties tailored to the data they are
ingesting."""

start_datetime: Optional[dt.datetime] = None
end_datetime: Optional[dt.datetime] = None
datetime: Optional[dt.datetime] = None

@field_validator("datetime", mode="before")
@classmethod
def validate_datetime(cls, v: Union[dt.datetime, str], values: Dict[str, Any]) -> dt:
if v == "null":
if not values["start_datetime"] and not values["end_datetime"]:
raise ValueError("start_datetime and end_datetime must be specified when datetime is null")


# class Link(BaseModel):
# """
# https://github.com/radiantearth/stac-spec/blob/v1.0.0/collection-spec/collection-spec.md#link-object
# """

# href: str = Field(..., alias="href", min_length=1)
# rel: str = Field(..., alias="rel", min_length=1)
# type: Optional[str] = None
# title: Optional[str] = None
# # Label extension
# label: Optional[str] = Field(None, alias="label:assets")
# model_config = ConfigDict(use_enum_values=True)

# def resolve(self, base_url: str) -> None:
# """resolve a link to the given base URL"""
# self.href = urljoin(base_url, self.href)


# class PaginationLink(Link):
# """
# https://github.com/radiantearth/stac-api-spec/blob/master/api-spec.md#paging-extension
# """

# rel: Literal["next", "previous"]
# method: Literal["GET", "POST"]
# body: Optional[Dict[Any, Any]] = None
# merge: bool = False
"""
Base STAC Item properties data model.

In concrete implementations, users would want to define a new data model that inherits from this base model
and extends it with properties tailored to the data they are ingesting.
"""
start_datetime: Optional[datetime_type] = None
end_datetime: Optional[datetime_type] = None
datetime: Optional[datetime_type] = None

# Links = RootModel[List[Union[PaginationLink, Link]]]
def __setitem__(self, key, value):
setattr(self, key, value)

def __getitem__(self, item):
return getattr(self, item)

class STACItem(BaseModel):
"""STAC Item data model."""
def __delitem__(self, item):
return delattr(self, item)

id: str = Field(..., alias="id", min_length=1)
geometry: Optional[SerializeAsAny[Geometry]] = None
bbox: Optional[List[float]] = None
properties: Optional[SerializeAsAny[STACItemProperties]] = None
assets: Dict[str, Asset] = None
stac_extensions: Optional[List[AnyUrl]] = []
collection: Optional[str] = None
datetime: Optional[dt.datetime] = None # Not in the spec, but needed by pystac.Item.
def __contains__(self, item):
return hasattr(self, item)
35 changes: 19 additions & 16 deletions STACpopulator/stac_utils.py
Original file line number Diff line number Diff line change
@@ -1,17 +1,18 @@
import datetime
import json
import logging
import os
import re
import sys
from dateutil import parser as dt_parser
from typing import Any, Literal, MutableMapping

import numpy as np
import pystac
import yaml
from colorlog import ColoredFormatter
from pystac.validation import validate as pystac_validate

from STACpopulator.models import STACItem
from STACpopulator.models import Geometry, STACItemProperties

LOGGER = logging.getLogger(__name__)
LOG_FORMAT = " %(log_color)s%(levelname)s:%(reset)s %(blue)s[%(name)-30s]%(reset)s %(message)s"
Expand Down Expand Up @@ -76,7 +77,7 @@ def load_collection_configuration() -> MutableMapping[str, Any]:

def collection2literal(collection, property="label"):
terms = tuple(getattr(term, property) for term in collection)
return Literal[terms]
return Literal[terms] # type: ignore


def ncattrs_to_geometry(attrs: MutableMapping[str, Any]) -> MutableMapping[str, Any]:
Expand Down Expand Up @@ -157,7 +158,12 @@ def magpie_resource_link(url: str) -> pystac.Link:
return link


def STAC_item_from_metadata(iid: str, attrs: MutableMapping[str, Any], item_props_datamodel, item_geometry_model):
def STAC_item_from_metadata(
iid: str,
attrs: MutableMapping[str, Any],
item_props_data_model: STACItemProperties,
item_geometry_model: Geometry,
):
"""
Create STAC Item from CF JSON metadata.

Expand All @@ -167,7 +173,7 @@ def STAC_item_from_metadata(iid: str, attrs: MutableMapping[str, Any], item_prop
Unique item ID.
attrs: dict
CF JSON metadata returned by `xncml.Dataset.to_cf_dict`.
item_props_datamodel : pydantic.BaseModel
item_props_data_model : pydantic.BaseModel
Data model describing the properties of the STAC item.
item_geometry_model : pydantic.BaseModel
Data model describing the geometry of the STAC item.
Expand All @@ -176,21 +182,18 @@ def STAC_item_from_metadata(iid: str, attrs: MutableMapping[str, Any], item_prop
cfmeta = attrs["groups"]["CFMetadata"]["attributes"]

# Create pydantic STAC item
item = STACItem(
props = item_props_data_model(**attrs["attributes"])
geom = item_geometry_model(**ncattrs_to_geometry(attrs))
item = pystac.Item(
id=iid,
geometry=item_geometry_model(**ncattrs_to_geometry(attrs)),
geometry=json.loads(geom.model_dump_json(by_alias=True)),
bbox=ncattrs_to_bbox(attrs),
properties=item_props_datamodel(
start_datetime=cfmeta["time_coverage_start"],
end_datetime=cfmeta["time_coverage_end"],
**attrs["attributes"],
),
properties=json.loads(props.model_dump_json(by_alias=True)),
datetime=None,
start_datetime=dt_parser.parse(cfmeta["time_coverage_start"]),
end_datetime=dt_parser.parse(cfmeta["time_coverage_end"]),
)

# Convert pydantic STAC item to a PySTAC Item
item = pystac.Item(**json.loads(item.model_dump_json(by_alias=True)))

root = attrs["access_urls"]

for name, url in root.items():
Expand All @@ -200,7 +203,7 @@ def STAC_item_from_metadata(iid: str, attrs: MutableMapping[str, Any], item_prop
item.add_asset(name, asset)

item.add_link(magpie_resource_link(root["HTTPServer"]))

item.validate()
return item


Expand Down
Loading