Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Arch finalization proposal #25

Merged
merged 36 commits into from
Nov 8, 2023
Merged
Show file tree
Hide file tree
Changes from 15 commits
Commits
Show all changes
36 commits
Select commit Hold shift + click to select a range
71ce3e0
adding numpy types to python types conversion for metadata
dchandan Oct 5, 2023
350b4f4
removing collection2enum
dchandan Oct 5, 2023
2a445f0
black
dchandan Oct 5, 2023
2728ce6
extracting pydantic base models to models.py
dchandan Oct 5, 2023
b47d613
removing cmip6 extension code
dchandan Oct 5, 2023
2f5dc39
Breaking CFJsonItem part 1: extracting STAC item creation
dchandan Oct 6, 2023
3f821ce
Breaking CFJsonItem part 2: extracting datacube extension code
dchandan Oct 6, 2023
3c584cc
updating geometry structure
dchandan Oct 12, 2023
b7a7ed9
moving np datatype conversion to a separate function
dchandan Oct 12, 2023
48598ae
modifications to datacube extension helper functions as per Francis's…
dchandan Oct 12, 2023
94eb521
code cleanup
dchandan Oct 12, 2023
a64a226
change how prefix is applied
dchandan Oct 12, 2023
f22c1a2
PR changes
dchandan Oct 13, 2023
efd9230
fixing output media type and roles output for assets
dchandan Oct 17, 2023
3e88591
adding magpie resource link
dchandan Oct 17, 2023
8d66fba
adding collection resource link for Magpie
dchandan Oct 18, 2023
00a968a
posting items fixes
dchandan Oct 19, 2023
2c3b49d
removing function no longer in use
dchandan Oct 19, 2023
6908d55
implemented updating stac collection and items
dchandan Oct 19, 2023
0c959ea
removing need to pass yml file to app on command line
dchandan Oct 19, 2023
73b2773
code cleanup
dchandan Oct 19, 2023
9e919c2
adding __init__ files
dchandan Oct 19, 2023
c62fb80
fix
dchandan Oct 19, 2023
10db128
more fixes
dchandan Oct 19, 2023
25985db
diagnostics
dchandan Oct 23, 2023
6d675bc
removing unused code
dchandan Oct 23, 2023
65bd5bb
refactoring to allow more flexibility
dchandan Oct 23, 2023
f540dbe
fix datacube extension
dchandan Oct 26, 2023
323c945
pr changes
dchandan Oct 27, 2023
0581c61
reverting to old way to read thredds access links
dchandan Oct 27, 2023
37a26e1
adding ability to get single file from THREDDS loader
dchandan Nov 8, 2023
e55591d
making make_cmip6_item_id a staticmethod
dchandan Nov 8, 2023
f1e28db
wrapping call to make STAC item with a try-exepcet block
dchandan Nov 8, 2023
8bb21e1
fixing commit e55591dd0b7f7db6cd4ee7256512d5693d282145
dchandan Nov 8, 2023
3055afc
more fixes to previous commits
dchandan Nov 8, 2023
3f1d284
making tracking_id optional in CMIP6ItemProperties
dchandan Nov 8, 2023
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion Makefile
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
IMP_DIR = /Users/dchandan/DACCS/Codes/stac-populator/implementations
IMP_DIR = STACpopulator/implementations
STAC_HOST = http://localhost:8880/stac

testcmip6:
Expand Down
Empty file.
179 changes: 0 additions & 179 deletions STACpopulator/extensions/cmip6.py

This file was deleted.

Original file line number Diff line number Diff line change
@@ -1,19 +1,18 @@
import argparse
import json
import logging
import hashlib
from datetime import datetime
from typing import Any, Dict, List, Literal, MutableMapping
from colorlog import ColoredFormatter
import argparse

import pyessv
from pydantic import AnyHttpUrl, BaseModel, Field, FieldValidationInfo, field_validator
from pystac.extensions.datacube import DatacubeExtension
from colorlog import ColoredFormatter
from extensions import DataCubeHelper
from pydantic import AnyHttpUrl, ConfigDict, Field, FieldValidationInfo, field_validator

from STACpopulator import STACpopulatorBase
from STACpopulator.extensions import cmip6
from STACpopulator.input import THREDDSLoader
from STACpopulator.stac_utils import ItemProperties
from STACpopulator.stac_utils import collection2literal, CFJsonItem

from STACpopulator.models import GeoJSONPolygon, STACItemProperties
from STACpopulator.stac_utils import STAC_item_from_metadata, collection2literal

LOGGER = logging.getLogger(__name__)
LOGFORMAT = " %(log_color)s%(levelname)s:%(reset)s %(blue)s[%(name)-30s]%(reset)s %(message)s"
Expand All @@ -33,54 +32,58 @@
Frequency = collection2literal(CV.frequency)
GridLabel = collection2literal(CV.grid_label)
InstitutionID = collection2literal(CV.institution_id)
# Member = collection2literal(CV.member_id) # This is empty
NominalResolution = collection2literal(CV.nominal_resolution)
Realm = collection2literal(CV.realm)
SourceID = collection2literal(CV.source_id)
SourceType = collection2literal(CV.source_type)
SubExperimentID = collection2literal(CV.sub_experiment_id)
TableID = collection2literal(CV.table_id)
# Variable = collection2literal(CV.variable_id) # This is empty


class Properties(ItemProperties, validate_assignment=True):
def add_cmip6_prefix(name: str) -> str:
return "cmip6:" + name if "datetime" not in name else name


class CMIP6ItemProperties(STACItemProperties, validate_assignment=True):
"""Data model for CMIP6 Controlled Vocabulary."""

Conventions: str = Field(..., serialization_alias="cmip6:Conventions")
activity_id: ActivityID = Field(..., serialization_alias="cmip6:activity_id")
creation_date: datetime = Field(..., serialization_alias="cmip6:creation_date")
data_specs_version: str = Field(..., serialization_alias="cmip6:data_specs_version")
experiment: str = Field(..., serialization_alias="cmip6:experiment")
experiment_id: ExperimentID = Field(..., serialization_alias="cmip6:experiment_id")
frequency: Frequency = Field(..., serialization_alias="cmip6:frequency")
further_info_url: AnyHttpUrl = Field(..., serialization_alias="cmip6:further_info_url")
grid_label: GridLabel = Field(..., serialization_alias="cmip6:grid_label")
institution: str = Field(..., serialization_alias="cmip6:institution")
institution_id: InstitutionID = Field(..., serialization_alias="cmip6:institution_id")
nominal_resolution: NominalResolution = Field(..., serialization_alias="cmip6:nominal_resolution")
realm: List[Realm] = Field(..., serialization_alias="cmip6:realm")
source: str = Field(..., serialization_alias="cmip6:source")
source_id: SourceID = Field(..., serialization_alias="cmip6:source_id")
source_type: List[SourceType] = Field(..., serialization_alias="cmip6:source_type")
sub_experiment: str | Literal["none"] = Field(..., serialization_alias="cmip6:sub_experiment")
sub_experiment_id: SubExperimentID | Literal["none"] = Field(..., serialization_alias="cmip6:sub_experiment_id")
table_id: TableID = Field(..., serialization_alias="cmip6:table_id")
variable_id: str = Field(..., serialization_alias="cmip6:variable_id")
variant_label: str = Field(..., serialization_alias="cmip6:variant_label")
initialization_index: int = Field(..., serialization_alias="cmip6:initialization_index")
physics_index: int = Field(..., serialization_alias="cmip6:physics_index")
realization_index: int = Field(..., serialization_alias="cmip6:realization_index")
forcing_index: int = Field(..., serialization_alias="cmip6:forcing_index")
tracking_id: str = Field(..., serialization_alias="cmip6:tracking_id")
version: str = Field("", serialization_alias="cmip6:version")
product: str = Field(..., serialization_alias="cmip6:product")
license: str = Field(..., serialization_alias="cmip6:license")
grid: str = Field(..., serialization_alias="cmip6:grid")
mip_era: str = Field(..., serialization_alias="cmip6:mip_era")
Conventions: str
activity_id: ActivityID
creation_date: datetime
data_specs_version: str
experiment: str
experiment_id: ExperimentID
frequency: Frequency
further_info_url: AnyHttpUrl
grid_label: GridLabel
institution: str
institution_id: InstitutionID
nominal_resolution: NominalResolution
realm: List[Realm]
source: str
source_id: SourceID
source_type: List[SourceType]
sub_experiment: str | Literal["none"]
sub_experiment_id: SubExperimentID | Literal["none"]
table_id: TableID
variable_id: str
variant_label: str
initialization_index: int
physics_index: int
realization_index: int
forcing_index: int
tracking_id: str
version: str = Field("")
product: str
license: str
grid: str
mip_era: str

model_config = ConfigDict(alias_generator=add_cmip6_prefix, populate_by_name=True)

@field_validator("initialization_index", "physics_index", "realization_index", "forcing_index", mode="before")
@classmethod
def first_item(cls, v: list, info: FieldValidationInfo):
def only_item(cls, v: list[int], info: FieldValidationInfo):
"""Pick single item from list."""
assert len(v) == 1, f"{info.field_name} must have one item only."
return v[0]
Expand Down Expand Up @@ -113,10 +116,12 @@ def make_cmip6_item_id(attrs: MutableMapping[str, Any]) -> str:
]
name = "_".join(attrs[k] for k in keys)
return name
return hashlib.md5(name.encode("utf-8")).hexdigest()


class CMIP6populator(STACpopulatorBase):
item_properties_model = CMIP6ItemProperties
item_geometry_model = GeoJSONPolygon

def __init__(self, stac_host: str, thredds_catalog_url: str, config_filename: str) -> None:
"""Constructor

Expand All @@ -129,7 +134,6 @@ def __init__(self, stac_host: str, thredds_catalog_url: str, config_filename: st
"""

data_loader = THREDDSLoader(thredds_catalog_url)
self.props_model = Properties
super().__init__(stac_host, data_loader, config_filename)

def handle_ingestion_error(self, error: str, item_name: str, item_data: MutableMapping[str, Any]):
Expand All @@ -147,23 +151,18 @@ def create_stac_item(self, item_name: str, item_data: MutableMapping[str, Any])
"""
iid = make_cmip6_item_id(item_data["attributes"])

obj = CFJsonItem(iid, item_data, self.props_model)

# Add CMIP6 extension
try:
cmip6_ext = cmip6.CMIP6Extension.ext(obj.item, add_if_missing=True)
cmip6_ext.apply(item_data["attributes"])
except:
LOGGER.warning(f"Failed to add CMIP6 extension to item {item_name}")
item = STAC_item_from_metadata(iid, item_data, self.item_properties_model, self.item_geometry_model)

# Add datacube extension
try:
dc_ext = DatacubeExtension.ext(obj.item, add_if_missing=True)
dc_ext.apply(dimensions=obj.dimensions(), variables=obj.variables())
dchelper = DataCubeHelper(item_data)
dc_ext = DatacubeExtension.ext(item, add_if_missing=True)
dc_ext.apply(dimensions=dchelper.dimensions(), variables=dchelper.variables())
except:
LOGGER.warning(f"Failed to add Datacube extension to item {item_name}")

return obj.item.to_dict()
# return json.dumps(item.to_dict())
print(json.dumps(item.to_dict()))

def validate_stac_item_cv(self, data: MutableMapping[str, Any]) -> bool:
# Validation is done at the item creating stage, using the Properties class.
Expand Down
Loading