Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Arch finalization proposal #25

Merged
merged 36 commits into from
Nov 8, 2023
Merged
Changes from 1 commit
Commits
Show all changes
36 commits
Select commit Hold shift + click to select a range
71ce3e0
adding numpy types to python types conversion for metadata
dchandan Oct 5, 2023
350b4f4
removing collection2enum
dchandan Oct 5, 2023
2a445f0
black
dchandan Oct 5, 2023
2728ce6
extracting pydantic base models to models.py
dchandan Oct 5, 2023
b47d613
removing cmip6 extension code
dchandan Oct 5, 2023
2f5dc39
Breaking CFJsonItem part 1: extracting STAC item creation
dchandan Oct 6, 2023
3f821ce
Breaking CFJsonItem part 2: extracting datacube extension code
dchandan Oct 6, 2023
3c584cc
updating geometry structure
dchandan Oct 12, 2023
b7a7ed9
moving np datatype conversion to a separate function
dchandan Oct 12, 2023
48598ae
modifications to datacube extension helper functions as per Francis's…
dchandan Oct 12, 2023
94eb521
code cleanup
dchandan Oct 12, 2023
a64a226
change how prefix is applied
dchandan Oct 12, 2023
f22c1a2
PR changes
dchandan Oct 13, 2023
efd9230
fixing output media type and roles output for assets
dchandan Oct 17, 2023
3e88591
adding magpie resource link
dchandan Oct 17, 2023
8d66fba
adding collection resource link for Magpie
dchandan Oct 18, 2023
00a968a
posting items fixes
dchandan Oct 19, 2023
2c3b49d
removing function no longer in use
dchandan Oct 19, 2023
6908d55
implemented updating stac collection and items
dchandan Oct 19, 2023
0c959ea
removing need to pass yml file to app on command line
dchandan Oct 19, 2023
73b2773
code cleanup
dchandan Oct 19, 2023
9e919c2
adding __init__ files
dchandan Oct 19, 2023
c62fb80
fix
dchandan Oct 19, 2023
10db128
more fixes
dchandan Oct 19, 2023
25985db
diagnostics
dchandan Oct 23, 2023
6d675bc
removing unused code
dchandan Oct 23, 2023
65bd5bb
refactoring to allow more flexibility
dchandan Oct 23, 2023
f540dbe
fix datacube extension
dchandan Oct 26, 2023
323c945
pr changes
dchandan Oct 27, 2023
0581c61
reverting to old way to read thredds access links
dchandan Oct 27, 2023
37a26e1
adding ability to get single file from THREDDS loader
dchandan Nov 8, 2023
e55591d
making make_cmip6_item_id a staticmethod
dchandan Nov 8, 2023
f1e28db
wrapping call to make STAC item with a try-exepcet block
dchandan Nov 8, 2023
8bb21e1
fixing commit e55591dd0b7f7db6cd4ee7256512d5693d282145
dchandan Nov 8, 2023
3055afc
more fixes to previous commits
dchandan Nov 8, 2023
3f1d284
making tracking_id optional in CMIP6ItemProperties
dchandan Nov 8, 2023
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Prev Previous commit
Next Next commit
black
dchandan committed Oct 5, 2023
commit 2a445f04b7f4c12dd50401baf8ce4502aa696aff
318 changes: 170 additions & 148 deletions STACpopulator/stac_utils.py
Original file line number Diff line number Diff line change
@@ -135,6 +135,7 @@ class Item(BaseModel):

class CFJsonItem:
"""Return STAC Item from CF JSON metadata, as provided by `xncml.Dataset.to_cf_dict`."""

axis = {"X": "x", "Y": "y", "Z": "z", "T": "t", "longitude": "x", "latitude": "y", "vertical": "z", "time": "t"}

def __init__(self, iid: str, attrs: dict, datamodel=None):
@@ -154,10 +155,11 @@ def __init__(self, iid: str, attrs: dict, datamodel=None):
cfmeta = attrs["groups"]["CFMetadata"]["attributes"]

# Global attributes
gattrs = {"start_datetime": cfmeta["time_coverage_start"],
"end_datetime": cfmeta["time_coverage_end"],
**attrs["attributes"],
}
gattrs = {
"start_datetime": cfmeta["time_coverage_start"],
"end_datetime": cfmeta["time_coverage_end"],
**attrs["attributes"],
}

# Validate using pydantic data model if given
datamodel = datamodel or dict
@@ -179,8 +181,8 @@ class MySTACItem(Item):
# Add assets
if "access_urls" in attrs:
root = attrs["access_urls"]
elif 'THREDDSMetadata' in attrs["groups"]:
root = attrs["groups"]['THREDDSMetadata']['groups']['services']['attributes']
elif "THREDDSMetadata" in attrs["groups"]:
root = attrs["groups"]["THREDDSMetadata"]["groups"]["services"]["attributes"]
else:
root = {}

@@ -244,49 +246,51 @@ def dimensions(self) -> dict:
bbox = self.obj.ncattrs_to_bbox()
for key, criteria in coordinate_criteria.items():
for criterion, expected in criteria.items():
if v['attributes'].get(criterion, None) in expected:
if v["attributes"].get(criterion, None) in expected:
axis = self.axis[key]
type_ = DimensionType.SPATIAL if axis in ['x', 'y', 'z'] else DimensionType.TEMPORAL
type_ = DimensionType.SPATIAL if axis in ["x", "y", "z"] else DimensionType.TEMPORAL

if v['type'] == 'int':
if v["type"] == "int":
extent = [0, int(length)]
else: # Not clear the logic is sound
if key == 'X':
if key == "X":
extent = bbox[0], bbox[2]
elif key == "Y":
extent = bbox[1], bbox[3]
else:
extent = None

dims[name] = Dimension(properties=dict(
axis = axis,
type = type_,
extent = extent,
description=v.get("description", v.get("long_name", criteria["standard_name"]))
dims[name] = Dimension(
properties=dict(
axis=axis,
type=type_,
extent=extent,
description=v.get("description", v.get("long_name", criteria["standard_name"])),
)
)

return dims

def variables(self)->dict:
def variables(self) -> dict:
"""Return Variable objects required for Datacube extension."""
variables = {}

for name, meta in self.attrs["variables"].items():
if name in self.attrs["dimensions"]:
continue

attrs = meta['attributes']
variables[name] = Variable(properties=dict(
attrs = meta["attributes"]
variables[name] = Variable(
properties=dict(
dimensions=meta["shape"],
type = VariableType.AUXILIARY.value if self.is_coordinate(attrs) else
VariableType.DATA.value,
type=VariableType.AUXILIARY.value if self.is_coordinate(attrs) else VariableType.DATA.value,
description=attrs.get("description", attrs.get("long_name")),
unit=attrs.get("units", None)
))
unit=attrs.get("units", None),
)
)
return variables

def is_coordinate(self, attrs: dict)-> bool:
def is_coordinate(self, attrs: dict) -> bool:
"""Return whether variable is a coordinate."""
for key, criteria in coordinate_criteria.items():
for criterion, expected in criteria.items():
@@ -297,128 +301,146 @@ def is_coordinate(self, attrs: dict)-> bool:

# From CF-Xarray
coordinate_criteria = {
'latitude': {'standard_name': ('latitude',),
'units': ('degree_north',
'degree_N',
'degreeN',
'degrees_north',
'degrees_N',
'degreesN'),
'_CoordinateAxisType': ('Lat',),
'long_name': ('latitude',)},
'longitude': {'standard_name': ('longitude',),
'units': ('degree_east',
'degree_E',
'degreeE',
'degrees_east',
'degrees_E',
'degreesE'),
'_CoordinateAxisType': ('Lon',),
'long_name': ('longitude',)},
'Z': {'standard_name': ('model_level_number',
'atmosphere_ln_pressure_coordinate',
'atmosphere_sigma_coordinate',
'atmosphere_hybrid_sigma_pressure_coordinate',
'atmosphere_hybrid_height_coordinate',
'atmosphere_sleve_coordinate',
'ocean_sigma_coordinate',
'ocean_s_coordinate',
'ocean_s_coordinate_g1',
'ocean_s_coordinate_g2',
'ocean_sigma_z_coordinate',
'ocean_double_sigma_coordinate'),
'_CoordinateAxisType': ('GeoZ', 'Height', 'Pressure'),
'axis': ('Z',),
'cartesian_axis': ('Z',),
'grads_dim': ('z',),
'long_name': ('model_level_number',
'atmosphere_ln_pressure_coordinate',
'atmosphere_sigma_coordinate',
'atmosphere_hybrid_sigma_pressure_coordinate',
'atmosphere_hybrid_height_coordinate',
'atmosphere_sleve_coordinate',
'ocean_sigma_coordinate',
'ocean_s_coordinate',
'ocean_s_coordinate_g1',
'ocean_s_coordinate_g2',
'ocean_sigma_z_coordinate',
'ocean_double_sigma_coordinate')},
'vertical': {'standard_name': ('air_pressure',
'height',
'depth',
'geopotential_height',
'altitude',
'height_above_geopotential_datum',
'height_above_reference_ellipsoid',
'height_above_mean_sea_level'),
'positive': ('up', 'down'),
'long_name': ('air_pressure',
'height',
'depth',
'geopotential_height',
'altitude',
'height_above_geopotential_datum',
'height_above_reference_ellipsoid',
'height_above_mean_sea_level')},
'X': {'standard_name': ('projection_x_coordinate',
'grid_longitude',
'projection_x_angular_coordinate'),
'_CoordinateAxisType': ('GeoX',),
'axis': ('X',),
'cartesian_axis': ('X',),
'grads_dim': ('x',),
'long_name': ('projection_x_coordinate',
'grid_longitude',
'projection_x_angular_coordinate',
'cell index along first dimension')},
'Y': {'standard_name': ('projection_y_coordinate',
'grid_latitude',
'projection_y_angular_coordinate'),
'_CoordinateAxisType': ('GeoY',),
'axis': ('Y',),
'cartesian_axis': ('Y',),
'grads_dim': ('y',),
'long_name': ('projection_y_coordinate',
'grid_latitude',
'projection_y_angular_coordinate',
'cell index along second dimension')},
'T': {'standard_name': ('time',),
'_CoordinateAxisType': ('Time',),
'axis': ('T',),
'cartesian_axis': ('T',),
'grads_dim': ('t',),
'long_name': ('time',)},
'time': {'standard_name': ('time',),
'_CoordinateAxisType': ('Time',),
'axis': ('T',),
'cartesian_axis': ('T',),
'grads_dim': ('t',),
'long_name': ('time',)}}


media_types = {"httpserver_service": "application/x-netcdf",
"opendap_service": pystac.MediaType.HTML,
"wcs_service": pystac.MediaType.XML,
"wms_service": pystac.MediaType.XML,
"nccs_service": "application/x-netcdf",
"HTTPServer": "application/x-netcdf",
"OPENDAP": pystac.MediaType.HTML,
"NCML": pystac.MediaType.XML,
"WCS": pystac.MediaType.XML,
"ISO": pystac.MediaType.XML,
"WMS": pystac.MediaType.XML,
"NetcdfSubset": "application/x-netcdf",
}

asset_roles = {"httpserver_service": ["data"],
"opendap_service": ["data"],
"wcs_service": ["data"],
"wms_service": ["visual"],
"nccs_service": ["data"],
"HTTPServer": ["data"],
"OPENDAP": ["data"],
"NCML": ["metadata"],
"WCS": ["data"],
"ISO": ["metadata"],
"WMS": ["visual"],
"NetcdfSubset": ["data"],}
"latitude": {
"standard_name": ("latitude",),
"units": ("degree_north", "degree_N", "degreeN", "degrees_north", "degrees_N", "degreesN"),
"_CoordinateAxisType": ("Lat",),
"long_name": ("latitude",),
},
"longitude": {
"standard_name": ("longitude",),
"units": ("degree_east", "degree_E", "degreeE", "degrees_east", "degrees_E", "degreesE"),
"_CoordinateAxisType": ("Lon",),
"long_name": ("longitude",),
},
"Z": {
"standard_name": (
"model_level_number",
"atmosphere_ln_pressure_coordinate",
"atmosphere_sigma_coordinate",
"atmosphere_hybrid_sigma_pressure_coordinate",
"atmosphere_hybrid_height_coordinate",
"atmosphere_sleve_coordinate",
"ocean_sigma_coordinate",
"ocean_s_coordinate",
"ocean_s_coordinate_g1",
"ocean_s_coordinate_g2",
"ocean_sigma_z_coordinate",
"ocean_double_sigma_coordinate",
),
"_CoordinateAxisType": ("GeoZ", "Height", "Pressure"),
"axis": ("Z",),
"cartesian_axis": ("Z",),
"grads_dim": ("z",),
"long_name": (
"model_level_number",
"atmosphere_ln_pressure_coordinate",
"atmosphere_sigma_coordinate",
"atmosphere_hybrid_sigma_pressure_coordinate",
"atmosphere_hybrid_height_coordinate",
"atmosphere_sleve_coordinate",
"ocean_sigma_coordinate",
"ocean_s_coordinate",
"ocean_s_coordinate_g1",
"ocean_s_coordinate_g2",
"ocean_sigma_z_coordinate",
"ocean_double_sigma_coordinate",
),
},
"vertical": {
"standard_name": (
"air_pressure",
"height",
"depth",
"geopotential_height",
"altitude",
"height_above_geopotential_datum",
"height_above_reference_ellipsoid",
"height_above_mean_sea_level",
),
"positive": ("up", "down"),
"long_name": (
"air_pressure",
"height",
"depth",
"geopotential_height",
"altitude",
"height_above_geopotential_datum",
"height_above_reference_ellipsoid",
"height_above_mean_sea_level",
),
},
"X": {
"standard_name": ("projection_x_coordinate", "grid_longitude", "projection_x_angular_coordinate"),
"_CoordinateAxisType": ("GeoX",),
"axis": ("X",),
"cartesian_axis": ("X",),
"grads_dim": ("x",),
"long_name": (
"projection_x_coordinate",
"grid_longitude",
"projection_x_angular_coordinate",
"cell index along first dimension",
),
},
"Y": {
"standard_name": ("projection_y_coordinate", "grid_latitude", "projection_y_angular_coordinate"),
"_CoordinateAxisType": ("GeoY",),
"axis": ("Y",),
"cartesian_axis": ("Y",),
"grads_dim": ("y",),
"long_name": (
"projection_y_coordinate",
"grid_latitude",
"projection_y_angular_coordinate",
"cell index along second dimension",
),
},
"T": {
"standard_name": ("time",),
"_CoordinateAxisType": ("Time",),
"axis": ("T",),
"cartesian_axis": ("T",),
"grads_dim": ("t",),
"long_name": ("time",),
},
"time": {
"standard_name": ("time",),
"_CoordinateAxisType": ("Time",),
"axis": ("T",),
"cartesian_axis": ("T",),
"grads_dim": ("t",),
"long_name": ("time",),
},
}


media_types = {
"httpserver_service": "application/x-netcdf",
"opendap_service": pystac.MediaType.HTML,
"wcs_service": pystac.MediaType.XML,
"wms_service": pystac.MediaType.XML,
"nccs_service": "application/x-netcdf",
"HTTPServer": "application/x-netcdf",
"OPENDAP": pystac.MediaType.HTML,
"NCML": pystac.MediaType.XML,
"WCS": pystac.MediaType.XML,
"ISO": pystac.MediaType.XML,
"WMS": pystac.MediaType.XML,
"NetcdfSubset": "application/x-netcdf",
}

asset_roles = {
"httpserver_service": ["data"],
"opendap_service": ["data"],
"wcs_service": ["data"],
"wms_service": ["visual"],
"nccs_service": ["data"],
"HTTPServer": ["data"],
"OPENDAP": ["data"],
"NCML": ["metadata"],
"WCS": ["data"],
"ISO": ["metadata"],
"WMS": ["visual"],
"NetcdfSubset": ["data"],
}
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

These look like THREDDS-specific metadata. They should most probably be attributes within its implementation.