diff --git a/CHANGES.md b/CHANGES.md index d607b24..3fc3e54 100644 --- a/CHANGES.md +++ b/CHANGES.md @@ -3,11 +3,13 @@ ## [Unreleased](https://github.com/crim-ca/stac-populator) (latest) -- Adding ability to add collection level assets -- Adding ability to add collection level links -- Adding collection links to `CMIP6_UofT` -- Adding an end date to `CMIP6_UofT`'s temporal extent for better rendering in STAC Browser -- Updates to datacube extension helper routines for `CMIP6_UofT`. +* Make sure *bounds* variables are given the auxiliary type attribute. +* Fix for variables that have no attributes. +* Adding ability to add collection level assets +* Adding ability to add collection level links +* Adding collection links to `CMIP6_UofT` +* Adding an end date to `CMIP6_UofT`'s temporal extent for better rendering in STAC Browser +* Updates to datacube extension helper routines for `CMIP6_UofT`. ## [0.6.0](https://github.com/crim-ca/stac-populator/tree/0.6.0) (2024-02-22) diff --git a/STACpopulator/extensions/datacube.py b/STACpopulator/extensions/datacube.py index 2d52bb4..b394416 100644 --- a/STACpopulator/extensions/datacube.py +++ b/STACpopulator/extensions/datacube.py @@ -184,6 +184,7 @@ def dimensions(self) -> dict[str, Dimension]: def variables(self) -> dict[str, Variable]: """Return Variable objects required for Datacube extension.""" variables = {} + bounds = self.bounds() for name, meta in self.attrs["variables"].items(): if name in self.attrs["dimensions"]: @@ -192,44 +193,50 @@ def variables(self) -> dict[str, Variable]: # Some variables like "time_bnds" in some model files do not have any attributes. attrs = meta.get("attributes", {}) - self._infer_variable_units_description(name, attrs) + if name in bounds: + # Bounds are auxiliary variables + dtype = VariableType.AUXILIARY.value + + # We can safely assume that the bounds variable has the same units as the variable it bounds. + if "units" not in attrs: + if (u := self.attrs["variables"][bounds[name]].get("attributes", {}).get("units")) is not None: + attrs["units"] = u + + if "description" not in "attrs": + attrs["description"] = f"bounds for the {bounds[name]} coordinate" + + elif self.is_coordinate(attrs): + # Using the CF-xarray heuristics to determine if variable is a coordinate. + dtype = VariableType.AUXILIARY.value + else: + dtype = VariableType.DATA.value variables[name] = Variable( properties=dict( dimensions=meta["shape"], - type=VariableType.AUXILIARY.value if self.is_coordinate(attrs) else VariableType.DATA.value, + type=dtype, description=attrs.get("description", attrs.get("long_name", "")), unit=attrs.get("units", ""), ) ) return variables - def _infer_variable_units_description(self, name, attrs): - """Try to infer the units and description of some simple coordinate variables.""" - if name == "time_bnds": - related_variable = "time" - attrs["description"] = "bounds for the time coordinate" - elif name == "lat_bnds": - related_variable = "lat" - attrs["description"] = "bounds for the latitude coordinate" - elif name == "lon_bnds": - related_variable = "lon" - attrs["description"] = "bounds for the longitude coordinate" - else: - return - - try: - attrs["units"] = self.attrs["variables"][related_variable]["attributes"]["units"] - except KeyError: - pass + def bounds(self): + """Return a list of variables that are bounds for other variables.""" + out = {} + for name, meta in self.attrs["variables"].items(): + attrs = meta.get("attributes", {}) + if "bounds" in attrs: + out[attrs["bounds"]] = name + return out + def is_coordinate(self, attrs: MutableMapping[str, Any]) -> bool: - """Return whether variable is a coordinate.""" - - if (desc := attrs.get("description", None)) is not None: - if "bounds for" in desc: - return True + """Return whether variable is a coordinate. + - data: a variable indicating some measured value, for example "precipitation", "temperature", etc. + - auxiliary: a variable that contains coordinate data, but isn't a dimension in cube:dimensions. + """ for key, criteria in self.coordinate_criteria.items(): for criterion, expected in criteria.items(): if attrs.get(criterion, None) in expected: diff --git a/tests/data/clt_Amon_EC-Earth3_historical_r2i1p1f1_gr_185001-201412.xml b/tests/data/clt_Amon_EC-Earth3_historical_r2i1p1f1_gr_185001-201412.xml new file mode 100644 index 0000000..9e63c91 --- /dev/null +++ b/tests/data/clt_Amon_EC-Earth3_historical_r2i1p1f1_gr_185001-201412.xml @@ -0,0 +1,133 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/tests/test_cmip6_datacube.py b/tests/test_cmip6_datacube.py index 9adafa0..511a378 100644 --- a/tests/test_cmip6_datacube.py +++ b/tests/test_cmip6_datacube.py @@ -35,3 +35,26 @@ def test_datacube_helper(): assert len(schemas) >= 2 assert "item.json" in schemas[0] assert "datacube" in schemas[1] + + +def test_auxiliary_variables(): + # https://github.com/crim-ca/stac-populator/issues/52 + + file_path = DIR / "data" / "clt_Amon_EC-Earth3_historical_r2i1p1f1_gr_185001-201412.xml" + + ds = xncml.Dataset(filepath=str(file_path)) + attrs = ds.to_cf_dict() + attrs["access_urls"] = {"HTTPServer": "http://example.com"} + item = CMIP6Helper(attrs, GeoJSONPolygon).stac_item() + + dc = DataCubeHelper(attrs) + dc_ext = DatacubeExtension.ext(item, add_if_missing=True) + dc_ext.apply(dimensions=dc.dimensions, variables=dc.variables) + + p = dc_ext.properties + assert set(['time', 'lat', 'lon']) == set(p['cube:dimensions'].keys()) + assert p["cube:variables"]["lon_bnds"]["unit"] == "degrees_east" + assert p["cube:variables"]["time_bnds"]["unit"] == "days since 1850-01-01" + assert p["cube:variables"]["time_bnds"]["type"] == "auxiliary" + assert p["cube:variables"]["time_bnds"]["description"] == "bounds for the time coordinate" + assert p["cube:variables"]["clt"]["type"] == "data"