diff --git a/STACpopulator/input.py b/STACpopulator/input.py index 272f9ad..2522f15 100644 --- a/STACpopulator/input.py +++ b/STACpopulator/input.py @@ -4,6 +4,7 @@ import pystac import requests +import siphon import xncml from colorlog import ColoredFormatter from siphon.catalog import TDSCatalog @@ -95,7 +96,7 @@ def __iter__(self) -> Iterator[Tuple[str, MutableMapping[str, Any]]]: """Return a generator walking a THREDDS data catalog for datasets.""" if self.catalog_head.datasets.items(): for item_name, ds in self.catalog_head.datasets.items(): - attrs = self.extract_metadata(ds.access_urls["NCML"], self.catalog_head.catalog_url, ds.url_path) + attrs = self.extract_metadata(ds) yield item_name, attrs if self._depth > 0: @@ -104,12 +105,14 @@ def __iter__(self) -> Iterator[Tuple[str, MutableMapping[str, Any]]]: self._depth -= 1 yield from self - def extract_metadata(self, ncml_url: str, catalog_url: str, dataset_path: str) -> MutableMapping[str, Any]: + def extract_metadata(self, ds: siphon.catalog.Dataset) -> MutableMapping[str, Any]: LOGGER.info("Requesting NcML dataset description") - r = requests.get(ncml_url, params={"catalog": catalog_url, "dataset": dataset_path}) + url = ds.access_urls["NCML"] + r = requests.get(url) # Convert NcML to CF-compliant dictionary attrs = xncml.Dataset.from_text(r.content).to_cf_dict() attrs["attributes"] = numpy_to_python_datatypes(attrs["attributes"]) + attrs["access_urls"] = ds.access_urls return attrs diff --git a/STACpopulator/stac_utils.py b/STACpopulator/stac_utils.py index d3786e1..c245ed1 100644 --- a/STACpopulator/stac_utils.py +++ b/STACpopulator/stac_utils.py @@ -191,16 +191,15 @@ def STAC_item_from_metadata(iid: str, attrs: MutableMapping[str, Any], item_prop # Convert pydantic STAC item to a PySTAC Item item = pystac.Item(**json.loads(item.model_dump_json(by_alias=True))) - root = attrs["groups"]["THREDDSMetadata"]["groups"]["services"]["attributes"] + root = attrs["access_urls"] for name, url in root.items(): name = str(name) # converting name from siphon.catalog.CaseInsensitiveStr to str asset = pystac.Asset(href=url, media_type=media_types.get(name), roles=asset_roles.get(name)) - name = asset_name_remaps[name] if name in asset_name_remaps.keys() else name item.add_asset(name, asset) - item.add_link(magpie_resource_link(root["httpserver_service"])) + item.add_link(magpie_resource_link(root["HTTPServer"])) return item @@ -214,17 +213,17 @@ def STAC_item_from_metadata(iid: str, attrs: MutableMapping[str, Any], item_prop } media_types = { - "httpserver_service": "application/x-netcdf", - "opendap_service": pystac.MediaType.HTML, - "wcs_service": pystac.MediaType.XML, - "wms_service": pystac.MediaType.XML, - "nccs_service": "application/x-netcdf", + "HTTPServer": "application/x-netcdf", + "OPENDAP": pystac.MediaType.HTML, + "WCS": pystac.MediaType.XML, + "WMS": pystac.MediaType.XML, + "NetcdfSubset": "application/x-netcdf", } asset_roles = { - "httpserver_service": ["data"], - "opendap_service": ["data"], - "wcs_service": ["data"], - "wms_service": ["visual"], - "nccs_service": ["data"], + "HTTPServer": ["data"], + "OPENDAP": ["data"], + "WCS": ["data"], + "WMS": ["visual"], + "NetcdfSubset": ["data"], }