Skip to content

Commit

Permalink
Add ncattrs function to get nc attributes without the need for siphon.
Browse files Browse the repository at this point in the history
  • Loading branch information
huard committed Nov 16, 2023
1 parent e7fd123 commit 112fc4c
Show file tree
Hide file tree
Showing 3 changed files with 58 additions and 31 deletions.
2 changes: 2 additions & 0 deletions CHANGES.md
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,8 @@

<!-- insert list items of new changes here -->

* New function `ncattrs` to get attributes from netCDF files hosted on a THREDDS server.

## [0.2.0](https://github.com/crim-ca/stac-populator/tree/0.2.0) (2023-11-10)


Expand Down
50 changes: 50 additions & 0 deletions STACpopulator/stac_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -79,6 +79,56 @@ def collection2literal(collection, property="label"):
return Literal[terms]


def thredds_catalog_attrs(url: str) -> dict:
"""Return attributes from the catalog.xml THREDDS server response."""
import xmltodict
import requests

xml = requests.get(url).text

raw = xmltodict.parse(
xml,
process_namespaces=True,
namespaces={
"http://www.unidata.ucar.edu/namespaces/thredds/InvCatalog/v1.0": None,
"https://www.unidata.ucar.edu/namespaces/thredds/InvCatalog/v1.0": None,
},
)
return raw


def ncattrs(url: str) -> dict:
"""Return attributes from a THREDDS netCDF dataset."""
import requests
import xncml
import urllib

pr = urllib.parse.urlparse(url)

parts = url.split("/")
nc = parts[-1]

# Get catalog information about available services
catalog = "/".join(parts[:-1]) + "/catalog.xml"
cattrs = thredds_catalog_attrs(catalog)["catalog"]

cid = cattrs["dataset"]["@ID"]

# Get service URLs for the dataset
access_urls = {}
for service in cattrs["service"]["service"]:
access_urls[service["@serviceType"]] = f'{pr.scheme}://{pr.netloc}{service["@base"]}{cid}/{nc}'

# Get dataset attributes
r = requests.get(access_urls["NCML"])
attrs = xncml.Dataset.from_text(r.text).to_cf_dict()
attrs["attributes"] = numpy_to_python_datatypes(attrs["attributes"])

# Include service attributes
attrs["access_urls"] = access_urls
return attrs


def ncattrs_to_geometry(attrs: MutableMapping[str, Any]) -> MutableMapping[str, Any]:
"""Create Polygon geometry from CFMetadata."""
attrs = attrs["groups"]["CFMetadata"]["attributes"]
Expand Down
37 changes: 6 additions & 31 deletions tests/test_standalone_stac_item.py
Original file line number Diff line number Diff line change
@@ -1,16 +1,15 @@
import json
import pytest
import requests
import os
import tempfile
from urllib.parse import quote

import xncml

from STACpopulator.implementations.CMIP6_UofT.add_CMIP6 import CMIP6ItemProperties, CMIP6populator
from STACpopulator.input import THREDDSLoader
from STACpopulator.models import GeoJSONPolygon
from STACpopulator.stac_utils import STAC_item_from_metadata
from STACpopulator.stac_utils import STAC_item_from_metadata, ncattrs
from pystac.validation import JsonSchemaSTACValidator
from pystac import STACObjectType

CUR_DIR = os.path.dirname(__file__)

Expand All @@ -21,35 +20,11 @@ def quote_none_safe(url):

@pytest.mark.online
def test_standalone_stac_item_thredds_ncml():
thredds_url = "https://pavics.ouranos.ca/twitcher/ows/proxy/thredds"
thredds_path = "birdhouse/testdata/xclim/cmip6"
thredds_nc = "sic_SImon_CCCma-CanESM5_ssp245_r13i1p2f1_2020.nc"
thredds_catalog = f"{thredds_url}/catalog/{thredds_path}/catalog.html"
thredds_ds = f"{thredds_path}/{thredds_nc}"
thredds_ncml_url = (
f"{thredds_url}/ncml/{thredds_path}/{thredds_nc}"
f"?catalog={quote_none_safe(thredds_catalog)}&dataset={quote_none_safe(thredds_ds)}"
)

# FIXME: avoid hackish workarounds
data = requests.get(thredds_ncml_url).text
attrs = xncml.Dataset.from_text(data).to_cf_dict()
attrs["access_urls"] = { # FIXME: all following should be automatically added, but they are not!
"HTTPServer": f"{thredds_url}/fileServer/{thredds_path}/{thredds_nc}",
"OPENDAP": f"{thredds_url}/dodsC/{thredds_path}/{thredds_nc}",
"WCS": f"{thredds_url}/wcs/{thredds_path}/{thredds_nc}?service=WCS&version=1.0.0&request=GetCapabilities",
"WMS": f"{thredds_url}/wms/{thredds_path}/{thredds_nc}?service=WMS&version=1.3.0&request=GetCapabilities",
"NetcdfSubset": f"{thredds_url}/ncss/{thredds_path}/{thredds_nc}/dataset.html",
}

url = "https://pavics.ouranos.ca/twitcher/ows/proxy/thredds/birdhouse/testdata/xclim/cmip6/sic_SImon_CCCma-CanESM5_ssp245_r13i1p2f1_2020.nc"
attrs = ncattrs(url)
stac_item_id = CMIP6populator.make_cmip6_item_id(attrs["attributes"])
stac_item = STAC_item_from_metadata(stac_item_id, attrs, CMIP6ItemProperties, GeoJSONPolygon)

ref_file = os.path.join(CUR_DIR, "data/stac_item_testdata_xclim_cmip6_ncml.json")
with open(ref_file, mode="r", encoding="utf-8") as ff:
reference = json.load(ff)

assert stac_item.to_dict() == reference
assert stac_item.validate()


class MockedNoSTACUpload(CMIP6populator):
Expand Down

0 comments on commit 112fc4c

Please sign in to comment.