diff --git a/CHANGES.md b/CHANGES.md index c57bad3..363f5ff 100644 --- a/CHANGES.md +++ b/CHANGES.md @@ -4,7 +4,7 @@ Changes [Unreleased](https://github.com/crim-ca/ncml2stac/tree/master) (latest) ------------------------------------------------------------------------------------------------------------------ - +- Update STAC Item generation from NCML using `STACpopulator==0.5.0` to employ all latest fixes. [0.2.0](https://github.com/crim-ca/ncml2stac/tree/0.2.0) (2023-10-02) ------------------------------------------------------------------------------------------------------------------ diff --git a/Makefile b/Makefile index d996aed..5c0a603 100644 --- a/Makefile +++ b/Makefile @@ -71,7 +71,8 @@ PIP_USE_FEATURE := `python -c '\ except ImportError: \ from distutils.version import LooseVersion as Version \ print(Version(pip.__version__) < Version("21.0"))'` -PIP_XARGS ?= +# when a repository must be cloned locally to build/install it, (w)ipe if path conflicts +PIP_XARGS ?= --exists-action=w ifeq ("$(PIP_USE_FEATURE)", "True") PIP_XARGS := --use-feature=2020-resolver $(PIP_XARGS) endif diff --git a/notebooks/ncml2stac.ipynb b/notebooks/ncml2stac.ipynb index cb65f15..8da8ed4 100644 --- a/notebooks/ncml2stac.ipynb +++ b/notebooks/ncml2stac.ipynb @@ -8,7 +8,7 @@ "This notebook should be compiled into a standalone *CWL* definition using the following command:\n", "\n", "```shell\n", - "jupyter-repo2cwl \"https://github.com/crim-ca/ncml2sta\" -o /tmp\n", + "jupyter-repo2cwl \"https://github.com/crim-ca/ncml2stac\" -o /tmp\n", "```\n", "(replace the Git repository URL by the path if the clone locally)\n", "\n", @@ -38,11 +38,11 @@ }, { "cell_type": "code", - "execution_count": 85, + "execution_count": 1, "outputs": [], "source": [ "# NOTE:\n", - "# If using indented code block here (eg: 'if TYPE_CHECKING:'),\n", + "# If using code that is not preserved at runtime (eg: 'if TYPE_CHECKING:'),\n", "# it is important to have other things than 'ipython2cwl' imports.\n", "# When ported into the generated python script, imports from 'ipython2cwl' are removed,\n", "# which can cause syntax/indent errors.\n", @@ -71,8 +71,8 @@ "metadata": { "collapsed": false, "ExecuteTime": { - "end_time": "2023-09-29T22:45:33.099193431Z", - "start_time": "2023-09-29T22:45:33.014184569Z" + "end_time": "2024-01-09T21:23:06.458322329Z", + "start_time": "2024-01-09T21:23:06.322097107Z" } }, "id": "61f43c81dc3aa6c2" @@ -94,19 +94,19 @@ }, { "cell_type": "code", - "execution_count": 86, + "execution_count": 8, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Cloning into '/home/francis/.esdoc/pyessv-archive'...\r\n", - "remote: Enumerating objects: 63068, done.\u001B[K\r\n", - "remote: Counting objects: 100% (1557/1557), done.\u001B[K\r\n", - "remote: Compressing objects: 100% (476/476), done.\u001B[K\r\n", - "remote: Total 63068 (delta 1258), reused 1327 (delta 1070), pack-reused 61511\u001B[K\r\n", - "Receiving objects: 100% (63068/63068), 6.06 MiB | 5.05 MiB/s, done.\r\n", - "Resolving deltas: 100% (60270/60270), done.\r\n", + "remote: Enumerating objects: 7728, done.\u001B[K\r\n", + "remote: Counting objects: 100% (7728/7728), done.\u001B[K\r\n", + "remote: Compressing objects: 100% (2840/2840), done.\u001B[K\r\n", + "remote: Total 7728 (delta 6653), reused 5274 (delta 4866), pack-reused 0\u001B[K\r\n", + "Receiving objects: 100% (7728/7728), 806.86 KiB | 5.68 MiB/s, done.\r\n", + "Resolving deltas: 100% (6653/6653), done.\r\n", "\r\n", "Local identity for pyessv-archive set to \"Francis Charette Migneault \"\r\n" ] @@ -115,49 +115,57 @@ "source": [ "!rm -fr ~/.esdoc/pyessv-archive\n", "!mkdir -p ~/.esdoc/\n", - "!git clone https://github.com/ES-DOC/pyessv-archive ~/.esdoc/pyessv-archive" + "!git clone --depth 1 https://github.com/ES-DOC/pyessv-archive ~/.esdoc/pyessv-archive" ], "metadata": { "collapsed": false, "ExecuteTime": { - "end_time": "2023-09-29T22:45:37.397132140Z", - "start_time": "2023-09-29T22:45:33.063477776Z" + "end_time": "2024-01-09T20:46:18.837092675Z", + "start_time": "2024-01-09T20:46:17.329893874Z" } }, "id": "f10d85e12b47da43" }, { "cell_type": "code", - "execution_count": 87, - "outputs": [], + "execution_count": 2, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "2024-01-09 21:23:12.406510 [INFO] :: PYESSV :: Loading vocabularies from /home/francis/.esdoc/pyessv-archive ... please wait\n" + ] + } + ], "source": [ - "import hashlib\n", "import json\n", + "import os\n", "import tempfile\n", "from datetime import datetime, date\n", "from enum import Enum\n", + "from urllib.parse import parse_qs, urlparse, unquote\n", "\n", "import numpy as np\n", "import pystac\n", "import requests\n", - "import xncml\n", "from pydantic.networks import Url\n", "\n", - "from STACpopulator.extensions import cmip6\n", - "from STACpopulator.stac_utils import CFJsonItem, DatacubeExt" + "from STACpopulator.input import THREDDSLoader\n", + "from STACpopulator.implementations.CMIP6_UofT.add_CMIP6 import CMIP6populator" ], "metadata": { "collapsed": false, "ExecuteTime": { - "end_time": "2023-09-29T22:45:37.444764964Z", - "start_time": "2023-09-29T22:45:37.400457899Z" + "end_time": "2024-01-09T21:23:13.576072319Z", + "start_time": "2024-01-09T21:23:11.298045355Z" } }, "id": "f68ea4339c5e4a9d" }, { "cell_type": "code", - "execution_count": 88, + "execution_count": 3, "outputs": [ { "name": "stdout", @@ -244,7 +252,7 @@ " \n", " \n", " \n", - " \n", + " \n", " \n", " \n", " \n", @@ -350,6 +358,7 @@ ], "source": [ "# retrieve the file contents\n", + "input_ncml_href = input_ncml\n", "if not (input_ncml.startswith(\"/\") or input_ncml.startswith(\"file:///\")):\n", " resp = requests.get(input_ncml, headers={\"Accept\": \"text/xml, application/xml\"}, timeout=5)\n", " if not resp.status_code == 200 and resp.text.startswith(\"" - }, - "execution_count": 89, - "metadata": {}, - "output_type": "execute_result" + "name": "stderr", + "output_type": "stream", + "text": [ + " \u001B[32mINFO:\u001B[0m \u001B[34m[STACpopulator.input ]\u001B[0m Requesting NcML dataset description\u001B[0m\n" + ] } ], "source": [ - "# FIXME: duplicate code\n", - "# this is defined in:\n", - "# https://github.com/crim-ca/stac-populator/blob/arch-changes/implementations/CMIP6-UofT/add_CMIP6.py#L102-L116\n", - "# but we cannot import it since outside of installed 'STACpopulator' module\n", - "def make_cmip6_item_id(_attrs: \"JsonLike\") -> str:\n", - " \"\"\"Return a unique ID for CMIP6 data item.\"\"\"\n", - " keys = [\n", - " \"activity_id\",\n", - " \"institution_id\",\n", - " \"source_id\",\n", - " \"experiment_id\",\n", - " \"variant_label\",\n", - " \"table_id\",\n", - " \"variable_id\",\n", - " \"grid_label\",\n", - " ]\n", - " name = \"_\".join(_attrs[k] for k in keys)\n", - " return hashlib.md5(name.encode(\"utf-8\")).hexdigest()\n", + "# NOTE:\n", + "# Since we are only interested to convert a single NCML to STAC Item,\n", + "# override the logic of the provided populator such that it does not\n", + "# automatically iterate over the complete THREDDS catalog contents.\n", "\n", + "class CMIP6SingleFilePopulator(CMIP6populator):\n", + " # WARNING:\n", + " # to limit as much as possible how many useless iterations crawling the datasets is done\n", + " # this implementation enforces a depth=1.\n", + " # therefore, the 'thredds_catalog_url' must be \"right above\" the target NCML file URL\n", + " # example:\n", + " # To describe:\n", + " # https://svc.com/thredds/ncml/some/nested/netcdf.nc\"\n", + " # thredds_catalog_url should be:\n", + " # https://svc.com/thredds/catalog/some/nested/catalog.xml\"\n", + " def __init__( # pylint: disable=W0231 # super init not called on purpose to avoid loading missing config\n", + " self,\n", + " stac_host,\n", + " thredds_catalog_url,\n", + " update=False,\n", + " ):\n", + " # FIXME: just reimplement what is needed (no config needed, we don't care about STAC Collections...)\n", + " self._stac_host = stac_host\n", + " self._ingest_pipeline = THREDDSLoader(thredds_catalog_url, depth=0)\n", + " self.update = update\n", "\n", - "# FIXME: temporary patch of URL/Media-Type\n", - "# https://github.com/crim-ca/stac-populator/pull/23#discussion_r1341819744\n", - "class CFJsonItemNetCDF(CFJsonItem):\n", - " def item_link(self) -> pystac.Link:\n", - " url = self.attrs[\"@location\"] # NetCDF URL\n", - " name = self.attrs[\"groups\"][\"THREDDSMetadata\"][\"attributes\"][\"id\"]\n", - " path = url.split(name, 1)[0]\n", - " parts = list(filter(lambda _: bool(_), path.rsplit(\"/\", 3)))\n", - " service = parts[-2] # always 1 path part for the service\n", - " link = pystac.Link(\n", - " rel=\"source\",\n", - " target=url,\n", - " media_type=\"application/x-netcdf\",\n", - " title=f\"{service}:{name}\"\n", - " )\n", - " return link\n", + " # FIXME: perform what ingest() does, but only for a single item and without STAC API POST request\n", + " def ncml2stac(self, target_item: str):\n", + " ds = self._ingest_pipeline[target_item]\n", + " ncml_data = self._ingest_pipeline.extract_metadata(ds)\n", + " stac_item = self.create_stac_item(target_item, ncml_data)\n", + " return stac_item\n", "\n", "\n", - "# FIXME: partial duplicate code\n", - "# https://github.com/crim-ca/stac-populator/blob/arch-changes/implementations/CMIP6-UofT/add_CMIP6.py#L138-L165\n", - "# should be combined into a single callable function that doesn't depend on the rest of the THREDDS crawling iterator\n", - "ds = xncml.Dataset(input_ncml)\n", - "attrs = ds.to_cf_dict()\n", + "input_ncml_href_parsed = urlparse(input_ncml_href)\n", + "input_ncml_href_params = parse_qs(input_ncml_href_parsed.query)\n", + "if \"catalog\" in input_ncml_href_params:\n", + " input_ncml_target_href = input_ncml_href.split(\"?\", 1)[0]\n", + " input_ncml_catalog_href = unquote(input_ncml_href_params[\"catalog\"][0])\n", + " input_ncml_catalog_href = os.path.splitext(input_ncml_catalog_href)[0] + \".xml\" # in case it was HTML\n", + "else:\n", + " input_ncml_target_href = input_ncml_href\n", + " input_ncml_catalog_href = input_ncml_href.split(\"?\", 1)[0] # just in case there's extra query params\n", + " input_ncml_catalog_href = input_ncml_catalog_href.replace(\"/ncml\", \"/catalog/\")\n", + " input_ncml_catalog_href = os.path.join(os.path.dirname(input_ncml_catalog_href), \"catalog.xml\")\n", + "input_ncml_headers = {\"Accept\": \"text/xml, application/xml\"}\n", + "input_ncml_catalog_xml = requests.get(input_ncml_catalog_href, headers=input_ncml_headers, timeout=5).text\n", + "input_ncml_target_name = os.path.split(input_ncml_target_href)[-1]\n", "\n", - "# FIXME: AttributeError\n", - "nc_services = getattr(ds, \"access_urls\", None)\n", - "if nc_services:\n", - " attrs[\"access_urls\"] = nc_services\n", + "# technically invalid STAC host, but just need something for URL schema validation\n", + "stac_host_url = f\"{input_ncml_href_parsed.scheme}://{input_ncml_href_parsed.netloc}\"\n", + "cmip6_pop = CMIP6SingleFilePopulator(stac_host_url, input_ncml_catalog_href)\n", "\n", - "stac_item_id = make_cmip6_item_id(attrs[\"attributes\"])\n", - "attrs[\"id\"] = stac_item_id\n", - "stac_item = CFJsonItemNetCDF(stac_item_id, attrs, cmip6.Properties)\n", - "DatacubeExt(stac_item)" + "# generate the STAC Item definition corresponding to the NCML content\n", + "stac_item_data = cmip6_pop.ncml2stac(input_ncml_target_name)" ], "metadata": { "collapsed": false, "ExecuteTime": { - "end_time": "2023-09-29T22:45:37.711546409Z", - "start_time": "2023-09-29T22:45:37.646200547Z" + "end_time": "2024-01-09T21:43:16.613980591Z", + "start_time": "2024-01-09T21:43:16.278982130Z" } }, "id": "299946ccd58e2efc" @@ -463,7 +474,7 @@ }, { "cell_type": "code", - "execution_count": 90, + "execution_count": 9, "outputs": [ { "name": "stdout", @@ -472,80 +483,71 @@ "{\n", " \"type\": \"Feature\",\n", " \"stac_version\": \"1.0.0\",\n", - " \"id\": \"36c83a8bb9d382ff2ffed7b9ba422cd3\",\n", + " \"id\": \"ScenarioMIP_CCCma_CanESM5_ssp245_r13i1p2f1_SImon_siconc_gn\",\n", " \"properties\": {\n", " \"start_datetime\": \"2019-12-06T12:00:00Z\",\n", " \"end_datetime\": \"2020-11-04T12:00:00Z\",\n", - " \"datetime\": null,\n", - " \"Conventions\": \"CF-1.7 CMIP-6.2\",\n", - " \"activity_id\": \"ScenarioMIP\",\n", - " \"creation_date\": \"2019-09-25T23:01:33Z\",\n", - " \"data_specs_version\": \"01.00.30\",\n", - " \"experiment\": \"update of RCP4.5 based on SSP2\",\n", - " \"experiment_id\": \"ssp245\",\n", - " \"frequency\": \"mon\",\n", - " \"further_info_url\": \"https://furtherinfo.es-doc.org/CMIP6.CCCma.CanESM5.ssp245.none.r13i1p2f1\",\n", - " \"grid_label\": \"gn\",\n", - " \"institution\": \"Canadian Centre for Climate Modelling and Analysis, Environment and Climate Change Canada, Victoria, BC V8P 5C2, Canada\",\n", - " \"institution_id\": \"CCCma\",\n", - " \"nominal_resolution\": \"100 km\",\n", - " \"realm\": [\n", + " \"cmip6:Conventions\": \"CF-1.7 CMIP-6.2\",\n", + " \"cmip6:activity_id\": \"ScenarioMIP\",\n", + " \"cmip6:creation_date\": \"2019-09-25T23:01:33Z\",\n", + " \"cmip6:data_specs_version\": \"01.00.30\",\n", + " \"cmip6:experiment\": \"update of RCP4.5 based on SSP2\",\n", + " \"cmip6:experiment_id\": \"ssp245\",\n", + " \"cmip6:frequency\": \"mon\",\n", + " \"cmip6:further_info_url\": \"https://furtherinfo.es-doc.org/CMIP6.CCCma.CanESM5.ssp245.none.r13i1p2f1\",\n", + " \"cmip6:grid_label\": \"gn\",\n", + " \"cmip6:institution\": \"Canadian Centre for Climate Modelling and Analysis, Environment and Climate Change Canada, Victoria, BC V8P 5C2, Canada\",\n", + " \"cmip6:institution_id\": \"CCCma\",\n", + " \"cmip6:nominal_resolution\": \"100 km\",\n", + " \"cmip6:realm\": [\n", " \"seaIce\"\n", " ],\n", - " \"source\": \"CanESM5 (2019): \\naerosol: interactive\\natmos: CanAM5 (T63L49 native atmosphere, T63 Linear Gaussian Grid; 128 x 64 longitude/latitude; 49 levels; top level 1 hPa)\\natmosChem: specified oxidants for aerosols\\nland: CLASS3.6/CTEM1.2\\nlandIce: specified ice sheets\\nocean: NEMO3.4.1 (ORCA1 tripolar grid, 1 deg with refinement to 1/3 deg within 20 degrees of the equator; 361 x 290 longitude/latitude; 45 vertical levels; top grid cell 0-6.19 m)\\nocnBgchem: Canadian Model of Ocean Carbon (CMOC); NPZD ecosystem with OMIP prescribed carbonate chemistry\\nseaIce: LIM2\",\n", - " \"source_id\": \"CanESM5\",\n", - " \"source_type\": [\n", + " \"cmip6:source\": \"CanESM5 (2019): \\naerosol: interactive\\natmos: CanAM5 (T63L49 native atmosphere, T63 Linear Gaussian Grid; 128 x 64 longitude/latitude; 49 levels; top level 1 hPa)\\natmosChem: specified oxidants for aerosols\\nland: CLASS3.6/CTEM1.2\\nlandIce: specified ice sheets\\nocean: NEMO3.4.1 (ORCA1 tripolar grid, 1 deg with refinement to 1/3 deg within 20 degrees of the equator; 361 x 290 longitude/latitude; 45 vertical levels; top grid cell 0-6.19 m)\\nocnBgchem: Canadian Model of Ocean Carbon (CMOC); NPZD ecosystem with OMIP prescribed carbonate chemistry\\nseaIce: LIM2\",\n", + " \"cmip6:source_id\": \"CanESM5\",\n", + " \"cmip6:source_type\": [\n", " \"AOGCM\"\n", " ],\n", - " \"sub_experiment\": \"none\",\n", - " \"sub_experiment_id\": \"none\",\n", - " \"table_id\": \"SImon\",\n", - " \"variable_id\": \"siconc\",\n", - " \"variant_label\": \"r13i1p2f1\",\n", - " \"initialization_index\": 1,\n", - " \"physics_index\": 2,\n", - " \"realization_index\": 13,\n", - " \"forcing_index\": 1,\n", - " \"tracking_id\": \"hdl:21.14100/9e4f804b-c161-44fa-acd1-c2e94e220c95\",\n", - " \"version\": \"v20190429\",\n", - " \"product\": \"model-output\",\n", - " \"license\": \"CMIP6 model data produced by The Government of Canada (Canadian Centre for Climate Modelling and Analysis, Environment and Climate Change Canada) is licensed under a Creative Commons Attribution ShareAlike 4.0 International License (https://creativecommons.org/licenses). Consult https://pcmdi.llnl.gov/CMIP6/TermsOfUse for terms of use governing CMIP6 output, including citation requirements and proper acknowledgment. Further information about this data, including some limitations, can be found via the further_info_url (recorded as a global attribute in this file) and at https:///pcmdi.llnl.gov/. The data producers and data providers make no warranty, either express or implied, including, but not limited to, warranties of merchantability and fitness for a particular purpose. All liabilities arising from the supply of the information (including any liability arising in negligence) are excluded to the fullest extent permitted by law.\",\n", - " \"grid\": \"ORCA1 tripolar grid, 1 deg with refinement to 1/3 deg within 20 degrees of the equator; 361 x 290 longitude/latitude; 45 vertical levels; top grid cell 0-6.19 m\",\n", - " \"mip_era\": \"CMIP6\",\n", + " \"cmip6:sub_experiment\": \"none\",\n", + " \"cmip6:sub_experiment_id\": \"none\",\n", + " \"cmip6:table_id\": \"SImon\",\n", + " \"cmip6:variable_id\": \"siconc\",\n", + " \"cmip6:variant_label\": \"r13i1p2f1\",\n", + " \"cmip6:initialization_index\": 1,\n", + " \"cmip6:physics_index\": 2,\n", + " \"cmip6:realization_index\": 13,\n", + " \"cmip6:forcing_index\": 1,\n", + " \"cmip6:tracking_id\": \"hdl:21.14100/9e4f804b-c161-44fa-acd1-c2e94e220c95\",\n", + " \"cmip6:version\": \"v20190429\",\n", + " \"cmip6:product\": \"model-output\",\n", + " \"cmip6:license\": \"CMIP6 model data produced by The Government of Canada (Canadian Centre for Climate Modelling and Analysis, Environment and Climate Change Canada) is licensed under a Creative Commons Attribution ShareAlike 4.0 International License (https://creativecommons.org/licenses). Consult https://pcmdi.llnl.gov/CMIP6/TermsOfUse for terms of use governing CMIP6 output, including citation requirements and proper acknowledgment. Further information about this data, including some limitations, can be found via the further_info_url (recorded as a global attribute in this file) and at https:///pcmdi.llnl.gov/. The data producers and data providers make no warranty, either express or implied, including, but not limited to, warranties of merchantability and fitness for a particular purpose. All liabilities arising from the supply of the information (including any liability arising in negligence) are excluded to the fullest extent permitted by law.\",\n", + " \"cmip6:grid\": \"ORCA1 tripolar grid, 1 deg with refinement to 1/3 deg within 20 degrees of the equator; 361 x 290 longitude/latitude; 45 vertical levels; top grid cell 0-6.19 m\",\n", + " \"cmip6:mip_era\": \"CMIP6\",\n", " \"cube:dimensions\": {\n", " \"time\": {\n", - " \"axis\": \"t\",\n", " \"type\": \"temporal\",\n", - " \"extent\": null,\n", - " \"description\": [\n", - " \"time\"\n", - " ]\n", + " \"extent\": [\n", + " \"2019-12-06T12:00:00Z\",\n", + " \"2020-11-04T12:00:00Z\"\n", + " ],\n", + " \"description\": \"time\"\n", " },\n", " \"j\": {\n", - " \"axis\": \"y\",\n", " \"type\": \"spatial\",\n", " \"extent\": [\n", " 0,\n", " 291\n", " ],\n", - " \"description\": [\n", - " \"projection_y_coordinate\",\n", - " \"grid_latitude\",\n", - " \"projection_y_angular_coordinate\"\n", - " ]\n", + " \"description\": \"projection_y_coordinate\",\n", + " \"axis\": \"y\"\n", " },\n", " \"i\": {\n", - " \"axis\": \"x\",\n", " \"type\": \"spatial\",\n", " \"extent\": [\n", " 0,\n", " 360\n", " ],\n", - " \"description\": [\n", - " \"projection_x_coordinate\",\n", - " \"grid_longitude\",\n", - " \"projection_x_angular_coordinate\"\n", - " ]\n", + " \"description\": \"projection_x_coordinate\",\n", + " \"axis\": \"x\"\n", " }\n", " },\n", " \"cube:variables\": {\n", @@ -555,8 +557,8 @@ " \"bnds\"\n", " ],\n", " \"type\": \"data\",\n", - " \"description\": null,\n", - " \"unit\": null\n", + " \"description\": \"\",\n", + " \"unit\": \"\"\n", " },\n", " \"vertices_latitude\": {\n", " \"dimensions\": [\n", @@ -565,8 +567,8 @@ " \"vertices\"\n", " ],\n", " \"type\": \"data\",\n", - " \"description\": null,\n", - " \"unit\": null\n", + " \"description\": \"\",\n", + " \"unit\": \"\"\n", " },\n", " \"vertices_longitude\": {\n", " \"dimensions\": [\n", @@ -575,8 +577,8 @@ " \"vertices\"\n", " ],\n", " \"type\": \"data\",\n", - " \"description\": null,\n", - " \"unit\": null\n", + " \"description\": \"\",\n", + " \"unit\": \"\"\n", " },\n", " \"siconc\": {\n", " \"dimensions\": [\n", @@ -603,7 +605,7 @@ " ],\n", " \"type\": \"data\",\n", " \"description\": \"Sea Ice area type\",\n", - " \"unit\": null\n", + " \"unit\": \"\"\n", " },\n", " \"latitude\": {\n", " \"dimensions\": [\n", @@ -623,7 +625,8 @@ " \"description\": \"longitude\",\n", " \"unit\": \"degrees_east\"\n", " }\n", - " }\n", + " },\n", + " \"datetime\": null\n", " },\n", " \"geometry\": {\n", " \"type\": \"Polygon\",\n", @@ -655,42 +658,57 @@ " \"links\": [\n", " {\n", " \"rel\": \"source\",\n", - " \"href\": \"https://pavics.ouranos.ca/twitcher/ows/proxy/thredds/dodsC/birdhouse/testdata/xclim/cmip6/sic_SImon_CCCma-CanESM5_ssp245_r13i1p2f1_2020.nc\",\n", + " \"href\": \"https://pavics.ouranos.ca/twitcher/ows/proxy/thredds/fileServer/birdhouse/testdata/xclim/cmip6/sic_SImon_CCCma-CanESM5_ssp245_r13i1p2f1_2020.nc\",\n", " \"type\": \"application/x-netcdf\",\n", - " \"title\": \"thredds:birdhouse/testdata/xclim/cmip6/sic_SImon_CCCma-CanESM5_ssp245_r13i1p2f1_2020.nc\"\n", + " \"title\": \"birdhouse/testdata/xclim/cmip6/sic_SImon_CCCma-CanESM5_ssp245_r13i1p2f1_2020.nc\"\n", " }\n", " ],\n", " \"assets\": {\n", - " \"httpserver_service\": {\n", + " \"HTTPServer\": {\n", " \"href\": \"https://pavics.ouranos.ca/twitcher/ows/proxy/thredds/fileServer/birdhouse/testdata/xclim/cmip6/sic_SImon_CCCma-CanESM5_ssp245_r13i1p2f1_2020.nc\",\n", " \"type\": \"application/x-netcdf\",\n", " \"roles\": [\n", " \"data\"\n", " ]\n", " },\n", - " \"opendap_service\": {\n", + " \"OpenDAP\": {\n", " \"href\": \"https://pavics.ouranos.ca/twitcher/ows/proxy/thredds/dodsC/birdhouse/testdata/xclim/cmip6/sic_SImon_CCCma-CanESM5_ssp245_r13i1p2f1_2020.nc\",\n", " \"type\": \"text/html\",\n", " \"roles\": [\n", " \"data\"\n", " ]\n", " },\n", - " \"wcs_service\": {\n", - " \"href\": \"https://pavics.ouranos.ca/twitcher/ows/proxy/thredds/wcs/birdhouse/testdata/xclim/cmip6/sic_SImon_CCCma-CanESM5_ssp245_r13i1p2f1_2020.nc?service=WCS&version=1.0.0&request=GetCapabilities\",\n", + " \"NcML\": {\n", + " \"href\": \"https://pavics.ouranos.ca/twitcher/ows/proxy/thredds/ncml/birdhouse/testdata/xclim/cmip6/sic_SImon_CCCma-CanESM5_ssp245_r13i1p2f1_2020.nc\",\n", + " \"type\": \"\",\n", + " \"roles\": []\n", + " },\n", + " \"UDDC\": {\n", + " \"href\": \"https://pavics.ouranos.ca/twitcher/ows/proxy/thredds/uddc/birdhouse/testdata/xclim/cmip6/sic_SImon_CCCma-CanESM5_ssp245_r13i1p2f1_2020.nc\",\n", + " \"type\": \"\",\n", + " \"roles\": []\n", + " },\n", + " \"ISO\": {\n", + " \"href\": \"https://pavics.ouranos.ca/twitcher/ows/proxy/thredds/iso/birdhouse/testdata/xclim/cmip6/sic_SImon_CCCma-CanESM5_ssp245_r13i1p2f1_2020.nc\",\n", + " \"type\": \"\",\n", + " \"roles\": []\n", + " },\n", + " \"WCS\": {\n", + " \"href\": \"https://pavics.ouranos.ca/twitcher/ows/proxy/thredds/wcs/birdhouse/testdata/xclim/cmip6/sic_SImon_CCCma-CanESM5_ssp245_r13i1p2f1_2020.nc\",\n", " \"type\": \"application/xml\",\n", " \"roles\": [\n", " \"data\"\n", " ]\n", " },\n", - " \"wms_service\": {\n", - " \"href\": \"https://pavics.ouranos.ca/twitcher/ows/proxy/thredds/wms/birdhouse/testdata/xclim/cmip6/sic_SImon_CCCma-CanESM5_ssp245_r13i1p2f1_2020.nc?service=WMS&version=1.3.0&request=GetCapabilities\",\n", + " \"WMS\": {\n", + " \"href\": \"https://pavics.ouranos.ca/twitcher/ows/proxy/thredds/wms/birdhouse/testdata/xclim/cmip6/sic_SImon_CCCma-CanESM5_ssp245_r13i1p2f1_2020.nc\",\n", " \"type\": \"application/xml\",\n", " \"roles\": [\n", " \"visual\"\n", " ]\n", " },\n", - " \"nccs_service\": {\n", - " \"href\": \"https://pavics.ouranos.ca/twitcher/ows/proxy/thredds/ncss/birdhouse/testdata/xclim/cmip6/sic_SImon_CCCma-CanESM5_ssp245_r13i1p2f1_2020.nc/dataset.html\",\n", + " \"NetcdfSubset\": {\n", + " \"href\": \"https://pavics.ouranos.ca/twitcher/ows/proxy/thredds/ncss/birdhouse/testdata/xclim/cmip6/sic_SImon_CCCma-CanESM5_ssp245_r13i1p2f1_2020.nc\",\n", " \"type\": \"application/x-netcdf\",\n", " \"roles\": [\n", " \"data\"\n", @@ -704,16 +722,18 @@ " 89.74176788330078\n", " ],\n", " \"stac_extensions\": [\n", - " \"https://stac-extensions.github.io/datacube/v2.0.0/schema.json\"\n", + " \"https://raw.githubusercontent.com/TomAugspurger/cmip6/main/json-schema/schema.json\",\n", + " \"https://stac-extensions.github.io/datacube/v2.2.0/schema.json\"\n", " ]\n", "}\n" ] } ], "source": [ - "stac_item_data = stac_item.item.to_dict()\n", + "AnyDateTime = Union[datetime, date]\n", + "AnyJsonEncodable = Union[pystac.Item, np.ndarray, np.number, Url, Enum, AnyDateTime, \"JsonLike\"]\n", "\n", - "def json_encode(obj: \"pystac.Item\") -> Union[\"JsonLike\", str]:\n", + "def json_encode(obj: \"AnyJsonEncodable\") -> Union[\"JsonLike\", str]:\n", " if isinstance(obj, (np.ndarray, np.number)):\n", " return obj.tolist()\n", " if isinstance(obj, (Url, Enum)):\n", @@ -728,8 +748,8 @@ "metadata": { "collapsed": false, "ExecuteTime": { - "end_time": "2023-09-29T22:45:37.741924891Z", - "start_time": "2023-09-29T22:45:37.672054869Z" + "end_time": "2024-01-09T21:43:20.412846636Z", + "start_time": "2024-01-09T21:43:20.405259220Z" } }, "id": "4eeb52c23edccb31" @@ -746,7 +766,7 @@ }, { "cell_type": "code", - "execution_count": 91, + "execution_count": 8, "outputs": [], "source": [ "# NOTE:\n", @@ -759,8 +779,8 @@ "metadata": { "collapsed": false, "ExecuteTime": { - "end_time": "2023-09-29T22:45:37.742086738Z", - "start_time": "2023-09-29T22:45:37.715603867Z" + "end_time": "2024-01-09T20:54:12.730333186Z", + "start_time": "2024-01-09T20:54:12.724897702Z" } }, "id": "e4fa98fcad8b5556" diff --git a/requirements-dev.txt b/requirements-dev.txt index 0f848dc..6999cc9 100644 --- a/requirements-dev.txt +++ b/requirements-dev.txt @@ -35,7 +35,7 @@ pytest-notebook pytest-rerunfailures pycodestyle pydocstyle -pylint>=2.15.4; python_version >= "3.7" +pylint>=2.15.4,<3 pylint-per-file-ignores; python_version >= "3.7" pylint_quotes safety diff --git a/requirements.txt b/requirements.txt index 0920de8..67bb53b 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,5 +1,3 @@ -r requirements-sys.txt -# Following does not work -# STACpopulator @ https://github.com/crim-ca/stac-populator/archive/refs/heads/weaver-repo2cwl-ncml2stac.zip -# Also, editable '-e' required, otherwise module still not found... --e git+https://github.com/Ouranosinc/stac-populator@collection_link#egg=STACpopulator +# editable '-e' required, otherwise module still not found... +-e git+https://github.com/crim-ca/stac-populator@0.5.0#egg=STACpopulator