From 2db1353350d2fdaed02e73b216e56f91411411d7 Mon Sep 17 00:00:00 2001 From: Deepak Chandan Date: Sun, 25 Jun 2023 21:33:48 -0600 Subject: [PATCH 01/12] First commit of new implementation --- .gitignore | 2 + .vscode/launch.json | 26 +++++++ .vscode/settings.json | 28 ++++++++ STACpopulator/__init__.py | 3 + STACpopulator/crawlers.py | 19 +++++ STACpopulator/metadata_parsers.py | 61 ++++++++++++++++ STACpopulator/populator_base.py | 113 ++++++++++++++++++++++++++++++ STACpopulator/stac_utils.py | 65 +++++++++++++++++ docker-compose.yml | 49 +++++++++++++ implementations/CMIP6.yml | 6 ++ implementations/add_CMIP6.py | 43 ++++++++++++ implementations/add_some_data.py | 0 pyproject.toml | 11 +++ requirements.txt | 7 +- 14 files changed, 428 insertions(+), 5 deletions(-) create mode 100644 .gitignore create mode 100644 .vscode/launch.json create mode 100644 .vscode/settings.json create mode 100644 STACpopulator/__init__.py create mode 100644 STACpopulator/crawlers.py create mode 100644 STACpopulator/metadata_parsers.py create mode 100644 STACpopulator/populator_base.py create mode 100644 STACpopulator/stac_utils.py create mode 100644 docker-compose.yml create mode 100644 implementations/CMIP6.yml create mode 100644 implementations/add_CMIP6.py create mode 100644 implementations/add_some_data.py create mode 100644 pyproject.toml diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..72f7985 --- /dev/null +++ b/.gitignore @@ -0,0 +1,2 @@ +*.pyc +STACpopulator.egg-info/ \ No newline at end of file diff --git a/.vscode/launch.json b/.vscode/launch.json new file mode 100644 index 0000000..0d5b319 --- /dev/null +++ b/.vscode/launch.json @@ -0,0 +1,26 @@ +{ + // Use IntelliSense to learn about possible attributes. + // Hover to view descriptions of existing attributes. + // For more information, visit: https://go.microsoft.com/fwlink/?linkid=830387 + "version": "0.2.0", + "configurations": [ + + { + "name": "test CMIP6", + "type": "python", + "request": "launch", + "program": "${file}", + "console": "integratedTerminal", + "justMyCode": true + }, + { + "name": "test CMIP5", + "type": "python", + "request": "launch", + "program": "${file}", + "console": "integratedTerminal", + "justMyCode": true + } + + ] +} \ No newline at end of file diff --git a/.vscode/settings.json b/.vscode/settings.json new file mode 100644 index 0000000..d1595d0 --- /dev/null +++ b/.vscode/settings.json @@ -0,0 +1,28 @@ +{ + "cmake.configureOnOpen": false, + "python.linting.enabled": true, + "python.linting.pylintEnabled": false, + "python.linting.flake8Enabled": true, + "python.linting.flake8Path": "/Users/dchandan/local/mambaforge/bin/flake8", + // "python.linting.flake8Args": [ + // "--max-line-length=120", + // "--ignore=E402,F841,F401,E302,E305,F821", + // ], + "flake8.args": [ + "--ignore=E402,F841,F401,E302,E305,F821", + "--max-line-length=120" + ], + "python.formatting.provider": "black", + "[python]": { + "editor.defaultFormatter": "ms-python.black-formatter", + "editor.formatOnSave": true, + "editor.codeActionsOnSave": { + "source.organizeImports": true + }, + }, + "isort.args": [ + "--profile", + "black" + ], + // "python.formatting.blackPath": "/Users/dchandan/local/mambaforge/bin/black", +} \ No newline at end of file diff --git a/STACpopulator/__init__.py b/STACpopulator/__init__.py new file mode 100644 index 0000000..c8bf250 --- /dev/null +++ b/STACpopulator/__init__.py @@ -0,0 +1,3 @@ +from .crawlers import thredds_crawler +from .metadata_parsers import nc_attrs_from_ncml +from .populator_base import STACpopulatorBase diff --git a/STACpopulator/crawlers.py b/STACpopulator/crawlers.py new file mode 100644 index 0000000..c19241c --- /dev/null +++ b/STACpopulator/crawlers.py @@ -0,0 +1,19 @@ +def thredds_crawler(cat, depth=1): + """Return a generator walking a THREDDS data catalog for datasets. + + Parameters + ---------- + cat : TDSCatalog + THREDDS catalog. + depth : int + Maximum recursive depth. Setting 0 will return only datasets within the top-level catalog. If None, + depth is set to 1000. + """ + yield from cat.datasets.items() + if depth is None: + depth = 1000 + + if depth > 0: + for name, ref in cat.catalog_refs.items(): + child = ref.follow() + yield from thredds_crawler(child, depth=depth - 1) diff --git a/STACpopulator/metadata_parsers.py b/STACpopulator/metadata_parsers.py new file mode 100644 index 0000000..84636f8 --- /dev/null +++ b/STACpopulator/metadata_parsers.py @@ -0,0 +1,61 @@ +import lxml.etree +import requests + + +def nc_attrs_from_ncml(url): + """Extract attributes from NcML file. + + Parameters + ---------- + url : str + Link to NcML service of THREDDS server for a dataset. + + Returns + ------- + dict + Global attribute values keyed by facet names, with variable attributes in `__variable__` nested dict, and + additional specialized attributes in `__group__` nested dict. + """ + parser = lxml.etree.XMLParser(encoding="UTF-8") + + ns = {"ncml": "http://www.unidata.ucar.edu/namespaces/netcdf/ncml-2.2"} + + # Parse XML content - UTF-8 encoded documents need to be read as bytes + xml = requests.get(url).content + doc = lxml.etree.fromstring(xml, parser=parser) + nc = doc.xpath("/ncml:netcdf", namespaces=ns)[0] + + # Extract global attributes + out = _attrib_to_dict(nc.xpath("ncml:attribute", namespaces=ns)) + + # Extract group attributes + gr = {} + for group in nc.xpath("ncml:group", namespaces=ns): + gr[group.attrib["name"]] = _attrib_to_dict(group.xpath("ncml:attribute", namespaces=ns)) + + # Extract variable attributes + va = {} + for variable in nc.xpath("ncml:variable", namespaces=ns): + if "_CoordinateAxisType" in variable.xpath("ncml:attribute/@name", namespaces=ns): + continue + va[variable.attrib["name"]] = _attrib_to_dict(variable.xpath("ncml:attribute", namespaces=ns)) + + out["__group__"] = gr + out["__variable__"] = va + + return out + + +def _attrib_to_dict(elems): + """Convert element attributes to dictionary. + + Ignore attributes with names starting with _ + """ + hidden_prefix = "_" + out = {} + for e in elems: + a = e.attrib + if a["name"].startswith(hidden_prefix): + continue + out[a["name"]] = a["value"] + return out diff --git a/STACpopulator/populator_base.py b/STACpopulator/populator_base.py new file mode 100644 index 0000000..7d1db7e --- /dev/null +++ b/STACpopulator/populator_base.py @@ -0,0 +1,113 @@ +import hashlib +import logging +from abc import ABC, abstractmethod +from typing import Iterator, Optional + +import yaml +from colorlog import ColoredFormatter +from siphon.catalog import TDSCatalog + +from STACpopulator.stac_utils import ( + create_stac_collection, + post_collection, + stac_collection_exists, +) + +LOGGER = logging.getLogger(__name__) +LOGFORMAT = " %(log_color)s%(levelname)s:%(reset)s %(blue)s[%(name)-30s]%(reset)s %(message)s" +formatter = ColoredFormatter(LOGFORMAT) +stream = logging.StreamHandler() +stream.setFormatter(formatter) +LOGGER.addHandler(stream) +LOGGER.setLevel(logging.INFO) +LOGGER.propagate = False + + +class STACpopulatorBase(ABC): + def __init__( + self, + catalog: str, + stac_host: str, + collection_info_fname: str, + crawler: Iterator[str], + crawler_args: Optional[dict] = {}, + ) -> None: + """Constructor + + Parameters + ---------- + catalog : TDSCatalog + stac_host : STAC API address + collection_info_fname : Name of the configuration file containing info about the collection + crawler : callable that knows how to iterate over the organization + structure of the catalog in order to find individual items + crawler_args : any optional arguments to pass to the crawler + """ + + super().__init__() + with open(collection_info_fname) as f: + self._collection_info = yaml.load(f, yaml.Loader) + + req_definitions = ["title", "description", "keywords", "license"] + for req in req_definitions: + if req not in self._collection_info.keys(): + LOGGER.error(f"'{req}' is required in the configuration file") + raise RuntimeError(f"'{req}' is required in the configuration file") + + if catalog.endswith(".html"): + catalog = catalog.replace(".html", ".xml") + LOGGER.info("Convering catalog URL from html to xml") + self._catalog = TDSCatalog(catalog) + self._stac_host = self.validate_host(stac_host) + self._crawler = crawler + self._crawler_args = crawler_args + + self._collection_id = hashlib.md5(self.collection.encode("utf-8")).hexdigest() + LOGGER.info("Initialization complete") + LOGGER.info(f"Collection {self.collection} is assigned id {self._collection_id}") + + @property + def catalog(self) -> TDSCatalog: + return self._catalog + + @property + def collection(self) -> str: + return self._collection_info["title"] + + @property + def stac_host(self) -> str: + return self._stac_host + + @property + def crawler(self) -> Iterator[str]: + return self._crawler + + @property + def collection_id(self): + return self._collection_id + + def validate_host(self, stac_host): + # TODO: check the format of the host is URL type + # TODO: check if the host is reacheable?? + return stac_host + + def ingest(self) -> None: + # First create colelction if it doesn't exist + if not stac_collection_exists(self.stac_host, self.collection_id): + LOGGER.info(f"Creating collection '{self.collection}'") + pystac_collection = create_stac_collection(self.collection_id, self._collection_info) + # print(pystac_collection) + post_collection(self.stac_host, pystac_collection) + LOGGER.info("Collection successfully created") + else: + LOGGER.info(f"Collection '{self.collection}' already exists") + # for item in self.crawler(self.catalog, **self._crawler_args): + # stac_item = self.process_STAC_item(item) + # self.post_item(stac_item) + + def post_item(self, data: dict[str, dict]) -> None: + pass + + @abstractmethod + def process_STAC_item(self): # noqa N802 + pass diff --git a/STACpopulator/stac_utils.py b/STACpopulator/stac_utils.py new file mode 100644 index 0000000..39722ba --- /dev/null +++ b/STACpopulator/stac_utils.py @@ -0,0 +1,65 @@ +import os +from datetime import datetime +from typing import Any + +import pystac +import requests + + +def stac_collection_exists(stac_host: str, collection_id: str): + """ + Get a STAC collection + + Returns the collection JSON. + """ + r = requests.get(os.path.join(stac_host, "collections", collection_id), verify=False) + + return True if r.status_code == 200 else False + + +def create_stac_collection(collection_id: str, collection_info) -> dict[str, Any]: + """ + Create a basic STAC collection. + + Returns the collection. + """ + + sp_extent = pystac.SpatialExtent([collection_info.pop("spatialextent")]) + tmp = collection_info.pop("temporalextent") + tmp_extent = pystac.TemporalExtent( + [ + [ + datetime.strptime(tmp[0], "%Y-%m-%d") if tmp[0] is not None else None, + datetime.strptime(tmp[1], "%Y-%m-%d") if tmp[1] is not None else None, + ] + ] + ) + collection_info["extent"] = pystac.Extent(sp_extent, tmp_extent) + collection_info["summaries"] = pystac.Summaries({"needs_summaries_update": ["true"]}) + + collection = pystac.Collection(id=collection_id, **collection_info) + + return collection.to_dict() + + +def post_collection(stac_host: str, json_data: dict[str, Any]) -> None: + """ + Post a STAC collection. + + Returns the collection id. + """ + collection_id = json_data["id"] + r = requests.post(os.path.join(stac_host, "collections"), json=json_data, verify=False) + + if r.status_code == 200: + print( + f"{bcolors.OKGREEN}[INFO] Pushed STAC collection [{collection_id}] to [{stac_host}] ({r.status_code}){bcolors.ENDC}" + ) + elif r.status_code == 409: + print( + f"{bcolors.WARNING}[INFO] STAC collection [{collection_id}] already exists on [{stac_host}] ({r.status_code}), updating..{bcolors.ENDC}" + ) + r = requests.put(os.path.join(stac_host, "collections"), json=json_data, verify=False) + r.raise_for_status() + else: + r.raise_for_status() diff --git a/docker-compose.yml b/docker-compose.yml new file mode 100644 index 0000000..544f45c --- /dev/null +++ b/docker-compose.yml @@ -0,0 +1,49 @@ +version: "3.4" + +x-logging: + &default-logging + driver: "json-file" + options: + max-size: "50m" + max-file: "10" + +services: + stac: + container_name: stac-populator-test + image: ghcr.io/crim-ca/stac-app:main + depends_on: + - stac-db + ports: + - "8880:8000" + environment: + - POSTGRES_USER=dchandan + - POSTGRES_PASS=password + - POSTGRES_DBNAME=postgis + - POSTGRES_HOST_READER=stac-db + - POSTGRES_HOST_WRITER=stac-db + - POSTGRES_PORT=5432 + - ROUTER_PREFIX=/stac + logging: *default-logging + restart: always + + stac-db: + container_name: stac-populator-test-db + image: ghcr.io/stac-utils/pgstac:v0.6.10 + environment: + - POSTGRES_USER=dchandan + - POSTGRES_PASSWORD=password + - POSTGRES_DB=postgis + - PGUSER=dchandan + - PGPASSWORD=password + - PGHOST=localhost + - PGDATABASE=postgis + volumes: + - stac-db:/var/lib/postgresql/data + healthcheck: + test: ["CMD-SHELL", "pg_isready"] + interval: 10s + timeout: 5s + retries: 5 + +volumes: + stac-db: diff --git a/implementations/CMIP6.yml b/implementations/CMIP6.yml new file mode 100644 index 0000000..59824a5 --- /dev/null +++ b/implementations/CMIP6.yml @@ -0,0 +1,6 @@ +title: CMIP6 +description: Coupled Model Intercomparison Project phase 6 +keywords: ['CMIP', 'CMIP6', 'WCRP', 'Cimate Change'] +license: "CC-BY-4.0" +spatialextent: [-180, -90, 180, 90] +temporalextent: ['1850-01-01', null] \ No newline at end of file diff --git a/implementations/add_CMIP6.py b/implementations/add_CMIP6.py new file mode 100644 index 0000000..cb6331a --- /dev/null +++ b/implementations/add_CMIP6.py @@ -0,0 +1,43 @@ +import argparse +import logging + +from colorlog import ColoredFormatter + +from STACpopulator import STACpopulatorBase +from STACpopulator.crawlers import thredds_crawler + +# from STACpopulator.metadata_parsers import nc_attrs_from_ncml + +LOGGER = logging.getLogger(__name__) +LOGFORMAT = " %(log_color)s%(levelname)s:%(reset)s %(blue)s[%(name)-30s]%(reset)s %(message)s" +formatter = ColoredFormatter(LOGFORMAT) +stream = logging.StreamHandler() +stream.setFormatter(formatter) +LOGGER.addHandler(stream) +LOGGER.setLevel(logging.INFO) +LOGGER.propagate = False + + +class CMIP6populator(STACpopulatorBase): + def __init__( + self, + catalog: str, + hostname: str, + config: str, + ) -> None: + super().__init__(catalog, hostname, config, thredds_crawler, crawler_args={"depth": None}) + + def process_STAC_item(self): # noqa N802 + print("here") + + +if __name__ == "__main__": + parser = argparse.ArgumentParser(prog="CMIP6 STAC populator") + parser.add_argument("hostname", type=str, help="STAC API address") + parser.add_argument("catalog_URL", type=str, help="URL to the CMIP6 thredds catalog") + parser.add_argument("config_file", type=str, help="Name of the configuration file") + + args = parser.parse_args() + LOGGER.info(f"Arguments to call: {args}") + c = CMIP6populator(args.catalog_URL, args.hostname, args.config_file) + c.ingest() diff --git a/implementations/add_some_data.py b/implementations/add_some_data.py new file mode 100644 index 0000000..e69de29 diff --git a/pyproject.toml b/pyproject.toml new file mode 100644 index 0000000..c90f1a1 --- /dev/null +++ b/pyproject.toml @@ -0,0 +1,11 @@ +[build-system] +requires = ["setuptools>=61.0"] +build-backend = "setuptools.build_meta" + +[project] +name = "STACpopulator" +version = "0.0.1" +requires-python = ">=3.9" + +[tool.setuptools] +py-modules = ["STACpopulator"] \ No newline at end of file diff --git a/requirements.txt b/requirements.txt index 10a7a4a..dc03813 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,9 +1,6 @@ +colorlog requests pystac pyyaml -git+https://github.com/crim-ca/stac-generator#egg=stac-generator siphon -shapely -boto3 -fsspec -xarray +lxml From fd29687d4f73c69d4c308431b99f2ca8e600849b Mon Sep 17 00:00:00 2001 From: Deepak Chandan Date: Sun, 25 Jun 2023 21:34:51 -0600 Subject: [PATCH 02/12] Python code formatting with black --- collection_processor.py | 66 ++++++++++++++++++++--------------------- 1 file changed, 33 insertions(+), 33 deletions(-) diff --git a/collection_processor.py b/collection_processor.py index 5042351..e676219 100644 --- a/collection_processor.py +++ b/collection_processor.py @@ -12,25 +12,26 @@ __holder__ = "Computer Research Institute of Montreal (CRIM)" __contact__ = "mathieu.provencher@crim.ca" -import requests -import os -import pystac import datetime import hashlib -import yaml +import os import sys +import pystac +import requests +import yaml + class bcolors: - HEADER = '\033[95m' - OKBLUE = '\033[94m' - OKCYAN = '\033[96m' - OKGREEN = '\033[92m' - WARNING = '\033[93m' - FAIL = '\033[91m' - ENDC = '\033[0m' - BOLD = '\033[1m' - UNDERLINE = '\033[4m' + HEADER = "\033[95m" + OKBLUE = "\033[94m" + OKCYAN = "\033[96m" + OKGREEN = "\033[92m" + WARNING = "\033[93m" + FAIL = "\033[91m" + ENDC = "\033[0m" + BOLD = "\033[1m" + UNDERLINE = "\033[4m" class CollectionProcessor: @@ -55,9 +56,7 @@ def __init__(self): self.process_collection(stac_host, col["name"], col["description"]) def process_collection(self, stac_host, collection_name, collection_description): - collection_id = hashlib.md5( - collection_name.encode("utf-8") - ).hexdigest() + collection_id = hashlib.md5(collection_name.encode("utf-8")).hexdigest() stac_collection = self.get_stac_collection(stac_host, collection_id) if stac_collection: @@ -119,23 +118,20 @@ def create_stac_collection(self, collection_id, collection_name, collection_desc """ sp_extent = pystac.SpatialExtent([[-140.99778, 41.6751050889, -52.6480987209, 83.23324]]) - capture_date = datetime.datetime.strptime('2015-10-22', '%Y-%m-%d') - end_capture_date = datetime.datetime.strptime('2100-10-22', '%Y-%m-%d') + capture_date = datetime.datetime.strptime("2015-10-22", "%Y-%m-%d") + end_capture_date = datetime.datetime.strptime("2100-10-22", "%Y-%m-%d") tmp_extent = pystac.TemporalExtent([(capture_date, end_capture_date)]) extent = pystac.Extent(sp_extent, tmp_extent) - collection = pystac.Collection(id=collection_id, - title=collection_name, - description=collection_description, - extent=extent, - keywords=[ - "climate change", - "CMIP5", - "WCRP", - "CMIP" - ], - providers=None, - summaries=pystac.Summaries({"needs_summaries_update": ["true"]})) + collection = pystac.Collection( + id=collection_id, + title=collection_name, + description=collection_description, + extent=extent, + keywords=["climate change", "CMIP5", "WCRP", "CMIP"], + providers=None, + summaries=pystac.Summaries({"needs_summaries_update": ["true"]}), + ) return collection.to_dict() @@ -145,13 +141,17 @@ def post_collection(self, stac_host, json_data): Returns the collection id. """ - collection_id = json_data['id'] + collection_id = json_data["id"] r = requests.post(os.path.join(stac_host, "collections"), json=json_data, verify=False) if r.status_code == 200: - print(f"{bcolors.OKGREEN}[INFO] Pushed STAC collection [{collection_id}] to [{stac_host}] ({r.status_code}){bcolors.ENDC}") + print( + f"{bcolors.OKGREEN}[INFO] Pushed STAC collection [{collection_id}] to [{stac_host}] ({r.status_code}){bcolors.ENDC}" + ) elif r.status_code == 409: - print(f"{bcolors.WARNING}[INFO] STAC collection [{collection_id}] already exists on [{stac_host}] ({r.status_code}), updating..{bcolors.ENDC}") + print( + f"{bcolors.WARNING}[INFO] STAC collection [{collection_id}] already exists on [{stac_host}] ({r.status_code}), updating..{bcolors.ENDC}" + ) r = requests.put(os.path.join(stac_host, "collections"), json=json_data, verify=False) r.raise_for_status() else: From 903b043dee9b039ff0ddd0c96e5ba129547aca41 Mon Sep 17 00:00:00 2001 From: Deepak Chandan Date: Mon, 26 Jun 2023 11:22:06 -0600 Subject: [PATCH 03/12] Moving old implementation to .deprecated for now --- Dockerfile => .deprecated/Dockerfile | 0 collection_processor.py => .deprecated/collection_processor.py | 0 collections.yaml => .deprecated/collections.yaml | 0 populate.sh => .deprecated/populate.sh | 0 4 files changed, 0 insertions(+), 0 deletions(-) rename Dockerfile => .deprecated/Dockerfile (100%) rename collection_processor.py => .deprecated/collection_processor.py (100%) rename collections.yaml => .deprecated/collections.yaml (100%) rename populate.sh => .deprecated/populate.sh (100%) diff --git a/Dockerfile b/.deprecated/Dockerfile similarity index 100% rename from Dockerfile rename to .deprecated/Dockerfile diff --git a/collection_processor.py b/.deprecated/collection_processor.py similarity index 100% rename from collection_processor.py rename to .deprecated/collection_processor.py diff --git a/collections.yaml b/.deprecated/collections.yaml similarity index 100% rename from collections.yaml rename to .deprecated/collections.yaml diff --git a/populate.sh b/.deprecated/populate.sh similarity index 100% rename from populate.sh rename to .deprecated/populate.sh From 5ddc305d3502d2e63e0f7ca9d0c7ecefb3360af2 Mon Sep 17 00:00:00 2001 From: Deepak Chandan Date: Mon, 26 Jun 2023 11:22:43 -0600 Subject: [PATCH 04/12] Changes to my IDE settings --- .vscode/settings.json | 21 +++++++++++++++++++++ 1 file changed, 21 insertions(+) diff --git a/.vscode/settings.json b/.vscode/settings.json index d1595d0..834fc93 100644 --- a/.vscode/settings.json +++ b/.vscode/settings.json @@ -19,10 +19,31 @@ "editor.codeActionsOnSave": { "source.organizeImports": true }, + "editor.wordBasedSuggestions": true, + "editor.quickSuggestions": { + "comments": "on", + "strings": "on", + "other": "on" + }, + }, "isort.args": [ "--profile", "black" ], // "python.formatting.blackPath": "/Users/dchandan/local/mambaforge/bin/black", + + "[markdown]": { + "editor.unicodeHighlight.ambiguousCharacters": false, + "editor.unicodeHighlight.invisibleCharacters": false, + "diffEditor.ignoreTrimWhitespace": false, + "editor.wordWrap": "on", + "editor.quickSuggestions": { + "comments": "on", + "strings": "on", + "other": "on" + }, + "editor.wordBasedSuggestions": true, + + } } \ No newline at end of file From 3122f05e0fff9ca1e5ca02e218ea8d1846a271d8 Mon Sep 17 00:00:00 2001 From: Deepak Chandan Date: Mon, 26 Jun 2023 11:24:51 -0600 Subject: [PATCH 05/12] minor comment --- implementations/add_CMIP6.py | 1 + 1 file changed, 1 insertion(+) diff --git a/implementations/add_CMIP6.py b/implementations/add_CMIP6.py index cb6331a..f374280 100644 --- a/implementations/add_CMIP6.py +++ b/implementations/add_CMIP6.py @@ -28,6 +28,7 @@ def __init__( super().__init__(catalog, hostname, config, thredds_crawler, crawler_args={"depth": None}) def process_STAC_item(self): # noqa N802 + # TODO: next step is to implement this print("here") From 2aec8ecadae179628c4ce0f427bdb038099135e6 Mon Sep 17 00:00:00 2001 From: Deepak Chandan Date: Mon, 26 Jun 2023 11:25:16 -0600 Subject: [PATCH 06/12] modifying readme --- README.md | 18 ++++++++++++++---- 1 file changed, 14 insertions(+), 4 deletions(-) diff --git a/README.md b/README.md index 50bef0b..bcac544 100644 --- a/README.md +++ b/README.md @@ -1,11 +1,21 @@ # STAC Catalog Populator -Populate STAC catalog with sample collection items via [CEDA STAC Generator](https://github.com/cedadev/stac-generator), employed in sample -[CMIP Dataset Ingestion Workflows](https://github.com/cedadev/stac-generator-example/tree/master/conf). +This repository contains a framework [STACpopulator](STACpopulator) that can be used to implement concrete populators (see [implementations](implementations)) for populating the STAC catalog on a DACCS node. -**Sample call via Docker image** +## Framework + +The framwork is centered around a Python Abstract Base Class: `STACpopulatorBase` that implements all the logic for populating a STAC catalog. This class implements an abstract method called `process_STAC_item` that should be defined in implementations of the class and contain all the logic for constructing the STAC representation for an item in the collection that is to be processed. + +## Implementations + +Currently, one implementation of `STACpopulatorBase` is provided in [add_CMIP6.py](implementations/add_CMIP6.py). + +## Testing + +The provided `docker-compose` file can be used to launch a test STAC server. The `add_CMIP6.py` script can be run as: ``` -docker run -e STAC_HOST=https://stac-dev.crim.ca/stac/ -e STAC_ASSET_GENERATOR_TIMEOUT=300 stac-populator +python implementations/add_CMIP6.py http://localhost:8880/stac/ https://pavics.ouranos.ca/twitcher/ows/proxy/thredds/catalog/datasets/simulations/bias_adjusted/catalog.html implementations/CMIP6.yml ``` +Note: in the script above, I am currently using a sample THREDDS catalog URL and not one relevant to the global scale CMIP6 data. \ No newline at end of file From 66286a9695ca543455ca23ff6a3514c1e5a23840 Mon Sep 17 00:00:00 2001 From: Deepak Chandan Date: Tue, 15 Aug 2023 14:45:02 -0400 Subject: [PATCH 07/12] removing IDE settings from repo --- .gitignore | 3 ++- .vscode/launch.json | 26 ----------------------- .vscode/settings.json | 49 ------------------------------------------- 3 files changed, 2 insertions(+), 76 deletions(-) delete mode 100644 .vscode/launch.json delete mode 100644 .vscode/settings.json diff --git a/.gitignore b/.gitignore index 72f7985..80f0926 100644 --- a/.gitignore +++ b/.gitignore @@ -1,2 +1,3 @@ *.pyc -STACpopulator.egg-info/ \ No newline at end of file +STACpopulator.egg-info/ +.vscode/ diff --git a/.vscode/launch.json b/.vscode/launch.json deleted file mode 100644 index 0d5b319..0000000 --- a/.vscode/launch.json +++ /dev/null @@ -1,26 +0,0 @@ -{ - // Use IntelliSense to learn about possible attributes. - // Hover to view descriptions of existing attributes. - // For more information, visit: https://go.microsoft.com/fwlink/?linkid=830387 - "version": "0.2.0", - "configurations": [ - - { - "name": "test CMIP6", - "type": "python", - "request": "launch", - "program": "${file}", - "console": "integratedTerminal", - "justMyCode": true - }, - { - "name": "test CMIP5", - "type": "python", - "request": "launch", - "program": "${file}", - "console": "integratedTerminal", - "justMyCode": true - } - - ] -} \ No newline at end of file diff --git a/.vscode/settings.json b/.vscode/settings.json deleted file mode 100644 index 834fc93..0000000 --- a/.vscode/settings.json +++ /dev/null @@ -1,49 +0,0 @@ -{ - "cmake.configureOnOpen": false, - "python.linting.enabled": true, - "python.linting.pylintEnabled": false, - "python.linting.flake8Enabled": true, - "python.linting.flake8Path": "/Users/dchandan/local/mambaforge/bin/flake8", - // "python.linting.flake8Args": [ - // "--max-line-length=120", - // "--ignore=E402,F841,F401,E302,E305,F821", - // ], - "flake8.args": [ - "--ignore=E402,F841,F401,E302,E305,F821", - "--max-line-length=120" - ], - "python.formatting.provider": "black", - "[python]": { - "editor.defaultFormatter": "ms-python.black-formatter", - "editor.formatOnSave": true, - "editor.codeActionsOnSave": { - "source.organizeImports": true - }, - "editor.wordBasedSuggestions": true, - "editor.quickSuggestions": { - "comments": "on", - "strings": "on", - "other": "on" - }, - - }, - "isort.args": [ - "--profile", - "black" - ], - // "python.formatting.blackPath": "/Users/dchandan/local/mambaforge/bin/black", - - "[markdown]": { - "editor.unicodeHighlight.ambiguousCharacters": false, - "editor.unicodeHighlight.invisibleCharacters": false, - "diffEditor.ignoreTrimWhitespace": false, - "editor.wordWrap": "on", - "editor.quickSuggestions": { - "comments": "on", - "strings": "on", - "other": "on" - }, - "editor.wordBasedSuggestions": true, - - } -} \ No newline at end of file From b47b649cb3d42d263dffb73e71a5b8c7514f05bb Mon Sep 17 00:00:00 2001 From: Deepak Chandan Date: Tue, 15 Aug 2023 17:43:36 -0400 Subject: [PATCH 08/12] various small PR comment related changes --- STACpopulator/populator_base.py | 34 ++++++++++++++++----------------- STACpopulator/stac_utils.py | 6 +++--- docker-compose.yml | 9 +++------ 3 files changed, 23 insertions(+), 26 deletions(-) diff --git a/STACpopulator/populator_base.py b/STACpopulator/populator_base.py index 7d1db7e..6e0a1ce 100644 --- a/STACpopulator/populator_base.py +++ b/STACpopulator/populator_base.py @@ -7,6 +7,7 @@ from colorlog import ColoredFormatter from siphon.catalog import TDSCatalog +from STACpopulator.crawlers import Crawler from STACpopulator.stac_utils import ( create_stac_collection, post_collection, @@ -28,24 +29,24 @@ def __init__( self, catalog: str, stac_host: str, - collection_info_fname: str, - crawler: Iterator[str], + collection_info_filename: str, + crawler: Crawler, crawler_args: Optional[dict] = {}, ) -> None: """Constructor Parameters ---------- - catalog : TDSCatalog + catalog : str stac_host : STAC API address - collection_info_fname : Name of the configuration file containing info about the collection + collection_info_filename : Name of the configuration file containing info about the collection crawler : callable that knows how to iterate over the organization structure of the catalog in order to find individual items crawler_args : any optional arguments to pass to the crawler """ super().__init__() - with open(collection_info_fname) as f: + with open(collection_info_filename) as f: self._collection_info = yaml.load(f, yaml.Loader) req_definitions = ["title", "description", "keywords", "license"] @@ -56,22 +57,22 @@ def __init__( if catalog.endswith(".html"): catalog = catalog.replace(".html", ".xml") - LOGGER.info("Convering catalog URL from html to xml") + LOGGER.info("Converting catalog URL from html to xml") self._catalog = TDSCatalog(catalog) self._stac_host = self.validate_host(stac_host) self._crawler = crawler self._crawler_args = crawler_args - self._collection_id = hashlib.md5(self.collection.encode("utf-8")).hexdigest() + self._collection_id = hashlib.md5(self.collection_name.encode("utf-8")).hexdigest() LOGGER.info("Initialization complete") - LOGGER.info(f"Collection {self.collection} is assigned id {self._collection_id}") + LOGGER.info(f"Collection {self.collection_name} is assigned id {self._collection_id}") @property def catalog(self) -> TDSCatalog: return self._catalog @property - def collection(self) -> str: + def collection_name(self) -> str: return self._collection_info["title"] @property @@ -79,28 +80,27 @@ def stac_host(self) -> str: return self._stac_host @property - def crawler(self) -> Iterator[str]: + def crawler(self) -> Crawler: return self._crawler @property - def collection_id(self): + def collection_id(self) -> str: return self._collection_id - def validate_host(self, stac_host): + def validate_host(self, stac_host: str) -> str: # TODO: check the format of the host is URL type - # TODO: check if the host is reacheable?? + # TODO: check if the host is reachable?? return stac_host def ingest(self) -> None: - # First create colelction if it doesn't exist + # First create collection if it doesn't exist if not stac_collection_exists(self.stac_host, self.collection_id): - LOGGER.info(f"Creating collection '{self.collection}'") + LOGGER.info(f"Creating collection '{self.collection_name}'") pystac_collection = create_stac_collection(self.collection_id, self._collection_info) - # print(pystac_collection) post_collection(self.stac_host, pystac_collection) LOGGER.info("Collection successfully created") else: - LOGGER.info(f"Collection '{self.collection}' already exists") + LOGGER.info(f"Collection '{self.collection_name}' already exists") # for item in self.crawler(self.catalog, **self._crawler_args): # stac_item = self.process_STAC_item(item) # self.post_item(stac_item) diff --git a/STACpopulator/stac_utils.py b/STACpopulator/stac_utils.py index 39722ba..c7fd1a3 100644 --- a/STACpopulator/stac_utils.py +++ b/STACpopulator/stac_utils.py @@ -6,7 +6,7 @@ import requests -def stac_collection_exists(stac_host: str, collection_id: str): +def stac_collection_exists(stac_host: str, collection_id: str) -> bool: """ Get a STAC collection @@ -14,10 +14,10 @@ def stac_collection_exists(stac_host: str, collection_id: str): """ r = requests.get(os.path.join(stac_host, "collections", collection_id), verify=False) - return True if r.status_code == 200 else False + return r.status_code == 200 -def create_stac_collection(collection_id: str, collection_info) -> dict[str, Any]: +def create_stac_collection(collection_id: str, collection_info: dict[str, Any]) -> dict[str, Any]: """ Create a basic STAC collection. diff --git a/docker-compose.yml b/docker-compose.yml index 544f45c..23ffae5 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -1,7 +1,4 @@ -version: "3.4" - -x-logging: - &default-logging +x-logging: &default-logging driver: "json-file" options: max-size: "50m" @@ -30,7 +27,7 @@ services: container_name: stac-populator-test-db image: ghcr.io/stac-utils/pgstac:v0.6.10 environment: - - POSTGRES_USER=dchandan + - POSTGRES_USER=testuser - POSTGRES_PASSWORD=password - POSTGRES_DB=postgis - PGUSER=dchandan @@ -40,7 +37,7 @@ services: volumes: - stac-db:/var/lib/postgresql/data healthcheck: - test: ["CMD-SHELL", "pg_isready"] + test: [ "CMD-SHELL", "pg_isready" ] interval: 10s timeout: 5s retries: 5 From 418f5421cbd16ef35c645a9025c4edddec881a46 Mon Sep 17 00:00:00 2001 From: Deepak Chandan Date: Tue, 15 Aug 2023 17:44:41 -0400 Subject: [PATCH 09/12] adding abstract CV validation method for STAC items --- STACpopulator/populator_base.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/STACpopulator/populator_base.py b/STACpopulator/populator_base.py index 6e0a1ce..7d6d96c 100644 --- a/STACpopulator/populator_base.py +++ b/STACpopulator/populator_base.py @@ -111,3 +111,7 @@ def post_item(self, data: dict[str, dict]) -> None: @abstractmethod def process_STAC_item(self): # noqa N802 pass + + @abstractmethod + def validate_STAC_item_CV(self): # noqa N802 + pass From 2eb8b12000f9cd6081e3c42105189fe5cc178d3d Mon Sep 17 00:00:00 2001 From: Deepak Chandan Date: Thu, 17 Aug 2023 16:30:10 -0400 Subject: [PATCH 10/12] Adding a generic way to work with input sources --- STACpopulator/__init__.py | 2 - STACpopulator/crawlers.py | 19 -------- STACpopulator/input.py | 86 +++++++++++++++++++++++++++++++++ STACpopulator/populator_base.py | 51 ++++++++----------- STACpopulator/stac_utils.py | 34 +++++++++++++ implementations/CMIP6.yml | 2 +- implementations/add_CMIP6.py | 36 ++++++++++---- 7 files changed, 166 insertions(+), 64 deletions(-) delete mode 100644 STACpopulator/crawlers.py create mode 100644 STACpopulator/input.py diff --git a/STACpopulator/__init__.py b/STACpopulator/__init__.py index c8bf250..f217a91 100644 --- a/STACpopulator/__init__.py +++ b/STACpopulator/__init__.py @@ -1,3 +1 @@ -from .crawlers import thredds_crawler -from .metadata_parsers import nc_attrs_from_ncml from .populator_base import STACpopulatorBase diff --git a/STACpopulator/crawlers.py b/STACpopulator/crawlers.py deleted file mode 100644 index c19241c..0000000 --- a/STACpopulator/crawlers.py +++ /dev/null @@ -1,19 +0,0 @@ -def thredds_crawler(cat, depth=1): - """Return a generator walking a THREDDS data catalog for datasets. - - Parameters - ---------- - cat : TDSCatalog - THREDDS catalog. - depth : int - Maximum recursive depth. Setting 0 will return only datasets within the top-level catalog. If None, - depth is set to 1000. - """ - yield from cat.datasets.items() - if depth is None: - depth = 1000 - - if depth > 0: - for name, ref in cat.catalog_refs.items(): - child = ref.follow() - yield from thredds_crawler(child, depth=depth - 1) diff --git a/STACpopulator/input.py b/STACpopulator/input.py new file mode 100644 index 0000000..f59328f --- /dev/null +++ b/STACpopulator/input.py @@ -0,0 +1,86 @@ +import logging +from abc import ABC, abstractmethod +from typing import Optional + +from colorlog import ColoredFormatter +from siphon.catalog import TDSCatalog + +LOGGER = logging.getLogger(__name__) +LOGFORMAT = " %(log_color)s%(levelname)s:%(reset)s %(blue)s[%(name)-30s]%(reset)s %(message)s" +formatter = ColoredFormatter(LOGFORMAT) +stream = logging.StreamHandler() +stream.setFormatter(formatter) +LOGGER.addHandler(stream) +LOGGER.setLevel(logging.INFO) +LOGGER.propagate = False + + +class GenericLoader(ABC): + def __init__(self) -> None: + pass + + @abstractmethod + def __iter__(self): + """ + A generator that returns an item from the input. The item could be anything + depending on the specific concrete implementation of this abstract class. + """ + pass + + @abstractmethod + def reset(self): + """Reset the internal state of the generator.""" + pass + + +class THREDDSLoader(GenericLoader): + def __init__(self, thredds_catalog_url: str, depth: Optional[int] = None) -> None: + """Constructor + + :param thredds_catalog_url: the URL to the THREDDS catalog to ingest + :type thredds_catalog_url: str + :param depth: Maximum recursive depth for the class's generator. Setting 0 will return only datasets within the + top-level catalog. If None, depth is set to 1000, defaults to None + :type depth: int, optional + """ + super().__init__() + self._depth = depth if depth is not None else 1000 + + if thredds_catalog_url.endswith(".html"): + thredds_catalog_url = thredds_catalog_url.replace(".html", ".xml") + LOGGER.info("Converting catalog URL from html to xml") + + self.thredds_catalog_URL = thredds_catalog_url + self.catalog = TDSCatalog(self.thredds_catalog_URL) + self.catalog_head = self.catalog + + def reset(self): + """Reset the generator.""" + self.catalog_head = self.catalog + + def __iter__(self): + """Return a generator walking a THREDDS data catalog for datasets.""" + yield from self.catalog_head.datasets.items() + + if self._depth > 0: + for name, ref in self.catalog_head.catalog_refs.items(): + self.catalog_head = ref.follow() + self._depth -= 1 + yield from self + + +class RemoteTHREDDSLoader(THREDDSLoader): + def __init__(self, thredds_catalog_url: str, depth: int | None = None) -> None: + super().__init__(thredds_catalog_url, depth) + # more stuff to follow based on needs of a concrete implementation + + +class GeoServerLoader(GenericLoader): + def __init__(self) -> None: + super().__init__() + + def __iter__(self): + raise NotImplementedError + + def reset(self): + raise NotImplementedError diff --git a/STACpopulator/populator_base.py b/STACpopulator/populator_base.py index 7d6d96c..8c6465c 100644 --- a/STACpopulator/populator_base.py +++ b/STACpopulator/populator_base.py @@ -1,17 +1,17 @@ import hashlib import logging from abc import ABC, abstractmethod -from typing import Iterator, Optional import yaml from colorlog import ColoredFormatter -from siphon.catalog import TDSCatalog -from STACpopulator.crawlers import Crawler +from STACpopulator.input import GenericLoader from STACpopulator.stac_utils import ( create_stac_collection, post_collection, stac_collection_exists, + stac_host_reachable, + url_validate, ) LOGGER = logging.getLogger(__name__) @@ -27,22 +27,19 @@ class STACpopulatorBase(ABC): def __init__( self, - catalog: str, stac_host: str, + data_loader: GenericLoader, collection_info_filename: str, - crawler: Crawler, - crawler_args: Optional[dict] = {}, ) -> None: """Constructor - Parameters - ---------- - catalog : str - stac_host : STAC API address - collection_info_filename : Name of the configuration file containing info about the collection - crawler : callable that knows how to iterate over the organization - structure of the catalog in order to find individual items - crawler_args : any optional arguments to pass to the crawler + :param stac_host: URL to the STAC API + :type stac_host: str + :param data_loader: A concrete implementation of the GenericLoader abstract base class + :type data_loader: GenericLoader + :param collection_info_filename: Yaml file containing the information about the collection to populate + :type collection_info_filename: str + :raises RuntimeError: Raised if one of the required definitions is not found in the collection info filename """ super().__init__() @@ -55,22 +52,13 @@ def __init__( LOGGER.error(f"'{req}' is required in the configuration file") raise RuntimeError(f"'{req}' is required in the configuration file") - if catalog.endswith(".html"): - catalog = catalog.replace(".html", ".xml") - LOGGER.info("Converting catalog URL from html to xml") - self._catalog = TDSCatalog(catalog) + self._ingest_pipeline = data_loader self._stac_host = self.validate_host(stac_host) - self._crawler = crawler - self._crawler_args = crawler_args self._collection_id = hashlib.md5(self.collection_name.encode("utf-8")).hexdigest() LOGGER.info("Initialization complete") LOGGER.info(f"Collection {self.collection_name} is assigned id {self._collection_id}") - @property - def catalog(self) -> TDSCatalog: - return self._catalog - @property def collection_name(self) -> str: return self._collection_info["title"] @@ -79,17 +67,16 @@ def collection_name(self) -> str: def stac_host(self) -> str: return self._stac_host - @property - def crawler(self) -> Crawler: - return self._crawler - @property def collection_id(self) -> str: return self._collection_id def validate_host(self, stac_host: str) -> str: - # TODO: check the format of the host is URL type - # TODO: check if the host is reachable?? + if not url_validate(stac_host): + raise ValueError("stac_host URL is not appropriately formatted") + if not stac_host_reachable(stac_host): + raise ValueError("stac_host is not reachable") + return stac_host def ingest(self) -> None: @@ -109,9 +96,9 @@ def post_item(self, data: dict[str, dict]) -> None: pass @abstractmethod - def process_STAC_item(self): # noqa N802 + def process_stac_item(self): # noqa N802 pass @abstractmethod - def validate_STAC_item_CV(self): # noqa N802 + def validate_stac_item_cv(self): # noqa N802 pass diff --git a/STACpopulator/stac_utils.py b/STACpopulator/stac_utils.py index c7fd1a3..743f53a 100644 --- a/STACpopulator/stac_utils.py +++ b/STACpopulator/stac_utils.py @@ -1,4 +1,5 @@ import os +import re from datetime import datetime from typing import Any @@ -6,6 +7,39 @@ import requests +def url_validate(target: str) -> bool: + """Validate whether a supplied URL is reliably written. + + Parameters + ---------- + target : str + + References + ---------- + https://stackoverflow.com/a/7160778/7322852 + """ + url_regex = re.compile( + r"^(?:http|ftp)s?://" # http:// or https:// + # domain... + r"(?:(?:[A-Z\d](?:[A-Z\d-]{0,61}[A-Z\d])?\.)+(?:[A-Z]{2,6}\.?|[A-Z\d-]{2,}\.?)|" + r"localhost|" # localhost... + r"\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3})" # ...or ip + r"(?::\d+)?" # optional port + r"(?:/?|[/?]\S+)$", + re.IGNORECASE, + ) + return True if re.match(url_regex, target) else False + + +def stac_host_reachable(url: str) -> bool: + try: + registry = requests.get(url) + registry.raise_for_status() + return True + except (requests.exceptions.RequestException, requests.exceptions.ConnectionError): + return False + + def stac_collection_exists(stac_host: str, collection_id: str) -> bool: """ Get a STAC collection diff --git a/implementations/CMIP6.yml b/implementations/CMIP6.yml index 59824a5..a57875b 100644 --- a/implementations/CMIP6.yml +++ b/implementations/CMIP6.yml @@ -1,6 +1,6 @@ title: CMIP6 description: Coupled Model Intercomparison Project phase 6 -keywords: ['CMIP', 'CMIP6', 'WCRP', 'Cimate Change'] +keywords: ['CMIP', 'CMIP6', 'WCRP', 'Climate Change'] license: "CC-BY-4.0" spatialextent: [-180, -90, 180, 90] temporalextent: ['1850-01-01', null] \ No newline at end of file diff --git a/implementations/add_CMIP6.py b/implementations/add_CMIP6.py index f374280..349540d 100644 --- a/implementations/add_CMIP6.py +++ b/implementations/add_CMIP6.py @@ -4,7 +4,7 @@ from colorlog import ColoredFormatter from STACpopulator import STACpopulatorBase -from STACpopulator.crawlers import thredds_crawler +from STACpopulator.input import THREDDSLoader # from STACpopulator.metadata_parsers import nc_attrs_from_ncml @@ -21,24 +21,40 @@ class CMIP6populator(STACpopulatorBase): def __init__( self, - catalog: str, - hostname: str, - config: str, + stac_host: str, + thredds_catalog_url: str, + config_filename: str, ) -> None: - super().__init__(catalog, hostname, config, thredds_crawler, crawler_args={"depth": None}) - - def process_STAC_item(self): # noqa N802 + """Constructor + + :param stac_host: URL to the STAC API + :type stac_host: str + :param thredds_catalog_url: the URL to the THREDDS catalog to ingest + :type thredds_catalog_url: str + :param config_filename: Yaml file containing the information about the collection to populate + :type config_filename: str + """ + data_loader = THREDDSLoader(thredds_catalog_url) + for item in data_loader: + print(item) + super().__init__(stac_host, data_loader, config_filename) + + def process_stac_item(self): # noqa N802 # TODO: next step is to implement this print("here") + def validate_stac_item_cv(self): + # TODO: next step is to implement this + pass + if __name__ == "__main__": parser = argparse.ArgumentParser(prog="CMIP6 STAC populator") - parser.add_argument("hostname", type=str, help="STAC API address") - parser.add_argument("catalog_URL", type=str, help="URL to the CMIP6 thredds catalog") + parser.add_argument("stac_host", type=str, help="STAC API address") + parser.add_argument("thredds_catalog_URL", type=str, help="URL to the CMIP6 THREDDS catalog") parser.add_argument("config_file", type=str, help="Name of the configuration file") args = parser.parse_args() LOGGER.info(f"Arguments to call: {args}") - c = CMIP6populator(args.catalog_URL, args.hostname, args.config_file) + c = CMIP6populator(args.stac_host, args.thredds_catalog_URL, args.config_file) c.ingest() From 21712a5a88745757b8fb381e0db494dc4054cc4a Mon Sep 17 00:00:00 2001 From: Deepak Chandan Date: Fri, 18 Aug 2023 12:00:50 -0400 Subject: [PATCH 11/12] adding implementation directories --- implementations/{ => CMIP6-UofT}/CMIP6.yml | 0 implementations/{ => CMIP6-UofT}/add_CMIP6.py | 0 .../{add_some_data.py => NEX-GDDP-UofT/add_NEX-GDDP.py} | 0 3 files changed, 0 insertions(+), 0 deletions(-) rename implementations/{ => CMIP6-UofT}/CMIP6.yml (100%) rename implementations/{ => CMIP6-UofT}/add_CMIP6.py (100%) rename implementations/{add_some_data.py => NEX-GDDP-UofT/add_NEX-GDDP.py} (100%) diff --git a/implementations/CMIP6.yml b/implementations/CMIP6-UofT/CMIP6.yml similarity index 100% rename from implementations/CMIP6.yml rename to implementations/CMIP6-UofT/CMIP6.yml diff --git a/implementations/add_CMIP6.py b/implementations/CMIP6-UofT/add_CMIP6.py similarity index 100% rename from implementations/add_CMIP6.py rename to implementations/CMIP6-UofT/add_CMIP6.py diff --git a/implementations/add_some_data.py b/implementations/NEX-GDDP-UofT/add_NEX-GDDP.py similarity index 100% rename from implementations/add_some_data.py rename to implementations/NEX-GDDP-UofT/add_NEX-GDDP.py From 4cade234c85701dac3021bed538a1cd1ffd31e8d Mon Sep 17 00:00:00 2001 From: David Huard Date: Mon, 21 Aug 2023 11:23:31 -0400 Subject: [PATCH 12/12] Set min version to 3.10 and add dependencies --- pyproject.toml | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index c90f1a1..1c94eaf 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -5,7 +5,15 @@ build-backend = "setuptools.build_meta" [project] name = "STACpopulator" version = "0.0.1" -requires-python = ">=3.9" +requires-python = ">=3.10" +dependencies = [ + "colorlog", + "pyyaml", + "siphon", + "pystac" +] [tool.setuptools] -py-modules = ["STACpopulator"] \ No newline at end of file +py-modules = ["STACpopulator"] + +