diff --git a/.dockerignore b/.dockerignore new file mode 100644 index 0000000..cf551b9 --- /dev/null +++ b/.dockerignore @@ -0,0 +1,28 @@ +## IDE +.idea/ +.vscode/ + +## SCM +.git* + +## Configurations +.* +*.rc + +## Environment +.conda/ +.env* +*.env +.venv/ +jupyter/ + +## Tests +.coverage +.pytest_cache +reports + +## Caches +**/__pycache__/ +STACpopulator.egg-info/ +build +*.pyc diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml index 47c1e7b..ec87644 100644 --- a/.github/workflows/release.yml +++ b/.github/workflows/release.yml @@ -15,6 +15,7 @@ jobs: release: name: release runs-on: ubuntu-latest + if: ${{ success() && (contains(github.ref, 'refs/tags') || github.ref == 'refs/heads/master') }} steps: - name: Checkout uses: actions/checkout@v2 @@ -47,5 +48,6 @@ jobs: uses: docker/build-push-action@v3 with: context: . + file: docker/Dockerfile push: true tags: ${{ env.REGISTRY }}/${{ env.IMAGE_NAME }}:${{ steps.extract_branch.outputs.branch }} diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml new file mode 100644 index 0000000..8a33819 --- /dev/null +++ b/.github/workflows/tests.yml @@ -0,0 +1,156 @@ +# run test suites + +name: Tests +on: + - pull_request + - push + - release + - workflow_dispatch + +# cancel the current workflow if another commit was pushed on the same PR or reference +# uses the GitHub workflow name to avoid collision with other workflows running on the same PR/reference +concurrency: + group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }} + cancel-in-progress: true + +jobs: + # see: https://github.com/fkirc/skip-duplicate-actions +# skip_duplicate: +# continue-on-error: true +# runs-on: ubuntu-latest +# outputs: +# should_skip: ${{ steps.skip_check.outputs.should_skip && ! contains(github.ref, 'refs/tags') }} +# steps: +# - id: skip_check +# uses: fkirc/skip-duplicate-actions@master +# with: +# concurrent_skipping: "same_content" +# skip_after_successful_duplicate: "true" +# do_not_skip: '["pull_request", "workflow_dispatch", "schedule", "release"]' + + # see: https://github.com/actions/setup-python + tests: + # FIXME: https://github.com/fkirc/skip-duplicate-actions/issues/90 + #needs: skip_duplicate + #if: ${{ needs.skip_duplicate.outputs.should_skip != 'true' }} + runs-on: ${{ matrix.os }} + continue-on-error: ${{ matrix.allow-failure }} + env: + # override make command to install directly in active python + CONDA_CMD: "" + + strategy: + matrix: + os: [ubuntu-latest] + python-version: ["3.10", "3.11", "3.12"] + allow-failure: [false] + test-case: [test-cov] +# include: +# # experimental python +# - os: ubuntu-latest +# python-version: "3.13" +# allow-failure: true +# test-case: test-unit-only +# - os: ubuntu-latest +# python-version: "3.13" +# allow-failure: true +# test-case: test-func-only +# # linter tests +# - os: ubuntu-latest +# python-version: "3.10" +# allow-failure: false +# test-case: check-all +# # documentation build +# - os: ubuntu-latest +# python-version: "3.10" +# allow-failure: false +# test-case: docs +# # coverage test +# - os: ubuntu-latest +# python-version: "3.10" +# allow-failure: false +# test-case: test-coverage-only +# # smoke test of Docker image +# - os: ubuntu-latest +# python-version: "3.10" # doesn't matter which one (in docker), but match default of repo +# allow-failure: false +# test-case: test-docker + steps: + - uses: actions/checkout@v2 + with: + fetch-depth: "0" + - name: Setup Python + # skip python setup if running with docker + if: ${{ matrix.test-case != 'test-docker' }} + uses: actions/setup-python@v2 + with: + python-version: "${{ matrix.python-version }}" + - name: Parse Python Version + id: python-semver + run: | + echo "::set-output name=major:$(echo ${{ matrix.python-version }} | cut -d '.' -f 1)" + echo "::set-output name=minor:$(echo ${{ matrix.python-version }} | cut -d '.' -f 2)" + - uses: actions/cache@v3 + name: Check Proj Lib Pre-Built in Cache + id: cache-proj + with: + # note: '22' is v8, '21' is v7 + path: /tmp/proj-8.2.1/install + key: ${{ runner.os }}-python${{ matrix.python-version }}-proj + - name: Install Dependencies + # skip python setup if running with docker + if: ${{ matrix.test-case != 'test-docker' }} + # install package and dependencies directly, + # skip sys/conda setup to use active python + run: make setup-pyessv-archive install-dev version + - name: Display Packages + # skip python setup if running with docker + if: ${{ matrix.test-case != 'test-docker' }} + run: pip freeze + #- name: Setup Environment Variables + # uses: c-py/action-dotenv-to-setenv@v2 + # with: + # env-file: ./ci/weaver.env + - name: Display Environment Variables + run: | + hash -r + env | sort + - name: Run Tests + run: make ${{ matrix.test-case }} + - name: Upload coverage report + uses: codecov/codecov-action@v2 + if: ${{ success() && matrix.test-case == 'test-coverage-only' }} + with: + token: ${{ secrets.CODECOV_TOKEN }} + files: ./reports/coverage.xml + fail_ci_if_error: true + verbose: true + +# deploy-docker: +# needs: tests +# if: ${{ success() && (contains(github.ref, 'refs/tags') || github.ref == 'refs/heads/master') }} +# runs-on: ubuntu-latest +# steps: +# - uses: actions/checkout@v2 +# with: +# fetch-depth: "0" +# - name: Get Tag Version +# id: version +# shell: bash +# run: | +# if [[ "${GITHUB_REF}" == "refs/heads/master" ]]; then +# echo "::set-output name=TAG_VERSION::latest" +# else +# echo "::set-output name=TAG_VERSION::${GITHUB_REF##*/}" +# fi +# - name: Build Docker +# run: | +# make DOCKER_REPO=pavics/weaver APP_VERSION=${{ steps.version.outputs.TAG_VERSION }} docker-info docker-build +# - name: Login to DockerHub +# uses: docker/login-action@v1 +# with: +# username: ${{ secrets.DOCKERHUB_USERNAME }} +# password: ${{ secrets.DOCKERHUB_TOKEN }} +# - name: Push to DockerHub +# run: | +# make DOCKER_REPO=pavics/weaver APP_VERSION=${{ steps.version.outputs.TAG_VERSION }} docker-push diff --git a/.gitignore b/.gitignore index 7e12211..9d5674a 100644 --- a/.gitignore +++ b/.gitignore @@ -1,7 +1,21 @@ -*.pyc -STACpopulator.egg-info/ +## IDE +.idea/ .vscode/ + +## Environment +.conda/ +.env* +*.env .venv/ jupyter/ -.idea -.vscode + +## Tests +.coverage +.pytest_cache +reports + +## Caches +**/__pycache__/ +STACpopulator.egg-info/ +build +*.pyc diff --git a/CHANGES.md b/CHANGES.md new file mode 100644 index 0000000..3a3eece --- /dev/null +++ b/CHANGES.md @@ -0,0 +1,23 @@ +# Changes + +## [Unreleased](https://github.com/crim-ca/stac-populator) (latest) + +* Add `LICENSE` file. +* Add `bump-my-version` with `make version` and `make VERSION=<...> bump` utilities to self-update release versions. +* Add more metadata to `pyproject.toml`. +* Adjust `README.md` with updated references and release version indicators. +* Add `CHANGES.md` to record version updates. +* Add `dev` dependencies to `pyproject.toml` for testing the package (install with `pip install ".[dev]"`). +* Add GitHub CI tests. +* Remove `requirements.txt` in favor of all dependencies combined in `pyproject.toml`. +* Add test to validate STAC Collection and Item contain `source` with expected THREDDS format. +* Fix broken tests and invalid imports. + +## [0.1.0](https://github.com/crim-ca/stac-populator/tree/0.1.0) (2023-11-08) + + +* Refactor of `CMIP6_UofT` with more robust parsing strategies and STAC Item generation from THREDDS NCML metadata. + +## [0.0.1](https://github.com/crim-ca/stac-populator/tree/0.0.1) (2023-08-22) + +* Initial release with implementation of `CMIP6_UofT`. diff --git a/LICENSE b/LICENSE new file mode 100644 index 0000000..17ee2a6 --- /dev/null +++ b/LICENSE @@ -0,0 +1,21 @@ +MIT License + +Copyright (c) 2013-2014 Computer Research Institute of Montreal (CRIM) + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. diff --git a/Makefile b/Makefile index d4b42f3..bd5daeb 100644 --- a/Makefile +++ b/Makefile @@ -1,24 +1,79 @@ -IMP_DIR = STACpopulator/implementations -STAC_HOST = http://localhost:8880/stac +MAKEFILE_NAME := $(word $(words $(MAKEFILE_LIST)),$(MAKEFILE_LIST)) +# Include custom config if it is available +-include Makefile.config +APP_ROOT := $(abspath $(lastword $(MAKEFILE_NAME))/..) +APP_NAME := STACpopulator +APP_VERSION ?= 0.1.0 + +DOCKER_COMPOSE_FILES := -f "$(APP_ROOT)/docker/docker-compose.yml" +DOCKER_TAG := ghcr.io/crim-ca/stac-populator:$(APP_VERSION) + +IMP_DIR := $(APP_NAME)/implementations +STAC_HOST ?= http://localhost:8880/stac # CATALOG = https://pavics.ouranos.ca/twitcher/ows/proxy/thredds/catalog/birdhouse/testdata/xclim/cmip6/catalog.html CATALOG = https://daccs.cs.toronto.edu/twitcher/ows/proxy/thredds/catalog/datasets/CMIP6/catalog.html # CATALOG = https://daccs.cs.toronto.edu/twitcher/ows/proxy/thredds/catalog/datasets/CMIP6/CMIP/NOAA-GFDL/catalog.html # CATALOG = https://daccs.cs.toronto.edu/twitcher/ows/proxy/thredds/catalog/datasets/CMIP6/CMIP/AS-RCEC/catalog.html -testcmip6: +## -- Testing targets -------------------------------------------------------------------------------------------- ## + +setup-pyessv-archive: + git clone "https://github.com/ES-DOC/pyessv-archive" ~/.esdoc/pyessv-archive + +test-cmip6: python $(IMP_DIR)/CMIP6_UofT/add_CMIP6.py $(STAC_HOST) $(CATALOG) -delcmip6: +del-cmip6: curl --location --request DELETE '$(STAC_HOST)/collections/CMIP6_UofT' @echo "" -starthost: - docker compose up +docker-start: + docker compose $(DOCKER_COMPOSE_FILES) up +starthost: docker-start -stophost: - docker compose down +docker-stop: + docker compose $(DOCKER_COMPOSE_FILES) down +stophost: docker-stop + +docker-build: + docker build "$(APP_ROOT)" -f "$(APP_ROOT)/docker/Dockerfile" -t "$(DOCKER_TAG)" del_docker_volume: stophost docker volume rm stac-populator_stac-db resethost: del_docker_volume starthost + +install: + pip install "$(APP_ROOT)" + +install-dev: + pip install "$(APP_ROOT)[dev]" + +test-unit: + pytest "$(APP_ROOT)" + +test-cov: + pytest "$(APP_ROOT)" --cov="$(APP_NAME)" --cov-report=term --cov-report=html + +## -- Versioning targets -------------------------------------------------------------------------------------------- ## + +# Bumpversion 'dry' config +# if 'dry' is specified as target, any bumpversion call using 'BUMP_XARGS' will not apply changes +BUMP_TOOL := bump-my-version +BUMP_XARGS ?= --verbose --allow-dirty +ifeq ($(filter dry, $(MAKECMDGOALS)), dry) + BUMP_XARGS := $(BUMP_XARGS) --dry-run +endif +.PHONY: dry +dry: pyproject.toml ## run 'bump' target without applying changes (dry-run) [make VERSION= bump dry] + @-echo > /dev/null + +.PHONY: bump +bump: ## bump version using VERSION specified as user input [make VERSION= bump] + @-echo "Updating package version ..." + @[ "${VERSION}" ] || ( echo ">> 'VERSION' is not set"; exit 1 ) + @-bash -c '$(CONDA_CMD) $(BUMP_TOOL) $(BUMP_XARGS) --new-version "${VERSION}" patch;' + +.PHONY: version +version: ## display current version + @-echo "$(APP_NAME) version: $(APP_VERSION)" diff --git a/README.md b/README.md index 808926c..4efb3ee 100644 --- a/README.md +++ b/README.md @@ -1,21 +1,62 @@ # STAC Catalog Populator +![Latest Version](https://img.shields.io/badge/latest%20version-0.1.0-blue?logo=github) +![Commits Since Latest](https://img.shields.io/github/commits-since/crim-ca/stac-populator/0.1.0.svg?logo=github) -This repository contains a framework [STACpopulator](STACpopulator) that can be used to implement concrete populators (see [implementations](implementations)) for populating the STAC catalog on a DACCS node. +This repository contains a framework [STACpopulator](STACpopulator) +that can be used to implement concrete populators (see [implementations](STACpopulator/implementations)) +for populating the STAC Catalog, Collections and Items from various dataset/catalog sources, and pushed using +STAC API on a server node. ## Framework -The framwork is centered around a Python Abstract Base Class: `STACpopulatorBase` that implements all the logic for populating a STAC catalog. This class implements an abstract method called `process_STAC_item` that should be defined in implementations of the class and contain all the logic for constructing the STAC representation for an item in the collection that is to be processed. +The framework is centered around a Python Abstract Base Class: `STACpopulatorBase` that implements all the logic +for populating a STAC catalog. This class provides abstract methods that should be overridden by implementations that +contain all the logic for constructing the STAC representation for an item in the collection that is to be processed. ## Implementations -Currently, one implementation of `STACpopulatorBase` is provided in [add_CMIP6.py](implementations/add_CMIP6.py). +Provided implementations of `STACpopulatorBase`: -## Testing +- [CMIP6_UofT][CMIP6_UofT] + +[CMIP6_UofT]: STACpopulator/implementations/CMIP6_UofT/add_CMIP6.py + +## Installation and Execution + +Either with Python directly (in an environment of your choosing): + +```shell +pip install . +# OR +make install +``` + +With development packages: -The provided `docker-compose` file can be used to launch a test STAC server. The `add_CMIP6.py` script can be run as: +```shell +pip install .[dev] +# OR +make install-dev +``` + +You can also employ the pre-built Docker: +```shell +docker run -ti ghcr.io/crim-ca/stac-populator:0.1.0 [command] ``` -python implementations/CMIP6-UofT/add_CMIP6.py http://localhost:8880/stac/ https://pavics.ouranos.ca/twitcher/ows/proxy/thredds/catalog/birdhouse/testdata/xclim/cmip6/catalog.html implementations/CMIP6-UofT/CMIP6.yml + +## Testing + +The provided [`docker-compose`](docker/docker-compose.yml) configuration file can be used to launch a test STAC server. +For example, the [CMIP6_UofT][CMIP6_UofT] script can be run as: + +```shell +python STACpopulator/implementations/CMIP6_UofT/add_CMIP6.py \ + "http://localhost:8880/stac/" \ + "https://pavics.ouranos.ca/twitcher/ows/proxy/thredds/catalog/birdhouse/testdata/xclim/cmip6/catalog.html" \ + "STACpopulator/implementations/CMIP6_UofT/collection_config.yml" ``` -Note: in the script above, I am currently using a sample THREDDS catalog URL and not one relevant to the global scale CMIP6 data. + +*Note*: +In the script above, a sample THREDDS catalog URL is employed and not one relevant to the global scale CMIP6 data. diff --git a/STACpopulator/__init__.py b/STACpopulator/__init__.py index f217a91..3dc1f76 100644 --- a/STACpopulator/__init__.py +++ b/STACpopulator/__init__.py @@ -1 +1 @@ -from .populator_base import STACpopulatorBase +__version__ = "0.1.0" diff --git a/STACpopulator/api_requests.py b/STACpopulator/api_requests.py index 35b0dc2..e7a5380 100644 --- a/STACpopulator/api_requests.py +++ b/STACpopulator/api_requests.py @@ -62,7 +62,11 @@ def post_stac_collection(stac_host: str, json_data: dict[str, Any], update: Opti def post_stac_item( - stac_host: str, collection_id: str, item_name: str, json_data: dict[str, dict], update: Optional[bool] = True + stac_host: str, + collection_id: str, + item_name: str, + json_data: dict[str, dict], + update: Optional[bool] = True, ) -> None: """Post a STAC item to the host server. diff --git a/STACpopulator/implementations/CMIP6_UofT/add_CMIP6.py b/STACpopulator/implementations/CMIP6_UofT/add_CMIP6.py index d966ead..ae86892 100644 --- a/STACpopulator/implementations/CMIP6_UofT/add_CMIP6.py +++ b/STACpopulator/implementations/CMIP6_UofT/add_CMIP6.py @@ -10,10 +10,10 @@ from pydantic import AnyHttpUrl, ConfigDict, Field, FieldValidationInfo, field_validator from pystac.extensions.datacube import DatacubeExtension -from STACpopulator import STACpopulatorBase from STACpopulator.implementations.CMIP6_UofT.extensions import DataCubeHelper -from STACpopulator.input import GenericLoader, THREDDSLoader +from STACpopulator.input import GenericLoader, ErrorLoader, THREDDSLoader from STACpopulator.models import GeoJSONPolygon, STACItemProperties +from STACpopulator.populator_base import STACpopulatorBase from STACpopulator.stac_utils import STAC_item_from_metadata, collection2literal LOGGER = logging.getLogger(__name__) diff --git a/STACpopulator/input.py b/STACpopulator/input.py index 25750c0..f72bc6e 100644 --- a/STACpopulator/input.py +++ b/STACpopulator/input.py @@ -31,12 +31,23 @@ def __iter__(self): A generator that returns an item from the input. The item could be anything depending on the specific concrete implementation of this abstract class. """ - pass + raise NotImplementedError @abstractmethod def reset(self): """Reset the internal state of the generator.""" - pass + raise NotImplementedError + + +class ErrorLoader(GenericLoader): + def __init__(self): # noqa + raise NotImplementedError + + def __iter__(self): + raise NotImplementedError + + def reset(self): + raise NotImplementedError class THREDDSLoader(GenericLoader): @@ -84,9 +95,10 @@ def magpie_collection_link(self) -> pystac.Link: url = self.thredds_catalog_URL parts = url.split("/") i = parts.index("catalog") - # service = parts[i - 1] + service = parts[i - 1] path = "/".join(parts[i + 1 : -1]) - return pystac.Link(rel="source", target=url, media_type="text/xml", title=path) + title = f"{service}:{path}" + return pystac.Link(rel="source", target=url, media_type="text/xml", title=title) def reset(self): """Reset the generator.""" @@ -113,7 +125,7 @@ def extract_metadata(self, ds: siphon.catalog.Dataset) -> MutableMapping[str, An url = ds.access_urls["NCML"] r = requests.get(url) # Convert NcML to CF-compliant dictionary - attrs = xncml.Dataset.from_text(r.content).to_cf_dict() + attrs = xncml.Dataset.from_text(r.text).to_cf_dict() attrs["attributes"] = numpy_to_python_datatypes(attrs["attributes"]) attrs["access_urls"] = ds.access_urls return attrs diff --git a/STACpopulator/populator_base.py b/STACpopulator/populator_base.py index a13abb9..55db015 100644 --- a/STACpopulator/populator_base.py +++ b/STACpopulator/populator_base.py @@ -1,7 +1,8 @@ +import functools import logging from abc import ABC, abstractmethod from datetime import datetime -from typing import Any, MutableMapping, Optional +from typing import Any, Optional import pystac from colorlog import ColoredFormatter @@ -41,7 +42,8 @@ def __init__( """ super().__init__() - self._collection_info = load_collection_configuration() + self._collection_info = None + self.load_config() self._ingest_pipeline = data_loader self._stac_host = self.validate_host(stac_host) @@ -52,6 +54,9 @@ def __init__( LOGGER.info(f"Collection {self.collection_name} is assigned id {self._collection_id}") self.create_stac_collection() + def load_config(self): + self._collection_info = load_collection_configuration() + @property def collection_name(self) -> str: return self._collection_info["title"] @@ -79,7 +84,7 @@ def item_geometry_model(self): raise NotImplementedError @abstractmethod - def create_stac_item(self, item_name: str, item_data: MutableMapping[str, Any]) -> MutableMapping[str, Any]: + def create_stac_item(self, item_name: str, item_data: dict[str, Any]) -> dict[str, Any]: raise NotImplementedError def validate_host(self, stac_host: str) -> str: @@ -90,7 +95,11 @@ def validate_host(self, stac_host: str) -> str: return stac_host - def create_stac_collection(self) -> None: + # FIXME: should provide a way to update after item generation + # STAC collections are supposed to include 'summaries' with + # an aggregation of all supported 'properties' by its child items + @functools.cache + def create_stac_collection(self) -> dict[str, Any]: """ Create a basic STAC collection. @@ -112,8 +121,12 @@ def create_stac_collection(self) -> None: collection = pystac.Collection(**self._collection_info) collection.add_links(self._ingest_pipeline.links) + collection_data = collection.to_dict() + self.publish_stac_collection(collection_data) + return collection_data - post_stac_collection(self.stac_host, collection.to_dict(), self.update) + def publish_stac_collection(self, collection_data: dict[str, Any]) -> None: + post_stac_collection(self.stac_host, collection_data, self.update) def ingest(self) -> None: LOGGER.info("Data ingestion") diff --git a/docker/Dockerfile b/docker/Dockerfile new file mode 100644 index 0000000..07c269c --- /dev/null +++ b/docker/Dockerfile @@ -0,0 +1,32 @@ +FROM python:3.10-slim +LABEL description.short="STAC Populator" +LABEL description.long="Utility to populate STAC Catalog, Collections and Items from various dataset/catalog sources." +LABEL maintainer="Francis Charette-Migneault " +LABEL vendor="CRIM" +LABEL version="0.1.0" + +# setup paths +ENV APP_DIR=/opt/local/src/stac-populator +WORKDIR ${APP_DIR} + +# obtain source files +COPY STACpopulator/ ${APP_DIR}/STACpopulator/ +COPY README.md LICENSE pyproject.toml ${APP_DIR}/ + +# install runtime/package dependencies +RUN apt-get update && apt-get install -y --no-install-recommends \ + ca-certificates \ + netbase \ + git \ + && mkdir -p /home/stac/.esdoc/ \ + && git clone "https://github.com/ES-DOC/pyessv-archive" /home/stac/.esdoc/pyessv-archive/ \ + && pip install --no-cache-dir ${APP_DIR} \ + && apt-get remove -y \ + git \ + && rm -rf /var/lib/apt/lists/* + +RUN groupadd -r stac && useradd -r -g stac stac +USER stac + +# FIXME: use common CLI +CMD ["bash"] diff --git a/docker-compose.yml b/docker/docker-compose.yml similarity index 100% rename from docker-compose.yml rename to docker/docker-compose.yml diff --git a/pyproject.toml b/pyproject.toml index dc08b7b..ca64cba 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -2,21 +2,157 @@ requires = ["setuptools>=61.0"] build-backend = "setuptools.build_meta" +[tool.setuptools.packages.find] +include = ["STACpopulator*"] +exclude = [ + ".deprecated", + ".pytest_cache", + "__pycache__", + "__pycache__.*", + "__pycache__*", + "STACpopulator.__pycache__*", + "tests*", +] + [project] name = "STACpopulator" -version = "0.0.1" +version = "0.1.0" +description = "Utility to populate STAC Catalog, Collections and Items from various dataset/catalog sources." requires-python = ">=3.10" dependencies = [ "colorlog", "pyyaml", "siphon", "pystac", - "xncml", + "xncml>=0.3.1", # python 3.12 support "pydantic", - "pyessv" + "pyessv", + "requests", + "lxml", +] +readme = "README.md" +license = { file = "LICENSE" } +classifiers = [ + "Development Status :: 3 - Alpha", + "Intended Audience :: Developers", + "License :: OSI Approved :: MIT", + "Natural Language :: English", + "Programming Language :: Python", + "Programming Language :: Python :: 3 :: Only", + "Programming Language :: Python :: 3.10", + "Programming Language :: Python :: 3.11", + "Programming Language :: Python :: 3.12", + "Operating System :: OS Independent", + "Topic :: Database :: Database Engines/Servers", + "Topic :: Internet :: WWW/HTTP :: Dynamic Content", + "Topic :: Scientific/Engineering", + "Topic :: Scientific/Engineering :: Information Analysis", + "Topic :: Scientific/Engineering :: Interface Engine/Protocol Translator", + "Topic :: Utilities", +] +authors = [ + { name = "Francis Charette-Migneault", email = "francis.charette-migneault@crim.ca" }, + { name = "Deepak Chandan", email = "dchandan@cs.toronto.edu" }, + { name = "David Huard", email = "huard.david@ouranos.ca" }, +] +maintainers = [ + { name = "Francis Charette-Migneault", email = "francis.charette-migneault@crim.ca" }, + { name = "Deepak Chandan", email = "dchandan@cs.toronto.edu" }, + { name = "David Huard", email = "huard.david@ouranos.ca" }, +] +keywords = [ + "STAC", + "SpatioTemporal Asset Catalog", + "Data Ingestion", + "THREDDS", + "CMIP6" +] + +[project.urls] +Repository = "https://github.com/crim-ca/stac-populator" +Changelog = "https://github.com/crim-ca/stac-populator/blob/master/CHANGES.md" + +[project.optional-dependencies] +dev = [ + "pytest", + "pytest-cov", + "coverage", + "bump-my-version", +] + +[tool.pytest.ini_options] +norecursedirs = [ + ".*", + "build", + "dist", + "{arch}", + "*.egg", + "venv", + "requirements*", + "lib", ] +python_files = "test*.py" +# these break debugger breakpoints +# add them manually with 'make test-cov' +#addopts = [ +# "--cov", +# "--cov-report=term", +# "--cov-report=html", +#] +markers = [ + "online" +] + +[tool.coverage.html] +directory = "reports/coverage/html" + +[tool.coverage.xml] +output = "reports/coverage.xml" + +[tool.bumpversion] +current_version = "0.1.0" +commit = true +commit_args = "--no-verify" +tag = true +tag_name = "{new_version}" +allow_dirty = true +parse = "(?P\\d+)\\.(?P\\d+)\\.(?P\\d+)(\\.dev\\d+)?" +serialize = [ + "{major}.{minor}.{patch}.dev{distance_to_latest_tag}", + "{major}.{minor}.{patch}" +] +message = "Version updated from {current_version} to {new_version}" + +[[tool.bumpversion.files]] +filename = "STACpopulator/__init__.py" + +[[tool.bumpversion.files]] +filename = "README.md" + +[[tool.bumpversion.files]] +filename = "Makefile" +search = "APP_VERSION ?= {current_version}" +replace = "APP_VERSION ?= {new_version}" + +[[tool.bumpversion.files]] +filename = "docker/Dockerfile" +search = "LABEL version=\"{current_version}\"" +replace = "LABEL version=\"{new_version}\"" + +[[tool.bumpversion.files]] +filename = "CHANGES.md" +search = "## [Unreleased](https://github.com/crim-ca/stac-populator) (latest)" +replace = """ +## [Unreleased](https://github.com/crim-ca/stac-populator) (latest) -[tool.setuptools] -py-modules = ["STACpopulator"] + +## [{new_version}](https://github.com/crim-ca/stac-populator/tree/{new_version}) ({now:%Y-%m-%d}) +""" +[[tool.bumpversion.files]] +filename = "pyproject.toml" +# ensure the regex does not match another version by mistake using the package name as guide +regex = true +search = "^name = \"STACpopulator\"\nversion = \"{current_version}\"$" +replace = "name = \"STACpopulator\"\nversion = \"{new_version}\"" diff --git a/requirements.txt b/requirements.txt deleted file mode 100644 index dc03813..0000000 --- a/requirements.txt +++ /dev/null @@ -1,6 +0,0 @@ -colorlog -requests -pystac -pyyaml -siphon -lxml diff --git a/tests/data/stac_collection_testdata_xclim_cmip6_catalog.json b/tests/data/stac_collection_testdata_xclim_cmip6_catalog.json new file mode 100644 index 0000000..982e9e5 --- /dev/null +++ b/tests/data/stac_collection_testdata_xclim_cmip6_catalog.json @@ -0,0 +1,30 @@ +{ + "type": "Collection", + "id": "test", + "title": "test", + "stac_version": "1.0.0", + "description": "test", + "summaries": {"needs_summaries_update": ["true"]}, + "extent": { + "spatial": { + "bbox": [ + [-180, -90, 180, 90] + ] + }, + "temporal": { + "interval": [ + ["1850-01-01T00:00:00Z", null] + ] + } + }, + "license": "MIT", + "keywords": ["test"], + "links": [ + { + "href": "https://pavics.ouranos.ca/twitcher/ows/proxy/thredds/catalog/birdhouse/testdata/xclim/cmip6/catalog.xml", + "rel": "source", + "title": "thredds:birdhouse/testdata/xclim/cmip6", + "type": "text/xml" + } + ] +} diff --git a/tests/ref.json b/tests/data/stac_item_testdata_xclim_cmip6_ncml.json similarity index 100% rename from tests/ref.json rename to tests/data/stac_item_testdata_xclim_cmip6_ncml.json diff --git a/tests/test_standalone_stac_item.py b/tests/test_standalone_stac_item.py index d7239a8..3163cd5 100644 --- a/tests/test_standalone_stac_item.py +++ b/tests/test_standalone_stac_item.py @@ -1,30 +1,88 @@ import json - +import pytest import requests +import os +import tempfile +from urllib.parse import quote + import xncml -from STACpopulator.implementations.CMIP6_UofT.add_CMIP6 import ( - CMIP6ItemProperties, - make_cmip6_item_id, -) +from STACpopulator.implementations.CMIP6_UofT.add_CMIP6 import CMIP6ItemProperties, CMIP6populator +from STACpopulator.input import THREDDSLoader from STACpopulator.models import GeoJSONPolygon from STACpopulator.stac_utils import STAC_item_from_metadata +CUR_DIR = os.path.dirname(__file__) + + +def quote_none_safe(url): + return quote(url, safe="") -def test_standalone_stac_item(): - url = ( - "https://pavics.ouranos.ca/twitcher/ows/proxy/" - "thredds/ncml/birdhouse/testdata/xclim/cmip6/sic_SImon_CCCma-CanESM5_ssp245_r13i1p2f1_2020.nc" - "?catalog=https%3A%2F%2Fpavics.ouranos.ca%2Ftwitcher%2Fows%2Fproxy%2F" - "thredds%2Fcatalog%2Fbirdhouse%2Ftestdata%2Fxclim%2Fcmip6%2Fcatalog.html" - "&dataset=birdhouse%2Ftestdata%2Fxclim%2Fcmip6%2Fsic_SImon_CCCma-CanESM5_ssp245_r13i1p2f1_2020.nc" + +@pytest.mark.online +def test_standalone_stac_item_thredds_ncml(): + thredds_url = "https://pavics.ouranos.ca/twitcher/ows/proxy/thredds" + thredds_path = "birdhouse/testdata/xclim/cmip6" + thredds_nc = "sic_SImon_CCCma-CanESM5_ssp245_r13i1p2f1_2020.nc" + thredds_catalog = f"{thredds_url}/catalog/{thredds_path}/catalog.html" + thredds_ds = f"{thredds_path}/{thredds_nc}" + thredds_ncml_url = ( + f"{thredds_url}/ncml/{thredds_path}/{thredds_nc}" + f"?catalog={quote_none_safe(thredds_catalog)}&dataset={quote_none_safe(thredds_ds)}" ) - attrs = xncml.Dataset.from_text(requests.get(url).content).to_cf_dict() - stac_item_id = make_cmip6_item_id(attrs["attributes"]) + # FIXME: avoid hackish workarounds + data = requests.get(thredds_ncml_url).text + attrs = xncml.Dataset.from_text(data).to_cf_dict() + attrs["access_urls"] = { # FIXME: all following should be automatically added, but they are not! + "HTTPServer": f"{thredds_url}/fileServer/{thredds_path}/{thredds_nc}", + "OPENDAP": f"{thredds_url}/dodsC/{thredds_path}/{thredds_nc}", + "WCS": f"{thredds_url}/wcs/{thredds_path}/{thredds_nc}?service=WCS&version=1.0.0&request=GetCapabilities", + "WMS": f"{thredds_url}/wms/{thredds_path}/{thredds_nc}?service=WMS&version=1.3.0&request=GetCapabilities", + "NetcdfSubset": f"{thredds_url}/ncss/{thredds_path}/{thredds_nc}/dataset.html", + } + + stac_item_id = CMIP6populator.make_cmip6_item_id(attrs["attributes"]) stac_item = STAC_item_from_metadata(stac_item_id, attrs, CMIP6ItemProperties, GeoJSONPolygon) - with open("tests/ref.json", "r") as ff: + ref_file = os.path.join(CUR_DIR, "data/stac_item_testdata_xclim_cmip6_ncml.json") + with open(ref_file, mode="r", encoding="utf-8") as ff: reference = json.load(ff) assert stac_item.to_dict() == reference + + +class MockedNoSTACUpload(CMIP6populator): + def load_config(self): + # bypass auto-load config + self._collection_info = { + "id": "test", + "title": "test", + "description": "test", + "keywords": ["test"], + "license": "MIT", + "spatialextent": [-180, -90, 180, 90], + "temporalextent": ['1850-01-01', None] + } + + def validate_host(self, stac_host: str) -> str: + pass # don't care + + def publish_stac_collection(self, *_) -> None: + pass # don't push to STAC API + + +@pytest.mark.online +def test_cmip6_stac_thredds_catalog_parsing(): + url = "https://pavics.ouranos.ca/twitcher/ows/proxy/thredds/catalog/birdhouse/testdata/xclim/cmip6/catalog.html" + loader = THREDDSLoader(url) + with tempfile.NamedTemporaryFile(): + populator = MockedNoSTACUpload("https://host-dont-care.com", loader) + + result = populator.create_stac_collection() + + ref_file = os.path.join(CUR_DIR, "data/stac_collection_testdata_xclim_cmip6_catalog.json") + with open(ref_file, mode="r", encoding="utf-8") as ff: + reference = json.load(ff) + + assert result == reference