Skip to content

Commit

Permalink
patch tests
Browse files Browse the repository at this point in the history
  • Loading branch information
fmigneault-crim committed Nov 9, 2023
1 parent 4acf96f commit 3277b70
Show file tree
Hide file tree
Showing 12 changed files with 178 additions and 38 deletions.
4 changes: 2 additions & 2 deletions .github/workflows/tests.yml
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,7 @@ jobs:
os: [ubuntu-latest]
python-version: ["3.10", "3.11", "3.12"]
allow-failure: [false]
test-case: [test-unit]
test-case: [test-cov]
# include:
# # experimental python
# - os: ubuntu-latest
Expand Down Expand Up @@ -102,7 +102,7 @@ jobs:
if: ${{ matrix.test-case != 'test-docker' }}
# install package and dependencies directly,
# skip sys/conda setup to use active python
run: make install-dev version
run: make setup-pyessv-archive install-dev version
- name: Display Packages
# skip python setup if running with docker
if: ${{ matrix.test-case != 'test-docker' }}
Expand Down
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
.coverage
.pytest_cache
build
reports
*.pyc
STACpopulator.egg-info/
Expand Down
4 changes: 3 additions & 1 deletion CHANGES.md
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
# Changes

## [Unreleased](https://github.com/crim-ca/stac-populator) (latest)"
## [Unreleased](https://github.com/crim-ca/stac-populator) (latest)

* Add `LICENSE` file.
* Add `bump-my-version` with `make version` and `make VERSION=<...> bump` utilities to self-update release versions.
Expand All @@ -10,6 +10,8 @@
* Add `dev` dependencies to `pyproject.toml` for testing the package (install with `pip install ".[dev]"`).
* Add GitHub CI tests.
* Remove `requirements.txt` in favor of all dependencies combined in `pyproject.toml`.
* Add test to validate STAC Collection and Item contain `source` with expected THREDDS format.
* Fix broken tests and invalid imports.

## [0.1.0](https://github.com/crim-ca/stac-populator/tree/0.1.0) (2023-11-08)

Expand Down
6 changes: 6 additions & 0 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,9 @@ STAC_HOST ?= http://localhost:8880/stac

## -- Testing targets -------------------------------------------------------------------------------------------- ##

setup-pyessv-archive:
git clone "https://github.com/ES-DOC/pyessv-archive" ~/.esdoc/pyessv-archive

test-cmip6:
python $(IMP_DIR)/CMIP6_UofT/add_CMIP6.py $(STAC_HOST) https://pavics.ouranos.ca/twitcher/ows/proxy/thredds/catalog/birdhouse/testdata/xclim/cmip6/catalog.html

Expand Down Expand Up @@ -40,6 +43,9 @@ install-dev:
test-unit:
pytest "$(APP_ROOT)"

test-cov:
pytest "$(APP_ROOT)" --cov --cov-report=term --cov-report=html

## -- Versioning targets -------------------------------------------------------------------------------------------- ##

# Bumpversion 'dry' config
Expand Down
10 changes: 7 additions & 3 deletions STACpopulator/api_requests.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
import logging
import os
from typing import Any, Optional
from typing import Any, MutableMapping, Optional

import requests
from colorlog import ColoredFormatter
Expand Down Expand Up @@ -35,7 +35,7 @@ def stac_collection_exists(stac_host: str, collection_id: str) -> bool:
return r.status_code == 200


def post_stac_collection(stac_host: str, json_data: dict[str, Any], update: Optional[bool] = True) -> None:
def post_stac_collection(stac_host: str, json_data: MutableMapping[str, Any], update: Optional[bool] = True) -> None:
"""Post/create a collection on the STAC host
:param stac_host: address of the STAC host
Expand All @@ -62,7 +62,11 @@ def post_stac_collection(stac_host: str, json_data: dict[str, Any], update: Opti


def post_stac_item(
stac_host: str, collection_id: str, item_name: str, json_data: dict[str, dict], update: Optional[bool] = True
stac_host: str,
collection_id: str,
item_name: str,
json_data: MutableMapping[str, dict],
update: Optional[bool] = True,
) -> None:
"""Post a STAC item to the host server.
Expand Down
4 changes: 2 additions & 2 deletions STACpopulator/implementations/CMIP6_UofT/add_CMIP6.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,10 +10,10 @@
from pydantic import AnyHttpUrl, ConfigDict, Field, FieldValidationInfo, field_validator
from pystac.extensions.datacube import DatacubeExtension

from STACpopulator import STACpopulatorBase
from STACpopulator.implementations.CMIP6_UofT.extensions import DataCubeHelper
from STACpopulator.input import GenericLoader, THREDDSLoader
from STACpopulator.input import GenericLoader, ErrorLoader, THREDDSLoader
from STACpopulator.models import GeoJSONPolygon, STACItemProperties
from STACpopulator.populator_base import STACpopulatorBase
from STACpopulator.stac_utils import STAC_item_from_metadata, collection2literal

LOGGER = logging.getLogger(__name__)
Expand Down
20 changes: 16 additions & 4 deletions STACpopulator/input.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,12 +31,23 @@ def __iter__(self):
A generator that returns an item from the input. The item could be anything
depending on the specific concrete implementation of this abstract class.
"""
pass
raise NotImplementedError

@abstractmethod
def reset(self):
"""Reset the internal state of the generator."""
pass
raise NotImplementedError


class ErrorLoader(GenericLoader):
def __init__(self): # noqa
raise NotImplementedError

def __iter__(self):
raise NotImplementedError

def reset(self):
raise NotImplementedError


class THREDDSLoader(GenericLoader):
Expand Down Expand Up @@ -84,9 +95,10 @@ def magpie_collection_link(self) -> pystac.Link:
url = self.thredds_catalog_URL
parts = url.split("/")
i = parts.index("catalog")
# service = parts[i - 1]
service = parts[i - 1]
path = "/".join(parts[i + 1 : -1])
return pystac.Link(rel="source", target=url, media_type="text/xml", title=path)
title = f"{service}:{path}"
return pystac.Link(rel="source", target=url, media_type="text/xml", title=title)

def reset(self):
"""Reset the generator."""
Expand Down
19 changes: 16 additions & 3 deletions STACpopulator/populator_base.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
import functools
import logging
from abc import ABC, abstractmethod
from datetime import datetime
Expand Down Expand Up @@ -41,7 +42,8 @@ def __init__(
"""

super().__init__()
self._collection_info = load_collection_configuration()
self._collection_info = None
self.load_config()

self._ingest_pipeline = data_loader
self._stac_host = self.validate_host(stac_host)
Expand All @@ -52,6 +54,9 @@ def __init__(
LOGGER.info(f"Collection {self.collection_name} is assigned id {self._collection_id}")
self.create_stac_collection()

def load_config(self):
self._collection_info = load_collection_configuration()

@property
def collection_name(self) -> str:
return self._collection_info["title"]
Expand Down Expand Up @@ -90,7 +95,11 @@ def validate_host(self, stac_host: str) -> str:

return stac_host

def create_stac_collection(self) -> None:
# FIXME: should provide a way to update after item generation
# STAC collections are supposed to include 'summaries' with
# an aggregation of all supported 'properties' by its child items
@functools.cache
def create_stac_collection(self) -> MutableMapping[str, Any]:
"""
Create a basic STAC collection.
Expand All @@ -112,8 +121,12 @@ def create_stac_collection(self) -> None:
collection = pystac.Collection(**self._collection_info)

collection.add_links(self._ingest_pipeline.links)
collection_data = collection.to_dict()
self.publish_stac_collection(collection_data)
return collection_data

post_stac_collection(self.stac_host, collection.to_dict(), self.update)
def publish_stac_collection(self, collection_data: MutableMapping[str, Any]) -> None:
post_stac_collection(self.stac_host, collection_data, self.update)

def ingest(self) -> None:
LOGGER.info("Data ingestion")
Expand Down
30 changes: 22 additions & 8 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,18 @@
requires = ["setuptools>=61.0"]
build-backend = "setuptools.build_meta"

[tool.setuptools.packages.find]
include = ["STACpopulator*"]
exclude = [
".deprecated",
".pytest_cache",
"__pycache__",
"__pycache__.*",
"__pycache__*",
"STACpopulator.__pycache__*",
"tests*",
]

[project]
name = "STACpopulator"
version = "0.1.0"
Expand Down Expand Up @@ -60,9 +72,6 @@ keywords = [
Repository = "https://github.com/crim-ca/stac-populator"
Changelog = "https://github.com/crim-ca/stac-populator/blob/master/CHANGES.md"

[tool.setuptools]
py-modules = ["STACpopulator"]

[project.optional-dependencies]
dev = [
"pytest",
Expand All @@ -83,10 +92,15 @@ norecursedirs = [
"lib",
]
python_files = "test*.py"
addopts = [
"--cov",
"--cov-report=term",
"--cov-report=html",
# these break debugger breakpoints
# add them manually with 'make test-cov'
#addopts = [
# "--cov",
# "--cov-report=term",
# "--cov-report=html",
#]
markers = [
"online"
]

[tool.coverage.html]
Expand Down Expand Up @@ -124,7 +138,7 @@ replace = "APP_VERSION ?= {new_version}"
filename = "CHANGES.md"
search = "## [Unreleased](https://github.com/crim-ca/stac-populator) (latest)"
replace = """
## [Unreleased](https://github.com/crim-ca/stac-populator) (latest)"
## [Unreleased](https://github.com/crim-ca/stac-populator) (latest)
<!-- insert list items of new changes here -->
Expand Down
30 changes: 30 additions & 0 deletions tests/data/stac_collection_testdata_xclim_cmip6_catalog.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
{
"type": "Collection",
"id": "test",
"title": "test",
"stac_version": "1.0.0",
"description": "test",
"summaries": {"needs_summaries_update": ["true"]},
"extent": {
"spatial": {
"bbox": [
[-180, -90, 180, 90]
]
},
"temporal": {
"interval": [
["1850-01-01T00:00:00Z", null]
]
}
},
"license": "MIT",
"keywords": ["test"],
"links": [
{
"href": "https://pavics.ouranos.ca/twitcher/ows/proxy/thredds/catalog/birdhouse/testdata/xclim/cmip6/catalog.xml",
"rel": "source",
"title": "thredds:birdhouse/testdata/xclim/cmip6",
"type": "text/xml"
}
]
}
File renamed without changes.
88 changes: 73 additions & 15 deletions tests/test_standalone_stac_item.py
Original file line number Diff line number Diff line change
@@ -1,30 +1,88 @@
import json

import pytest
import requests
import os
import tempfile
from urllib.parse import quote

import xncml

from STACpopulator.implementations.CMIP6_UofT.add_CMIP6 import (
CMIP6ItemProperties,
make_cmip6_item_id,
)
from STACpopulator.implementations.CMIP6_UofT.add_CMIP6 import CMIP6ItemProperties, CMIP6populator
from STACpopulator.input import THREDDSLoader
from STACpopulator.models import GeoJSONPolygon
from STACpopulator.stac_utils import STAC_item_from_metadata

CUR_DIR = os.path.dirname(__file__)


def quote_none_safe(url):
return quote(url, safe="")

def test_standalone_stac_item():
url = (
"https://pavics.ouranos.ca/twitcher/ows/proxy/"
"thredds/ncml/birdhouse/testdata/xclim/cmip6/sic_SImon_CCCma-CanESM5_ssp245_r13i1p2f1_2020.nc"
"?catalog=https%3A%2F%2Fpavics.ouranos.ca%2Ftwitcher%2Fows%2Fproxy%2F"
"thredds%2Fcatalog%2Fbirdhouse%2Ftestdata%2Fxclim%2Fcmip6%2Fcatalog.html"
"&dataset=birdhouse%2Ftestdata%2Fxclim%2Fcmip6%2Fsic_SImon_CCCma-CanESM5_ssp245_r13i1p2f1_2020.nc"

@pytest.mark.online
def test_standalone_stac_item_thredds_ncml():
thredds_url = "https://pavics.ouranos.ca/twitcher/ows/proxy/thredds"
thredds_path = "birdhouse/testdata/xclim/cmip6"
thredds_nc = "sic_SImon_CCCma-CanESM5_ssp245_r13i1p2f1_2020.nc"
thredds_catalog = f"{thredds_url}/catalog/{thredds_path}/catalog.html"
thredds_ds = f"{thredds_path}/{thredds_nc}"
thredds_ncml_url = (
f"{thredds_url}/ncml/{thredds_path}/{thredds_nc}"
f"?catalog={quote_none_safe(thredds_catalog)}&dataset={quote_none_safe(thredds_ds)}"
)

attrs = xncml.Dataset.from_text(requests.get(url).content).to_cf_dict()
stac_item_id = make_cmip6_item_id(attrs["attributes"])
# FIXME: avoid hackish workarounds
data = requests.get(thredds_ncml_url).text
attrs = xncml.Dataset.from_text(data).to_cf_dict()
attrs["access_urls"] = { # FIXME: all following should be automatically added, but they are not!
"HTTPServer": f"{thredds_url}/fileServer/{thredds_path}/{thredds_nc}",
"OPENDAP": f"{thredds_url}/dodsC/{thredds_path}/{thredds_nc}",
"WCS": f"{thredds_url}/wcs/{thredds_path}/{thredds_nc}?service=WCS&version=1.0.0&request=GetCapabilities",
"WMS": f"{thredds_url}/wms/{thredds_path}/{thredds_nc}?service=WMS&version=1.3.0&request=GetCapabilities",
"NetcdfSubset": f"{thredds_url}/ncss/{thredds_path}/{thredds_nc}/dataset.html",
}

stac_item_id = CMIP6populator.make_cmip6_item_id(attrs["attributes"])
stac_item = STAC_item_from_metadata(stac_item_id, attrs, CMIP6ItemProperties, GeoJSONPolygon)

with open("tests/ref.json", "r") as ff:
ref_file = os.path.join(CUR_DIR, "data/stac_item_testdata_xclim_cmip6_ncml.json")
with open(ref_file, mode="r", encoding="utf-8") as ff:
reference = json.load(ff)

assert stac_item.to_dict() == reference


class MockedNoSTACUpload(CMIP6populator):
def load_config(self):
# bypass auto-load config
self._collection_info = {
"id": "test",
"title": "test",
"description": "test",
"keywords": ["test"],
"license": "MIT",
"spatialextent": [-180, -90, 180, 90],
"temporalextent": ['1850-01-01', None]
}

def validate_host(self, stac_host: str) -> str:
pass # don't care

def publish_stac_collection(self, *_) -> None:
pass # don't push to STAC API


@pytest.mark.online
def test_cmip6_stac_thredds_catalog_parsing():
url = "https://pavics.ouranos.ca/twitcher/ows/proxy/thredds/catalog/birdhouse/testdata/xclim/cmip6/catalog.html"
loader = THREDDSLoader(url)
with tempfile.NamedTemporaryFile():
populator = MockedNoSTACUpload("https://host-dont-care.com", loader)

result = populator.create_stac_collection()

ref_file = os.path.join(CUR_DIR, "data/stac_collection_testdata_xclim_cmip6_catalog.json")
with open(ref_file, mode="r", encoding="utf-8") as ff:
reference = json.load(ff)

assert result == reference

0 comments on commit 3277b70

Please sign in to comment.