From ba656e00a920f67df6dcfcc53b81a218ce2ba729 Mon Sep 17 00:00:00 2001 From: Stefaan Lippens Date: Thu, 27 Jun 2024 14:17:42 +0200 Subject: [PATCH] Issue #147 openeo.cloud configs: only consider "vito" for SENTINEL2_L2A collection --- CHANGELOG.md | 4 +++ conf/aggregator.dev.py | 8 ++++++ conf/aggregator.prod.py | 8 ++++++ src/openeo_aggregator/about.py | 2 +- src/openeo_aggregator/backend.py | 9 +++++++ src/openeo_aggregator/config.py | 6 +++++ tests/test_views.py | 46 ++++++++++++++++++++++++++++++++ 7 files changed, 82 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index bd8a45b..de7c302 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -6,6 +6,10 @@ The format is roughly based on [Keep a Changelog](https://keepachangelog.com/en/ +## 0.36.0 + +- openeo.cloud configs: only consider "vito" for SENTINEL2_L2A collection ([#147](https://github.com/Open-EO/openeo-aggregator/issues/147)) + ## 0.35.1 - Add `aggregator.dummy.py` to wheel ([#117](https://github.com/Open-EO/openeo-aggregator/issues/117)) diff --git a/conf/aggregator.dev.py b/conf/aggregator.dev.py index d6ea460..9348118 100644 --- a/conf/aggregator.dev.py +++ b/conf/aggregator.dev.py @@ -1,3 +1,5 @@ +import re + from openeo_driver.users.oidc import OidcProvider from openeo_aggregator.config import AggregatorBackendConfig @@ -55,6 +57,12 @@ # Sentinel Hub OpenEO by Sinergise "sentinelhub": "https://openeo.sentinel-hub.com/production/", }, + collection_allow_list=[ + # Special case: only consider Terrascope for SENTINEL2_L2A + {"collection_id": "SENTINEL2_L2A", "allowed_backends": ["vito"]}, + # Still allow all other collections + re.compile("(?!SENTINEL2_L2A).*"), + ], zookeeper_prefix="/openeo/aggregator-dev/", partitioned_job_tracking={ "zk_hosts": ZK_HOSTS, diff --git a/conf/aggregator.prod.py b/conf/aggregator.prod.py index 1de2021..7c1b094 100644 --- a/conf/aggregator.prod.py +++ b/conf/aggregator.prod.py @@ -1,3 +1,5 @@ +import re + from openeo_driver.users.oidc import OidcProvider from openeo_aggregator.config import AggregatorBackendConfig @@ -47,6 +49,12 @@ # Sentinel Hub OpenEO by Sinergise "sentinelhub": "https://openeo.sentinel-hub.com/production/", }, + collection_allow_list=[ + # Special case: only consider Terrascope for SENTINEL2_L2A + {"collection_id": "SENTINEL2_L2A", "allowed_backends": ["vito"]}, + # Still allow all other collections + re.compile("(?!SENTINEL2_L2A).*"), + ], zookeeper_prefix="/openeo/aggregator/", partitioned_job_tracking={ "zk_hosts": ZK_HOSTS, diff --git a/src/openeo_aggregator/about.py b/src/openeo_aggregator/about.py index 521aacd..fa9a1e7 100644 --- a/src/openeo_aggregator/about.py +++ b/src/openeo_aggregator/about.py @@ -2,7 +2,7 @@ import sys from typing import Optional -__version__ = "0.35.1a1" +__version__ = "0.36.0a1" def log_version_info(logger: Optional[logging.Logger] = None): diff --git a/src/openeo_aggregator/backend.py b/src/openeo_aggregator/backend.py index 94e5ddd..0aa2903 100644 --- a/src/openeo_aggregator/backend.py +++ b/src/openeo_aggregator/backend.py @@ -178,6 +178,15 @@ def match(self, collection_id: str, backend_id: str) -> bool: class CollectionAllowList: """Allow list for collections, where filtering is based on collection id and (optionally) backend id.""" + # TODO: The use case where one wants to exclude one backend from a particular collection, + # while still keeping all other collections like the default behaviour + # is a bit cumbersome and error-prone, e.g.: + # collection_allow_list=[ + # {"collection_id": "SENTINEL2_L2A", "allowed_backends": ["vito"]}, + # re.compile("(?!SENTINEL2_L2A).*"), + # ], + # That last "catch-all-but-one" rule is not very intuitive and easy to get wrong. + def __init__(self, items: List[Union[str, re.Pattern, dict]]): """ :param items: list of allow list items, where each item can be: diff --git a/src/openeo_aggregator/config.py b/src/openeo_aggregator/config.py index 8b67680..926c2aa 100644 --- a/src/openeo_aggregator/config.py +++ b/src/openeo_aggregator/config.py @@ -78,6 +78,12 @@ class AggregatorBackendConfig(OpenEoBackendConfig): # # and additionally only consider specific backends by id (per `aggregator_backends` config) # {"collection_id": "SENTINEL2_L2A", "allowed_backends": ["b2"]}, # ] + # A collection+backend combo will be included if any item matches. + # This means that if you want to exclude one backend from a particular collection, + # while still keeping all other collections (like the default behavior), + # you need to add an item that matches everything but that collection, e.g.: + # {"collection_id": "SENTINEL2_L2A", "allowed_backends": ["b2"]}, + # re.compile("(?!SENTINEL2_L2A).*"), collection_allow_list: Optional[List[Union[str, re.Pattern, dict]]] = None # Process allow list (as callable) for process ids to cover with the aggregator. Accept all by default. diff --git a/tests/test_views.py b/tests/test_views.py index a5236fc..34a99ec 100644 --- a/tests/test_views.py +++ b/tests/test_views.py @@ -390,6 +390,52 @@ def test_collections_allow_list_allowed_backend( else: res.assert_error(404, "CollectionNotFound") + def test_collections_allow_list_deny_on_one_keep_the_rest( + self, + api100, + requests_mock, + backend1, + backend2, + ): + """ + exclude one backend for a particular collection and keep the rest + """ + for backend, cids in { + backend1: ["S1", "S2", "S3"], + backend2: ["S2", "S3"], + }.items(): + requests_mock.get(backend + "/collections", json={"collections": [{"id": cid} for cid in cids]}) + for cid in cids: + requests_mock.get(backend + f"/collections/{cid}", json={"id": cid, "title": f"{backend} {cid}"}) + + collection_allow_list = [ + re.compile("(?!S3).*"), + {"collection_id": "S3", "allowed_backends": ["b2"]}, + ] + + with config_overrides(collection_allow_list=collection_allow_list): + res = api100.get("/collections").assert_status_code(200).json + assert set(c["id"] for c in res["collections"]) == {"S1", "S2", "S3"} + + assert api100.get("/collections/S1").assert_status_code(200).json == DictSubSet( + { + "id": "S1", + "summaries": DictSubSet({"federation:backends": ["b1"]}), + } + ) + assert api100.get("/collections/S2").assert_status_code(200).json == DictSubSet( + { + "id": "S2", + "summaries": DictSubSet({"federation:backends": ["b1", "b2"]}), + } + ) + assert api100.get("/collections/S3").assert_status_code(200).json == DictSubSet( + { + "id": "S3", + "summaries": DictSubSet({"federation:backends": ["b2"]}), + } + ) + class TestAuthentication: def test_credentials_oidc_default(self, api100, backend1, backend2):