From acc340f1977cb0358594da132df945d8acb006b3 Mon Sep 17 00:00:00 2001 From: Matic Lubej Date: Tue, 4 Apr 2023 14:05:17 +0200 Subject: [PATCH] Move get_timestamps utility to sh-py (#444) * move get timestamps utility to sh-py catalog api * add test for timestamps query utility * update maxcc parameter check --- sentinelhub/api/catalog.py | 47 +++++++++++++++++++++++++++++++++++++- tests/api/test_catalog.py | 40 +++++++++++++++++++++++++++++--- 2 files changed, 83 insertions(+), 4 deletions(-) diff --git a/sentinelhub/api/catalog.py b/sentinelhub/api/catalog.py index 02e2a9d2..e402973f 100644 --- a/sentinelhub/api/catalog.py +++ b/sentinelhub/api/catalog.py @@ -5,9 +5,10 @@ from typing import Any, Dict, Iterable, List, Literal, Optional, Union from ..base import FeatureIterator +from ..config import SHConfig from ..data_collections import DataCollection, OrbitDirection from ..geometry import CRS, BBox, Geometry -from ..time_utils import parse_time, parse_time_interval, serialize_time +from ..time_utils import filter_times, parse_time, parse_time_interval, serialize_time from ..types import JsonDict, RawTimeIntervalType, RawTimeType from .base import SentinelHubService from .utils import remove_undefined @@ -265,3 +266,47 @@ def get_ids(self) -> List[str]: :return: A list of IDs """ return [feature["id"] for feature in self] + + +def get_available_timestamps( + bbox: BBox, + time_interval: Optional[RawTimeIntervalType], + data_collection: DataCollection, + *, + time_difference: Optional[dt.timedelta] = None, + ignore_tz: bool = True, + maxcc: Optional[float] = 1.0, + config: Optional[SHConfig] = None, +) -> List[dt.datetime]: + """Helper function to search for all available timestamps for a given area and query parameters. + + :param bbox: A bounding box of the search area. + :param data_collection: A collection specifying the satellite data source for finding available timestamps. + :param time_interval: A time interval from which to provide the timestamps. + :param time_difference: Shortest allowed time difference. Consecutive timestamps will be skipped if too close to + the previous one. Defaults to keeping all timestamps. + :param ignore_tz: Ignore the time zone part in the returned timestamps. Default is True. + :param maxcc: Maximum cloud coverage filter from interval [0, 1]. Default is None. + :param config: The SH configuration object. + :return: A list of timestamps of available observations. + """ + query_filter = None + time_difference = time_difference if time_difference is not None else dt.timedelta(seconds=-1) + fields = {"include": ["properties.datetime"], "exclude": []} + + if maxcc is not None and data_collection.has_cloud_coverage: + if isinstance(maxcc, (int, float)) and (maxcc < 0 or maxcc > 1): + raise ValueError('Maximum cloud coverage "maxcc" parameter should be a float on an interval [0, 1]') + query_filter = f"eo:cloud_cover < {int(maxcc * 100)}" + + if data_collection.service_url is not None: + config = config.copy() if config else SHConfig() + config.sh_base_url = data_collection.service_url + + catalog = SentinelHubCatalog(config=config) + search_iterator = catalog.search( + collection=data_collection, bbox=bbox, time=time_interval, filter=query_filter, fields=fields + ) + + timestamps = [parse_time(ts, force_datetime=True, ignoretz=ignore_tz) for ts in search_iterator.get_timestamps()] + return filter_times(timestamps, time_difference) diff --git a/tests/api/test_catalog.py b/tests/api/test_catalog.py index 9d013829..30acadf4 100644 --- a/tests/api/test_catalog.py +++ b/tests/api/test_catalog.py @@ -2,15 +2,17 @@ Tests for the module with Catalog API interface """ import datetime as dt +from functools import partial from typing import Union import dateutil.tz +import numpy as np import pytest -from sentinelhub import CRS, BBox, DataCollection, Geometry, SentinelHubCatalog, SHConfig -from sentinelhub.api.catalog import CatalogSearchIterator +from sentinelhub import CRS, BBox, DataCollection, Geometry, SentinelHubCatalog, SHConfig, parse_time +from sentinelhub.api.catalog import CatalogSearchIterator, get_available_timestamps -TEST_BBOX = BBox([46.16, -16.15, 46.51, -15.58], CRS.WGS84) +TEST_BBOX = BBox((46.16, -16.15, 46.51, -15.58), CRS.WGS84) pytestmark = pytest.mark.sh_integration @@ -196,3 +198,35 @@ def test_search_with_ids(config: SHConfig) -> None: results = list(search_iterator) assert len(results) == 1 assert results[0]["id"] == tile_id + + +@pytest.mark.parametrize( + "data_collection, time_difference_hours, maxcc, n_timestamps", + [ + (DataCollection.SENTINEL1_IW, 2, None, 4), + (DataCollection.SENTINEL2_L2A, 1, 0.7, 8), + (DataCollection.SENTINEL2_L2A, 2 * 30 * 24, None, 1), + (DataCollection.SENTINEL2_L1C.define_from("COLLECTION_WITHOUT_URL", service_url=None), -1, None, 10), + ], +) +def test_get_available_timestamps( + data_collection: DataCollection, time_difference_hours: int, maxcc: int, n_timestamps: int +) -> None: + interval_start, interval_end = "2019-04-20", "2019-06-09" + get_test_timestamps = partial( + get_available_timestamps, + bbox=TEST_BBOX, + data_collection=data_collection, + time_difference=dt.timedelta(hours=time_difference_hours), + time_interval=(interval_start, interval_end), + maxcc=maxcc, + ) + + timestamps = get_test_timestamps(ignore_tz=True) + assert len(timestamps) == n_timestamps + assert all(ts >= parse_time(interval_start, force_datetime=True) for ts in timestamps) + assert all(ts <= parse_time(interval_end, force_datetime=True) for ts in timestamps) + assert all(ts_diff.total_seconds() / 3600 > time_difference_hours for ts_diff in np.diff(np.array(timestamps))) + + timestamps_with_tz = get_test_timestamps(ignore_tz=False) + assert all(timestamp.tzinfo is not None for timestamp in timestamps_with_tz)