From 7e7d070146335716c4ead5b1700055390942c118 Mon Sep 17 00:00:00 2001 From: Julia Signell Date: Thu, 23 Jan 2025 15:27:29 +0100 Subject: [PATCH] get_root_link/get_child_links/get_item_links: Ensure correct media types (#1497) * `get_root|child|item_links` should be json-like * Allow media_type on get_links and get_single_link to an iterable * Use `application/json", "application/geo+json" or None as the media_type for `get_root_link`, `get_child_links` and `get_item_links` * Dry up --- CHANGELOG.md | 2 ++ pystac/catalog.py | 10 +++++++-- pystac/media_type.py | 4 ++++ pystac/stac_object.py | 43 +++++++++++++++++++++----------------- tests/test_catalog.py | 48 +++++++++++++++++++++++++++++++++++++++++++ 5 files changed, 86 insertions(+), 21 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index c46827062..63a935711 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -6,6 +6,7 @@ - Top-level `item_assets` dict on `Collection`s ([#1476](https://github.com/stac-utils/pystac/pull/1476)) - Render Extension ([#1465](https://github.com/stac-utils/pystac/pull/1465)) +- Filter by links by list of media_types ### Changed @@ -16,6 +17,7 @@ - Update Projection Extension to version 2 - proj:epsg -> proj:code ([#1287](https://github.com/stac-utils/pystac/pull/1287)) - Update migrate code to handle license changes in STAC spec 1.1.0 ([#1491](https://github.com/stac-utils/pystac/pull/1491)) - Allow links to have `file://` prefix - but don't write them that way by default ([#1489](https://github.com/stac-utils/pystac/pull/1489)) +- For `get_root_link`, `get_child_links`, `get_item_links`: Ensure json media types ([#1497](https://github.com/stac-utils/pystac/pull/1497)) - Raise `STACError` with message when a link is expected to resolve to a STAC object but doesn't ([#1500](https://github.com/stac-utils/pystac/pull/1500)) - Raise an error on APILayoutStrategy when root_href is non-url ([#1498](https://github.com/stac-utils/pystac/pull/1498)) diff --git a/pystac/catalog.py b/pystac/catalog.py index f8bd28f83..718650be7 100644 --- a/pystac/catalog.py +++ b/pystac/catalog.py @@ -13,6 +13,7 @@ ) import pystac +import pystac.media_type from pystac.cache import ResolvedObjectCache from pystac.errors import STACError, STACTypeError from pystac.layout import ( @@ -466,7 +467,10 @@ def get_child_links(self) -> list[Link]: Return: List[Link]: List of links of this catalog with ``rel == 'child'`` """ - return self.get_links(pystac.RelType.CHILD) + return self.get_links( + rel=pystac.RelType.CHILD, + media_type=pystac.media_type.STAC_JSON, + ) def clear_children(self) -> None: """Removes all children from this catalog. @@ -626,7 +630,9 @@ def get_item_links(self) -> list[Link]: Return: List[Link]: List of links of this catalog with ``rel == 'item'`` """ - return self.get_links(pystac.RelType.ITEM) + return self.get_links( + rel=pystac.RelType.ITEM, media_type=pystac.media_type.STAC_JSON + ) def to_dict( self, include_self_link: bool = True, transform_hrefs: bool = True diff --git a/pystac/media_type.py b/pystac/media_type.py index 734291024..4f69148a7 100644 --- a/pystac/media_type.py +++ b/pystac/media_type.py @@ -24,3 +24,7 @@ class MediaType(StringEnum): PDF = "application/pdf" ZARR = "application/vnd+zarr" # https://github.com/openMetadataInitiative/openMINDS_core/blob/v4/instances/data/contentTypes/zarr.jsonld NETCDF = "application/netcdf" # https://github.com/Unidata/netcdf/issues/42#issuecomment-1007618822 + + +#: Media types that can be resolved as STAC Objects +STAC_JSON = [None, MediaType.GEOJSON, MediaType.JSON] diff --git a/pystac/stac_object.py b/pystac/stac_object.py index 1043d867f..591a5c862 100644 --- a/pystac/stac_object.py +++ b/pystac/stac_object.py @@ -3,12 +3,7 @@ from abc import ABC, abstractmethod from collections.abc import Callable, Iterable from html import escape -from typing import ( - TYPE_CHECKING, - Any, - TypeVar, - cast, -) +from typing import TYPE_CHECKING, Any, TypeAlias, TypeVar, cast import pystac from pystac import STACError @@ -27,6 +22,8 @@ S = TypeVar("S", bound="STACObject") +OptionalMediaType: TypeAlias = str | pystac.MediaType | None + class STACObjectType(StringEnum): CATALOG = "Catalog" @@ -177,7 +174,7 @@ def traverse(obj: str | STACObject, visited: set[str | STACObject]) -> bool: def get_single_link( self, rel: str | pystac.RelType | None = None, - media_type: str | pystac.MediaType | None = None, + media_type: OptionalMediaType | Iterable[OptionalMediaType] = None, ) -> Link | None: """Get a single :class:`~pystac.Link` instance associated with this object. @@ -186,7 +183,8 @@ def get_single_link( rel : If set, filter links such that only those matching this relationship are returned. media_type: If set, filter the links such that only - those matching media_type are returned + those matching media_type are returned. media_type can + be a single value or a list of values. Returns: :class:`~pystac.Link` | None: First link that matches ``rel`` @@ -195,12 +193,14 @@ def get_single_link( """ if rel is None and media_type is None: return next(iter(self.links), None) + if media_type and isinstance(media_type, (str, pystac.MediaType)): + media_type = [media_type] return next( ( link for link in self.links if (rel is None or link.rel == rel) - and (media_type is None or link.media_type == media_type) + and (media_type is None or link.media_type in media_type) ), None, ) @@ -208,7 +208,7 @@ def get_single_link( def get_links( self, rel: str | pystac.RelType | None = None, - media_type: str | pystac.MediaType | None = None, + media_type: OptionalMediaType | Iterable[OptionalMediaType] = None, ) -> list[Link]: """Gets the :class:`~pystac.Link` instances associated with this object. @@ -216,7 +216,8 @@ def get_links( rel : If set, filter links such that only those matching this relationship are returned. media_type: If set, filter the links such that only - those matching media_type are returned + those matching media_type are returned. media_type can + be a single value or a list of values. Returns: List[:class:`~pystac.Link`]: A list of links that match ``rel`` and/ @@ -225,13 +226,14 @@ def get_links( """ if rel is None and media_type is None: return self.links - else: - return [ - link - for link in self.links - if (rel is None or link.rel == rel) - and (media_type is None or link.media_type == media_type) - ] + if media_type and isinstance(media_type, (str, pystac.MediaType)): + media_type = [media_type] + return [ + link + for link in self.links + if (rel is None or link.rel == rel) + and (media_type is None or link.media_type in media_type) + ] def clear_links(self, rel: str | pystac.RelType | None = None) -> None: """Clears all :class:`~pystac.Link` instances associated with this object. @@ -252,7 +254,10 @@ def get_root_link(self) -> Link | None: :class:`~pystac.Link` or None: The root link for this object, or ``None`` if no root link is set. """ - return self.get_single_link(pystac.RelType.ROOT) + return self.get_single_link( + rel=pystac.RelType.ROOT, + media_type=pystac.media_type.STAC_JSON, + ) @property def self_href(self) -> str: diff --git a/tests/test_catalog.py b/tests/test_catalog.py index 9efebabff..36adf4b1d 100644 --- a/tests/test_catalog.py +++ b/tests/test_catalog.py @@ -1409,6 +1409,10 @@ def test_get_links(self) -> None: len(catalog.get_links(rel="search", media_type="application/geo+json")) == 1 ) assert len(catalog.get_links(media_type="text/html")) == 1 + assert ( + len(catalog.get_links(media_type=["text/html", "application/geo+json"])) + == 2 + ) assert len(catalog.get_links(rel="search")) == 2 assert len(catalog.get_links(rel="via")) == 0 assert len(catalog.get_links()) == 6 @@ -1982,3 +1986,47 @@ def test_APILayoutStrategy_requires_root_to_be_url( match="When using APILayoutStrategy the root_href must be a URL", ): catalog.normalize_hrefs(root_href="issues-1486", strategy=APILayoutStrategy()) + + +def test_get_child_links_cares_about_media_type(catalog: pystac.Catalog) -> None: + catalog.links.extend( + [ + pystac.Link( + rel="child", target="./child-1.json", media_type="application/json" + ), + pystac.Link( + rel="child", target="./child-2.json", media_type="application/geo+json" + ), + pystac.Link(rel="child", target="./child-3.json"), + # this one won't get counted since it's the wrong media_type + pystac.Link(rel="child", target="./child.html", media_type="text/html"), + ] + ) + + assert len(catalog.get_child_links()) == 3 + + +def test_get_item_links_cares_about_media_type(catalog: pystac.Catalog) -> None: + catalog.links.extend( + [ + pystac.Link( + rel="item", target="./item-1.json", media_type="application/json" + ), + pystac.Link( + rel="item", target="./item-2.json", media_type="application/geo+json" + ), + pystac.Link(rel="item", target="./item-3.json"), + # this one won't get counted since it's the wrong media_type + pystac.Link(rel="item", target="./item.html", media_type="text/html"), + ] + ) + + assert len(catalog.get_item_links()) == 3 + + +def test_get_root_link_cares_about_media_type(catalog: pystac.Catalog) -> None: + catalog.links.insert( + 0, pystac.Link(rel="root", target="./self.json", media_type="text/html") + ) + root_link = catalog.get_root_link() + assert root_link and root_link.target != "./self.json"