Skip to content

Commit

Permalink
get_root_link/get_child_links/get_item_links: Ensure correct media ty…
Browse files Browse the repository at this point in the history
…pes (#1497)

* `get_root|child|item_links` should be json-like

* Allow media_type on get_links and get_single_link to
  an iterable
* Use `application/json", "application/geo+json" or None
  as the media_type for `get_root_link`, `get_child_links`
  and `get_item_links`

* Dry up
  • Loading branch information
jsignell authored Jan 23, 2025
1 parent c033b51 commit 7e7d070
Show file tree
Hide file tree
Showing 5 changed files with 86 additions and 21 deletions.
2 changes: 2 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@

- Top-level `item_assets` dict on `Collection`s ([#1476](https://github.com/stac-utils/pystac/pull/1476))
- Render Extension ([#1465](https://github.com/stac-utils/pystac/pull/1465))
- Filter by links by list of media_types

### Changed

Expand All @@ -16,6 +17,7 @@
- Update Projection Extension to version 2 - proj:epsg -> proj:code ([#1287](https://github.com/stac-utils/pystac/pull/1287))
- Update migrate code to handle license changes in STAC spec 1.1.0 ([#1491](https://github.com/stac-utils/pystac/pull/1491))
- Allow links to have `file://` prefix - but don't write them that way by default ([#1489](https://github.com/stac-utils/pystac/pull/1489))
- For `get_root_link`, `get_child_links`, `get_item_links`: Ensure json media types ([#1497](https://github.com/stac-utils/pystac/pull/1497))
- Raise `STACError` with message when a link is expected to resolve to a STAC object but doesn't ([#1500](https://github.com/stac-utils/pystac/pull/1500))
- Raise an error on APILayoutStrategy when root_href is non-url ([#1498](https://github.com/stac-utils/pystac/pull/1498))

Expand Down
10 changes: 8 additions & 2 deletions pystac/catalog.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@
)

import pystac
import pystac.media_type
from pystac.cache import ResolvedObjectCache
from pystac.errors import STACError, STACTypeError
from pystac.layout import (
Expand Down Expand Up @@ -466,7 +467,10 @@ def get_child_links(self) -> list[Link]:
Return:
List[Link]: List of links of this catalog with ``rel == 'child'``
"""
return self.get_links(pystac.RelType.CHILD)
return self.get_links(
rel=pystac.RelType.CHILD,
media_type=pystac.media_type.STAC_JSON,
)

def clear_children(self) -> None:
"""Removes all children from this catalog.
Expand Down Expand Up @@ -626,7 +630,9 @@ def get_item_links(self) -> list[Link]:
Return:
List[Link]: List of links of this catalog with ``rel == 'item'``
"""
return self.get_links(pystac.RelType.ITEM)
return self.get_links(
rel=pystac.RelType.ITEM, media_type=pystac.media_type.STAC_JSON
)

def to_dict(
self, include_self_link: bool = True, transform_hrefs: bool = True
Expand Down
4 changes: 4 additions & 0 deletions pystac/media_type.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,3 +24,7 @@ class MediaType(StringEnum):
PDF = "application/pdf"
ZARR = "application/vnd+zarr" # https://github.com/openMetadataInitiative/openMINDS_core/blob/v4/instances/data/contentTypes/zarr.jsonld
NETCDF = "application/netcdf" # https://github.com/Unidata/netcdf/issues/42#issuecomment-1007618822


#: Media types that can be resolved as STAC Objects
STAC_JSON = [None, MediaType.GEOJSON, MediaType.JSON]
43 changes: 24 additions & 19 deletions pystac/stac_object.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,12 +3,7 @@
from abc import ABC, abstractmethod
from collections.abc import Callable, Iterable
from html import escape
from typing import (
TYPE_CHECKING,
Any,
TypeVar,
cast,
)
from typing import TYPE_CHECKING, Any, TypeAlias, TypeVar, cast

import pystac
from pystac import STACError
Expand All @@ -27,6 +22,8 @@

S = TypeVar("S", bound="STACObject")

OptionalMediaType: TypeAlias = str | pystac.MediaType | None


class STACObjectType(StringEnum):
CATALOG = "Catalog"
Expand Down Expand Up @@ -177,7 +174,7 @@ def traverse(obj: str | STACObject, visited: set[str | STACObject]) -> bool:
def get_single_link(
self,
rel: str | pystac.RelType | None = None,
media_type: str | pystac.MediaType | None = None,
media_type: OptionalMediaType | Iterable[OptionalMediaType] = None,
) -> Link | None:
"""Get a single :class:`~pystac.Link` instance associated with this
object.
Expand All @@ -186,7 +183,8 @@ def get_single_link(
rel : If set, filter links such that only those
matching this relationship are returned.
media_type: If set, filter the links such that only
those matching media_type are returned
those matching media_type are returned. media_type can
be a single value or a list of values.
Returns:
:class:`~pystac.Link` | None: First link that matches ``rel``
Expand All @@ -195,28 +193,31 @@ def get_single_link(
"""
if rel is None and media_type is None:
return next(iter(self.links), None)
if media_type and isinstance(media_type, (str, pystac.MediaType)):
media_type = [media_type]
return next(
(
link
for link in self.links
if (rel is None or link.rel == rel)
and (media_type is None or link.media_type == media_type)
and (media_type is None or link.media_type in media_type)
),
None,
)

def get_links(
self,
rel: str | pystac.RelType | None = None,
media_type: str | pystac.MediaType | None = None,
media_type: OptionalMediaType | Iterable[OptionalMediaType] = None,
) -> list[Link]:
"""Gets the :class:`~pystac.Link` instances associated with this object.
Args:
rel : If set, filter links such that only those
matching this relationship are returned.
media_type: If set, filter the links such that only
those matching media_type are returned
those matching media_type are returned. media_type can
be a single value or a list of values.
Returns:
List[:class:`~pystac.Link`]: A list of links that match ``rel`` and/
Expand All @@ -225,13 +226,14 @@ def get_links(
"""
if rel is None and media_type is None:
return self.links
else:
return [
link
for link in self.links
if (rel is None or link.rel == rel)
and (media_type is None or link.media_type == media_type)
]
if media_type and isinstance(media_type, (str, pystac.MediaType)):
media_type = [media_type]
return [
link
for link in self.links
if (rel is None or link.rel == rel)
and (media_type is None or link.media_type in media_type)
]

def clear_links(self, rel: str | pystac.RelType | None = None) -> None:
"""Clears all :class:`~pystac.Link` instances associated with this object.
Expand All @@ -252,7 +254,10 @@ def get_root_link(self) -> Link | None:
:class:`~pystac.Link` or None: The root link for this object,
or ``None`` if no root link is set.
"""
return self.get_single_link(pystac.RelType.ROOT)
return self.get_single_link(
rel=pystac.RelType.ROOT,
media_type=pystac.media_type.STAC_JSON,
)

@property
def self_href(self) -> str:
Expand Down
48 changes: 48 additions & 0 deletions tests/test_catalog.py
Original file line number Diff line number Diff line change
Expand Up @@ -1409,6 +1409,10 @@ def test_get_links(self) -> None:
len(catalog.get_links(rel="search", media_type="application/geo+json")) == 1
)
assert len(catalog.get_links(media_type="text/html")) == 1
assert (
len(catalog.get_links(media_type=["text/html", "application/geo+json"]))
== 2
)
assert len(catalog.get_links(rel="search")) == 2
assert len(catalog.get_links(rel="via")) == 0
assert len(catalog.get_links()) == 6
Expand Down Expand Up @@ -1982,3 +1986,47 @@ def test_APILayoutStrategy_requires_root_to_be_url(
match="When using APILayoutStrategy the root_href must be a URL",
):
catalog.normalize_hrefs(root_href="issues-1486", strategy=APILayoutStrategy())


def test_get_child_links_cares_about_media_type(catalog: pystac.Catalog) -> None:
catalog.links.extend(
[
pystac.Link(
rel="child", target="./child-1.json", media_type="application/json"
),
pystac.Link(
rel="child", target="./child-2.json", media_type="application/geo+json"
),
pystac.Link(rel="child", target="./child-3.json"),
# this one won't get counted since it's the wrong media_type
pystac.Link(rel="child", target="./child.html", media_type="text/html"),
]
)

assert len(catalog.get_child_links()) == 3


def test_get_item_links_cares_about_media_type(catalog: pystac.Catalog) -> None:
catalog.links.extend(
[
pystac.Link(
rel="item", target="./item-1.json", media_type="application/json"
),
pystac.Link(
rel="item", target="./item-2.json", media_type="application/geo+json"
),
pystac.Link(rel="item", target="./item-3.json"),
# this one won't get counted since it's the wrong media_type
pystac.Link(rel="item", target="./item.html", media_type="text/html"),
]
)

assert len(catalog.get_item_links()) == 3


def test_get_root_link_cares_about_media_type(catalog: pystac.Catalog) -> None:
catalog.links.insert(
0, pystac.Link(rel="root", target="./self.json", media_type="text/html")
)
root_link = catalog.get_root_link()
assert root_link and root_link.target != "./self.json"

0 comments on commit 7e7d070

Please sign in to comment.