Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

refactor: use Pystac (WIP) TDE-1358 #1239

Draft
wants to merge 3 commits into
base: master
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1,179 changes: 636 additions & 543 deletions poetry.lock

Large diffs are not rendered by default.

1 change: 1 addition & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -58,3 +58,4 @@ pytest-dependency = "*"
pytest-mock = "*"
pytest-subtests = "*"
shellcheck-py = "*"
pystac = "^1.11.0"
10 changes: 7 additions & 3 deletions scripts/datetimes.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@ def format_rfc_3339_datetime_string(datetime_object: datetime) -> str:
return datetime_object.astimezone(timezone.utc).strftime(RFC_3339_DATETIME_FORMAT)


def format_rfc_3339_nz_midnight_datetime_string(datetime_object: datetime) -> str:
def format_rfc_3339_nz_midnight_datetime(datetime_object: datetime) -> datetime:
"""Convert datetime to New Zealand midnight and format it to UTC"""
naive_midnight_datetime_string = f"{datetime_object.strftime(RFC_3339_DATE_FORMAT)}T00:00:00.000"

Expand All @@ -34,9 +34,13 @@ def format_rfc_3339_nz_midnight_datetime_string(datetime_object: datetime) -> st
raise Exception(f"Not a valid date: {err}") from err

utc_tz = tz.gettz("UTC")
datetime_utc = nz_datetime.astimezone(utc_tz)
nz_time: datetime = nz_datetime.astimezone(utc_tz)
return nz_time


return format_rfc_3339_datetime_string(datetime_utc)
def format_rfc_3339_nz_midnight_datetime_string(datetime_object: datetime) -> str:
"""Convert datetime to New Zealand midnight and format it to UTC as string"""
return format_rfc_3339_datetime_string(format_rfc_3339_nz_midnight_datetime(datetime_object))


class NaiveDatetimeError(Exception):
Expand Down
2 changes: 1 addition & 1 deletion scripts/files/geotiff.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
from scripts.gdal.gdalinfo import GdalInfo


def get_extents(gdalinfo_result: GdalInfo) -> tuple[dict[str, list[list[list[float]]]], tuple[float, float, float, float]]:
def get_extents(gdalinfo_result: GdalInfo) -> tuple[dict[str, list[list[list[float]]]], list[float]]:
"""Get the geometry and bounding box from the `gdalinfo`.

Args:
Expand Down
11 changes: 11 additions & 0 deletions scripts/stac/imagery/asset.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
from pystac import Asset, MediaType


def create_visual_asset(href: str, created: str, checksum: str, updated: str | None = None) -> Asset:
"""Create a visual Asset"""
if not updated:
updated = created
extra_fields = {"created": created, "updated": updated}
if checksum:
extra_fields["file:checksum"] = checksum
return Asset(href=href, title="visual", media_type=MediaType.COG, extra_fields=extra_fields)
12 changes: 6 additions & 6 deletions scripts/stac/imagery/collection.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
from typing import Any

import ulid
from pystac import MediaType, RelType
from shapely.geometry.base import BaseGeometry

from scripts.datetimes import format_rfc_3339_datetime_string, parse_rfc_3339_datetime
Expand All @@ -23,10 +24,9 @@
SubtypeParameterError,
)
from scripts.stac.imagery.provider import Provider, ProviderRole, merge_provider_roles
from scripts.stac.link import Link, Relation
from scripts.stac.link import create_link_with_checksum
from scripts.stac.util import checksum
from scripts.stac.util.STAC_VERSION import STAC_VERSION
from scripts.stac.util.media_type import StacMediaType
from scripts.stac.util.stac_extensions import StacExtensions

CAPTURE_AREA_FILE_NAME = "capture-area.geojson"
Expand Down Expand Up @@ -166,12 +166,12 @@ def add_item(self, item: dict[Any, Any]) -> None:
item_self_link = next((feat for feat in item["links"] if feat["rel"] == "self"), None)
if item_self_link:
self.stac["links"].append(
Link(
create_link_with_checksum(
path=item_self_link["href"],
rel=Relation.ITEM,
media_type=StacMediaType.GEOJSON,
rel=RelType.ITEM,
media_type=MediaType.GEOJSON,
file_content=dict_to_json_bytes(item),
).stac
).to_dict()
)
self.update_temporal_extent(item["properties"]["start_datetime"], item["properties"]["end_datetime"])
self.update_spatial_extent(item["bbox"])
Expand Down
63 changes: 27 additions & 36 deletions scripts/stac/imagery/create_stac.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
from typing import Any, TypeAlias, cast

from linz_logger import get_log
from pystac import RelType
from shapely.geometry.base import BaseGeometry

from scripts.files import fs
Expand All @@ -11,13 +12,12 @@
from scripts.files.geotiff import get_extents
from scripts.gdal.gdal_helper import gdal_info
from scripts.gdal.gdalinfo import GdalInfo
from scripts.stac.imagery.asset import create_visual_asset
from scripts.stac.imagery.collection import ImageryCollection
from scripts.stac.imagery.item import ImageryItem, STACAsset, STACProcessing, STACProcessingSoftware
from scripts.stac.imagery.item import ImageryItem, STACProcessing, STACProcessingSoftware
from scripts.stac.imagery.metadata_constants import CollectionMetadata
from scripts.stac.imagery.provider import Provider, ProviderRole
from scripts.stac.link import Link, Relation
from scripts.stac.util import checksum
from scripts.stac.util.media_type import StacMediaType

JSON: TypeAlias = dict[str, "JSON"] | list["JSON"] | str | int | float | bool | None
JSON_Dict: TypeAlias = dict[str, "JSON"]
Expand Down Expand Up @@ -123,48 +123,42 @@ def create_item(
Returns:
a STAC Item wrapped in ImageryItem
"""
item = create_or_load_base_item(asset_path, gdal_version, current_datetime, odr_url)
base_stac = item.stac.copy()
item = create_or_load_base_item(asset_path, gdal_version, current_datetime, start_datetime, end_datetime, odr_url)
base_stac = item.clone()

if not gdalinfo_result:
gdalinfo_result = gdal_info(asset_path)

if item.stac.get("links") is not None:
# Remove existing derived_from links in case of resupply
item.stac["links"] = [link for link in item.stac["links"] if link["rel"] != "derived_from"]

item.remove_links(RelType.DERIVED_FROM)
if derived_from is not None:
for derived in derived_from:
derived_item_content = read(derived)
derived_stac = json.loads(derived_item_content.decode("UTF-8"))
if not start_datetime or derived_stac["properties"]["start_datetime"] < start_datetime:
start_datetime = derived_stac["properties"]["start_datetime"]
if not end_datetime or derived_stac["properties"]["end_datetime"] > end_datetime:
end_datetime = derived_stac["properties"]["end_datetime"]
item.add_link(
Link(
path=derived,
rel=Relation.DERIVED_FROM,
media_type=StacMediaType.GEOJSON,
file_content=derived_item_content,
)
)
derived_from_item = ImageryItem.from_file(derived)
if not start_datetime or derived_from_item.properties["start_datetime"] < start_datetime:
start_datetime = derived_from_item.properties["start_datetime"]
if not end_datetime or derived_from_item.properties["end_datetime"] > end_datetime:
end_datetime = derived_from_item.properties["end_datetime"]
item.add_derived_from(derived_from_item)

item.update_datetime(start_datetime, end_datetime)
item.update_spatial(*get_extents(gdalinfo_result))
item.add_collection(collection_id)

if item.stac != base_stac and item.stac["properties"]["updated"] != current_datetime:
item.stac["properties"][
"updated"
] = current_datetime # some of the metadata has changed, so we need to make sure the `updated` time is set correctly
if item.to_dict() != base_stac.to_dict() and item.properties["updated"] != current_datetime:
item.properties["updated"] = (
current_datetime # some of the metadata has changed, so we need to make sure the `updated` time is set correctly
)

get_log().info("ImageryItem created", path=asset_path)
return item


def create_or_load_base_item(
asset_path: str, gdal_version: str, current_datetime: str, odr_url: str | None = None
asset_path: str,
gdal_version: str,
current_datetime: str,
start_datetime: str,
end_datetime: str,
odr_url: str | None = None,
) -> ImageryItem:
"""
Args:
Expand Down Expand Up @@ -202,16 +196,13 @@ def create_or_load_base_item(
except NoSuchFileError:
get_log().info(f"No Item is published for ID: {id_}")

stac_asset = STACAsset(
**{
"href": os.path.join(".", os.path.basename(asset_path)),
"file:checksum": file_content_checksum,
"created": current_datetime,
"updated": current_datetime,
}
asset = create_visual_asset(
href=os.path.join(".", os.path.basename(asset_path)),
created=current_datetime,
checksum=file_content_checksum,
)

return ImageryItem(id_, stac_asset, stac_processing)
return ImageryItem(id_, asset, stac_processing, start_datetime, end_datetime)


def get_published_file_contents(odr_url: str, filename: str) -> JSON_Dict:
Expand Down
Loading
Loading