Skip to content

Commit 27a239e

Browse files
Merge pull request #152 from Open-EO/update-stac-splitter
Update stac splitter
2 parents 8d9a808 + cebdbe9 commit 27a239e

File tree

2 files changed

+55
-69
lines changed

2 files changed

+55
-69
lines changed

src/openeo_gfmap/utils/catalogue.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,8 +4,8 @@
44
import requests
55
from pyproj.crs import CRS
66
from rasterio.warp import transform_bounds
7-
from shapely import unary_union
87
from shapely.geometry import box, shape
8+
from shapely.ops import unary_union
99

1010
from openeo_gfmap import (
1111
Backend,

src/openeo_gfmap/utils/split_stac.py

Lines changed: 54 additions & 68 deletions
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@
44

55
import os
66
from pathlib import Path
7-
from typing import Union
7+
from typing import Iterator, Union
88

99
import pystac
1010

@@ -30,92 +30,59 @@ def _extract_epsg_from_stac_item(stac_item: pystac.Item) -> int:
3030
raise KeyError("The 'proj:epsg' property is missing from the STAC item.")
3131

3232

33-
def _create_item_by_epsg_dict(collection: pystac.Collection) -> dict:
33+
def _get_items_by_epsg(
34+
collection: pystac.Collection,
35+
) -> Iterator[tuple[int, pystac.Item]]:
3436
"""
35-
Create a dictionary that groups items by their EPSG code.
37+
Generator function that yields items grouped by their EPSG code.
3638
3739
Parameters:
3840
collection (pystac.Collection): The STAC collection.
3941
40-
Returns:
41-
dict: A dictionary that maps EPSG codes to lists of items.
42+
Yields:
43+
tuple[int, pystac.Item]: EPSG code and corresponding STAC item.
4244
"""
43-
# Dictionary to store items grouped by their EPSG codes
44-
items_by_epsg = {}
45-
46-
# Iterate through items and group them
47-
for item in collection.get_items():
45+
for item in collection.get_all_items():
4846
epsg = _extract_epsg_from_stac_item(item)
49-
if epsg not in items_by_epsg:
50-
items_by_epsg[epsg] = []
51-
items_by_epsg[epsg].append(item)
52-
53-
return items_by_epsg
47+
yield epsg, item
5448

5549

56-
def _create_new_epsg_collection(
57-
epsg: int, items: list, collection: pystac.Collection
50+
def _create_collection_skeleton(
51+
collection: pystac.Collection, epsg: int
5852
) -> pystac.Collection:
5953
"""
60-
Create a new STAC collection with a given EPSG code.
54+
Create a skeleton for a new STAC collection with a given EPSG code.
6155
6256
Parameters:
63-
epsg (int): The EPSG code.
64-
items (list): The list of items.
6557
collection (pystac.Collection): The original STAC collection.
58+
epsg (int): The EPSG code.
6659
6760
Returns:
68-
pystac.Collection: The new STAC collection.
61+
pystac.Collection: The skeleton of the new STAC collection.
6962
"""
70-
new_collection = collection.clone()
71-
new_collection.id = f"{collection.id}_{epsg}"
72-
new_collection.description = (
73-
f"{collection.description} Containing only items with EPSG code {epsg}"
63+
new_collection = pystac.Collection(
64+
id=f"{collection.id}_{epsg}",
65+
description=f"{collection.description} Containing only items with EPSG code {epsg}",
66+
extent=collection.extent.clone(),
67+
summaries=collection.summaries,
68+
license=collection.license,
69+
stac_extensions=collection.stac_extensions,
7470
)
75-
new_collection.clear_items()
76-
for item in items:
77-
new_collection.add_item(item)
78-
79-
new_collection.update_extent_from_items()
80-
71+
if "item_assets" in collection.extra_fields:
72+
item_assets_extension = pystac.extensions.item_assets.ItemAssetsExtension.ext(
73+
collection
74+
)
75+
76+
new_item_assets_extension = (
77+
pystac.extensions.item_assets.ItemAssetsExtension.ext(
78+
new_collection, add_if_missing=True
79+
)
80+
)
81+
82+
new_item_assets_extension.item_assets = item_assets_extension.item_assets
8183
return new_collection
8284

8385

84-
def _create_collection_by_epsg_dict(collection: pystac.Collection) -> dict:
85-
"""
86-
Create a dictionary that groups collections by their EPSG code.
87-
88-
Parameters:
89-
collection (pystac.Collection): The STAC collection.
90-
91-
Returns:
92-
dict: A dictionary that maps EPSG codes to STAC collections.
93-
"""
94-
items_by_epsg = _create_item_by_epsg_dict(collection)
95-
collections_by_epsg = {}
96-
for epsg, items in items_by_epsg.items():
97-
new_collection = _create_new_epsg_collection(epsg, items, collection)
98-
collections_by_epsg[epsg] = new_collection
99-
100-
return collections_by_epsg
101-
102-
103-
def _write_collection_dict(collection_dict: dict, output_dir: Union[str, Path]):
104-
"""
105-
Write the collection dictionary to disk.
106-
107-
Parameters:
108-
collection_dict (dict): The dictionary that maps EPSG codes to STAC collections.
109-
output_dir (str): The output directory.
110-
"""
111-
output_dir = Path(output_dir)
112-
os.makedirs(output_dir, exist_ok=True)
113-
114-
for epsg, collection in collection_dict.items():
115-
collection.normalize_hrefs(os.path.join(output_dir, f"collection-{epsg}"))
116-
collection.save()
117-
118-
11986
def split_collection_by_epsg(path: Union[str, Path], output_dir: Union[str, Path]):
12087
"""
12188
Split a STAC collection into multiple STAC collections based on EPSG code.
@@ -124,10 +91,29 @@ def split_collection_by_epsg(path: Union[str, Path], output_dir: Union[str, Path
12491
path (str): The path to the STAC collection.
12592
output_dir (str): The output directory.
12693
"""
94+
12795
path = Path(path)
96+
output_dir = Path(output_dir)
97+
os.makedirs(output_dir, exist_ok=True)
98+
12899
try:
129100
collection = pystac.read_file(path)
130101
except pystac.STACError:
131102
print("Please provide a path to a valid STAC collection.")
132-
collection_dict = _create_collection_by_epsg_dict(collection)
133-
_write_collection_dict(collection_dict, output_dir)
103+
return
104+
105+
collections_by_epsg = {}
106+
107+
for epsg, item in _get_items_by_epsg(collection):
108+
if epsg not in collections_by_epsg:
109+
collections_by_epsg[epsg] = _create_collection_skeleton(collection, epsg)
110+
111+
# Add item to the corresponding collection
112+
collections_by_epsg[epsg].add_item(item)
113+
114+
# Write each collection to disk
115+
for epsg, new_collection in collections_by_epsg.items():
116+
new_collection.update_extent_from_items() # Update extent based on added items
117+
collection_path = output_dir / f"collection-{epsg}"
118+
new_collection.normalize_hrefs(str(collection_path))
119+
new_collection.save()

0 commit comments

Comments
 (0)