12
12
from openeo_gfmap .manager import _log
13
13
14
14
15
- def load_s2_grid () -> gpd .GeoDataFrame :
15
+ def load_s2_grid (web_mercator : bool = False ) -> gpd .GeoDataFrame :
16
16
"""Returns a geo data frame from the S2 grid."""
17
17
# Builds the path where the geodataframe should be
18
- gdf_path = Path .home () / ".openeo-gfmap" / "s2grid_bounds.geojson"
18
+ if not web_mercator :
19
+ gdf_path = Path .home () / ".openeo-gfmap" / "s2grid_bounds_4326.geoparquet"
20
+ url = "https://artifactory.vgt.vito.be/artifactory/auxdata-public/gfmap/s2grid_bounds_4326.geoparquet"
21
+ else :
22
+ gdf_path = Path .home () / ".openeo-gfmap" / "s2grid_bounds_3857.geoparquet"
23
+ url = "https://artifactory.vgt.vito.be/artifactory/auxdata-public/gfmap/s2grid_bounds_3857.geoparquet"
24
+
19
25
if not gdf_path .exists ():
20
26
_log .info ("S2 grid not found, downloading it from artifactory." )
21
27
# Downloads the file from the artifactory URL
22
28
gdf_path .parent .mkdir (exist_ok = True )
23
29
response = requests .get (
24
- "https://artifactory.vgt.vito.be/artifactory/auxdata-public/gfmap/s2grid_bounds.geojson" ,
30
+ url ,
25
31
timeout = 180 , # 3mins
26
32
)
27
33
with open (gdf_path , "wb" ) as f :
28
34
f .write (response .content )
29
- return gpd .read_file (gdf_path )
35
+ return gpd .read_parquet (gdf_path )
30
36
31
37
32
38
def _resplit_group (
@@ -38,7 +44,7 @@ def _resplit_group(
38
44
39
45
40
46
def split_job_s2grid (
41
- polygons : gpd .GeoDataFrame , max_points : int = 500
47
+ polygons : gpd .GeoDataFrame , max_points : int = 500 , web_mercator : bool = False
42
48
) -> List [gpd .GeoDataFrame ]:
43
49
"""Split a job into multiple jobs from the position of the polygons/points. The centroid of
44
50
the geometries to extract are used to select tile in the Sentinel-2 tile grid.
@@ -60,17 +66,23 @@ def split_job_s2grid(
60
66
if polygons .crs is None :
61
67
raise ValueError ("The GeoDataFrame must contain a CRS" )
62
68
63
- polygons = polygons .to_crs (epsg = 4326 )
64
- if polygons .geometry .geom_type [0 ] != "Point" :
65
- polygons ["geometry" ] = polygons .geometry .centroid
69
+ epsg = 3857 if web_mercator else 4326
70
+
71
+ original_crs = polygons .crs
72
+
73
+ polygons = polygons .to_crs (epsg = epsg )
74
+
75
+ polygons ["centroid" ] = polygons .geometry .centroid
66
76
67
77
# Dataset containing all the S2 tiles, find the nearest S2 tile for each point
68
- s2_grid = load_s2_grid ()
78
+ s2_grid = load_s2_grid (web_mercator )
69
79
s2_grid ["geometry" ] = s2_grid .geometry .centroid
70
80
71
- polygons = gpd .sjoin_nearest (polygons , s2_grid [["tile" , "geometry" ]]).drop (
72
- columns = ["index_right" ]
73
- )
81
+ polygons = gpd .sjoin_nearest (
82
+ polygons .set_geometry ("centroid" ), s2_grid [["tile" , "geometry" ]]
83
+ ).drop (columns = ["index_right" , "centroid" ])
84
+
85
+ polygons = polygons .set_geometry ("geometry" ).to_crs (original_crs )
74
86
75
87
split_datasets = []
76
88
for _ , sub_gdf in polygons .groupby ("tile" ):
@@ -86,10 +98,13 @@ def append_h3_index(
86
98
polygons : gpd .GeoDataFrame , grid_resolution : int = 3
87
99
) -> gpd .GeoDataFrame :
88
100
"""Append the H3 index to the polygons."""
89
- if polygons .geometry .geom_type [0 ] != "Point" :
90
- geom_col = polygons .geometry .centroid
91
- else :
92
- geom_col = polygons .geometry
101
+
102
+ # Project to Web mercator to calculate centroids
103
+ polygons = polygons .to_crs (epsg = 3857 )
104
+ geom_col = polygons .geometry .centroid
105
+ # Project to lat lon to calculate the h3 index
106
+ geom_col = geom_col .to_crs (epsg = 4326 )
107
+
93
108
polygons ["h3index" ] = geom_col .apply (
94
109
lambda pt : h3 .geo_to_h3 (pt .y , pt .x , grid_resolution )
95
110
)
@@ -127,12 +142,13 @@ def split_job_hex(
127
142
if polygons .crs is None :
128
143
raise ValueError ("The GeoDataFrame must contain a CRS" )
129
144
130
- # Project to lat/lon positions
131
- polygons = polygons .to_crs (epsg = 4326 )
145
+ original_crs = polygons .crs
132
146
133
147
# Split the polygons into multiple jobs
134
148
polygons = append_h3_index (polygons , grid_resolution )
135
149
150
+ polygons = polygons .to_crs (original_crs )
151
+
136
152
split_datasets = []
137
153
for _ , sub_gdf in polygons .groupby ("h3index" ):
138
154
if len (sub_gdf ) > max_points :
0 commit comments