Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Cif 293 set tile size in meters for fishnet functions #89

Merged
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
85 changes: 45 additions & 40 deletions city_metrix/layers/layer.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@
import shapely.geometry as geometry
import pandas as pd

MAX_TILE_SIZE = 0.5
MAX_TILE_SIDE_SIZE_METERS = 60000 # Approx. 0.5 degrees latitudinal offset. TODO How selected?

class Layer:
def __init__(self, aggregate=None, masks=[]):
Expand Down Expand Up @@ -55,23 +55,22 @@ def groupby(self, zones, layer=None):
"""
return LayerGroupBy(self.aggregate, zones, layer, self.masks)

def write(self, bbox, output_path, tile_degrees=None, buffer_meters=None, **kwargs):
def write(self, bbox, output_path, tile_side_meters=None, tile_buffer_meters=None, **kwargs):
"""
Write the layer to a path. Does not apply masks.

:param bbox: (min x, min y, max x, max y)
:param output_path: local or s3 path to output to
:param tile_degrees: optional param to tile the results into multiple files with a VRT.
Degrees to tile by. `output_path` should be a folder path to store the tiles.
: param buffer_meters: tile buffer distance in meters
:param tile_side_meters: optional param to tile the results into multiple files specified as tile length on a side in meters
: param tile_buffer_meters: tile buffer distance in meters
:return:
"""

if tile_degrees is None:
if tile_side_meters is None:
clipped_data = self.aggregate.get_data(bbox)
write_layer(output_path, clipped_data)
else:
tile_grid, unbuffered_tile_grid = _get_tile_boundaries(bbox, tile_degrees, buffer_meters)
tile_grid, unbuffered_tile_grid = _get_tile_boundaries(bbox, tile_side_meters, tile_buffer_meters)

# write raster data to files
if not os.path.exists(output_path):
Expand All @@ -93,14 +92,13 @@ def write(self, bbox, output_path, tile_degrees=None, buffer_meters=None, **kwar
_write_tile_grid(unbuffered_tile_grid, output_path, 'tile_grid_unbuffered.geojson')


def _get_tile_boundaries(bbox, tile_degrees, buffer_meters):
has_buffer = True if buffer_meters is not None and buffer_meters != 0 else False
def _get_tile_boundaries(bbox, tile_side_meters, tile_buffer_meters):
has_buffer = True if tile_buffer_meters is not None and tile_buffer_meters != 0 else False
if has_buffer:
lon_degree_offset, lat_degree_offset = meters_to_offset_degrees(bbox, buffer_meters)
tiles = create_fishnet_grid(*bbox, tile_degrees, lon_degree_offset, lat_degree_offset)
unbuffered_tiles = create_fishnet_grid(*bbox, tile_degrees)
tiles = create_fishnet_grid(*bbox, tile_side_meters, tile_buffer_meters)
unbuffered_tiles = create_fishnet_grid(*bbox, tile_side_meters)
else:
tiles = create_fishnet_grid(*bbox, tile_degrees)
tiles = create_fishnet_grid(*bbox, tile_side_meters)
unbuffered_tiles = None

tile_grid = []
Expand Down Expand Up @@ -138,7 +136,7 @@ def count(self):
return self._zonal_stats("count")

def _zonal_stats(self, stats_func):
if box(*self.zones.total_bounds).area <= MAX_TILE_SIZE**2:
if box(*self.zones.total_bounds).area <= MAX_TILE_SIDE_SIZE_METERS**2:
stats = self._zonal_stats_tile(self.zones, [stats_func])
else:
stats = self._zonal_stats_fishnet(stats_func)
Expand All @@ -162,7 +160,7 @@ def group_layer_values(df):

def _zonal_stats_fishnet(self, stats_func):
# fishnet GeoDataFrame into smaller tiles
fishnet = create_fishnet_grid(*self.zones.total_bounds, MAX_TILE_SIZE)
fishnet = create_fishnet_grid(*self.zones.total_bounds, MAX_TILE_SIDE_SIZE_METERS)

# spatial join with fishnet grid and then intersect geometries with fishnet tiles
joined = self.zones.sjoin(fishnet)
Expand Down Expand Up @@ -259,30 +257,39 @@ def get_utm_zone_epsg(bbox) -> str:
return f"EPSG:{epsg}"


def create_fishnet_grid(min_x, min_y, max_x, max_y, cell_size, lon_degree_buffer=0, lat_degree_buffer=0):
x, y = (min_x, min_y)
def create_fishnet_grid(min_lon, min_lat, max_lon, max_lat, tile_side_meters, tile_buffer_meters=0):
lon_coord, lat_coord = (min_lon, min_lat)
geom_array = []

center_lat = (min_lat + max_lat) / 2
lon_side_offset, lat_side_offset = offset_meters_to_geographic_degrees(center_lat, tile_side_meters)
if tile_buffer_meters == 0:
lon_buffer_offset = 0
lat_buffer_offset = 0
else:
lon_buffer_offset, lat_buffer_offset = offset_meters_to_geographic_degrees(center_lat, tile_buffer_meters)

# Polygon Size
while y < max_y:
while x < max_x:
cell_min_x = x - lon_degree_buffer
cell_min_y = y - lat_degree_buffer
cell_max_x = x + cell_size + lon_degree_buffer
cell_max_y = y + cell_size + lat_degree_buffer
while lat_coord < max_lat:
while lon_coord < max_lon:
cell_min_lon = lon_coord - lon_buffer_offset
cell_min_lat = lat_coord - lat_buffer_offset
cell_max_lon = lon_coord + lon_side_offset + lon_buffer_offset
cell_max_lat = lat_coord + lat_side_offset + lat_buffer_offset
geom = geometry.Polygon(
[
(cell_min_x, cell_min_y),
(cell_min_x, cell_max_y),
(cell_max_x, cell_max_y),
(cell_max_x, cell_min_y),
(cell_min_x, cell_min_y),
(cell_min_lon, cell_min_lat),
(cell_min_lon, cell_max_lat),
(cell_max_lon, cell_max_lat),
(cell_max_lon, cell_min_lat),
(cell_min_lon, cell_min_lat),
]
)
geom_array.append(geom)
x += cell_size
x = min_x
y += cell_size
lon_coord += lon_side_offset
lon_coord = min_lon

lat_coord += lat_side_offset

fishnet = gpd.GeoDataFrame(geom_array, columns=["geometry"]).set_crs("EPSG:4326")
fishnet["fishnet_geometry"] = fishnet["geometry"]
Expand Down Expand Up @@ -367,14 +374,12 @@ def write_dataarray(path, data):
else:
data.rio.to_raster(raster_path=path, driver="COG")

def meters_to_offset_degrees(bbox, offset_meters):
min_lat = bbox[1]
max_lat = bbox[3]
center_lat = (min_lat + max_lat) / 2
def offset_meters_to_geographic_degrees(decimal_latitude, length_m):
# TODO consider replacing this spherical calculation with ellipsoidal
earth_radius_m = 6378137
rad = 180/math.pi

meters_per_degree_of_lat = 111111
earth_circumference_meters = 40075000
lon_degree_offset = offset_meters/ (earth_circumference_meters * math.cos(center_lat) / 360)
lat_degree_offset = offset_meters / meters_per_degree_of_lat
lon_degree_offset = abs((length_m / (earth_radius_m * math.cos(math.pi*decimal_latitude/180))) * rad)
lat_degree_offset = abs((length_m / earth_radius_m) * rad)

return abs(lon_degree_offset), abs(lat_degree_offset)
return lon_degree_offset, lat_degree_offset
Original file line number Diff line number Diff line change
Expand Up @@ -31,71 +31,73 @@
def test_write_albedo(target_folder, bbox_info, target_spatial_resolution_multiplier):
file_path = prep_output_path(target_folder, 'albedo.tif')
target_resolution = target_spatial_resolution_multiplier * get_class_default_spatial_resolution(Albedo())
Albedo(spatial_resolution=target_resolution).write(bbox_info.bounds, file_path, tile_degrees=None)
Albedo(spatial_resolution=target_resolution).write(bbox_info.bounds, file_path, tile_side_meters=None)
assert verify_file_is_populated(file_path)

@pytest.mark.skipif(RUN_DUMPS == False, reason='Skipping since RUN_DUMPS set to False')
def test_write_albedo_tiled_unbuffered(target_folder, bbox_info, target_spatial_resolution_multiplier):
file_path = prep_output_path(target_folder, 'albedo_tiled.tif')
target_resolution = target_spatial_resolution_multiplier * get_class_default_spatial_resolution(Albedo())
(Albedo(spatial_resolution=target_resolution).
write(bbox_info.bounds, file_path, tile_degrees=0.01, buffer_meters=None))
write(bbox_info.bounds, file_path, tile_side_meters=1000, tile_buffer_meters=None))
file_count = get_file_count_in_folder(file_path)

assert file_count == 5
expected_file_count = 7 # includes 6 tiles and one geojson file
assert file_count == expected_file_count

@pytest.mark.skipif(RUN_DUMPS == False, reason='Skipping since RUN_DUMPS set to False')
def test_write_albedo_tiled_buffered(target_folder, bbox_info, target_spatial_resolution_multiplier):
buffer_meters = 500
file_path = prep_output_path(target_folder, 'albedo_tiled.tif')
file_path = prep_output_path(target_folder, 'albedo_tiled_buffered.tif')
target_resolution = target_spatial_resolution_multiplier * get_class_default_spatial_resolution(Albedo())
(Albedo(spatial_resolution=target_resolution).
write(bbox_info.bounds, file_path, tile_degrees=0.01, buffer_meters=buffer_meters))
write(bbox_info.bounds, file_path, tile_side_meters=1000, tile_buffer_meters=buffer_meters))
file_count = get_file_count_in_folder(file_path)

assert file_count == 6
expected_file_count = 8 # includes 6 tiles and two geojson files
assert file_count == expected_file_count


@pytest.mark.skipif(RUN_DUMPS == False, reason='Skipping since RUN_DUMPS set to False')
def test_write_alos_dsm(target_folder, bbox_info, target_spatial_resolution_multiplier):
file_path = prep_output_path(target_folder, 'alos_dsm.tif')
target_resolution = target_spatial_resolution_multiplier * get_class_default_spatial_resolution(AlosDSM())
AlosDSM(spatial_resolution=target_resolution).write(bbox_info.bounds, file_path, tile_degrees=None)
AlosDSM(spatial_resolution=target_resolution).write(bbox_info.bounds, file_path, tile_side_meters=None)
assert verify_file_is_populated(file_path)

@pytest.mark.skipif(RUN_DUMPS == False, reason='Skipping since RUN_DUMPS set to False')
def test_write_average_net_building_height(target_folder, bbox_info, target_spatial_resolution_multiplier):
file_path = prep_output_path(target_folder, 'average_net_building_height.tif')
target_resolution = target_spatial_resolution_multiplier * get_class_default_spatial_resolution(AverageNetBuildingHeight())
AverageNetBuildingHeight(spatial_resolution=target_resolution).write(bbox_info.bounds, file_path, tile_degrees=None)
AverageNetBuildingHeight(spatial_resolution=target_resolution).write(bbox_info.bounds, file_path, tile_side_meters=None)
assert verify_file_is_populated(file_path)

@pytest.mark.skipif(RUN_DUMPS == False, reason='Skipping since RUN_DUMPS set to False')
def test_write_esa_world_cover(target_folder, bbox_info, target_spatial_resolution_multiplier):
file_path = prep_output_path(target_folder, 'esa_world_cover.tif')
target_resolution = target_spatial_resolution_multiplier * get_class_default_spatial_resolution(EsaWorldCover())
EsaWorldCover(spatial_resolution=target_resolution).write(bbox_info.bounds, file_path, tile_degrees=None)
EsaWorldCover(spatial_resolution=target_resolution).write(bbox_info.bounds, file_path, tile_side_meters=None)
assert verify_file_is_populated(file_path)

@pytest.mark.skipif(RUN_DUMPS == False, reason='Skipping since RUN_DUMPS set to False')
def test_write_high_land_surface_temperature(target_folder, bbox_info, target_spatial_resolution_multiplier):
file_path = prep_output_path(target_folder, 'high_land_surface_temperature.tif')
target_resolution = target_spatial_resolution_multiplier * get_class_default_spatial_resolution(HighLandSurfaceTemperature())
HighLandSurfaceTemperature(spatial_resolution=target_resolution).write(bbox_info.bounds, file_path, tile_degrees=None)
HighLandSurfaceTemperature(spatial_resolution=target_resolution).write(bbox_info.bounds, file_path, tile_side_meters=None)
assert verify_file_is_populated(file_path)

@pytest.mark.skipif(RUN_DUMPS == False, reason='Skipping since RUN_DUMPS set to False')
def test_write_impervious_surface(target_folder, bbox_info, target_spatial_resolution_multiplier):
file_path = prep_output_path(target_folder, 'impervious_surface.tif')
target_resolution = target_spatial_resolution_multiplier * get_class_default_spatial_resolution(ImperviousSurface())
LandSurfaceTemperature(spatial_resolution=target_resolution).write(bbox_info.bounds, file_path, tile_degrees=None)
LandSurfaceTemperature(spatial_resolution=target_resolution).write(bbox_info.bounds, file_path, tile_side_meters=None)
assert verify_file_is_populated(file_path)

@pytest.mark.skipif(RUN_DUMPS == False, reason='Skipping since RUN_DUMPS set to False')
def test_write_land_surface_temperature(target_folder, bbox_info, target_spatial_resolution_multiplier):
file_path = prep_output_path(target_folder, 'land_surface_temperature.tif')
target_resolution = target_spatial_resolution_multiplier * get_class_default_spatial_resolution(LandSurfaceTemperature())
LandSurfaceTemperature(spatial_resolution=target_resolution).write(bbox_info.bounds, file_path, tile_degrees=None)
LandSurfaceTemperature(spatial_resolution=target_resolution).write(bbox_info.bounds, file_path, tile_side_meters=None)
assert verify_file_is_populated(file_path)

# TODO Class is no longer used, but may be useful later
Expand All @@ -110,27 +112,27 @@ def test_write_land_surface_temperature(target_folder, bbox_info, target_spatial
def test_write_nasa_dem(target_folder, bbox_info, target_spatial_resolution_multiplier):
file_path = prep_output_path(target_folder, 'nasa_dem.tif')
target_resolution = target_spatial_resolution_multiplier * get_class_default_spatial_resolution(NasaDEM())
NasaDEM(spatial_resolution=target_resolution).write(bbox_info.bounds, file_path, tile_degrees=None)
NasaDEM(spatial_resolution=target_resolution).write(bbox_info.bounds, file_path, tile_side_meters=None)
assert verify_file_is_populated(file_path)

@pytest.mark.skipif(RUN_DUMPS == False, reason='Skipping since RUN_DUMPS set to False')
def test_write_natural_areas(target_folder, bbox_info, target_spatial_resolution_multiplier):
file_path = prep_output_path(target_folder, 'natural_areas.tif')
target_resolution = target_spatial_resolution_multiplier * get_class_default_spatial_resolution(NaturalAreas())
NaturalAreas(spatial_resolution=target_resolution).write(bbox_info.bounds, file_path, tile_degrees=None)
NaturalAreas(spatial_resolution=target_resolution).write(bbox_info.bounds, file_path, tile_side_meters=None)
assert verify_file_is_populated(file_path)

@pytest.mark.skipif(RUN_DUMPS == False, reason='Skipping since RUN_DUMPS set to False')
def test_write_ndvi_sentinel2_gee(target_folder, bbox_info, target_spatial_resolution_multiplier):
file_path = prep_output_path(target_folder, 'ndvi_sentinel2_gee.tif')
target_resolution = target_spatial_resolution_multiplier * get_class_default_spatial_resolution(NdviSentinel2())
NdviSentinel2(year=2023, spatial_resolution=target_resolution).write(bbox_info.bounds, file_path, tile_degrees=None)
NdviSentinel2(year=2023, spatial_resolution=target_resolution).write(bbox_info.bounds, file_path, tile_side_meters=None)
assert verify_file_is_populated(file_path)

@pytest.mark.skipif(RUN_DUMPS == False, reason='Skipping since RUN_DUMPS set to False')
def test_write_openbuildings(target_folder, bbox_info, target_spatial_resolution_multiplier):
file_path = prep_output_path(target_folder, 'open_buildings.geojson')
OpenBuildings(bbox_info.country).write(bbox_info.bounds, file_path, tile_degrees=None)
OpenBuildings(bbox_info.country).write(bbox_info.bounds, file_path, tile_side_meters=None)
assert verify_file_is_populated(file_path)

# TODO Class write is not functional. Is class still needed or have we switched to overture?
Expand All @@ -143,7 +145,7 @@ def test_write_openbuildings(target_folder, bbox_info, target_spatial_resolution
@pytest.mark.skipif(RUN_DUMPS == False, reason='Skipping since RUN_DUMPS set to False')
def test_write_overture_buildings(target_folder, bbox_info, target_spatial_resolution_multiplier):
file_path = prep_output_path(target_folder, 'overture_buildings.geojson')
OvertureBuildings().write(bbox_info.bounds, file_path, tile_degrees=None)
OvertureBuildings().write(bbox_info.bounds, file_path, tile_side_meters=None)
assert verify_file_is_populated(file_path)

# TODO Class is no longer used, but may be useful later
Expand All @@ -158,33 +160,33 @@ def test_write_overture_buildings(target_folder, bbox_info, target_spatial_resol
def test_write_smart_surface_lulc(target_folder, bbox_info, target_spatial_resolution_multiplier):
# Note: spatial_resolution not implemented for this raster class
file_path = prep_output_path(target_folder, 'smart_surface_lulc.tif')
SmartSurfaceLULC().write(bbox_info.bounds, file_path, tile_degrees=None)
SmartSurfaceLULC().write(bbox_info.bounds, file_path, tile_side_meters=None)
assert verify_file_is_populated(file_path)

@pytest.mark.skipif(RUN_DUMPS == False, reason='Skipping since RUN_DUMPS set to False')
def test_write_tree_canopy_height(target_folder, bbox_info, target_spatial_resolution_multiplier):
file_path = prep_output_path(target_folder, 'tree_canopy_height.tif')
target_resolution = target_spatial_resolution_multiplier * get_class_default_spatial_resolution(TreeCanopyHeight())
TreeCanopyHeight(spatial_resolution=target_resolution).write(bbox_info.bounds, file_path, tile_degrees=None)
TreeCanopyHeight(spatial_resolution=target_resolution).write(bbox_info.bounds, file_path, tile_side_meters=None)
assert verify_file_is_populated(file_path)

@pytest.mark.skipif(RUN_DUMPS == False, reason='Skipping since RUN_DUMPS set to False')
def test_write_tree_cover(target_folder, bbox_info, target_spatial_resolution_multiplier):
file_path = prep_output_path(target_folder, 'tree_cover.tif')
target_resolution = target_spatial_resolution_multiplier * get_class_default_spatial_resolution(TreeCover())
TreeCover(spatial_resolution=target_resolution).write(bbox_info.bounds, file_path, tile_degrees=None)
TreeCover(spatial_resolution=target_resolution).write(bbox_info.bounds, file_path, tile_side_meters=None)
assert verify_file_is_populated(file_path)

@pytest.mark.skipif(RUN_DUMPS == False, reason='Skipping since RUN_DUMPS set to False')
def test_write_urban_land_use(target_folder, bbox_info, target_spatial_resolution_multiplier):
file_path = prep_output_path(target_folder, 'urban_land_use.tif')
target_resolution = target_spatial_resolution_multiplier * get_class_default_spatial_resolution(UrbanLandUse())
UrbanLandUse(spatial_resolution=target_resolution).write(bbox_info.bounds, file_path, tile_degrees=None)
UrbanLandUse(spatial_resolution=target_resolution).write(bbox_info.bounds, file_path, tile_side_meters=None)
assert verify_file_is_populated(file_path)

@pytest.mark.skipif(RUN_DUMPS == False, reason='Skipping since RUN_DUMPS set to False')
def test_write_world_pop(target_folder, bbox_info, target_spatial_resolution_multiplier):
file_path = prep_output_path(target_folder, 'world_pop.tif')
target_resolution = target_spatial_resolution_multiplier * get_class_default_spatial_resolution(WorldPop())
WorldPop(spatial_resolution=target_resolution).write(bbox_info.bounds, file_path, tile_degrees=None)
WorldPop(spatial_resolution=target_resolution).write(bbox_info.bounds, file_path, tile_side_meters=None)
assert verify_file_is_populated(file_path)
Loading
Loading