-
Notifications
You must be signed in to change notification settings - Fork 1
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge branch 'feature/zarr-output' into 'master'
zarr and netcdf support, split multitemporal gtiff to separate gtiffs with each only containing 1 time point See merge request team-6/openeo-sentinelhub-python-driver!315
- Loading branch information
Showing
13 changed files
with
1,262 additions
and
626 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Large diffs are not rendered by default.
Oops, something went wrong.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,17 @@ | ||
{ | ||
"gis_data_types": [ | ||
"raster" | ||
], | ||
"parameters": { | ||
"datatype": { | ||
"type": "string", | ||
"description": "The values data type.", | ||
"enum": [ | ||
"byte", | ||
"uint16", | ||
"float32" | ||
], | ||
"default": "float32" | ||
} | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,17 @@ | ||
{ | ||
"gis_data_types": [ | ||
"raster" | ||
], | ||
"parameters": { | ||
"datatype": { | ||
"type": "string", | ||
"description": "The values data type.", | ||
"enum": [ | ||
"byte", | ||
"uint16", | ||
"float32" | ||
], | ||
"default": "float32" | ||
} | ||
} | ||
} |
Empty file.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,10 @@ | ||
from sentinelhub import MimeType | ||
from processing.const import CustomMimeType | ||
|
||
TMP_FOLDER = "/tmp/" | ||
|
||
parsed_output_file_name = { | ||
MimeType.TIFF: {"name": "output", "ext": ".tif"}, | ||
CustomMimeType.ZARR: {"name": "output", "ext": ".zarr"}, | ||
CustomMimeType.NETCDF: {"name": "output", "ext": ".nc"}, | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,110 @@ | ||
import os | ||
import shutil | ||
import rioxarray | ||
from dateutil import parser | ||
import pandas as pd | ||
import xarray as xr | ||
import requests | ||
from sentinelhub import MimeType | ||
|
||
from processing.const import CustomMimeType | ||
from openeoerrors import Internal | ||
|
||
# assume it's only 1 time and 1 bands dimension | ||
def check_dimensions(time_dimensions, bands_dimensions): | ||
if len(time_dimensions) == 0: | ||
raise Internal("No time dimensions exist. Only 1 time dimension is supported.") | ||
|
||
if len(time_dimensions) > 1: | ||
raise Internal("More than 1 time dimension exist. Only 1 time dimension is supported.") | ||
|
||
if len(bands_dimensions) == 0: | ||
raise Internal("No bands dimensions exist. Only 1 bands dimension is supported.") | ||
|
||
if len(bands_dimensions) > 1: | ||
raise Internal("More than 1 bands dimension exist. Only 1 bands dimension is supported.") | ||
|
||
|
||
def get_timestamps_arrays(datacube_time_as_bands, time_dimensions, bands_dimensions, output_format): | ||
num_of_img_bands = len(datacube_time_as_bands["band"]) | ||
num_of_bands_dimension = len(bands_dimensions[0]["labels"]) | ||
|
||
list_of_timestamps = [] | ||
list_of_timestamp_arrays = [] | ||
|
||
for i in range(0, num_of_img_bands, num_of_bands_dimension): | ||
date = time_dimensions[0]["labels"][int(i / num_of_bands_dimension)] | ||
timestamp_array = datacube_time_as_bands[i : i + num_of_bands_dimension] | ||
|
||
if output_format in [CustomMimeType.NETCDF, CustomMimeType.ZARR]: | ||
pandas_time = pd.to_datetime(parser.parse(date)) | ||
timestamp_array = timestamp_array.assign_coords(band=bands_dimensions[0]["labels"]) | ||
timestamp_array = timestamp_array.assign_coords(t=pandas_time) | ||
timestamp_array = timestamp_array.expand_dims(dim="t") | ||
|
||
list_of_timestamps.append(date) | ||
list_of_timestamp_arrays.append(timestamp_array) | ||
|
||
return list_of_timestamps, list_of_timestamp_arrays | ||
|
||
|
||
def save_as_gtiff(list_of_timestamps, list_of_timestamp_arrays, output_dir, output_name): | ||
output_file_paths = [] | ||
for array, date in zip(list_of_timestamp_arrays, list_of_timestamps): | ||
file_name = f"{output_name['name']}_{date}{output_name['ext']}" | ||
file_path = os.path.join(output_dir, file_name) | ||
output_file_paths.append(file_path) | ||
|
||
array.rio.to_raster( | ||
file_path, | ||
tiled=True, # GDAL: By default striped TIFF files are created. This option can be used to force creation of tiled TIFF files. | ||
windowed=True, # rioxarray: read & write one window at a time | ||
) | ||
return output_file_paths | ||
|
||
|
||
def save_as_netcdf(list_of_timestamp_arrays, output_dir, output_name): | ||
datacube_with_time_dimension = xr.combine_by_coords(list_of_timestamp_arrays) | ||
output_file_path = os.path.join(output_dir, f"{output_name['name']}{output_name['ext']}") | ||
datacube_with_time_dimension.to_netcdf(output_file_path) | ||
return [output_file_path] | ||
|
||
|
||
def save_as_zarr(list_of_timestamp_arrays, output_dir, output_name): | ||
datacube_with_time_dimension = xr.combine_by_coords(list_of_timestamp_arrays) | ||
output_file_path = os.path.join(output_dir, f"{output_name['name']}{output_name['ext']}") | ||
datacube_with_time_dimension.to_zarr(output_file_path) | ||
# zip the zarr folder to avoid listing a bunch of files | ||
shutil.make_archive(output_file_path, "zip", output_file_path) | ||
output_file_path = f"{output_file_path}.zip" | ||
return [output_file_path] | ||
|
||
|
||
def parse_multitemporal_gtiff_to_format(input_tiff, input_metadata, output_dir, output_name, output_format): | ||
datacube_time_as_bands = rioxarray.open_rasterio(input_tiff) | ||
datacube_metadata = requests.get(input_metadata).json() | ||
|
||
time_dimensions = [dim for dim in datacube_metadata["outputDimensions"] if dim["type"] == "temporal"] | ||
bands_dimensions = [dim for dim in datacube_metadata["outputDimensions"] if dim["type"] == "bands"] | ||
|
||
# mock a bands dimension (with 1 band) if it's not present in the data | ||
# e.g. save_result process right after ndvi process which doesn't have a target band set | ||
if len(bands_dimensions) == 0: | ||
bands_dimensions = [{"name": "bands", "type": "bands", "labels": ["results"]}] | ||
|
||
check_dimensions(time_dimensions, bands_dimensions) | ||
|
||
list_of_timestamps, list_of_timestamp_arrays = get_timestamps_arrays( | ||
datacube_time_as_bands, time_dimensions, bands_dimensions, output_format | ||
) | ||
|
||
if output_format == MimeType.TIFF: | ||
return save_as_gtiff(list_of_timestamps, list_of_timestamp_arrays, output_dir, output_name) | ||
|
||
if output_format == CustomMimeType.NETCDF: | ||
return save_as_netcdf(list_of_timestamp_arrays, output_dir, output_name) | ||
|
||
if output_format == CustomMimeType.ZARR: | ||
return save_as_zarr(list_of_timestamp_arrays, output_dir, output_name) | ||
|
||
raise Internal(f"Parsing to format {output_format} is not supported") |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,73 @@ | ||
import os | ||
import json | ||
import shutil | ||
|
||
from processing.const import ShBatchResponseOutput, ProcessingRequestTypes | ||
from processing.processing import new_process | ||
from post_processing.gtiff_parser import parse_multitemporal_gtiff_to_format | ||
from post_processing.const import TMP_FOLDER, parsed_output_file_name | ||
|
||
|
||
def check_if_already_parsed(results, output_format): | ||
for result in results: | ||
if ( | ||
parsed_output_file_name[output_format]["name"] in result["Key"] | ||
and parsed_output_file_name[output_format]["ext"] in result["Key"] | ||
): | ||
return True | ||
|
||
return False | ||
|
||
|
||
def generate_subfolder_groups(batch_request_id, bucket, results): | ||
subfolder_groups = {} | ||
for result in results: | ||
for output in [ShBatchResponseOutput.DATA, ShBatchResponseOutput.METADATA]: | ||
if output.value in result["Key"]: | ||
url = bucket.generate_presigned_url(object_key=result["Key"]) | ||
subfolder_name = ( | ||
result["Key"].replace(f"{batch_request_id}", "").replace("/", "").split(output.value)[0] | ||
) | ||
if subfolder_name not in subfolder_groups: | ||
subfolder_groups[subfolder_name] = {} | ||
subfolder_groups[subfolder_name][output.value] = url | ||
|
||
return subfolder_groups | ||
|
||
|
||
def upload_output_to_bucket(local_file_paths, bucket): | ||
for path in local_file_paths: | ||
s3_path = path[len(f"{TMP_FOLDER}") :] | ||
bucket.upload_file_to_bucket(path, None, s3_path) | ||
|
||
|
||
def parse_sh_gtiff_to_format(job, bucket): | ||
batch_request_id = job["batch_request_id"] | ||
results = bucket.get_data_from_bucket(prefix=batch_request_id) | ||
|
||
process = new_process(json.loads(job["process"]), request_type=ProcessingRequestTypes.BATCH) | ||
output_format = process.get_mimetype() | ||
|
||
if check_if_already_parsed(results, output_format): | ||
return | ||
|
||
subfolder_groups = generate_subfolder_groups(batch_request_id, bucket, results) | ||
|
||
for subfolder_id, subfolder_group in subfolder_groups.items(): | ||
input_tiff = subfolder_group[ShBatchResponseOutput.DATA.value] | ||
input_metadata = subfolder_group[ShBatchResponseOutput.METADATA.value] | ||
|
||
# preventively remove directory and create it again | ||
batch_request_dir = f"{TMP_FOLDER}{batch_request_id}" | ||
batch_subfolder = f"{batch_request_dir}/{subfolder_id}/" | ||
if os.path.exists(batch_request_dir): | ||
shutil.rmtree(batch_request_dir) | ||
os.makedirs(batch_subfolder) | ||
|
||
output_file_paths = parse_multitemporal_gtiff_to_format( | ||
input_tiff, input_metadata, batch_subfolder, parsed_output_file_name[output_format], output_format | ||
) | ||
upload_output_to_bucket(output_file_paths, bucket) | ||
|
||
# remove folder after the folder/file has been uploaded | ||
shutil.rmtree(batch_request_dir) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters