From 35a581f2cb860f11f22aae97f4028980d20887b9 Mon Sep 17 00:00:00 2001 From: Ziga Cernigoj Date: Mon, 20 Nov 2023 15:25:25 +0100 Subject: [PATCH] don't use reduntant bands in post-processing --- rest/app.py | 5 +- rest/post_processing/gtiff_parser.py | 74 +++++++--------------------- 2 files changed, 19 insertions(+), 60 deletions(-) diff --git a/rest/app.py b/rest/app.py index dbd3f090..52e42486 100644 --- a/rest/app.py +++ b/rest/app.py @@ -600,9 +600,8 @@ def add_job_to_queue(job_id): try: parse_sh_gtiff_to_format(job, bucket) except Exception as e: - print("parsing didn't succeed") - print(e) - + raise Internal("Post-processing did not succeed") + log(ERROR, f"Post-processing did not succeed: {e}") # END OF POST_PROCESSING diff --git a/rest/post_processing/gtiff_parser.py b/rest/post_processing/gtiff_parser.py index 45ae1015..d9c93d07 100644 --- a/rest/post_processing/gtiff_parser.py +++ b/rest/post_processing/gtiff_parser.py @@ -14,68 +14,36 @@ # assume it's only 1 time and 1 bands dimension def check_dimensions(time_dimensions, bands_dimensions): - if len(time_dimensions) == 0: - print("No time dimensions exist. Only 1 time dimension is supported.") - # raise Internal("No time dimensions exist. Only 1 time dimension is supported.") - if len(time_dimensions) > 1: - raise Internal("More than 1 time dimension exist. Only 1 time dimension is supported.") - - if len(bands_dimensions) == 0: - raise Internal("No bands dimensions exist. Only 1 bands dimension is supported.") + raise Internal("More than 1 time dimension exist. Only 0 or 1 time dimension is supported.") if len(bands_dimensions) > 1: - raise Internal("More than 1 bands dimension exist. Only 1 bands dimension is supported.") + raise Internal("More than 1 bands dimension exist. Only 0 or 1 bands dimension is supported.") def get_timestamps_arrays(datacube_time_as_bands, time_dimensions, bands_dimensions, output_format): - num_of_img_bands = len(datacube_time_as_bands["band"]) - num_of_bands_dimension = len(bands_dimensions[0]["labels"]) + bands_dimension = bands_dimensions[0] if len(bands_dimensions) > 0 else None + time_dimension = time_dimensions[0] if len(time_dimensions) > 0 else None - num_time_labels = 0 - for time_dim in time_dimensions: - num_time_labels += len(time_dim["labels"]) - - num_band_labels = 0 - for band_dim in bands_dimensions: - num_band_labels += len(band_dim["labels"]) + num_of_img_bands = len(datacube_time_as_bands["band"]) + num_of_band_labels = len(bands_dimension["labels"]) if bands_dimension else 1 + num_of_time_labels = len(time_dimension["labels"]) if time_dimension else 1 + num_of_usable_img_bands = num_of_time_labels * num_of_band_labels - num_actual_img_bands = (num_time_labels or 1) * (num_band_labels or 1) + if num_of_img_bands < num_of_usable_img_bands: + raise Internal(f"Datacube dimensions not compatible with returned image.") list_of_timestamps = [] list_of_timestamp_arrays = [] - printdata = { - "data": datacube_time_as_bands, - "data_length": len(datacube_time_as_bands), - "data_len/num_of_bands_dimension": len(datacube_time_as_bands) / num_of_bands_dimension, - "bands_dimensions": bands_dimensions, - "time_dimensions": time_dimensions, - "num_img_bands": num_of_img_bands, - "num_bands_dim": num_of_bands_dimension, - "range": range(0, num_actual_img_bands, num_of_bands_dimension), - "num_actual_img_bands": num_actual_img_bands, - "num_band_labels": num_band_labels, - "num_time_labels": num_time_labels, - } - - print("get_timestamps_arrays") - print(json.dumps(printdata, sort_keys=True, indent=4, default=str)) - - for i in range(0, num_actual_img_bands, num_of_bands_dimension): - - print("for loop", { - "time_dims": time_dimensions, - "time_dim_index": int(i / num_of_bands_dimension) - }) - - - date = time_dimensions[0]["labels"][int(i / num_of_bands_dimension)] if num_time_labels > 0 else "" - timestamp_array = datacube_time_as_bands[i : i + num_of_bands_dimension] + for i in range(0, num_of_usable_img_bands, num_of_band_labels): + date = time_dimension["labels"][int(i / num_of_band_labels)] if time_dimension else None + timestamp_array = datacube_time_as_bands[i : i + num_of_band_labels] if output_format in [CustomMimeType.NETCDF, CustomMimeType.ZARR]: - timestamp_array = timestamp_array.assign_coords(band=bands_dimensions[0]["labels"]) - if num_time_labels > 0: + if bands_dimension: + timestamp_array = timestamp_array.assign_coords(band=bands_dimension["labels"]) + if time_dimension: timestamp_array = timestamp_array.assign_coords(t=pd.to_datetime(parser.parse(date))) timestamp_array = timestamp_array.expand_dims(dim="t") @@ -88,7 +56,7 @@ def get_timestamps_arrays(datacube_time_as_bands, time_dimensions, bands_dimensi def save_as_gtiff(list_of_timestamps, list_of_timestamp_arrays, output_dir, output_name): output_file_paths = [] for array, date in zip(list_of_timestamp_arrays, list_of_timestamps): - date_string = "" if date == "" else f"_{date}" + date_string = f"_{date}" if date else "" file_name = f"{output_name['name']}{date_string}{output_name['ext']}" file_path = os.path.join(output_dir, file_name) output_file_paths.append(file_path) @@ -125,14 +93,6 @@ def parse_multitemporal_gtiff_to_format(input_tiff, input_metadata, output_dir, time_dimensions = [dim for dim in datacube_metadata["outputDimensions"] if dim["type"] == "temporal"] bands_dimensions = [dim for dim in datacube_metadata["outputDimensions"] if dim["type"] == "bands"] - # mock a bands dimension (with 1 band) if it's not present in the data - # e.g. save_result process right after ndvi process which doesn't have a target band set - if len(bands_dimensions) == 0: - bands_dimensions = [{"name": "bands", "type": "bands", "labels": ["results"]}] - - # if len(time_dimensions) == 0: - # time_dimensions = [{"name": "t", "type": "temporal", "labels": [""]}] - check_dimensions(time_dimensions, bands_dimensions) list_of_timestamps, list_of_timestamp_arrays = get_timestamps_arrays(