Skip to content

Commit

Permalink
don't use reduntant bands in post-processing
Browse files Browse the repository at this point in the history
  • Loading branch information
zcernigoj committed Nov 20, 2023
1 parent c3202e7 commit 35a581f
Show file tree
Hide file tree
Showing 2 changed files with 19 additions and 60 deletions.
5 changes: 2 additions & 3 deletions rest/app.py
Original file line number Diff line number Diff line change
Expand Up @@ -600,9 +600,8 @@ def add_job_to_queue(job_id):
try:
parse_sh_gtiff_to_format(job, bucket)
except Exception as e:
print("parsing didn't succeed")
print(e)

raise Internal("Post-processing did not succeed")
log(ERROR, f"Post-processing did not succeed: {e}")

# END OF POST_PROCESSING

Expand Down
74 changes: 17 additions & 57 deletions rest/post_processing/gtiff_parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,68 +14,36 @@

# assume it's only 1 time and 1 bands dimension
def check_dimensions(time_dimensions, bands_dimensions):
if len(time_dimensions) == 0:
print("No time dimensions exist. Only 1 time dimension is supported.")
# raise Internal("No time dimensions exist. Only 1 time dimension is supported.")

if len(time_dimensions) > 1:
raise Internal("More than 1 time dimension exist. Only 1 time dimension is supported.")

if len(bands_dimensions) == 0:
raise Internal("No bands dimensions exist. Only 1 bands dimension is supported.")
raise Internal("More than 1 time dimension exist. Only 0 or 1 time dimension is supported.")

if len(bands_dimensions) > 1:
raise Internal("More than 1 bands dimension exist. Only 1 bands dimension is supported.")
raise Internal("More than 1 bands dimension exist. Only 0 or 1 bands dimension is supported.")


def get_timestamps_arrays(datacube_time_as_bands, time_dimensions, bands_dimensions, output_format):
num_of_img_bands = len(datacube_time_as_bands["band"])
num_of_bands_dimension = len(bands_dimensions[0]["labels"])
bands_dimension = bands_dimensions[0] if len(bands_dimensions) > 0 else None
time_dimension = time_dimensions[0] if len(time_dimensions) > 0 else None

num_time_labels = 0
for time_dim in time_dimensions:
num_time_labels += len(time_dim["labels"])

num_band_labels = 0
for band_dim in bands_dimensions:
num_band_labels += len(band_dim["labels"])
num_of_img_bands = len(datacube_time_as_bands["band"])
num_of_band_labels = len(bands_dimension["labels"]) if bands_dimension else 1
num_of_time_labels = len(time_dimension["labels"]) if time_dimension else 1
num_of_usable_img_bands = num_of_time_labels * num_of_band_labels

num_actual_img_bands = (num_time_labels or 1) * (num_band_labels or 1)
if num_of_img_bands < num_of_usable_img_bands:
raise Internal(f"Datacube dimensions not compatible with returned image.")

list_of_timestamps = []
list_of_timestamp_arrays = []

printdata = {
"data": datacube_time_as_bands,
"data_length": len(datacube_time_as_bands),
"data_len/num_of_bands_dimension": len(datacube_time_as_bands) / num_of_bands_dimension,
"bands_dimensions": bands_dimensions,
"time_dimensions": time_dimensions,
"num_img_bands": num_of_img_bands,
"num_bands_dim": num_of_bands_dimension,
"range": range(0, num_actual_img_bands, num_of_bands_dimension),
"num_actual_img_bands": num_actual_img_bands,
"num_band_labels": num_band_labels,
"num_time_labels": num_time_labels,
}

print("get_timestamps_arrays")
print(json.dumps(printdata, sort_keys=True, indent=4, default=str))

for i in range(0, num_actual_img_bands, num_of_bands_dimension):

print("for loop", {
"time_dims": time_dimensions,
"time_dim_index": int(i / num_of_bands_dimension)
})


date = time_dimensions[0]["labels"][int(i / num_of_bands_dimension)] if num_time_labels > 0 else ""
timestamp_array = datacube_time_as_bands[i : i + num_of_bands_dimension]
for i in range(0, num_of_usable_img_bands, num_of_band_labels):
date = time_dimension["labels"][int(i / num_of_band_labels)] if time_dimension else None
timestamp_array = datacube_time_as_bands[i : i + num_of_band_labels]

if output_format in [CustomMimeType.NETCDF, CustomMimeType.ZARR]:
timestamp_array = timestamp_array.assign_coords(band=bands_dimensions[0]["labels"])
if num_time_labels > 0:
if bands_dimension:
timestamp_array = timestamp_array.assign_coords(band=bands_dimension["labels"])
if time_dimension:
timestamp_array = timestamp_array.assign_coords(t=pd.to_datetime(parser.parse(date)))
timestamp_array = timestamp_array.expand_dims(dim="t")

Expand All @@ -88,7 +56,7 @@ def get_timestamps_arrays(datacube_time_as_bands, time_dimensions, bands_dimensi
def save_as_gtiff(list_of_timestamps, list_of_timestamp_arrays, output_dir, output_name):
output_file_paths = []
for array, date in zip(list_of_timestamp_arrays, list_of_timestamps):
date_string = "" if date == "" else f"_{date}"
date_string = f"_{date}" if date else ""
file_name = f"{output_name['name']}{date_string}{output_name['ext']}"
file_path = os.path.join(output_dir, file_name)
output_file_paths.append(file_path)
Expand Down Expand Up @@ -125,14 +93,6 @@ def parse_multitemporal_gtiff_to_format(input_tiff, input_metadata, output_dir,
time_dimensions = [dim for dim in datacube_metadata["outputDimensions"] if dim["type"] == "temporal"]
bands_dimensions = [dim for dim in datacube_metadata["outputDimensions"] if dim["type"] == "bands"]

# mock a bands dimension (with 1 band) if it's not present in the data
# e.g. save_result process right after ndvi process which doesn't have a target band set
if len(bands_dimensions) == 0:
bands_dimensions = [{"name": "bands", "type": "bands", "labels": ["results"]}]

# if len(time_dimensions) == 0:
# time_dimensions = [{"name": "t", "type": "temporal", "labels": [""]}]

check_dimensions(time_dimensions, bands_dimensions)

list_of_timestamps, list_of_timestamp_arrays = get_timestamps_arrays(
Expand Down

0 comments on commit 35a581f

Please sign in to comment.