From 35a581f2cb860f11f22aae97f4028980d20887b9 Mon Sep 17 00:00:00 2001
From: Ziga Cernigoj <ziga.cernigoj@sinergise.com>
Date: Mon, 20 Nov 2023 15:25:25 +0100
Subject: [PATCH] don't use reduntant bands in post-processing

---
 rest/app.py                          |  5 +-
 rest/post_processing/gtiff_parser.py | 74 +++++++---------------------
 2 files changed, 19 insertions(+), 60 deletions(-)

diff --git a/rest/app.py b/rest/app.py
index dbd3f090..52e42486 100644
--- a/rest/app.py
+++ b/rest/app.py
@@ -600,9 +600,8 @@ def add_job_to_queue(job_id):
         try:
             parse_sh_gtiff_to_format(job, bucket)
         except Exception as e:
-            print("parsing didn't succeed")
-            print(e)
-
+            raise Internal("Post-processing did not succeed")
+            log(ERROR, f"Post-processing did not succeed:  {e}")
 
         # END OF POST_PROCESSING
 
diff --git a/rest/post_processing/gtiff_parser.py b/rest/post_processing/gtiff_parser.py
index 45ae1015..d9c93d07 100644
--- a/rest/post_processing/gtiff_parser.py
+++ b/rest/post_processing/gtiff_parser.py
@@ -14,68 +14,36 @@
 
 # assume it's only 1 time and 1 bands dimension
 def check_dimensions(time_dimensions, bands_dimensions):
-    if len(time_dimensions) == 0:
-        print("No time dimensions exist. Only 1 time dimension is supported.")
-        # raise Internal("No time dimensions exist. Only 1 time dimension is supported.")
-
     if len(time_dimensions) > 1:
-        raise Internal("More than 1 time dimension exist. Only 1 time dimension is supported.")
-
-    if len(bands_dimensions) == 0:
-        raise Internal("No bands dimensions exist. Only 1 bands dimension is supported.")
+        raise Internal("More than 1 time dimension exist. Only 0 or 1 time dimension is supported.")
 
     if len(bands_dimensions) > 1:
-        raise Internal("More than 1 bands dimension exist. Only 1 bands dimension is supported.")
+        raise Internal("More than 1 bands dimension exist. Only 0 or 1 bands dimension is supported.")
 
 
 def get_timestamps_arrays(datacube_time_as_bands, time_dimensions, bands_dimensions, output_format):
-    num_of_img_bands = len(datacube_time_as_bands["band"])
-    num_of_bands_dimension = len(bands_dimensions[0]["labels"])
+    bands_dimension = bands_dimensions[0] if len(bands_dimensions) > 0 else None
+    time_dimension = time_dimensions[0] if len(time_dimensions) > 0 else None
 
-    num_time_labels = 0
-    for time_dim in time_dimensions:
-        num_time_labels += len(time_dim["labels"])
-    
-    num_band_labels = 0
-    for band_dim in bands_dimensions:
-        num_band_labels += len(band_dim["labels"])
+    num_of_img_bands = len(datacube_time_as_bands["band"])
+    num_of_band_labels = len(bands_dimension["labels"]) if bands_dimension else 1
+    num_of_time_labels = len(time_dimension["labels"]) if time_dimension else 1
+    num_of_usable_img_bands = num_of_time_labels * num_of_band_labels
 
-    num_actual_img_bands = (num_time_labels or 1) * (num_band_labels or 1)
+    if num_of_img_bands < num_of_usable_img_bands:
+        raise Internal(f"Datacube dimensions not compatible with returned image.")
 
     list_of_timestamps = []
     list_of_timestamp_arrays = []
 
-    printdata = {
-        "data": datacube_time_as_bands,
-        "data_length": len(datacube_time_as_bands),
-        "data_len/num_of_bands_dimension": len(datacube_time_as_bands) / num_of_bands_dimension,
-        "bands_dimensions": bands_dimensions,
-        "time_dimensions": time_dimensions,
-        "num_img_bands": num_of_img_bands, 
-        "num_bands_dim": num_of_bands_dimension, 
-        "range": range(0, num_actual_img_bands, num_of_bands_dimension),
-        "num_actual_img_bands": num_actual_img_bands,
-        "num_band_labels": num_band_labels,
-        "num_time_labels": num_time_labels,
-    }
-
-    print("get_timestamps_arrays")
-    print(json.dumps(printdata, sort_keys=True, indent=4, default=str))
-
-    for i in range(0, num_actual_img_bands, num_of_bands_dimension):
-
-        print("for loop", {
-            "time_dims": time_dimensions, 
-            "time_dim_index": int(i / num_of_bands_dimension)
-        })
-
-
-        date = time_dimensions[0]["labels"][int(i / num_of_bands_dimension)] if num_time_labels > 0 else ""
-        timestamp_array = datacube_time_as_bands[i : i + num_of_bands_dimension]
+    for i in range(0, num_of_usable_img_bands, num_of_band_labels):
+        date = time_dimension["labels"][int(i / num_of_band_labels)] if time_dimension else None
+        timestamp_array = datacube_time_as_bands[i : i + num_of_band_labels]
 
         if output_format in [CustomMimeType.NETCDF, CustomMimeType.ZARR]:
-            timestamp_array = timestamp_array.assign_coords(band=bands_dimensions[0]["labels"])
-            if num_time_labels > 0:
+            if bands_dimension:
+                timestamp_array = timestamp_array.assign_coords(band=bands_dimension["labels"])
+            if time_dimension:
                 timestamp_array = timestamp_array.assign_coords(t=pd.to_datetime(parser.parse(date)))
                 timestamp_array = timestamp_array.expand_dims(dim="t")
 
@@ -88,7 +56,7 @@ def get_timestamps_arrays(datacube_time_as_bands, time_dimensions, bands_dimensi
 def save_as_gtiff(list_of_timestamps, list_of_timestamp_arrays, output_dir, output_name):
     output_file_paths = []
     for array, date in zip(list_of_timestamp_arrays, list_of_timestamps):
-        date_string = "" if date == "" else f"_{date}"
+        date_string = f"_{date}" if date else ""
         file_name = f"{output_name['name']}{date_string}{output_name['ext']}"
         file_path = os.path.join(output_dir, file_name)
         output_file_paths.append(file_path)
@@ -125,14 +93,6 @@ def parse_multitemporal_gtiff_to_format(input_tiff, input_metadata, output_dir,
     time_dimensions = [dim for dim in datacube_metadata["outputDimensions"] if dim["type"] == "temporal"]
     bands_dimensions = [dim for dim in datacube_metadata["outputDimensions"] if dim["type"] == "bands"]
 
-    # mock a bands dimension (with 1 band) if it's not present in the data
-    # e.g. save_result process right after ndvi process which doesn't have a target band set
-    if len(bands_dimensions) == 0:
-        bands_dimensions = [{"name": "bands", "type": "bands", "labels": ["results"]}]
-
-    # if len(time_dimensions) == 0:
-    #     time_dimensions = [{"name": "t", "type": "temporal", "labels": [""]}]
-
     check_dimensions(time_dimensions, bands_dimensions)
 
     list_of_timestamps, list_of_timestamp_arrays = get_timestamps_arrays(