From 8db3bd65a1a43cf6c046951d33ceb1c6fc8cbd42 Mon Sep 17 00:00:00 2001
From: Charles Gaydon <CGaydon@DEL2212S027.ign.fr>
Date: Thu, 1 Jun 2023 12:17:19 +0200
Subject: [PATCH 01/17] Functionnal pacasam config:
 20230601_lidarhd_pacasam_dataset

---
 .../20230601_lidarhd_pacasam_dataset.yaml         | 15 +++++++++++++++
 1 file changed, 15 insertions(+)
 create mode 100644 configs/dataset_description/20230601_lidarhd_pacasam_dataset.yaml

diff --git a/configs/dataset_description/20230601_lidarhd_pacasam_dataset.yaml b/configs/dataset_description/20230601_lidarhd_pacasam_dataset.yaml
new file mode 100644
index 00000000..aaa5d850
--- /dev/null
+++ b/configs/dataset_description/20230601_lidarhd_pacasam_dataset.yaml
@@ -0,0 +1,15 @@
+_convert_: all  # For omegaconf struct to be converted to python dictionnaries
+# classification_preprocessing_dict = {source_class_code_int: target_class_code_int},
+# 3: medium vegetation -> vegetation
+# 4: high vegetation -> vegetation
+# 0: no processing --> unclassified
+# 66: synthetic points --> noise (synthetic points are useful for specific modelling task on already classified data).
+# We set them to noise so that they are ignored during training.
+classification_preprocessing_dict: {3: 5, 4: 5, 0: 1, 66: 65}
+
+# classification_dict = {code_int: name_str, ...} and MUST be sorted (increasing order).
+classification_dict: {1: "unclassified", 2: "ground", 5: vegetation, 6: "building", 9: water, 17: bridge, 64: lasting_above, 66: 65}
+
+# Input and output dims of neural net are dataset dependant:
+d_in: 9
+num_classes: 7
\ No newline at end of file

From 7cac6727ed223690ebeaf5932440f857e64f034a Mon Sep 17 00:00:00 2001
From: Charles Gaydon <charles.gaydon@gmail.com>
Date: Tue, 1 Aug 2023 11:16:27 +0200
Subject: [PATCH 02/17] fix: standardization std fixed to not give nan values
 when num_nodes=1

---
 myria3d/pctl/transforms/transforms.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/myria3d/pctl/transforms/transforms.py b/myria3d/pctl/transforms/transforms.py
index a8f7a991..38852322 100755
--- a/myria3d/pctl/transforms/transforms.py
+++ b/myria3d/pctl/transforms/transforms.py
@@ -124,6 +124,8 @@ def standardize_channel(self, channel_data: torch.Tensor, clamp_sigma: int = 3):
         """Sample-wise standardization y* = (y-y_mean)/y_std. clamping to ignore large values."""
         mean = channel_data.mean()
         std = channel_data.std() + 10**-6
+        if torch.isnan(std):
+            std = 1.0
         standard = (channel_data - mean) / std
         clamp = clamp_sigma * std
         clamped = torch.clamp(input=standard, min=-clamp, max=clamp)
@@ -177,7 +179,6 @@ def __init__(
         classification_preprocessing_dict: Dict[int, int],
         classification_dict: Dict[int, str],
     ):
-
         self._set_preprocessing_mapper(classification_preprocessing_dict)
         self._set_mapper(classification_dict)
 

From 59cb20fd8c5b136676318163b163c705c5880e08 Mon Sep 17 00:00:00 2001
From: Charles Gaydon <charles.gaydon@gmail.com>
Date: Tue, 1 Aug 2023 11:17:36 +0200
Subject: [PATCH 03/17] dev: pre_filter_below_n_points now requires at least 1
 nodes instead of 50

---
 configs/datamodule/hdf5_datamodule.yaml |  2 +-
 myria3d/pctl/dataset/utils.py           | 38 +++++++------------------
 2 files changed, 11 insertions(+), 29 deletions(-)

diff --git a/configs/datamodule/hdf5_datamodule.yaml b/configs/datamodule/hdf5_datamodule.yaml
index e430d1dc..c39147a3 100755
--- a/configs/datamodule/hdf5_datamodule.yaml
+++ b/configs/datamodule/hdf5_datamodule.yaml
@@ -15,7 +15,7 @@ pre_filter:
   _target_: functools.partial
   _args_: 
     - "${get_method:myria3d.pctl.dataset.utils.pre_filter_below_n_points}"
-  min_num_nodes: 50
+  min_num_nodes: 1
 
 tile_width: 1000
 subtile_width: 50
diff --git a/myria3d/pctl/dataset/utils.py b/myria3d/pctl/dataset/utils.py
index 1a23f397..286c6945 100644
--- a/myria3d/pctl/dataset/utils.py
+++ b/myria3d/pctl/dataset/utils.py
@@ -31,9 +31,7 @@ def find_file_in_dir(data_dir: str, basename: str) -> str:
     return files[0]
 
 
-def get_mosaic_of_centers(
-    tile_width: Number, subtile_width: Number, subtile_overlap: Number = 0
-):
+def get_mosaic_of_centers(tile_width: Number, subtile_width: Number, subtile_overlap: Number = 0):
     if subtile_overlap < 0:
         raise ValueError("datamodule.subtile_overlap must be positive.")
 
@@ -63,9 +61,7 @@ def pdal_read_las_array(las_path: str):
 def pdal_read_las_array_as_float32(las_path: str):
     """Read LAS as a a named array, casted to floats."""
     arr = pdal_read_las_array(las_path)
-    all_floats = np.dtype(
-        {"names": arr.dtype.names, "formats": ["f4"] * len(arr.dtype.names)}
-    )
+    all_floats = np.dtype({"names": arr.dtype.names, "formats": ["f4"] * len(arr.dtype.names)})
     return arr.astype(all_floats)
 
 
@@ -101,6 +97,7 @@ def get_pdal_info_metadata(las_path: str) -> Dict:
 
     return json_info["metadata"]
 
+
 # hdf5, iterable
 
 
@@ -125,13 +122,9 @@ def split_cloud_into_samples(
 
     """
     points = pdal_read_las_array_as_float32(las_path)
-    pos = np.asarray(
-        [points["X"], points["Y"], points["Z"]], dtype=np.float32
-    ).transpose()
+    pos = np.asarray([points["X"], points["Y"], points["Z"]], dtype=np.float32).transpose()
     kd_tree = cKDTree(pos[:, :2] - pos[:, :2].min(axis=0))
-    XYs = get_mosaic_of_centers(
-        tile_width, subtile_width, subtile_overlap=subtile_overlap
-    )
+    XYs = get_mosaic_of_centers(tile_width, subtile_width, subtile_overlap=subtile_overlap)
     for center in tqdm(XYs, desc="Centers"):
         radius = subtile_width // 2  # Square receptive field.
         minkowski_p = np.inf
@@ -140,9 +133,7 @@ def split_cloud_into_samples(
             # Adapt radius to have complete coverage of the data, with a slight overlap between samples.
             minkowski_p = 2
             radius = radius * math.sqrt(2)
-        sample_idx = np.array(
-            kd_tree.query_ball_point(center, r=radius, p=minkowski_p)
-        )
+        sample_idx = np.array(kd_tree.query_ball_point(center, r=radius, p=minkowski_p))
         if not len(sample_idx):
             # no points in this receptive fields
             continue
@@ -150,7 +141,7 @@ def split_cloud_into_samples(
         yield sample_idx, sample_points
 
 
-def pre_filter_below_n_points(data, min_num_nodes=50):
+def pre_filter_below_n_points(data, min_num_nodes=1):
     return data.pos.shape[0] < min_num_nodes
 
 
@@ -175,19 +166,10 @@ def get_las_paths_by_split_dict(data_dir: str, split_csv_path: str) -> LAS_PATHS
     las_paths_by_split_dict: LAS_PATHS_BY_SPLIT_DICT_TYPE = {}
     split_df = pd.read_csv(split_csv_path)
     for phase in ["train", "val", "test"]:
-        basenames = split_df[
-            split_df.split == phase
-        ].basename.tolist()
-        las_paths_by_split_dict[phase] = [
-            find_file_in_dir(data_dir, b) for b in basenames
-        ]
+        basenames = split_df[split_df.split == phase].basename.tolist()
+        las_paths_by_split_dict[phase] = [find_file_in_dir(data_dir, b) for b in basenames]
 
     if not las_paths_by_split_dict:
-        raise FileNotFoundError(
-            (
-                f"No basename found while parsing directory {data_dir}"
-                f"using {split_csv_path} as split CSV."
-            )
-        )
+        raise FileNotFoundError((f"No basename found while parsing directory {data_dir}" f"using {split_csv_path} as split CSV."))
 
     return las_paths_by_split_dict

From c07b570ed508e413963aa824ec8867443f2bd424 Mon Sep 17 00:00:00 2001
From: Charles Gaydon <charles.gaydon@gmail.com>
Date: Tue, 1 Aug 2023 11:25:00 +0200
Subject: [PATCH 04/17] dev: update minor to 3.4.0 since this is a new feature

---
 package_metadata.yaml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/package_metadata.yaml b/package_metadata.yaml
index 082464fe..30c2caea 100644
--- a/package_metadata.yaml
+++ b/package_metadata.yaml
@@ -1,4 +1,4 @@
-__version__: "3.3.3"
+__version__: "3.4.0"
 __name__: "myria3d"
 __url__: "https://github.com/IGNF/myria3d"
 __description__: "Deep Learning for the Semantic Segmentation of Aerial Lidar Point Clouds"

From c65ea24aa05e50bff8668d78ec7e2f8a1d082660 Mon Sep 17 00:00:00 2001
From: Charles Gaydon <charles.gaydon@gmail.com>
Date: Tue, 1 Aug 2023 11:49:09 +0200
Subject: [PATCH 05/17] Update default configuration to accept num_nodes>=1

---
 .github/workflows/cicd.yaml                   |  2 +-
 run.py                                        |  5 +--
 tests/conftest.py                             |  2 ++
 tests/myria3d/test_train_and_predict.py       | 32 +++++++++++++------
 ..._Myria3DV3.1.0_predict_config_V3.4.0.yaml} |  2 +-
 5 files changed, 30 insertions(+), 13 deletions(-)
 rename trained_model_assets/{proto151_V2.0_epoch_100_Myria3DV3.1.0_predict_config_V3.3.0.yaml => proto151_V2.0_epoch_100_Myria3DV3.1.0_predict_config_V3.4.0.yaml} (99%)

diff --git a/.github/workflows/cicd.yaml b/.github/workflows/cicd.yaml
index 03dc47a0..5baccdc8 100644
--- a/.github/workflows/cicd.yaml
+++ b/.github/workflows/cicd.yaml
@@ -61,7 +61,7 @@ jobs:
         myria3d
         python run.py
         --config-path /inputs/
-        --config-name proto151_V2.0_epoch_100_Myria3DV3.1.0_predict_config_V3.3.0
+        --config-name proto151_V2.0_epoch_100_Myria3DV3.1.0_predict_config_V3.4.0
         predict.ckpt_path=/inputs/proto151_V2.0_epoch_100_Myria3DV3.1.0.ckpt
         predict.src_las=/inputs/792000_6272000_subset_buildings.las
         predict.output_dir=/outputs/
diff --git a/run.py b/run.py
index af1bdaf3..769b8e57 100755
--- a/run.py
+++ b/run.py
@@ -20,7 +20,7 @@
 
 TASK_NAME_DETECTION_STRING = "task.task_name="
 DEFAULT_DIRECTORY = "trained_model_assets/"
-DEFAULT_CONFIG_FILE = "proto151_V2.0_epoch_100_Myria3DV3.1.0_predict_config_V3.3.0.yaml"
+DEFAULT_CONFIG_FILE = "proto151_V2.0_epoch_100_Myria3DV3.1.0_predict_config_V3.4.0.yaml"
 DEFAULT_CHECKPOINT = "proto151_V2.0_epoch_100_Myria3DV3.1.0.ckpt"
 DEFAULT_ENV = "placeholder.env"
 
@@ -42,6 +42,7 @@ def launch_train(config: DictConfig):  # pragma: no cover  (it's just an initial
     # Imports should be nested inside @hydra.main to optimize tab completion
     # Read more here: https://github.com/facebookresearch/hydra/issues/934
     from myria3d.train import train
+
     utils.extras(config)
 
     # Pretty print config using Rich library
@@ -88,7 +89,7 @@ def launch_hdf5(config: DictConfig):
         subtile_shape=config.datamodule.get("subtile_shape"),
         pre_filter=hydra.utils.instantiate(config.datamodule.get("pre_filter")),
         subtile_overlap_train=config.datamodule.get("subtile_overlap_train"),
-        points_pre_transform=hydra.utils.instantiate(config.datamodule.get("points_pre_transform"))
+        points_pre_transform=hydra.utils.instantiate(config.datamodule.get("points_pre_transform")),
     )
 
 
diff --git a/tests/conftest.py b/tests/conftest.py
index f824a613..09f9e34b 100644
--- a/tests/conftest.py
+++ b/tests/conftest.py
@@ -8,6 +8,8 @@
 
 from myria3d.pctl.dataset.toy_dataset import make_toy_dataset_from_test_file
 
+SINGLE_POINT_CLOUD = "tests/data/single-point-cloud.laz"
+
 
 @pytest.fixture(scope="session")
 def toy_dataset_hdf5_path(tmpdir_factory):
diff --git a/tests/myria3d/test_train_and_predict.py b/tests/myria3d/test_train_and_predict.py
index 2c9eaac1..b057cb12 100644
--- a/tests/myria3d/test_train_and_predict.py
+++ b/tests/myria3d/test_train_and_predict.py
@@ -8,7 +8,11 @@
 from myria3d.pctl.dataset.utils import pdal_read_las_array
 from myria3d.predict import predict
 from myria3d.train import train
-from tests.conftest import make_default_hydra_cfg, run_hydra_decorated_command
+from tests.conftest import (
+    make_default_hydra_cfg,
+    run_hydra_decorated_command,
+    SINGLE_POINT_CLOUD,
+)
 from tests.runif import RunIf
 
 """
@@ -56,8 +60,7 @@ def test_FrenchLidar_RandLaNetDebug_with_gpu(toy_dataset_hdf5_path, tmpdir_facto
     # Attention to concurrency with other processes using the GPU when running tests.
     gpu_id = 0
     cfg_one_epoch = make_default_hydra_cfg(
-        overrides=["experiment=RandLaNetDebug", f"trainer.gpus=[{gpu_id}]"]
-        + tmp_paths_overrides
+        overrides=["experiment=RandLaNetDebug", f"trainer.gpus=[{gpu_id}]"] + tmp_paths_overrides
     )
     train(cfg_one_epoch)
 
@@ -84,9 +87,22 @@ def test_predict_as_command(one_epoch_trained_RandLaNet_checkpoint, tmpdir):
     run_hydra_decorated_command(command)
 
 
-def test_RandLaNet_predict_with_invariance_checks(
-    one_epoch_trained_RandLaNet_checkpoint, tmpdir
-):
+def test_predict_on_single_point_cloud(one_epoch_trained_RandLaNet_checkpoint, tmpdir):
+    """Test running inference by CLI for cloud with a single point (edge case addressed in V3.4.0)"""
+    # Hydra changes CWD, and therefore absolute paths are preferred
+    abs_path_to_single_point_cloud = osp.abspath(SINGLE_POINT_CLOUD)
+    command = [
+        "run.py",
+        f"predict.ckpt_path={one_epoch_trained_RandLaNet_checkpoint}",
+        f"predict.src_las={abs_path_to_single_point_cloud}",
+        f"predict.output_dir={tmpdir}",
+        "+predict.interpolator.probas_to_save=[building,unclassified]",
+        "task.task_name=predict",
+    ]
+    run_hydra_decorated_command(command)
+
+
+def test_RandLaNet_predict_with_invariance_checks(one_epoch_trained_RandLaNet_checkpoint, tmpdir):
     """Train a model for one epoch, and run test and predict functions using the trained model.
 
     Args:
@@ -235,9 +251,7 @@ def check_las_invariance(las_path_1: str, las_path_2: str):
         assert pytest.approx(np.sum(a2[d]), rel_tolerance) == np.sum(a1[d])
 
 
-def _make_list_of_necesary_hydra_overrides_with_tmp_paths(
-    toy_dataset_hdf5_path: str, tmpdir: str
-):
+def _make_list_of_necesary_hydra_overrides_with_tmp_paths(toy_dataset_hdf5_path: str, tmpdir: str):
     """Get list of overrides for hydra, the ones that are always needed when calling train/test.
 
     Args:
diff --git a/trained_model_assets/proto151_V2.0_epoch_100_Myria3DV3.1.0_predict_config_V3.3.0.yaml b/trained_model_assets/proto151_V2.0_epoch_100_Myria3DV3.1.0_predict_config_V3.4.0.yaml
similarity index 99%
rename from trained_model_assets/proto151_V2.0_epoch_100_Myria3DV3.1.0_predict_config_V3.3.0.yaml
rename to trained_model_assets/proto151_V2.0_epoch_100_Myria3DV3.1.0_predict_config_V3.4.0.yaml
index e89f39b1..acd15868 100644
--- a/trained_model_assets/proto151_V2.0_epoch_100_Myria3DV3.1.0_predict_config_V3.3.0.yaml
+++ b/trained_model_assets/proto151_V2.0_epoch_100_Myria3DV3.1.0_predict_config_V3.4.0.yaml
@@ -129,7 +129,7 @@ datamodule:
     _target_: functools.partial
     _args_:
     - ${get_method:myria3d.pctl.dataset.utils.pre_filter_below_n_points}
-    min_num_nodes: 50
+    min_num_nodes: 1
   tile_width: 1000
   subtile_width: 50
   subtile_shape: square

From c2208cc784a87b4a20416e393c272e907a28961c Mon Sep 17 00:00:00 2001
From: Charles Gaydon <charles.gaydon@gmail.com>
Date: Tue, 1 Aug 2023 12:17:13 +0200
Subject: [PATCH 06/17] fix: update dir of cicd inputs in workflow

---
 .github/workflows/cicd.yaml | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/.github/workflows/cicd.yaml b/.github/workflows/cicd.yaml
index 5baccdc8..7371105a 100644
--- a/.github/workflows/cicd.yaml
+++ b/.github/workflows/cicd.yaml
@@ -40,8 +40,8 @@ jobs:
     - name: Example inference run via Docker with default config and checkpoint
       run: >
         docker run
-        -v /var/data/cicd/CICD_github_assets/myria3d_V3.3.0/inputs/:/inputs/
-        -v /var/data/cicd/CICD_github_assets/myria3d_V3.3.0/outputs/:/outputs/
+        -v /var/data/cicd/CICD_github_assets/myria3d_V3.4.0/inputs/:/inputs/
+        -v /var/data/cicd/CICD_github_assets/myria3d_V3.4.0/outputs/:/outputs/
         --ipc=host
         --shm-size=2gb
         myria3d
@@ -54,8 +54,8 @@ jobs:
     - name: Example inference run via Docker with inference-time subtiles overlap to smooth-out results.
       run: >
         docker run
-        -v /var/data/cicd/CICD_github_assets/myria3d_V3.3.0/inputs/:/inputs/
-        -v /var/data/cicd/CICD_github_assets/myria3d_V3.3.0/outputs/:/outputs/
+        -v /var/data/cicd/CICD_github_assets/myria3d_V3.4.0/inputs/:/inputs/
+        -v /var/data/cicd/CICD_github_assets/myria3d_V3.4.0/outputs/:/outputs/
         --ipc=host
         --shm-size=2gb
         myria3d

From 8391faa55c6ba77b13a2d75a6771b013379678a9 Mon Sep 17 00:00:00 2001
From: Charles Gaydon <charles.gaydon@gmail.com>
Date: Wed, 2 Aug 2023 14:20:29 +0200
Subject: [PATCH 07/17] fix: fix dataset description for pacasam

---
 .../dataset_description/20230601_lidarhd_pacasam_dataset.yaml   | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/configs/dataset_description/20230601_lidarhd_pacasam_dataset.yaml b/configs/dataset_description/20230601_lidarhd_pacasam_dataset.yaml
index aaa5d850..90289a5a 100644
--- a/configs/dataset_description/20230601_lidarhd_pacasam_dataset.yaml
+++ b/configs/dataset_description/20230601_lidarhd_pacasam_dataset.yaml
@@ -8,7 +8,7 @@ _convert_: all  # For omegaconf struct to be converted to python dictionnaries
 classification_preprocessing_dict: {3: 5, 4: 5, 0: 1, 66: 65}
 
 # classification_dict = {code_int: name_str, ...} and MUST be sorted (increasing order).
-classification_dict: {1: "unclassified", 2: "ground", 5: vegetation, 6: "building", 9: water, 17: bridge, 64: lasting_above, 66: 65}
+classification_dict: {1: "unclassified", 2: "ground", 5: vegetation, 6: "building", 9: water, 17: bridge, 64: lasting_above}
 
 # Input and output dims of neural net are dataset dependant:
 d_in: 9

From 7157d369b84111c1bd0b2c4ea67aaf215a387188 Mon Sep 17 00:00:00 2001
From: Charles Gaydon <charles.gaydon@gmail.com>
Date: Wed, 2 Aug 2023 14:24:26 +0200
Subject: [PATCH 08/17] dev: update minor version to V3.4.1, and begin a
 CHANGELOG.md

---
 CHANGELOG.md          | 7 +++++++
 package_metadata.yaml | 2 +-
 2 files changed, 8 insertions(+), 1 deletion(-)
 create mode 100644 CHANGELOG.md

diff --git a/CHANGELOG.md b/CHANGELOG.md
new file mode 100644
index 00000000..18371688
--- /dev/null
+++ b/CHANGELOG.md
@@ -0,0 +1,7 @@
+# main
+
+# 3.4.1
+- Fix dataset description for pacasam: there was an unwanted int-to-int mapping in classification_dict.
+
+# 3.4.0
+- Allow inference for the smallest possible patches (num_nodes=1) to have consistent inference behavior 
\ No newline at end of file
diff --git a/package_metadata.yaml b/package_metadata.yaml
index 30c2caea..6b10d0dd 100644
--- a/package_metadata.yaml
+++ b/package_metadata.yaml
@@ -1,4 +1,4 @@
-__version__: "3.4.0"
+__version__: "3.4.1"
 __name__: "myria3d"
 __url__: "https://github.com/IGNF/myria3d"
 __description__: "Deep Learning for the Semantic Segmentation of Aerial Lidar Point Clouds"

From ccb2259e908daf7c1d50deae96c998cdba41aba4 Mon Sep 17 00:00:00 2001
From: Charles Gaydon <charles.gaydon@gmail.com>
Date: Wed, 2 Aug 2023 14:54:20 +0200
Subject: [PATCH 09/17] dev: Reconstruct absolute path of input LAS files
 explicitely, removing a costly glob operation

---
 CHANGELOG.md                             |  3 +++
 docs/source/tutorials/prepare_dataset.md | 14 ++++++++------
 myria3d/pctl/dataset/utils.py            | 16 +++++++++++++---
 package_metadata.yaml                    |  2 +-
 4 files changed, 25 insertions(+), 10 deletions(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 18371688..86f21758 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -1,5 +1,8 @@
 # main
 
+# 3.4.2
+- Reconstruct absolute path of input LAS files explicitely, removing a costly glob operation.
+
 # 3.4.1
 - Fix dataset description for pacasam: there was an unwanted int-to-int mapping in classification_dict.
 
diff --git a/docs/source/tutorials/prepare_dataset.md b/docs/source/tutorials/prepare_dataset.md
index c9646672..f50b03ca 100644
--- a/docs/source/tutorials/prepare_dataset.md
+++ b/docs/source/tutorials/prepare_dataset.md
@@ -17,15 +17,17 @@ Additionnaly, you can control cloud sampling parameters via two configurations:
 
 ## Preparing the dataset
 
-Input point clouds need to be splitted in subtiles that can be digested by segmentation models. We found that a receptive field of 50m*50m was a good balance between context and memory intensity. For faster training, this split can be done once, to avoid loading large file in memory multiple times.
-
-To perform a training, you will need to specify these parameters of the datamodule config group:
-- `data_dir`: path to a directory in which a set of LAS files are stored (can be nested in subdirectories).
+To perform a training, you will need to specify these parameters in the datamodule config group:
+- `data_dir`: path to a directory in which a set of LAS files are stored. Clouds must be nested in subdirectories named according to their spli: train, val, or test.
 - `split_csv_path`: path to a CSV file with schema `basename,split`, specifying a train/val/test spit for your data.
 
-These will be composed into a single file dataset for which you can specify a path via the `datamodule.hdf5_file_path` parameter. This happens on the fly, therefore a first training might take some time, but this should only happens once.
+Under the hood, the path of each LAS file will be reconstructed like this: '{data_dir}/{split}/{basename}'.
+
+Large input point clouds need to be divided in smaller clouds that can be digested by segmentation models. We found that a receptive field of 50m x 50m was a good balance between context and memory intensity. The division is performed once, to avoid loading large file in memory multiple times during training.
+
+After division, the smaller clouds are preprocessed (i.e. selection of specific LAS dimensions, on-the-fly creation of dimensions) and regrouped into a single HDF5 file whose path is specified via the `datamodule.hdf5_file_path` parameter. 
 
-Once this is done, you do not need sources anymore, and simply specifying the path to the HDF5 dataset is enough.
+The HDF5 dataset is created at training time. It should only happens once. Once this is done, you do not need sources anymore, and simply specifying the path to the HDF5 dataset is enough (there is no need for data_dir or split_csv_path parameters anymore).
 
 It's also possible to create the hdf5 file without training any model: just fill the `datamodule.hdf5_file_path` parameter as before to specify the file path, but use `task=create_hdf5` instead of `task=fit`.
 
diff --git a/myria3d/pctl/dataset/utils.py b/myria3d/pctl/dataset/utils.py
index 286c6945..0762377a 100644
--- a/myria3d/pctl/dataset/utils.py
+++ b/myria3d/pctl/dataset/utils.py
@@ -1,6 +1,7 @@
 import glob
 import json
 import math
+from pathlib import Path
 import subprocess as sp
 from numbers import Number
 from typing import Dict, List, Literal, Union
@@ -162,14 +163,23 @@ def make_circle_wkt(center, subtile_width):
     return wkt
 
 
-def get_las_paths_by_split_dict(data_dir: str, split_csv_path: str) -> LAS_PATHS_BY_SPLIT_DICT_TYPE:
+def get_las_paths_by_split_dict(
+    data_dir: str, split_csv_path: str
+) -> LAS_PATHS_BY_SPLIT_DICT_TYPE:
     las_paths_by_split_dict: LAS_PATHS_BY_SPLIT_DICT_TYPE = {}
     split_df = pd.read_csv(split_csv_path)
     for phase in ["train", "val", "test"]:
         basenames = split_df[split_df.split == phase].basename.tolist()
-        las_paths_by_split_dict[phase] = [find_file_in_dir(data_dir, b) for b in basenames]
+        # Explicit data structure with ./val, ./train, ./test subfolder is required.
+        # TODO: indicate this in the doc as well.
+        las_paths_by_split_dict[phase] = [str(Path(data_dir) / phase / b) for b in basenames]
 
     if not las_paths_by_split_dict:
-        raise FileNotFoundError((f"No basename found while parsing directory {data_dir}" f"using {split_csv_path} as split CSV."))
+        raise FileNotFoundError(
+            (
+                f"No basename found while parsing directory {data_dir}"
+                f"using {split_csv_path} as split CSV."
+            )
+        )
 
     return las_paths_by_split_dict
diff --git a/package_metadata.yaml b/package_metadata.yaml
index 6b10d0dd..a34ca158 100644
--- a/package_metadata.yaml
+++ b/package_metadata.yaml
@@ -1,4 +1,4 @@
-__version__: "3.4.1"
+__version__: "3.4.2"
 __name__: "myria3d"
 __url__: "https://github.com/IGNF/myria3d"
 __description__: "Deep Learning for the Semantic Segmentation of Aerial Lidar Point Clouds"

From 040207f05d40cad7171e017018b290a0e3fb2b4b Mon Sep 17 00:00:00 2001
From: Charles Gaydon <charles.gaydon@gmail.com>
Date: Wed, 2 Aug 2023 14:57:50 +0200
Subject: [PATCH 10/17] chore: remove outdated and incorrect hydra parameter in
 config.yaml

---
 CHANGELOG.md          | 3 +++
 configs/config.yaml   | 4 ----
 package_metadata.yaml | 2 +-
 3 files changed, 4 insertions(+), 5 deletions(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 86f21758..ca5985ae 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -1,5 +1,8 @@
 # main
 
+# 3.4.3
+- Remove outdated and incorrect hydra parameter in config.yaml
+
 # 3.4.2
 - Reconstruct absolute path of input LAS files explicitely, removing a costly glob operation.
 
diff --git a/configs/config.yaml b/configs/config.yaml
index 80463192..13a07eb7 100755
--- a/configs/config.yaml
+++ b/configs/config.yaml
@@ -42,7 +42,3 @@ defaults:
   # enable color logging
   - override hydra/hydra_logging: colorlog
   - override hydra/job_logging: colorlog
-
-hydra:
-  searchpath:
-    - pkg://default_files_for_predict
diff --git a/package_metadata.yaml b/package_metadata.yaml
index a34ca158..3d77391b 100644
--- a/package_metadata.yaml
+++ b/package_metadata.yaml
@@ -1,4 +1,4 @@
-__version__: "3.4.2"
+__version__: "3.4.3"
 __name__: "myria3d"
 __url__: "https://github.com/IGNF/myria3d"
 __description__: "Deep Learning for the Semantic Segmentation of Aerial Lidar Point Clouds"

From 1218aec8f91c026214252243afa5ea0088083e12 Mon Sep 17 00:00:00 2001
From: Charles Gaydon <charles.gaydon@gmail.com>
Date: Wed, 2 Aug 2023 15:02:41 +0200
Subject: [PATCH 11/17] chore: Remove duplicated experiment configuration

---
 CHANGELOG.md                                   |  3 +++
 .../RandLaNet_base_run_FR_pyg_randla_net.yaml  | 18 ------------------
 package_metadata.yaml                          |  2 +-
 3 files changed, 4 insertions(+), 19 deletions(-)
 delete mode 100755 configs/experiment/RandLaNet_base_run_FR_pyg_randla_net.yaml

diff --git a/CHANGELOG.md b/CHANGELOG.md
index ca5985ae..591d420b 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -1,5 +1,8 @@
 # main
 
+# 3.4.4
+- Remove duplicated experiment configuration
+
 # 3.4.3
 - Remove outdated and incorrect hydra parameter in config.yaml
 
diff --git a/configs/experiment/RandLaNet_base_run_FR_pyg_randla_net.yaml b/configs/experiment/RandLaNet_base_run_FR_pyg_randla_net.yaml
deleted file mode 100755
index 5083ac1d..00000000
--- a/configs/experiment/RandLaNet_base_run_FR_pyg_randla_net.yaml
+++ /dev/null
@@ -1,18 +0,0 @@
-# @package _global_
-defaults:
-  - override /model: pyg_randla_net_model.yaml
-  - override /datamodule/transforms/augmentations: light.yaml
-
-logger:
-  comet:
-    experiment_name: "Pyg RandLaNet - FR Data"
-
-trainer:
-  num_sanity_val_steps: 2
-  min_epochs: 100
-  max_epochs: 150
-  # gpus: [1]
-
-predict:
-  interpolator:
-    interpolation_k: 10
\ No newline at end of file
diff --git a/package_metadata.yaml b/package_metadata.yaml
index 3d77391b..3b7f143f 100644
--- a/package_metadata.yaml
+++ b/package_metadata.yaml
@@ -1,4 +1,4 @@
-__version__: "3.4.3"
+__version__: "3.4.4"
 __name__: "myria3d"
 __url__: "https://github.com/IGNF/myria3d"
 __description__: "Deep Learning for the Semantic Segmentation of Aerial Lidar Point Clouds"

From 1648313c96ca915a450784887cae11c5d05731c8 Mon Sep 17 00:00:00 2001
From: Charles Gaydon <charles.gaydon@gmail.com>
Date: Wed, 2 Aug 2023 15:24:46 +0200
Subject: [PATCH 12/17] dev: set a default task_name (fit) to avoid common
 error at lauch time.

---
 CHANGELOG.md          |  3 +++
 package_metadata.yaml |  2 +-
 run.py                | 56 +++++++++++++++++++++++++------------------
 3 files changed, 37 insertions(+), 24 deletions(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 591d420b..95235e78 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -1,5 +1,8 @@
 # main
 
+# 3.4.5
+- Set a default task_name (fit) to avoid common error at lauch time.
+
 # 3.4.4
 - Remove duplicated experiment configuration
 
diff --git a/package_metadata.yaml b/package_metadata.yaml
index 3b7f143f..7e80f8a5 100644
--- a/package_metadata.yaml
+++ b/package_metadata.yaml
@@ -1,4 +1,4 @@
-__version__: "3.4.4"
+__version__: "3.4.5"
 __name__: "myria3d"
 __url__: "https://github.com/IGNF/myria3d"
 __description__: "Deep Learning for the Semantic Segmentation of Aerial Lidar Point Clouds"
diff --git a/run.py b/run.py
index 769b8e57..e125437c 100755
--- a/run.py
+++ b/run.py
@@ -2,7 +2,9 @@
     # It is safer to import comet before all other imports.
     import comet_ml  # noqa
 except ImportError:
-    print("Warning: package comet_ml not found. This may break things if you use a comet callback.")
+    print(
+        "Warning: package comet_ml not found. This may break things if you use a comet callback."
+    )
 
 from enum import Enum
 
@@ -33,11 +35,15 @@ class TASK_NAMES(Enum):
     HDF5 = "create_hdf5"
 
 
+DEFAULT_TASK = TASK_NAMES.FIT.value
+
 log = utils.get_logger(__name__)
 
 
 @hydra.main(config_path="configs/", config_name="config.yaml")
-def launch_train(config: DictConfig):  # pragma: no cover  (it's just an initialyzer of a class/method tested elsewhere)
+def launch_train(
+    config: DictConfig,
+):  # pragma: no cover  (it's just an initialyzer of a class/method tested elsewhere)
     """Training, evaluation, testing, or finetuning of a neural network."""
     # Imports should be nested inside @hydra.main to optimize tab completion
     # Read more here: https://github.com/facebookresearch/hydra/issues/934
@@ -60,7 +66,9 @@ def launch_predict(config: DictConfig):
 
     # hydra changes current directory, so we make sure the checkpoint has an absolute path
     if not os.path.isabs(config.predict.ckpt_path):
-        config.predict.ckpt_path = os.path.join(os.path.dirname(__file__), config.predict.ckpt_path)
+        config.predict.ckpt_path = os.path.join(
+            os.path.dirname(__file__), config.predict.ckpt_path
+        )
 
     # Pretty print config using Rich library
     if config.get("print_config"):
@@ -80,7 +88,9 @@ def launch_hdf5(config: DictConfig):
     if config.get("print_config"):
         utils.print_config(config, resolve=False)
 
-    las_paths_by_split_dict = get_las_paths_by_split_dict(config.datamodule.get("data_dir"), config.datamodule.get("split_csv_path"))
+    las_paths_by_split_dict = get_las_paths_by_split_dict(
+        config.datamodule.get("data_dir"), config.datamodule.get("split_csv_path")
+    )
     create_hdf5(
         las_paths_by_split_dict=las_paths_by_split_dict,
         hdf5_file_path=config.datamodule.get("hdf5_file_path"),
@@ -89,35 +99,35 @@ def launch_hdf5(config: DictConfig):
         subtile_shape=config.datamodule.get("subtile_shape"),
         pre_filter=hydra.utils.instantiate(config.datamodule.get("pre_filter")),
         subtile_overlap_train=config.datamodule.get("subtile_overlap_train"),
-        points_pre_transform=hydra.utils.instantiate(config.datamodule.get("points_pre_transform")),
+        points_pre_transform=hydra.utils.instantiate(
+            config.datamodule.get("points_pre_transform")
+        ),
     )
 
 
 if __name__ == "__main__":
+    task_name = "fit"
     for arg in sys.argv:
         if TASK_NAME_DETECTION_STRING in arg:
             _, task_name = arg.split("=")
             break
 
-    try:
-        log.info(f"task selected: {task_name}")
-
-        if task_name in [TASK_NAMES.FIT.value, TASK_NAMES.TEST.value, TASK_NAMES.FINETUNE.value]:
-            # load environment variables from `.env` file if it exists
-            # recursively searches for `.env` in all folders starting from work dir
-            dotenv.load_dotenv(override=True)
-            launch_train()
+    log.info(f"Task: {task_name}")
 
-        elif task_name == TASK_NAMES.PREDICT.value:
-            dotenv.load_dotenv(os.path.join(DEFAULT_DIRECTORY, DEFAULT_ENV))
-            launch_predict()
+    if task_name in [TASK_NAMES.FIT.value, TASK_NAMES.TEST.value, TASK_NAMES.FINETUNE.value]:
+        # load environment variables from `.env` file if it exists
+        # recursively searches for `.env` in all folders starting from work dir
+        dotenv.load_dotenv(override=True)
+        launch_train()
 
-        elif task_name == TASK_NAMES.HDF5.value:
-            launch_hdf5()
+    elif task_name == TASK_NAMES.PREDICT.value:
+        dotenv.load_dotenv(os.path.join(DEFAULT_DIRECTORY, DEFAULT_ENV))
+        launch_predict()
 
-        else:
-            log.warning("Task unknown")
+    elif task_name == TASK_NAMES.HDF5.value:
+        launch_hdf5()
 
-    except NameError as e:
-        log.error('a task name must be defined, with the argument "task.task_name=..."')
-        raise e
+    else:
+        log.warning(
+            f"Task {task_name} is not known. Specify a valid task name via task.task_name=..."
+        )

From 6c6b9a19ecdc4531b6c1b7619525193984f77af4 Mon Sep 17 00:00:00 2001
From: Charles Gaydon <charles.gaydon@gmail.com>
Date: Wed, 2 Aug 2023 15:36:59 +0200
Subject: [PATCH 13/17] dev: document the possible use of ign-pdal-tools for
 colorization

---
 CHANGELOG.md                             | 7 +++++--
 docs/source/tutorials/prepare_dataset.md | 8 ++++----
 package_metadata.yaml                    | 2 +-
 3 files changed, 10 insertions(+), 7 deletions(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 95235e78..5d72aa22 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -1,13 +1,16 @@
 # main
 
+# 3.4.6
+- Document the possible use of ign-pdal-tools for colorization.
+
 # 3.4.5
 - Set a default task_name (fit) to avoid common error at lauch time.
 
 # 3.4.4
-- Remove duplicated experiment configuration
+- Remove duplicated experiment configuration.
 
 # 3.4.3
-- Remove outdated and incorrect hydra parameter in config.yaml
+- Remove outdated and incorrect hydra parameter in config.yaml.
 
 # 3.4.2
 - Reconstruct absolute path of input LAS files explicitely, removing a costly glob operation.
diff --git a/docs/source/tutorials/prepare_dataset.md b/docs/source/tutorials/prepare_dataset.md
index f50b03ca..1dff18a6 100644
--- a/docs/source/tutorials/prepare_dataset.md
+++ b/docs/source/tutorials/prepare_dataset.md
@@ -2,13 +2,13 @@
 
 ## Peprocessing functions
 
-The loading function is dataset dependant, and is `lidar_hd_pre_transform` by default. The function takes points loaded from a LAS file via pdal as input, and returns a `pytorch_geometric.Data` object following the standard naming convention of `pytorch_geometric`, plus a list of features names for later use in transforms.
+The loading function is dataset dependant, and is `lidar_hd_pre_transform` by default. The function takes points loaded from a LAS file via pdal as input, and returns a `pytorch_geometric.Data` object following the standard naming convention of `pytorch_geometric`, plus a list of features names for later use in transforms. In the loading function, the return number and color information (RGBI) are scaled to 0-1 interval, a NDVI and an average color ((R+G+B)/3) dimension are created, and points that may be occluded (as indicated by higher return number) have their color set to 0.
 
-It is adapted to the French Lidar HD data provided by IGN (see [the official page](https://geoservices.ign.fr/lidarhd) - link in French). Return number and color information (RGBI) are scaled to 0-1 interval, a NDVI and an average color ((R+G+B)/3) dimension are created, and points that may be occluded (as indicated by higher return number) have their color set to 0.
+Customization: You may want to implement your own logic (e.g. with custom, additional features) in directory `points_pre_transform`. It then needs to be referenced similarly to `lidar_hd_pre_transform`. 
 
-You may want to implement your own logic (e.g. with custom, additional features) in directory `points_pre_transform`. It then needs to be referenced similarly to `lidar_hd_pre_transform`. 
+The loading function is designed for the French Lidar HD data provided by IGN (see [the official page](https://geoservices.ign.fr/lidarhd) - link in French). Note that the clouds are shared without color information, and should be colorized (RGB+Infrared) to use myria3d. The [open-source ign-pdal-tools library](https://pypi.org/project/ign-pdal-tools/) is a convenient toolkit that can be used to colorize the raw clouds with IGN aerial imagery (see function 'pdaltools.color.color(...)').
 
-If you use your own classification convention , you will need to create a `dataset_description` configuration (for an example see `configs/dataset_description/20220607_151_dalles_proto.yaml`).
+Customization: If you use a different classification (e.g. additional classes), you will need to create a `dataset_description` configuration (similar to `configs/dataset_description/20220607_151_dalles_proto.yaml`).
 
 Additionnaly, you can control cloud sampling parameters via two configurations:
 - `configs/datamodule/transforms/preparations/points_budget.yaml`: (defaut) allows variable cloud size within lower and higher boundaries. 
diff --git a/package_metadata.yaml b/package_metadata.yaml
index 7e80f8a5..3d7474aa 100644
--- a/package_metadata.yaml
+++ b/package_metadata.yaml
@@ -1,4 +1,4 @@
-__version__: "3.4.5"
+__version__: "3.4.6"
 __name__: "myria3d"
 __url__: "https://github.com/IGNF/myria3d"
 __description__: "Deep Learning for the Semantic Segmentation of Aerial Lidar Point Clouds"

From 31fc2814bb4eb0f0e5143517a5c5aaa196a7f3f9 Mon Sep 17 00:00:00 2001
From: Charles Gaydon <charles.gaydon@gmail.com>
Date: Wed, 2 Aug 2023 15:42:02 +0200
Subject: [PATCH 14/17] dev: Remove tqdm when splitting a lidar tile to avoid
 cluttered logs during data preparation

---
 CHANGELOG.md                  | 3 +++
 myria3d/pctl/dataset/utils.py | 3 +--
 package_metadata.yaml         | 2 +-
 3 files changed, 5 insertions(+), 3 deletions(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 5d72aa22..49e92387 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -1,5 +1,8 @@
 # main
 
+# 3.4.7
+- Remove tqdm when splitting a lidar tile to avoid cluttered logs during data preparation.
+
 # 3.4.6
 - Document the possible use of ign-pdal-tools for colorization.
 
diff --git a/myria3d/pctl/dataset/utils.py b/myria3d/pctl/dataset/utils.py
index 0762377a..f9f3d836 100644
--- a/myria3d/pctl/dataset/utils.py
+++ b/myria3d/pctl/dataset/utils.py
@@ -11,7 +11,6 @@
 import pdal
 from scipy.spatial import cKDTree
 from shapely.geometry import Point
-from tqdm import tqdm
 
 SPLIT_TYPE = Union[Literal["train"], Literal["val"], Literal["test"]]
 SHAPE_TYPE = Union[Literal["disk"], Literal["square"]]
@@ -126,7 +125,7 @@ def split_cloud_into_samples(
     pos = np.asarray([points["X"], points["Y"], points["Z"]], dtype=np.float32).transpose()
     kd_tree = cKDTree(pos[:, :2] - pos[:, :2].min(axis=0))
     XYs = get_mosaic_of_centers(tile_width, subtile_width, subtile_overlap=subtile_overlap)
-    for center in tqdm(XYs, desc="Centers"):
+    for center in XYs:
         radius = subtile_width // 2  # Square receptive field.
         minkowski_p = np.inf
         if shape == "disk":
diff --git a/package_metadata.yaml b/package_metadata.yaml
index 3d7474aa..6c187a83 100644
--- a/package_metadata.yaml
+++ b/package_metadata.yaml
@@ -1,4 +1,4 @@
-__version__: "3.4.6"
+__version__: "3.4.7"
 __name__: "myria3d"
 __url__: "https://github.com/IGNF/myria3d"
 __description__: "Deep Learning for the Semantic Segmentation of Aerial Lidar Point Clouds"

From 944e2e136b6e0f3594a5d2be70cacb8c8c0fbb2c Mon Sep 17 00:00:00 2001
From: Charles Gaydon <charles.gaydon@gmail.com>
Date: Wed, 2 Aug 2023 18:06:35 +0200
Subject: [PATCH 15/17] dev: Add exceptions code to pacasam config

---
 .../dataset_description/20230601_lidarhd_pacasam_dataset.yaml  | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/configs/dataset_description/20230601_lidarhd_pacasam_dataset.yaml b/configs/dataset_description/20230601_lidarhd_pacasam_dataset.yaml
index 90289a5a..98ef0c72 100644
--- a/configs/dataset_description/20230601_lidarhd_pacasam_dataset.yaml
+++ b/configs/dataset_description/20230601_lidarhd_pacasam_dataset.yaml
@@ -5,7 +5,8 @@ _convert_: all  # For omegaconf struct to be converted to python dictionnaries
 # 0: no processing --> unclassified
 # 66: synthetic points --> noise (synthetic points are useful for specific modelling task on already classified data).
 # We set them to noise so that they are ignored during training.
-classification_preprocessing_dict: {3: 5, 4: 5, 0: 1, 66: 65}
+# Codes that should not have been in the data: 100, 101.
+classification_preprocessing_dict: {3: 5, 4: 5, 0: 1, 66: 65, 100: 1, 101: 1}
 
 # classification_dict = {code_int: name_str, ...} and MUST be sorted (increasing order).
 classification_dict: {1: "unclassified", 2: "ground", 5: vegetation, 6: "building", 9: water, 17: bridge, 64: lasting_above}

From 3047670d2169eb39ba8356cfaa312072ef8b4074 Mon Sep 17 00:00:00 2001
From: Charles Gaydon <charles.gaydon@gmail.com>
Date: Tue, 8 Aug 2023 18:35:49 +0200
Subject: [PATCH 16/17] chore: remove dead comment.

---
 myria3d/pctl/dataset/utils.py | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/myria3d/pctl/dataset/utils.py b/myria3d/pctl/dataset/utils.py
index f9f3d836..7e62e958 100644
--- a/myria3d/pctl/dataset/utils.py
+++ b/myria3d/pctl/dataset/utils.py
@@ -169,8 +169,7 @@ def get_las_paths_by_split_dict(
     split_df = pd.read_csv(split_csv_path)
     for phase in ["train", "val", "test"]:
         basenames = split_df[split_df.split == phase].basename.tolist()
-        # Explicit data structure with ./val, ./train, ./test subfolder is required.
-        # TODO: indicate this in the doc as well.
+        # Reminder: an explicit data structure with ./val, ./train, ./test subfolder is required.
         las_paths_by_split_dict[phase] = [str(Path(data_dir) / phase / b) for b in basenames]
 
     if not las_paths_by_split_dict:

From b98f120977aa78ecf5321eab61af4cba2b81689c Mon Sep 17 00:00:00 2001
From: Charles Gaydon <charles.gaydon@gmail.com>
Date: Tue, 8 Aug 2023 18:42:33 +0200
Subject: [PATCH 17/17] patch: Raise an informative error in case of unexpected
 task_name

---
 CHANGELOG.md          | 3 +++
 package_metadata.yaml | 2 +-
 run.py                | 5 +++--
 3 files changed, 7 insertions(+), 3 deletions(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 49e92387..d627d075 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -1,5 +1,8 @@
 # main
 
+# 3.4.8
+- Raise an informative error in case of unexpected task_name.
+
 # 3.4.7
 - Remove tqdm when splitting a lidar tile to avoid cluttered logs during data preparation.
 
diff --git a/package_metadata.yaml b/package_metadata.yaml
index 6c187a83..72a706c6 100644
--- a/package_metadata.yaml
+++ b/package_metadata.yaml
@@ -1,4 +1,4 @@
-__version__: "3.4.7"
+__version__: "3.4.8"
 __name__: "myria3d"
 __url__: "https://github.com/IGNF/myria3d"
 __description__: "Deep Learning for the Semantic Segmentation of Aerial Lidar Point Clouds"
diff --git a/run.py b/run.py
index e125437c..3d56cad1 100755
--- a/run.py
+++ b/run.py
@@ -128,6 +128,7 @@ def launch_hdf5(config: DictConfig):
         launch_hdf5()
 
     else:
-        log.warning(
-            f"Task {task_name} is not known. Specify a valid task name via task.task_name=..."
+        choices = ", ".join(task.value for task in TASK_NAMES)
+        raise ValueError(
+            f"Task '{task_name}' is not known. Specify a valid task name via task.task_name. Valid choices are: {choices})"
         )