diff --git a/.github/workflows/benchmark.yml b/.github/workflows/benchmark.yml
index c1a1e82..cb744c4 100644
--- a/.github/workflows/benchmark.yml
+++ b/.github/workflows/benchmark.yml
@@ -18,7 +18,6 @@ jobs:
   run-benchmark:
     name: Run Benchmark
     runs-on: ubuntu-latest
-    ## runs-on: self-hosted
     steps:
       - name: Check out repository
         uses: actions/checkout@v4
@@ -49,27 +48,26 @@ jobs:
         shell: bash -l {0}
         run: |
           mamba install -y pip
-          pip install git+https://github.com/omnibenchmark/omnibenchmark.git@reduce_install_scope
+          pip install git+https://github.com/omnibenchmark/omnibenchmark.git@main
 
       - name: Load benchmark cache
         id: cache-benchmark
         uses: actions/cache@v3
         with:
           path: out/
-          key: benchmark-${{ runner.os }}-${{ hashFiles('Clustering.yaml') }}
+          key: benchmark-${{ runner.os }}-${{ hashFiles('Clustering_conda_short.yml') }}
 
       - name: Run benchmark
         shell: bash -l {0}
-        continue-on-error: true
+        continue-on-error: false
         run: |
-          echo "y" | ob run benchmark -b Clustering.yaml --local --cores 3 --continue-on-error
+          ob run benchmark -b Clustering_conda_short.yml --local --cores 3 --continue-on-error --yes
 
   upload-artifact:
     name: Benchmark Artifact
     runs-on: ubuntu-latest
-    ## runs-on: self-hosted
     needs: run-benchmark
-    if: always()
+    if: github.ref == 'refs/heads/main' && github.repository_owner == 'omnibenchmark'
     steps:
       - name: Check out repository
         uses: actions/checkout@v4
@@ -100,7 +98,7 @@ jobs:
 
       - name: Deploy to GitHub Pages
         uses: actions/deploy-pages@v4
-          
+
       - name: Create Job Summary
         if: always()
         run: |
@@ -108,4 +106,3 @@ jobs:
           echo "- [Plotting Report](https://${{ github.repository_owner }}.github.io/${{ github.event.repository.name }})" >> $GITHUB_STEP_SUMMARY
           echo "### All Outputs" >> $GITHUB_STEP_SUMMARY
           echo "- [Complete Benchmark Output](https://github.com/${{ github.repository }}/actions/runs/${{ github.run_id }}#artifacts)" >> $GITHUB_STEP_SUMMARY
-    
diff --git a/.gitignore b/.gitignore
new file mode 100644
index 0000000..4d38534
--- /dev/null
+++ b/.gitignore
@@ -0,0 +1,10 @@
+# image build artifacts
+envs/*.sif
+
+# snakemake
+snakemake.log
+.snakemake/
+
+# vim swaps
+*.swp
+*.swo
diff --git a/Clustering.yaml b/Clustering.yaml
deleted file mode 100644
index 0007ea5..0000000
--- a/Clustering.yaml
+++ /dev/null
@@ -1,232 +0,0 @@
-id: clustering_example
-description: Clustering benchmark on Gagolewski's, true number of clusters plus minus 2.
-version: 1.2
-benchmarker: "Izaskun Mallona, Daniel Incicau"
-storage: https://play.min.io
-benchmark_yaml_spec: 0.04
-storage_api: S3
-storage_bucket_name: clustering_example
-software_backend: conda
-software_environments:
-  clustbench:
-    description: "clustbench on py3.12.6"
-    conda: envs/clustbench.yml
-    envmodule: clustbench
-    apptainer: envs/clustbench.sif
-  sklearn:
-    description: "Daniel's on py3.12.6"
-    conda: envs/sklearn.yml
-    apptainer: envs/sklearn.sif
-    envmodule: clustbench # not true, but
-  R:
-    description: "Daniel's R with readr, dplyr, mclust, caret"
-    conda: envs/r.yml
-    apptainer: envs/r.sif
-    envmodule: fcps # not true, but
-  rmarkdown:
-    description: "R with some plotting dependencies"
-    conda: envs/rmarkdown.yml
-    apptainer: envs/r.sif # not true, but
-    envmodule: fcps # not true, but
-  fcps:
-    description: "CRAN's FCPS"
-    conda: envs/fcps.yml
-    apptainer: envs/fcps.sif
-    envmodule: fcps
-metric_collectors:
-  - id: plotting
-    name: "Single-backend metric collector."
-    software_environment: "rmarkdown"
-    repository:
-      url: https://github.com/imallona/clustering_report
-      commit: 1d6bdf5
-    inputs:
-      - metrics.scores
-    outputs:
-      - id: plotting.html
-        path: "{input}/{name}/plotting_report.html"
-
-stages:
-  ## clustbench data ##########################################################
-
-  - id: data
-    modules:
-      - id: clustbench
-        name: "clustbench datasets, from https://www.sciencedirect.com/science/article/pii/S0020025521010082#t0005 Table1"
-        software_environment: "clustbench"
-        repository:
-          url: https://github.com/imallona/clustbench_data
-          commit: 366c5a2
-        parameters: # comments depict the possible cardinalities and the number of curated labelsets
-          - values: ["--dataset_generator", "fcps", "--dataset_name", "atom"] # 2 1
-          # - values: ["--dataset_generator", "fcps", "--dataset_name", "chainlink"] #  2 1
-          # - values: ["--dataset_generator", "fcps", "--dataset_name", "engytime"] # 2 2
-          # - values: ["--dataset_generator", "fcps", "--dataset_name", "hepta"] #  7 1
-          # - values: ["--dataset_generator", "fcps", "--dataset_name", "lsun"] # 3 1
-          # - values: ["--dataset_generator", "fcps", "--dataset_name", "target"] # 2, 6  2
-          # - values: ["--dataset_generator", "fcps", "--dataset_name", "tetra"] #  4 1
-          # - values: ["--dataset_generator", "fcps", "--dataset_name", "twodiamonds"] #  2 1
-          # - values: ["--dataset_generator", "fcps", "--dataset_name", "wingnut"] #  2 1
-          # - values: ["--dataset_generator", "graves", "--dataset_name", "dense"] #  2 1
-          # - values: ["--dataset_generator", "graves", "--dataset_name", "fuzzyx"] # 2, 4, 5 6
-          # - values: ["--dataset_generator", "graves", "--dataset_name", "line"] # 2 1
-          # - values: ["--dataset_generator", "graves", "--dataset_name", "parabolic"] #  2, 4  2
-          # - values: ["--dataset_generator", "graves", "--dataset_name", "ring"] # 2 1
-          # - values: ["--dataset_generator", "graves", "--dataset_name", "ring_noisy"] # 2 1
-          # - values: ["--dataset_generator", "graves", "--dataset_name", "ring_outliers"] #  2, 5  2
-          # - values: ["--dataset_generator", "graves", "--dataset_name", "zigzag"] # 3, 5  2
-          # - values: ["--dataset_generator", "graves", "--dataset_name", "zigzag_noisy"] # 3, 5  2
-          # - values: ["--dataset_generator", "graves", "--dataset_name", "zigzag_outliers"] #  3, 5  2
-          # - values: ["--dataset_generator", "other", "--dataset_name", "chameleon_t4_8k"] # 6 1
-          # - values: ["--dataset_generator", "other", "--dataset_name", "chameleon_t5_8k"] # 6 1
-          # - values: ["--dataset_generator", "other", "--dataset_name", "hdbscan"] # 6 1
-          # - values: ["--dataset_generator", "other", "--dataset_name", "iris"] #  3 1
-          # - values: ["--dataset_generator", "other", "--dataset_name", "iris5"] # 3 1
-          # - values: ["--dataset_generator", "other", "--dataset_name", "square"] #  2 1
-          # - values: ["--dataset_generator", "sipu", "--dataset_name", "aggregation"] #  7 1
-          # - values: ["--dataset_generator", "sipu", "--dataset_name", "compound"] # 4, 5, 6 5
-          # - values: ["--dataset_generator", "sipu", "--dataset_name", "flame"] #  2 2
-          # - values: ["--dataset_generator", "sipu", "--dataset_name", "jain"] # 2 1
-          # - values: ["--dataset_generator", "sipu", "--dataset_name", "pathbased"] #  3, 4  2
-          # - values: ["--dataset_generator", "sipu", "--dataset_name", "r15"] #  8, 9, 15  3
-          # - values: ["--dataset_generator", "sipu", "--dataset_name", "spiral"] # 3 1
-          # - values: ["--dataset_generator", "sipu", "--dataset_name", "unbalance"] #  8 1
-          # - values: ["--dataset_generator", "uci", "--dataset_name", "ecoli"] # 8 1
-          # - values: ["--dataset_generator", "uci", "--dataset_name", "ionosphere"] #  2 1
-          # - values: ["--dataset_generator", "uci", "--dataset_name", "sonar"] # 2 1
-          # - values: ["--dataset_generator", "uci", "--dataset_name", "statlog"] # 7 1
-          # - values: ["--dataset_generator", "uci", "--dataset_name", "wdbc"] #  2 1
-          # - values: ["--dataset_generator", "uci", "--dataset_name", "wine"] #  3 1
-          # - values: ["--dataset_generator", "uci", "--dataset_name", "yeast"] # 10  1
-          # - values: ["--dataset_generator", "wut", "--dataset_name", "circles"] # 4 1
-          # - values: ["--dataset_generator", "wut", "--dataset_name", "cross"] # 4 1
-          # - values: ["--dataset_generator", "wut", "--dataset_name", "graph"] # 10  1
-          # - values: ["--dataset_generator", "wut", "--dataset_name", "isolation"] # 3 1
-          # - values: ["--dataset_generator", "wut", "--dataset_name", "labirynth"] # 6 1
-          # - values: ["--dataset_generator", "wut", "--dataset_name", "mk1"] # 3 1
-          # - values: ["--dataset_generator", "wut", "--dataset_name", "mk2"] # 2 1
-          # - values: ["--dataset_generator", "wut", "--dataset_name", "mk3"] # 3 1
-          # - values: ["--dataset_generator", "wut", "--dataset_name", "mk4"] # 3 1
-          # - values: ["--dataset_generator", "wut", "--dataset_name", "olympic"] # 5 1
-          # - values: ["--dataset_generator", "wut", "--dataset_name", "smile"] # 4, 6  2
-          # - values: ["--dataset_generator", "wut", "--dataset_name", "stripes"] # 2 1
-          # - values: ["--dataset_generator", "wut", "--dataset_name", "trajectories"] #  4 1
-          # - values: ["--dataset_generator", "wut", "--dataset_name", "trapped_lovers"] #  3 1
-          # - values: ["--dataset_generator", "wut", "--dataset_name", "twosplashes"] # 2 1
-          # - values: ["--dataset_generator", "wut", "--dataset_name", "windows"] # 5 1
-          # - values: ["--dataset_generator", "wut", "--dataset_name", "x1"] #  3 1
-          # - values: ["--dataset_generator", "wut", "--dataset_name", "x2"] #  3 1
-          - values: ["--dataset_generator", "wut", "--dataset_name", "x3"] #  4 1
-          # - values: ["--dataset_generator", "wut", "--dataset_name", "z1"] #  3 1
-          # - values: ["--dataset_generator", "wut", "--dataset_name", "z2"] #  5 1
-          # - values: ["--dataset_generator", "wut", "--dataset_name", "z3"] #  4 1
-    outputs:
-      - id: data.matrix
-        path: "{input}/{stage}/{module}/{params}/{dataset}.data.gz"
-      - id: data.true_labels
-        path: "{input}/{stage}/{module}/{params}/{dataset}.labels0.gz"
-
-  ## clustbench methods (fastcluster) ###################################################################
-  
-  - id: clustering
-    modules:
-      - id: fastcluster
-        name: "fastcluster algorithm"
-        software_environment: "clustbench"
-        repository:
-          url: https://github.com/imallona/clustbench_fastcluster
-          # url: /home/imallona/src/clustbench_fastcluster/
-          commit: "45e43d3"
-        parameters:
-          - values: ["--linkage", "complete"]
-          - values: ["--linkage", "ward"]
-          # - values: ["--linkage", "average"]
-          # - values: ["--linkage", "weighted"]
-          # - values: ["--linkage", "median"]
-          # - values: ["--linkage", "centroid"]
-      - id: sklearn
-        name: "sklearn"
-        software_environment: "clustbench"
-        repository:
-          url: https://github.com/imallona/clustbench_sklearn
-          #url: /home/imallona/src/clustbench_sklearn
-          commit: 5877378
-        parameters:
-          - values: ["--method", "birch"]
-          - values: ["--method", "kmeans"]
-          # - values: ["--method", "spectral"] ## too slow
-          # - values: ["--method", "gm"]
-      - id: agglomerative
-        name: "agglomerative"
-        software_environment: "clustbench"
-        repository:
-          url: https://github.com/imallona/clustbench_agglomerative
-          commit: 5454368
-        parameters:
-          # - values: ["--linkage", "average"]
-          - values: ["--linkage", "complete"]
-          - values: ["--linkage", "ward"]
-      - id: genieclust
-        name: "genieclust"
-        software_environment: "clustbench"
-        repository:
-          url: https://github.com/imallona/clustbench_genieclust
-          commit: 6090043
-        parameters:
-          - values: ["--method", "genie", "--gini_threshold", 0.5]
-          - values: ["--method", "gic"]
-          # - values: ["--method", "ica"]
-      - id: fcps
-        name: "fcps"
-        software_environment: "fcps"
-        repository:
-          url: https://github.com/imallona/clustbench_fcps
-          commit: 272fa5f
-        parameters:
-          # - values: ["--method", "FCPS_AdaptiveDensityPeak"] # not in conda
-          - values: ["--method", "FCPS_Minimax"]
-          - values: ["--method", "FCPS_MinEnergy"]
-          # - values: ["--method", "FCPS_HDBSCAN_2"]
-          # - values: ["--method", "FCPS_HDBSCAN_4"]
-          # - values: ["--method", "FCPS_HDBSCAN_8"]
-          # - values: ["--method", "FCPS_Diana"]
-          # - values: ["--method", "FCPS_Fanny"]
-          # - values: ["--method", "FCPS_Hardcl"]
-          # - values: ["--method", "FCPS_Softcl"]
-          # - values: ["--method", "FCPS_Clara"]
-          # - values: ["--method", "FCPS_PAM"]
-    inputs:
-      - entries:
-          - data.matrix
-          - data.true_labels
-    outputs:
-      - id: clustering.predicted_ks_range
-        path: "{input}/{stage}/{module}/{params}/{dataset}_ks_range.labels.gz"
-
-  - id: metrics
-    modules:
-      - id: partition_metrics
-        name: "clustbench partition metrics"
-        software_environment: "clustbench"
-        repository:
-          url: https://github.com/imallona/clustbench_metrics
-          commit: 9132d45
-        parameters:
-          - values: ["--metric", "normalized_clustering_accuracy"]
-          - values: ["--metric", "adjusted_fm_score"]
-          # - values: ["--metric", "adjusted_mi_score"]
-          # - values: ["--metric", "adjusted_rand_score"]
-          # - values: ["--metric", "fm_score"]
-          # - values: ["--metric", "mi_score"]
-          # - values: ["--metric", "normalized_clustering_accuracy"]
-          # - values: ["--metric", "normalized_mi_score"]
-          # - values: ["--metric", "normalized_pivoted_accuracy"]
-          # - values: ["--metric", "pair_sets_index"]
-          # - values: ["--metric", "rand_score"]
-    inputs:
-      - entries:
-          - clustering.predicted_ks_range
-          - data.true_labels
-    outputs:
-      - id: metrics.scores
-        path: "{input}/{stage}/{module}/{params}/{dataset}.scores.gz"
diff --git a/Clustering_oras.yml b/Clustering_apptainer.yml
similarity index 67%
rename from Clustering_oras.yml
rename to Clustering_apptainer.yml
index 6640461..e075d81 100644
--- a/Clustering_oras.yml
+++ b/Clustering_apptainer.yml
@@ -1,46 +1,45 @@
-id: clustering_example
-description: Clustering benchmark on Gagolewski's, true number of clusters plus minus 2. Caution dirty apptainer sifs.
-version: 1.2
-benchmarker: "Izaskun Mallona, Daniel Incicau"
-storage: https://play.min.io
-benchmark_yaml_spec: 0.04
-storage_api: S3
-storage_bucket_name: clustering_example
-software_backend: apptainer
+# this file has been generated automatically - DO NOT EDIT BY HAND
+version: 1.5
+benchmarker: "Izaskun Mallona, Daniel Incicau, Ben Carrillo"
+benchmark_yaml_spec: 0.4
 software_environments:
   clustbench:
-    description: "clustbench on py3.12.6"
+    description: "clustbench on py3.12.9, optimized python build"
     conda: envs/clustbench.yml
-    envmodule: clustbench
-    apptainer: oras://registry.renkulab.io/izaskun.mallona/clustering_example/clustbench:latest
-  sklearn:
-    description: "Daniel's on py3.12.6"
-    conda: envs/sklearn.yml
-    apptainer: oras://registry.renkulab.io/izaskun.mallona/clustering_example/r:latest
-    envmodule: clustbench # not true, but
-  R:
-    description: "Daniel's R with readr, dplyr, mclust, caret"
-    conda: envs/r.yml
-    apptainer: oras://registry.renkulab.io/izaskun.mallona/clustering_example/sklearn:latest
-    envmodule: fcps # not true, but
+    envmodule: clustbench/0.1.0-foss-2023b
+    apptainer: oras://quay.io/omnibenchmark/clustbench-vanilla:0.1.0
   fcps:
     description: "CRAN's FCPS"
     conda: envs/fcps.yml
-    apptainer: oras://registry.renkulab.io/izaskun.mallona/clustering_example/fcps:latest
-    envmodule: fcps
+    envmodule: fcps/1.3.4-foss-2023a-r-4.3.2
+    apptainer: oras://quay.io/omnibenchmark/fcps:0.1.0
+  rmarkdown:
+    description: "R with some plotting dependencies"
+    conda: envs/rmarkdown.yml
+    envmodule: rmarkdown/0.1.0-gfbf-2024a-r-4.4.2
+    apptainer: oras://quay.io/omnibenchmark/fcps:0.1.0
+metric_collectors:
+  - id: plotting
+    name: "Single-backend metric collector."
+    software_environment: rmarkdown
+    repository:
+      url: https://github.com/imallona/clustering_report
+      commit: 1d6bdf5
+    inputs:
+      - metrics.scores
+    outputs:
+      - id: plotting.html
+        path: "{input}/{name}/plotting_report.html"
 stages:
-
-  ## clustbench data ##########################################################
-
   - id: data
     modules:
       - id: clustbench
-        name: "clustbench datasets"
-        software_environment: "clustbench"
+        name: "clustbench datasets, from https://www.sciencedirect.com/science/article/pii/S0020025521010082#t0005 Table1"
+        software_environment: clustbench
         repository:
           url: https://github.com/imallona/clustbench_data
-          commit: 366c5a2
-        parameters:
+          commit: 31ac323
+        parameters: # comments depict the possible cardinalities and the number of curated labelsets
           - values: ["--dataset_generator", "fcps", "--dataset_name", "atom"] #	2	1
           - values: ["--dataset_generator", "fcps", "--dataset_name", "chainlink"] #	2	1
           - values: ["--dataset_generator", "fcps", "--dataset_name", "engytime"] #	2	2
@@ -102,23 +101,19 @@ stages:
           - values: ["--dataset_generator", "wut", "--dataset_name", "x3"] #	4	1
           - values: ["--dataset_generator", "wut", "--dataset_name", "z1"] #	3	1
           - values: ["--dataset_generator", "wut", "--dataset_name", "z2"] #	5	1
-          - values: ["--dataset_generator", "wut", "--dataset_name", "z3"] #	4	1  
+          - values: ["--dataset_generator", "wut", "--dataset_name", "z3"] #	4	1
     outputs:
       - id: data.matrix
         path: "{input}/{stage}/{module}/{params}/{dataset}.data.gz"
       - id: data.true_labels
         path: "{input}/{stage}/{module}/{params}/{dataset}.labels0.gz"
-
-  ## clustbench methods (fastcluster) ###################################################################
-  
   - id: clustering
     modules:
       - id: fastcluster
         name: "fastcluster algorithm"
-        software_environment: "clustbench"
+        software_environment: clustbench
         repository:
           url: https://github.com/imallona/clustbench_fastcluster
-          # url: /home/imallona/src/clustbench_fastcluster/
           commit: "45e43d3"
         parameters:
           - values: ["--linkage", "complete"]
@@ -128,11 +123,10 @@ stages:
           - values: ["--linkage", "median"]
           - values: ["--linkage", "centroid"]
       - id: sklearn
-        name: "sklearn"
-        software_environment: "clustbench"
+        name: sklearn
+        software_environment: clustbench
         repository:
           url: https://github.com/imallona/clustbench_sklearn
-          #url: /home/imallona/src/clustbench_sklearn
           commit: 5877378
         parameters:
           - values: ["--method", "birch"]
@@ -150,8 +144,8 @@ stages:
           - values: ["--linkage", "complete"]
           - values: ["--linkage", "ward"]
       - id: genieclust
-        name: "genieclust"
-        software_environment: "clustbench"
+        name: genieclust
+        software_environment: clustbench
         repository:
           url: https://github.com/imallona/clustbench_genieclust
           commit: 6090043
@@ -185,12 +179,11 @@ stages:
     outputs:
       - id: clustering.predicted_ks_range
         path: "{input}/{stage}/{module}/{params}/{dataset}_ks_range.labels.gz"
-
   - id: metrics
     modules:
       - id: partition_metrics
         name: "clustbench partition metrics"
-        software_environment: "clustbench"
+        software_environment: clustbench
         repository:
           url: https://github.com/imallona/clustbench_metrics
           commit: 9132d45
@@ -213,89 +206,6 @@ stages:
     outputs:
       - id: metrics.scores
         path: "{input}/{stage}/{module}/{params}/{dataset}.scores.gz"
-
-  # ## daniel's data ###########################################################################
-  
-  # - id: danielsdata
-  #   modules:
-  #     - id: iris_manual
-  #       name: "Iris Dataset"
-  #       software_environment: "sklearn"
-  #       repository:
-  #         url: https://github.com/omnibenchmark-example/iris.git
-  #         commit: 47c63f0
-  #     - id: penguins
-  #       name: "Penguins Dataset"
-  #       software_environment: "sklearn"
-  #       repository:
-  #         url: https://github.com/omnibenchmark-example/penguins.git
-  #         commit: 9032478
-  #   outputs:
-  #     - id: data.features
-  #       path: "{input}/{stage}/{module}/{params}/{dataset}.features.csv"
-  #     - id: data.labels
-  #       path: "{input}/{stage}/{module}/{params}/{dataset}.labels.csv"
-  
-  # ## daniel's distances ########################################################################
-  
-  # - id: distances
-  #   modules:
-  #     - id: D1
-  #       software_environment: "sklearn"
-  #       parameters:
-  #         - values: ["--measure", "cosine"]
-  #         - values: ["--measure", "euclidean"]
-  #         - values: ["--measure", "manhattan"]
-  #         - values: ["--measure", "chebyshev"]
-  #       repository:
-  #         url: https://github.com/omnibenchmark-example/distance.git
-  #         commit: dd99d4f
-  #   inputs:
-  #     - entries:
-  #         - data.features
-  #   outputs:
-  #     - id: distances
-  #       path: "{input}/{stage}/{module}/{params}/{dataset}.distances.csv"
-        
-  # ## daniel's methods ###################################################################
-  
-  # - id: danielmethods
-  #   modules:
-  #     - id: kmeans
-  #       software_environment: "sklearn"
-  #       repository:
-  #         url: https://github.com/omnibenchmark-example/kmeans.git
-  #         commit: 049c8b1
-  #     - id: ward
-  #       software_environment: "R"
-  #       repository:
-  #         url: https://github.com/omnibenchmark-example/ward.git
-  #         commit: 976e3f3
-  #   inputs:
-  #     - entries:
-  #         - distances
-  #   outputs:
-  #     - id: methods.clusters
-  #       path: "{input}/{stage}/{module}/{params}/{dataset}.clusters.csv"
-
-  # ## daniel's metrics ###################################################################
-
-  # - id: danielsmetrics
-  #   modules:
-  #     - id: ari
-  #       software_environment: "R"
-  #       repository:
-  #         url: https://github.com/omnibenchmark-example/ari.git
-  #         commit: 72708f0
-  #     - id: accuracy
-  #       software_environment: "R"
-  #       repository:
-  #         url: https://github.com/omnibenchmark-example/accuracy.git
-  #         commit: e26b32f
-  #   inputs:
-  #     - entries:
-  #         - methods.clusters
-  #         - data.labels
-  #   outputs:
-  #     - id: metrics.mapping
-  #       path: "{input}/{stage}/{module}/{params}/{dataset}.metrics.txt"
+id: clustering_benchmark_apptainer_oras
+description: Clustering benchmark on Gagolewski's. Using apptainer from omnibenchmark ORAS registry.
+software_backend: apptainer
diff --git a/Clustering_apptainer_optimized.yml b/Clustering_apptainer_optimized.yml
new file mode 100644
index 0000000..d536ddc
--- /dev/null
+++ b/Clustering_apptainer_optimized.yml
@@ -0,0 +1,211 @@
+# this file has been generated automatically - DO NOT EDIT BY HAND
+version: 1.5
+benchmarker: "Izaskun Mallona, Daniel Incicau, Ben Carrillo"
+benchmark_yaml_spec: 0.4
+software_environments:
+  clustbench:
+    description: "clustbench on py3.12.9, optimized python build"
+    conda: envs/clustbench.yml
+    envmodule: clustbench/0.1.0-foss-2023b
+    apptainer: envs/clustbench-optimized.sif
+  fcps:
+    description: "CRAN's FCPS"
+    conda: envs/fcps.yml
+    envmodule: fcps/1.3.4-foss-2023a-r-4.3.2
+    apptainer: envs/fcps.sif
+  rmarkdown:
+    description: "R with some plotting dependencies"
+    conda: envs/rmarkdown.yml
+    envmodule: rmarkdown/0.1.0-gfbf-2024a-r-4.4.2
+    apptainer: envs/fcps.sif
+metric_collectors:
+  - id: plotting
+    name: "Single-backend metric collector."
+    software_environment: rmarkdown
+    repository:
+      url: https://github.com/imallona/clustering_report
+      commit: 1d6bdf5
+    inputs:
+      - metrics.scores
+    outputs:
+      - id: plotting.html
+        path: "{input}/{name}/plotting_report.html"
+stages:
+  - id: data
+    modules:
+      - id: clustbench
+        name: "clustbench datasets, from https://www.sciencedirect.com/science/article/pii/S0020025521010082#t0005 Table1"
+        software_environment: clustbench
+        repository:
+          url: https://github.com/imallona/clustbench_data
+          commit: 31ac323
+        parameters: # comments depict the possible cardinalities and the number of curated labelsets
+          - values: ["--dataset_generator", "fcps", "--dataset_name", "atom"] #	2	1
+          - values: ["--dataset_generator", "fcps", "--dataset_name", "chainlink"] #	2	1
+          - values: ["--dataset_generator", "fcps", "--dataset_name", "engytime"] #	2	2
+          - values: ["--dataset_generator", "fcps", "--dataset_name", "hepta"] #	7	1
+          - values: ["--dataset_generator", "fcps", "--dataset_name", "lsun"] #	3	1
+          - values: ["--dataset_generator", "fcps", "--dataset_name", "target"] #	2, 6	2
+          - values: ["--dataset_generator", "fcps", "--dataset_name", "tetra"] #	4	1
+          - values: ["--dataset_generator", "fcps", "--dataset_name", "twodiamonds"] #	2	1
+          - values: ["--dataset_generator", "fcps", "--dataset_name", "wingnut"] #	2	1
+          - values: ["--dataset_generator", "graves", "--dataset_name", "dense"] #	2	1
+          - values: ["--dataset_generator", "graves", "--dataset_name", "fuzzyx"] #	2, 4, 5	6
+          - values: ["--dataset_generator", "graves", "--dataset_name", "line"] #	2	1
+          - values: ["--dataset_generator", "graves", "--dataset_name", "parabolic"] #	2, 4	2
+          - values: ["--dataset_generator", "graves", "--dataset_name", "ring"] #	2	1
+          - values: ["--dataset_generator", "graves", "--dataset_name", "ring_noisy"] #	2	1
+          - values: ["--dataset_generator", "graves", "--dataset_name", "ring_outliers"] #	2, 5	2
+          - values: ["--dataset_generator", "graves", "--dataset_name", "zigzag"] #	3, 5	2
+          - values: ["--dataset_generator", "graves", "--dataset_name", "zigzag_noisy"] #	3, 5	2
+          - values: ["--dataset_generator", "graves", "--dataset_name", "zigzag_outliers"] #	3, 5	2
+          - values: ["--dataset_generator", "other", "--dataset_name", "chameleon_t4_8k"] #	6	1
+          - values: ["--dataset_generator", "other", "--dataset_name", "chameleon_t5_8k"] #	6	1
+          - values: ["--dataset_generator", "other", "--dataset_name", "hdbscan"] #	6	1
+          - values: ["--dataset_generator", "other", "--dataset_name", "iris"] #	3	1
+          - values: ["--dataset_generator", "other", "--dataset_name", "iris5"] #	3	1
+          - values: ["--dataset_generator", "other", "--dataset_name", "square"] #	2	1
+          - values: ["--dataset_generator", "sipu", "--dataset_name", "aggregation"] #	7	1
+          - values: ["--dataset_generator", "sipu", "--dataset_name", "compound"] #	4, 5, 6	5
+          - values: ["--dataset_generator", "sipu", "--dataset_name", "flame"] #	2	2
+          - values: ["--dataset_generator", "sipu", "--dataset_name", "jain"] #	2	1
+          - values: ["--dataset_generator", "sipu", "--dataset_name", "pathbased"] #	3, 4	2
+          - values: ["--dataset_generator", "sipu", "--dataset_name", "r15"] #	8, 9, 15	3
+          - values: ["--dataset_generator", "sipu", "--dataset_name", "spiral"] #	3	1
+          - values: ["--dataset_generator", "sipu", "--dataset_name", "unbalance"] #	8	1
+          - values: ["--dataset_generator", "uci", "--dataset_name", "ecoli"] #	8	1
+          - values: ["--dataset_generator", "uci", "--dataset_name", "ionosphere"] #	2	1
+          - values: ["--dataset_generator", "uci", "--dataset_name", "sonar"] #	2	1
+          - values: ["--dataset_generator", "uci", "--dataset_name", "statlog"] #	7	1
+          - values: ["--dataset_generator", "uci", "--dataset_name", "wdbc"] #	2	1
+          - values: ["--dataset_generator", "uci", "--dataset_name", "wine"] #	3	1
+          - values: ["--dataset_generator", "uci", "--dataset_name", "yeast"] #	10	1
+          - values: ["--dataset_generator", "wut", "--dataset_name", "circles"] #	4	1
+          - values: ["--dataset_generator", "wut", "--dataset_name", "cross"] #	4	1
+          - values: ["--dataset_generator", "wut", "--dataset_name", "graph"] #	10	1
+          - values: ["--dataset_generator", "wut", "--dataset_name", "isolation"] #	3	1
+          - values: ["--dataset_generator", "wut", "--dataset_name", "labirynth"] #	6	1
+          - values: ["--dataset_generator", "wut", "--dataset_name", "mk1"] #	3	1
+          - values: ["--dataset_generator", "wut", "--dataset_name", "mk2"] #	2	1
+          - values: ["--dataset_generator", "wut", "--dataset_name", "mk3"] #	3	1
+          - values: ["--dataset_generator", "wut", "--dataset_name", "mk4"] #	3	1
+          - values: ["--dataset_generator", "wut", "--dataset_name", "olympic"] #	5	1
+          - values: ["--dataset_generator", "wut", "--dataset_name", "smile"] #	4, 6	2
+          - values: ["--dataset_generator", "wut", "--dataset_name", "stripes"] #	2	1
+          - values: ["--dataset_generator", "wut", "--dataset_name", "trajectories"] #	4	1
+          - values: ["--dataset_generator", "wut", "--dataset_name", "trapped_lovers"] #	3	1
+          - values: ["--dataset_generator", "wut", "--dataset_name", "twosplashes"] #	2	1
+          - values: ["--dataset_generator", "wut", "--dataset_name", "windows"] #	5	1
+          - values: ["--dataset_generator", "wut", "--dataset_name", "x1"] #	3	1
+          - values: ["--dataset_generator", "wut", "--dataset_name", "x2"] #	3	1
+          - values: ["--dataset_generator", "wut", "--dataset_name", "x3"] #	4	1
+          - values: ["--dataset_generator", "wut", "--dataset_name", "z1"] #	3	1
+          - values: ["--dataset_generator", "wut", "--dataset_name", "z2"] #	5	1
+          - values: ["--dataset_generator", "wut", "--dataset_name", "z3"] #	4	1
+    outputs:
+      - id: data.matrix
+        path: "{input}/{stage}/{module}/{params}/{dataset}.data.gz"
+      - id: data.true_labels
+        path: "{input}/{stage}/{module}/{params}/{dataset}.labels0.gz"
+  - id: clustering
+    modules:
+      - id: fastcluster
+        name: "fastcluster algorithm"
+        software_environment: clustbench
+        repository:
+          url: https://github.com/imallona/clustbench_fastcluster
+          commit: "45e43d3"
+        parameters:
+          - values: ["--linkage", "complete"]
+          - values: ["--linkage", "ward"]
+          - values: ["--linkage", "average"]
+          - values: ["--linkage", "weighted"]
+          - values: ["--linkage", "median"]
+          - values: ["--linkage", "centroid"]
+      - id: sklearn
+        name: sklearn
+        software_environment: clustbench
+        repository:
+          url: https://github.com/imallona/clustbench_sklearn
+          commit: 5877378
+        parameters:
+          - values: ["--method", "birch"]
+          - values: ["--method", "kmeans"]
+          # - values: ["--method", "spectral"] ## too slow
+          - values: ["--method", "gm"]
+      - id: agglomerative
+        name: "agglomerative"
+        software_environment: "clustbench"
+        repository:
+          url: https://github.com/imallona/clustbench_agglomerative
+          commit: 5454368
+        parameters:
+          - values: ["--linkage", "average"]
+          - values: ["--linkage", "complete"]
+          - values: ["--linkage", "ward"]
+      - id: genieclust
+        name: genieclust
+        software_environment: clustbench
+        repository:
+          url: https://github.com/imallona/clustbench_genieclust
+          commit: 6090043
+        parameters:
+          - values: ["--method", "genie", "--gini_threshold", 0.5]
+          - values: ["--method", "gic"]
+          - values: ["--method", "ica"]
+      - id: fcps
+        name: "fcps"
+        software_environment: "fcps"
+        repository:
+          url: https://github.com/imallona/clustbench_fcps
+          commit: 272fa5f
+        parameters:
+          # - values: ["--method", "FCPS_AdaptiveDensityPeak"] # not in conda
+          - values: ["--method", "FCPS_Minimax"]
+          - values: ["--method", "FCPS_MinEnergy"]
+          - values: ["--method", "FCPS_HDBSCAN_2"]
+          - values: ["--method", "FCPS_HDBSCAN_4"]
+          - values: ["--method", "FCPS_HDBSCAN_8"]
+          - values: ["--method", "FCPS_Diana"]
+          - values: ["--method", "FCPS_Fanny"]
+          - values: ["--method", "FCPS_Hardcl"]
+          - values: ["--method", "FCPS_Softcl"]
+          - values: ["--method", "FCPS_Clara"]
+          - values: ["--method", "FCPS_PAM"]
+    inputs:
+      - entries:
+          - data.matrix
+          - data.true_labels
+    outputs:
+      - id: clustering.predicted_ks_range
+        path: "{input}/{stage}/{module}/{params}/{dataset}_ks_range.labels.gz"
+  - id: metrics
+    modules:
+      - id: partition_metrics
+        name: "clustbench partition metrics"
+        software_environment: clustbench
+        repository:
+          url: https://github.com/imallona/clustbench_metrics
+          commit: 9132d45
+        parameters:
+          - values: ["--metric", "normalized_clustering_accuracy"]
+          - values: ["--metric", "adjusted_fm_score"]
+          - values: ["--metric", "adjusted_mi_score"]
+          - values: ["--metric", "adjusted_rand_score"]
+          - values: ["--metric", "fm_score"]
+          - values: ["--metric", "mi_score"]
+          - values: ["--metric", "normalized_clustering_accuracy"]
+          - values: ["--metric", "normalized_mi_score"]
+          - values: ["--metric", "normalized_pivoted_accuracy"]
+          - values: ["--metric", "pair_sets_index"]
+          - values: ["--metric", "rand_score"]
+    inputs:
+      - entries:
+          - clustering.predicted_ks_range
+          - data.true_labels
+    outputs:
+      - id: metrics.scores
+        path: "{input}/{stage}/{module}/{params}/{dataset}.scores.gz"
+id: clustering_benchmark_apptainer_optimized_local
+description: Clustering benchmark on Gagolewski's. Using apptainer (locally built image, optimized python)
+software_backend: apptainer
diff --git a/Clustering_apptainer_optimized_short.yml b/Clustering_apptainer_optimized_short.yml
new file mode 100644
index 0000000..5bbd791
--- /dev/null
+++ b/Clustering_apptainer_optimized_short.yml
@@ -0,0 +1,121 @@
+# this file has been generated automatically - DO NOT EDIT BY HAND
+version: 1.5
+benchmarker: "Izaskun Mallona, Daniel Incicau, Ben Carrillo"
+benchmark_yaml_spec: 0.5
+software_environments:
+  clustbench:
+    description: "clustbench on py3.12.3, default python"
+    envmodule: clustbench/0.1.0-foss-2023b
+    conda: envs/clustbench.yml
+    apptainer: envs/clustbench-optimized.sif
+  fcps:
+    description: "CRAN's FCPS"
+    envmodule: fcps/1.3.4-foss-2023a-r-4.3.2
+    conda: envs/fcps.yml
+    apptainer: envs/fcps.sif
+  rmarkdown:
+    description: "R with some plotting dependencies"
+    conda: envs/rmarkdown.yml
+    envmodule: rmarkdown/0.1.0-gfbf-2024a-r-4.4.2
+    apptainer: envs/fcps.sif
+metric_collectors:
+  - id: plotting
+    name: "Single-backend metric collector."
+    software_environment: rmarkdown
+    repository:
+      url: https://github.com/imallona/clustering_report
+      commit: 1d6bdf5
+    inputs:
+      - metrics.scores
+    outputs:
+      - id: plotting.html
+        path: "{input}/{name}/plotting_report.html"
+stages:
+  - id: data
+    modules:
+      - id: clustbench
+        name: "clustbench datasets, from https://www.sciencedirect.com/science/article/pii/S0020025521010082#t0005 Table1"
+        software_environment: clustbench
+        repository:
+          url: https://github.com/imallona/clustbench_data
+          commit: 31ac323
+        parameters: # comments depict the possible cardinalities and the number of curated labelsets
+          - values: ["--dataset_generator", "fcps", "--dataset_name", "atom"] #	2	1
+    outputs:
+      - id: data.matrix
+        path: "{input}/{stage}/{module}/{params}/{dataset}.data.gz"
+      - id: data.true_labels
+        path: "{input}/{stage}/{module}/{params}/{dataset}.labels0.gz"
+  - id: clustering
+    modules:
+      - id: fastcluster
+        name: "fastcluster algorithm"
+        software_environment: clustbench
+        repository:
+          url: https://github.com/imallona/clustbench_fastcluster
+          commit: "45e43d3"
+        parameters:
+          - values: ["--linkage", "complete"]
+          - values: ["--linkage", "ward"]
+      - id: sklearn
+        name: "sklearn"
+        software_environment: clustbench
+        repository:
+          url: https://github.com/imallona/clustbench_sklearn
+          commit: 5877378
+        parameters:
+          - values: ["--method", "birch"]
+          - values: ["--method", "kmeans"]
+      - id: agglomerative
+        name: "agglomerative"
+        software_environment: clustbench
+        repository:
+          url: https://github.com/imallona/clustbench_agglomerative
+          commit: 5454368
+        parameters:
+          - values: ["--linkage", "complete"]
+          - values: ["--linkage", "ward"]
+      - id: genieclust
+        name: "genieclust"
+        software_environment: clustbench
+        repository:
+          url: https://github.com/imallona/clustbench_genieclust
+          commit: 6090043
+        parameters:
+          - values: ["--method", "genie", "--gini_threshold", 0.5]
+      - id: fcps
+        name: "fcps"
+        software_environment: fcps
+        repository:
+          url: https://github.com/imallona/clustbench_fcps
+          commit: 272fa5f
+        parameters:
+          - values: ["--method", "FCPS_Minimax"]
+    inputs:
+      - entries:
+          - data.matrix
+          - data.true_labels
+    outputs:
+      - id: clustering.predicted_ks_range
+        path: "{input}/{stage}/{module}/{params}/{dataset}_ks_range.labels.gz"
+  - id: metrics
+    modules:
+      - id: partition_metrics
+        name: "clustbench partition metrics"
+        software_environment: clustbench
+        repository:
+          url: https://github.com/imallona/clustbench_metrics
+          commit: 8184cd4
+        parameters:
+          - values: ["--metric", "normalized_clustering_accuracy"]
+          - values: ["--metric", "adjusted_fm_score"]
+    inputs:
+      - entries:
+          - clustering.predicted_ks_range
+          - data.true_labels
+    outputs:
+      - id: metrics.scores
+        path: "{input}/{stage}/{module}/{params}/{dataset}.scores.gz"
+id: clustering_benchmark_apptainer_optimized_local
+description: Clustering benchmark on Gagolewski's. Using apptainer (locally built image, optimized python)
+software_backend: apptainer
diff --git a/Clustering_apptainer_short.yml b/Clustering_apptainer_short.yml
new file mode 100644
index 0000000..71bdd6f
--- /dev/null
+++ b/Clustering_apptainer_short.yml
@@ -0,0 +1,121 @@
+# this file has been generated automatically - DO NOT EDIT BY HAND
+version: 1.5
+benchmarker: "Izaskun Mallona, Daniel Incicau, Ben Carrillo"
+benchmark_yaml_spec: 0.5
+software_environments:
+  clustbench:
+    description: "clustbench on py3.12.3, default python"
+    envmodule: clustbench/0.1.0-foss-2023b
+    conda: envs/clustbench.yml
+    apptainer: oras://quay.io/omnibenchmark/clustbench-vanilla:0.1.0
+  fcps:
+    description: "CRAN's FCPS"
+    envmodule: fcps/1.3.4-foss-2023a-r-4.3.2
+    conda: envs/fcps.yml
+    apptainer: oras://quay.io/omnibenchmark/fcps:0.1.0
+  rmarkdown:
+    description: "R with some plotting dependencies"
+    conda: envs/rmarkdown.yml
+    envmodule: rmarkdown/0.1.0-gfbf-2024a-r-4.4.2
+    apptainer: oras://quay.io/omnibenchmark/fcps:0.1.0
+metric_collectors:
+  - id: plotting
+    name: "Single-backend metric collector."
+    software_environment: rmarkdown
+    repository:
+      url: https://github.com/imallona/clustering_report
+      commit: 1d6bdf5
+    inputs:
+      - metrics.scores
+    outputs:
+      - id: plotting.html
+        path: "{input}/{name}/plotting_report.html"
+stages:
+  - id: data
+    modules:
+      - id: clustbench
+        name: "clustbench datasets, from https://www.sciencedirect.com/science/article/pii/S0020025521010082#t0005 Table1"
+        software_environment: clustbench
+        repository:
+          url: https://github.com/imallona/clustbench_data
+          commit: 31ac323
+        parameters: # comments depict the possible cardinalities and the number of curated labelsets
+          - values: ["--dataset_generator", "fcps", "--dataset_name", "atom"] #	2	1
+    outputs:
+      - id: data.matrix
+        path: "{input}/{stage}/{module}/{params}/{dataset}.data.gz"
+      - id: data.true_labels
+        path: "{input}/{stage}/{module}/{params}/{dataset}.labels0.gz"
+  - id: clustering
+    modules:
+      - id: fastcluster
+        name: "fastcluster algorithm"
+        software_environment: clustbench
+        repository:
+          url: https://github.com/imallona/clustbench_fastcluster
+          commit: "45e43d3"
+        parameters:
+          - values: ["--linkage", "complete"]
+          - values: ["--linkage", "ward"]
+      - id: sklearn
+        name: "sklearn"
+        software_environment: clustbench
+        repository:
+          url: https://github.com/imallona/clustbench_sklearn
+          commit: 5877378
+        parameters:
+          - values: ["--method", "birch"]
+          - values: ["--method", "kmeans"]
+      - id: agglomerative
+        name: "agglomerative"
+        software_environment: clustbench
+        repository:
+          url: https://github.com/imallona/clustbench_agglomerative
+          commit: 5454368
+        parameters:
+          - values: ["--linkage", "complete"]
+          - values: ["--linkage", "ward"]
+      - id: genieclust
+        name: "genieclust"
+        software_environment: clustbench
+        repository:
+          url: https://github.com/imallona/clustbench_genieclust
+          commit: 6090043
+        parameters:
+          - values: ["--method", "genie", "--gini_threshold", 0.5]
+      - id: fcps
+        name: "fcps"
+        software_environment: fcps
+        repository:
+          url: https://github.com/imallona/clustbench_fcps
+          commit: 272fa5f
+        parameters:
+          - values: ["--method", "FCPS_Minimax"]
+    inputs:
+      - entries:
+          - data.matrix
+          - data.true_labels
+    outputs:
+      - id: clustering.predicted_ks_range
+        path: "{input}/{stage}/{module}/{params}/{dataset}_ks_range.labels.gz"
+  - id: metrics
+    modules:
+      - id: partition_metrics
+        name: "clustbench partition metrics"
+        software_environment: clustbench
+        repository:
+          url: https://github.com/imallona/clustbench_metrics
+          commit: 8184cd4
+        parameters:
+          - values: ["--metric", "normalized_clustering_accuracy"]
+          - values: ["--metric", "adjusted_fm_score"]
+    inputs:
+      - entries:
+          - clustering.predicted_ks_range
+          - data.true_labels
+    outputs:
+      - id: metrics.scores
+        path: "{input}/{stage}/{module}/{params}/{dataset}.scores.gz"
+id: clustering_benchmark_apptainer_oras
+description: Clustering benchmark on Gagolewski's. Using apptainer from omnibenchmark ORAS registry.
+software_backend: apptainer
diff --git a/Clustering_apptainer_vanilla.yml b/Clustering_apptainer_vanilla.yml
new file mode 100644
index 0000000..cd4ba56
--- /dev/null
+++ b/Clustering_apptainer_vanilla.yml
@@ -0,0 +1,211 @@
+# this file has been generated automatically - DO NOT EDIT BY HAND
+version: 1.5
+benchmarker: "Izaskun Mallona, Daniel Incicau, Ben Carrillo"
+benchmark_yaml_spec: 0.4
+software_environments:
+  clustbench:
+    description: "clustbench on py3.12.9, optimized python build"
+    conda: envs/clustbench.yml
+    envmodule: clustbench/0.1.0-foss-2023b
+    apptainer: envs/clustbench.sif
+  fcps:
+    description: "CRAN's FCPS"
+    conda: envs/fcps.yml
+    envmodule: fcps/1.3.4-foss-2023a-r-4.3.2
+    apptainer: envs/fcps.sif
+  rmarkdown:
+    description: "R with some plotting dependencies"
+    conda: envs/rmarkdown.yml
+    envmodule: rmarkdown/0.1.0-gfbf-2024a-r-4.4.2
+    apptainer: envs/fcps.sif
+metric_collectors:
+  - id: plotting
+    name: "Single-backend metric collector."
+    software_environment: rmarkdown
+    repository:
+      url: https://github.com/imallona/clustering_report
+      commit: 1d6bdf5
+    inputs:
+      - metrics.scores
+    outputs:
+      - id: plotting.html
+        path: "{input}/{name}/plotting_report.html"
+stages:
+  - id: data
+    modules:
+      - id: clustbench
+        name: "clustbench datasets, from https://www.sciencedirect.com/science/article/pii/S0020025521010082#t0005 Table1"
+        software_environment: clustbench
+        repository:
+          url: https://github.com/imallona/clustbench_data
+          commit: 31ac323
+        parameters: # comments depict the possible cardinalities and the number of curated labelsets
+          - values: ["--dataset_generator", "fcps", "--dataset_name", "atom"] #	2	1
+          - values: ["--dataset_generator", "fcps", "--dataset_name", "chainlink"] #	2	1
+          - values: ["--dataset_generator", "fcps", "--dataset_name", "engytime"] #	2	2
+          - values: ["--dataset_generator", "fcps", "--dataset_name", "hepta"] #	7	1
+          - values: ["--dataset_generator", "fcps", "--dataset_name", "lsun"] #	3	1
+          - values: ["--dataset_generator", "fcps", "--dataset_name", "target"] #	2, 6	2
+          - values: ["--dataset_generator", "fcps", "--dataset_name", "tetra"] #	4	1
+          - values: ["--dataset_generator", "fcps", "--dataset_name", "twodiamonds"] #	2	1
+          - values: ["--dataset_generator", "fcps", "--dataset_name", "wingnut"] #	2	1
+          - values: ["--dataset_generator", "graves", "--dataset_name", "dense"] #	2	1
+          - values: ["--dataset_generator", "graves", "--dataset_name", "fuzzyx"] #	2, 4, 5	6
+          - values: ["--dataset_generator", "graves", "--dataset_name", "line"] #	2	1
+          - values: ["--dataset_generator", "graves", "--dataset_name", "parabolic"] #	2, 4	2
+          - values: ["--dataset_generator", "graves", "--dataset_name", "ring"] #	2	1
+          - values: ["--dataset_generator", "graves", "--dataset_name", "ring_noisy"] #	2	1
+          - values: ["--dataset_generator", "graves", "--dataset_name", "ring_outliers"] #	2, 5	2
+          - values: ["--dataset_generator", "graves", "--dataset_name", "zigzag"] #	3, 5	2
+          - values: ["--dataset_generator", "graves", "--dataset_name", "zigzag_noisy"] #	3, 5	2
+          - values: ["--dataset_generator", "graves", "--dataset_name", "zigzag_outliers"] #	3, 5	2
+          - values: ["--dataset_generator", "other", "--dataset_name", "chameleon_t4_8k"] #	6	1
+          - values: ["--dataset_generator", "other", "--dataset_name", "chameleon_t5_8k"] #	6	1
+          - values: ["--dataset_generator", "other", "--dataset_name", "hdbscan"] #	6	1
+          - values: ["--dataset_generator", "other", "--dataset_name", "iris"] #	3	1
+          - values: ["--dataset_generator", "other", "--dataset_name", "iris5"] #	3	1
+          - values: ["--dataset_generator", "other", "--dataset_name", "square"] #	2	1
+          - values: ["--dataset_generator", "sipu", "--dataset_name", "aggregation"] #	7	1
+          - values: ["--dataset_generator", "sipu", "--dataset_name", "compound"] #	4, 5, 6	5
+          - values: ["--dataset_generator", "sipu", "--dataset_name", "flame"] #	2	2
+          - values: ["--dataset_generator", "sipu", "--dataset_name", "jain"] #	2	1
+          - values: ["--dataset_generator", "sipu", "--dataset_name", "pathbased"] #	3, 4	2
+          - values: ["--dataset_generator", "sipu", "--dataset_name", "r15"] #	8, 9, 15	3
+          - values: ["--dataset_generator", "sipu", "--dataset_name", "spiral"] #	3	1
+          - values: ["--dataset_generator", "sipu", "--dataset_name", "unbalance"] #	8	1
+          - values: ["--dataset_generator", "uci", "--dataset_name", "ecoli"] #	8	1
+          - values: ["--dataset_generator", "uci", "--dataset_name", "ionosphere"] #	2	1
+          - values: ["--dataset_generator", "uci", "--dataset_name", "sonar"] #	2	1
+          - values: ["--dataset_generator", "uci", "--dataset_name", "statlog"] #	7	1
+          - values: ["--dataset_generator", "uci", "--dataset_name", "wdbc"] #	2	1
+          - values: ["--dataset_generator", "uci", "--dataset_name", "wine"] #	3	1
+          - values: ["--dataset_generator", "uci", "--dataset_name", "yeast"] #	10	1
+          - values: ["--dataset_generator", "wut", "--dataset_name", "circles"] #	4	1
+          - values: ["--dataset_generator", "wut", "--dataset_name", "cross"] #	4	1
+          - values: ["--dataset_generator", "wut", "--dataset_name", "graph"] #	10	1
+          - values: ["--dataset_generator", "wut", "--dataset_name", "isolation"] #	3	1
+          - values: ["--dataset_generator", "wut", "--dataset_name", "labirynth"] #	6	1
+          - values: ["--dataset_generator", "wut", "--dataset_name", "mk1"] #	3	1
+          - values: ["--dataset_generator", "wut", "--dataset_name", "mk2"] #	2	1
+          - values: ["--dataset_generator", "wut", "--dataset_name", "mk3"] #	3	1
+          - values: ["--dataset_generator", "wut", "--dataset_name", "mk4"] #	3	1
+          - values: ["--dataset_generator", "wut", "--dataset_name", "olympic"] #	5	1
+          - values: ["--dataset_generator", "wut", "--dataset_name", "smile"] #	4, 6	2
+          - values: ["--dataset_generator", "wut", "--dataset_name", "stripes"] #	2	1
+          - values: ["--dataset_generator", "wut", "--dataset_name", "trajectories"] #	4	1
+          - values: ["--dataset_generator", "wut", "--dataset_name", "trapped_lovers"] #	3	1
+          - values: ["--dataset_generator", "wut", "--dataset_name", "twosplashes"] #	2	1
+          - values: ["--dataset_generator", "wut", "--dataset_name", "windows"] #	5	1
+          - values: ["--dataset_generator", "wut", "--dataset_name", "x1"] #	3	1
+          - values: ["--dataset_generator", "wut", "--dataset_name", "x2"] #	3	1
+          - values: ["--dataset_generator", "wut", "--dataset_name", "x3"] #	4	1
+          - values: ["--dataset_generator", "wut", "--dataset_name", "z1"] #	3	1
+          - values: ["--dataset_generator", "wut", "--dataset_name", "z2"] #	5	1
+          - values: ["--dataset_generator", "wut", "--dataset_name", "z3"] #	4	1
+    outputs:
+      - id: data.matrix
+        path: "{input}/{stage}/{module}/{params}/{dataset}.data.gz"
+      - id: data.true_labels
+        path: "{input}/{stage}/{module}/{params}/{dataset}.labels0.gz"
+  - id: clustering
+    modules:
+      - id: fastcluster
+        name: "fastcluster algorithm"
+        software_environment: clustbench
+        repository:
+          url: https://github.com/imallona/clustbench_fastcluster
+          commit: "45e43d3"
+        parameters:
+          - values: ["--linkage", "complete"]
+          - values: ["--linkage", "ward"]
+          - values: ["--linkage", "average"]
+          - values: ["--linkage", "weighted"]
+          - values: ["--linkage", "median"]
+          - values: ["--linkage", "centroid"]
+      - id: sklearn
+        name: sklearn
+        software_environment: clustbench
+        repository:
+          url: https://github.com/imallona/clustbench_sklearn
+          commit: 5877378
+        parameters:
+          - values: ["--method", "birch"]
+          - values: ["--method", "kmeans"]
+          # - values: ["--method", "spectral"] ## too slow
+          - values: ["--method", "gm"]
+      - id: agglomerative
+        name: "agglomerative"
+        software_environment: "clustbench"
+        repository:
+          url: https://github.com/imallona/clustbench_agglomerative
+          commit: 5454368
+        parameters:
+          - values: ["--linkage", "average"]
+          - values: ["--linkage", "complete"]
+          - values: ["--linkage", "ward"]
+      - id: genieclust
+        name: genieclust
+        software_environment: clustbench
+        repository:
+          url: https://github.com/imallona/clustbench_genieclust
+          commit: 6090043
+        parameters:
+          - values: ["--method", "genie", "--gini_threshold", 0.5]
+          - values: ["--method", "gic"]
+          - values: ["--method", "ica"]
+      - id: fcps
+        name: "fcps"
+        software_environment: "fcps"
+        repository:
+          url: https://github.com/imallona/clustbench_fcps
+          commit: 272fa5f
+        parameters:
+          # - values: ["--method", "FCPS_AdaptiveDensityPeak"] # not in conda
+          - values: ["--method", "FCPS_Minimax"]
+          - values: ["--method", "FCPS_MinEnergy"]
+          - values: ["--method", "FCPS_HDBSCAN_2"]
+          - values: ["--method", "FCPS_HDBSCAN_4"]
+          - values: ["--method", "FCPS_HDBSCAN_8"]
+          - values: ["--method", "FCPS_Diana"]
+          - values: ["--method", "FCPS_Fanny"]
+          - values: ["--method", "FCPS_Hardcl"]
+          - values: ["--method", "FCPS_Softcl"]
+          - values: ["--method", "FCPS_Clara"]
+          - values: ["--method", "FCPS_PAM"]
+    inputs:
+      - entries:
+          - data.matrix
+          - data.true_labels
+    outputs:
+      - id: clustering.predicted_ks_range
+        path: "{input}/{stage}/{module}/{params}/{dataset}_ks_range.labels.gz"
+  - id: metrics
+    modules:
+      - id: partition_metrics
+        name: "clustbench partition metrics"
+        software_environment: clustbench
+        repository:
+          url: https://github.com/imallona/clustbench_metrics
+          commit: 9132d45
+        parameters:
+          - values: ["--metric", "normalized_clustering_accuracy"]
+          - values: ["--metric", "adjusted_fm_score"]
+          - values: ["--metric", "adjusted_mi_score"]
+          - values: ["--metric", "adjusted_rand_score"]
+          - values: ["--metric", "fm_score"]
+          - values: ["--metric", "mi_score"]
+          - values: ["--metric", "normalized_clustering_accuracy"]
+          - values: ["--metric", "normalized_mi_score"]
+          - values: ["--metric", "normalized_pivoted_accuracy"]
+          - values: ["--metric", "pair_sets_index"]
+          - values: ["--metric", "rand_score"]
+    inputs:
+      - entries:
+          - clustering.predicted_ks_range
+          - data.true_labels
+    outputs:
+      - id: metrics.scores
+        path: "{input}/{stage}/{module}/{params}/{dataset}.scores.gz"
+id: clustering_benchmark_apptainer_vanilla_local
+description: Clustering benchmark on Gagolewski's. Using apptainer (locally built image)
+software_backend: apptainer
diff --git a/Clustering_apptainer_vanilla_short.yml b/Clustering_apptainer_vanilla_short.yml
new file mode 100644
index 0000000..01a1fe2
--- /dev/null
+++ b/Clustering_apptainer_vanilla_short.yml
@@ -0,0 +1,121 @@
+# this file has been generated automatically - DO NOT EDIT BY HAND
+version: 1.5
+benchmarker: "Izaskun Mallona, Daniel Incicau, Ben Carrillo"
+benchmark_yaml_spec: 0.5
+software_environments:
+  clustbench:
+    description: "clustbench on py3.12.3, default python"
+    envmodule: clustbench/0.1.0-foss-2023b
+    conda: envs/clustbench.yml
+    apptainer: envs/clustbench.sif
+  fcps:
+    description: "CRAN's FCPS"
+    envmodule: fcps/1.3.4-foss-2023a-r-4.3.2
+    conda: envs/fcps.yml
+    apptainer: envs/fcps.sif
+  rmarkdown:
+    description: "R with some plotting dependencies"
+    conda: envs/rmarkdown.yml
+    envmodule: rmarkdown/0.1.0-gfbf-2024a-r-4.4.2
+    apptainer: envs/fcps.sif
+metric_collectors:
+  - id: plotting
+    name: "Single-backend metric collector."
+    software_environment: rmarkdown
+    repository:
+      url: https://github.com/imallona/clustering_report
+      commit: 1d6bdf5
+    inputs:
+      - metrics.scores
+    outputs:
+      - id: plotting.html
+        path: "{input}/{name}/plotting_report.html"
+stages:
+  - id: data
+    modules:
+      - id: clustbench
+        name: "clustbench datasets, from https://www.sciencedirect.com/science/article/pii/S0020025521010082#t0005 Table1"
+        software_environment: clustbench
+        repository:
+          url: https://github.com/imallona/clustbench_data
+          commit: 31ac323
+        parameters: # comments depict the possible cardinalities and the number of curated labelsets
+          - values: ["--dataset_generator", "fcps", "--dataset_name", "atom"] #	2	1
+    outputs:
+      - id: data.matrix
+        path: "{input}/{stage}/{module}/{params}/{dataset}.data.gz"
+      - id: data.true_labels
+        path: "{input}/{stage}/{module}/{params}/{dataset}.labels0.gz"
+  - id: clustering
+    modules:
+      - id: fastcluster
+        name: "fastcluster algorithm"
+        software_environment: clustbench
+        repository:
+          url: https://github.com/imallona/clustbench_fastcluster
+          commit: "45e43d3"
+        parameters:
+          - values: ["--linkage", "complete"]
+          - values: ["--linkage", "ward"]
+      - id: sklearn
+        name: "sklearn"
+        software_environment: clustbench
+        repository:
+          url: https://github.com/imallona/clustbench_sklearn
+          commit: 5877378
+        parameters:
+          - values: ["--method", "birch"]
+          - values: ["--method", "kmeans"]
+      - id: agglomerative
+        name: "agglomerative"
+        software_environment: clustbench
+        repository:
+          url: https://github.com/imallona/clustbench_agglomerative
+          commit: 5454368
+        parameters:
+          - values: ["--linkage", "complete"]
+          - values: ["--linkage", "ward"]
+      - id: genieclust
+        name: "genieclust"
+        software_environment: clustbench
+        repository:
+          url: https://github.com/imallona/clustbench_genieclust
+          commit: 6090043
+        parameters:
+          - values: ["--method", "genie", "--gini_threshold", 0.5]
+      - id: fcps
+        name: "fcps"
+        software_environment: fcps
+        repository:
+          url: https://github.com/imallona/clustbench_fcps
+          commit: 272fa5f
+        parameters:
+          - values: ["--method", "FCPS_Minimax"]
+    inputs:
+      - entries:
+          - data.matrix
+          - data.true_labels
+    outputs:
+      - id: clustering.predicted_ks_range
+        path: "{input}/{stage}/{module}/{params}/{dataset}_ks_range.labels.gz"
+  - id: metrics
+    modules:
+      - id: partition_metrics
+        name: "clustbench partition metrics"
+        software_environment: clustbench
+        repository:
+          url: https://github.com/imallona/clustbench_metrics
+          commit: 8184cd4
+        parameters:
+          - values: ["--metric", "normalized_clustering_accuracy"]
+          - values: ["--metric", "adjusted_fm_score"]
+    inputs:
+      - entries:
+          - clustering.predicted_ks_range
+          - data.true_labels
+    outputs:
+      - id: metrics.scores
+        path: "{input}/{stage}/{module}/{params}/{dataset}.scores.gz"
+id: clustering_benchmark_apptainer_vanilla_local
+description: Clustering benchmark on Gagolewski's. Using apptainer (locally built image)
+software_backend: apptainer
diff --git a/Clustering_conda.yml b/Clustering_conda.yml
index 7ac1629..5fd45d2 100644
--- a/Clustering_conda.yml
+++ b/Clustering_conda.yml
@@ -1,42 +1,27 @@
-id: clustering_example_conda
-description: Clustering benchmark on Gagolewski's, true number of clusters plus minus 2.
-version: 1.4
-benchmarker: "Izaskun Mallona, Daniel Incicau"
-storage: http://omnibenchmark.org:9000
-benchmark_yaml_spec: 0.04
-storage_api: S3
-storage_bucket_name: clusteringexampleconda
-software_backend: conda
+# this file has been generated automatically - DO NOT EDIT BY HAND
+version: 1.5
+benchmarker: "Izaskun Mallona, Daniel Incicau, Ben Carrillo"
+benchmark_yaml_spec: 0.4
 software_environments:
   clustbench:
-    description: "clustbench on py3.12.6"
+    description: "clustbench on py3.12.9, optimized python build"
     conda: envs/clustbench.yml
-    envmodule: clustbench
-    apptainer: envs/clustbench.sif
-  sklearn:
-    description: "Daniel's on py3.12.6"
-    conda: envs/sklearn.yml
-    apptainer: envs/sklearn.sif
-    envmodule: clustbench # not true, but
-  R:
-    description: "Daniel's R with readr, dplyr, mclust, caret"
-    conda: envs/r.yml
-    apptainer: envs/r.sif
-    envmodule: fcps # not true, but
-  rmarkdown:
-    description: "R with some plotting dependencies"
-    conda: envs/rmarkdown.yml
-    apptainer: envs/r.sif # not true, but
-    envmodule: fcps # not true, but
+    envmodule: clustbench/0.1.0-foss-2023b
+    apptainer: oras://quay.io/omnibenchmark/clustbench-vanilla:0.1.0
   fcps:
     description: "CRAN's FCPS"
     conda: envs/fcps.yml
-    apptainer: envs/fcps.sif
-    envmodule: fcps
+    envmodule: fcps/1.3.4-foss-2023a-r-4.3.2
+    apptainer: oras://quay.io/omnibenchmark/fcps:0.1.0
+  rmarkdown:
+    description: "R with some plotting dependencies"
+    conda: envs/rmarkdown.yml
+    envmodule: rmarkdown/0.1.0-gfbf-2024a-r-4.4.2
+    apptainer: oras://quay.io/omnibenchmark/fcps:0.1.0
 metric_collectors:
   - id: plotting
     name: "Single-backend metric collector."
-    software_environment: "rmarkdown"
+    software_environment: rmarkdown
     repository:
       url: https://github.com/imallona/clustering_report
       commit: 1d6bdf5
@@ -46,17 +31,15 @@ metric_collectors:
       - id: plotting.html
         path: "{input}/{name}/plotting_report.html"
 stages:
-  ## clustbench data ##########################################################
-
   - id: data
     modules:
       - id: clustbench
         name: "clustbench datasets, from https://www.sciencedirect.com/science/article/pii/S0020025521010082#t0005 Table1"
-        software_environment: "clustbench"
+        software_environment: clustbench
         repository:
           url: https://github.com/imallona/clustbench_data
-          commit: 366c5a2
-        parameters:  # comments depict the possible cardinalities and the number of curated labelsets
+          commit: 31ac323
+        parameters: # comments depict the possible cardinalities and the number of curated labelsets
           - values: ["--dataset_generator", "fcps", "--dataset_name", "atom"] #	2	1
           - values: ["--dataset_generator", "fcps", "--dataset_name", "chainlink"] #	2	1
           - values: ["--dataset_generator", "fcps", "--dataset_name", "engytime"] #	2	2
@@ -124,17 +107,13 @@ stages:
         path: "{input}/{stage}/{module}/{params}/{dataset}.data.gz"
       - id: data.true_labels
         path: "{input}/{stage}/{module}/{params}/{dataset}.labels0.gz"
-
-  ## clustbench methods (fastcluster) ###################################################################
-  
   - id: clustering
     modules:
       - id: fastcluster
         name: "fastcluster algorithm"
-        software_environment: "clustbench"
+        software_environment: clustbench
         repository:
           url: https://github.com/imallona/clustbench_fastcluster
-          # url: /home/imallona/src/clustbench_fastcluster/
           commit: "45e43d3"
         parameters:
           - values: ["--linkage", "complete"]
@@ -144,11 +123,10 @@ stages:
           - values: ["--linkage", "median"]
           - values: ["--linkage", "centroid"]
       - id: sklearn
-        name: "sklearn"
-        software_environment: "clustbench"
+        name: sklearn
+        software_environment: clustbench
         repository:
           url: https://github.com/imallona/clustbench_sklearn
-          #url: /home/imallona/src/clustbench_sklearn
           commit: 5877378
         parameters:
           - values: ["--method", "birch"]
@@ -166,8 +144,8 @@ stages:
           - values: ["--linkage", "complete"]
           - values: ["--linkage", "ward"]
       - id: genieclust
-        name: "genieclust"
-        software_environment: "clustbench"
+        name: genieclust
+        software_environment: clustbench
         repository:
           url: https://github.com/imallona/clustbench_genieclust
           commit: 6090043
@@ -201,12 +179,11 @@ stages:
     outputs:
       - id: clustering.predicted_ks_range
         path: "{input}/{stage}/{module}/{params}/{dataset}_ks_range.labels.gz"
-
   - id: metrics
     modules:
       - id: partition_metrics
         name: "clustbench partition metrics"
-        software_environment: "clustbench"
+        software_environment: clustbench
         repository:
           url: https://github.com/imallona/clustbench_metrics
           commit: 9132d45
@@ -229,89 +206,6 @@ stages:
     outputs:
       - id: metrics.scores
         path: "{input}/{stage}/{module}/{params}/{dataset}.scores.gz"
-
-  # ## daniel's data ###########################################################################
-  
-  # - id: danielsdata
-  #   modules:
-  #     - id: iris_manual
-  #       name: "Iris Dataset"
-  #       software_environment: "sklearn"
-  #       repository:
-  #         url: https://github.com/omnibenchmark-example/iris.git
-  #         commit: 47c63f0
-  #     - id: penguins
-  #       name: "Penguins Dataset"
-  #       software_environment: "sklearn"
-  #       repository:
-  #         url: https://github.com/omnibenchmark-example/penguins.git
-  #         commit: 9032478
-  #   outputs:
-  #     - id: data.features
-  #       path: "{input}/{stage}/{module}/{params}/{dataset}.features.csv"
-  #     - id: data.labels
-  #       path: "{input}/{stage}/{module}/{params}/{dataset}.labels.csv"
-  
-  # ## daniel's distances ########################################################################
-  
-  # - id: distances
-  #   modules:
-  #     - id: D1
-  #       software_environment: "sklearn"
-  #       parameters:
-  #         - values: ["--measure", "cosine"]
-  #         - values: ["--measure", "euclidean"]
-  #         - values: ["--measure", "manhattan"]
-  #         - values: ["--measure", "chebyshev"]
-  #       repository:
-  #         url: https://github.com/omnibenchmark-example/distance.git
-  #         commit: dd99d4f
-  #   inputs:
-  #     - entries:
-  #         - data.features
-  #   outputs:
-  #     - id: distances
-  #       path: "{input}/{stage}/{module}/{params}/{dataset}.distances.csv"
-        
-  # ## daniel's methods ###################################################################
-  
-  # - id: danielmethods
-  #   modules:
-  #     - id: kmeans
-  #       software_environment: "sklearn"
-  #       repository:
-  #         url: https://github.com/omnibenchmark-example/kmeans.git
-  #         commit: 049c8b1
-  #     - id: ward
-  #       software_environment: "R"
-  #       repository:
-  #         url: https://github.com/omnibenchmark-example/ward.git
-  #         commit: 976e3f3
-  #   inputs:
-  #     - entries:
-  #         - distances
-  #   outputs:
-  #     - id: methods.clusters
-  #       path: "{input}/{stage}/{module}/{params}/{dataset}.clusters.csv"
-
-  # ## daniel's metrics ###################################################################
-
-  # - id: danielsmetrics
-  #   modules:
-  #     - id: ari
-  #       software_environment: "R"
-  #       repository:
-  #         url: https://github.com/omnibenchmark-example/ari.git
-  #         commit: 72708f0
-  #     - id: accuracy
-  #       software_environment: "R"
-  #       repository:
-  #         url: https://github.com/omnibenchmark-example/accuracy.git
-  #         commit: e26b32f
-  #   inputs:
-  #     - entries:
-  #         - methods.clusters
-  #         - data.labels
-  #   outputs:
-  #     - id: metrics.mapping
-  #       path: "{input}/{stage}/{module}/{params}/{dataset}.metrics.txt"
+id: clustering_benchmark_conda
+description: Clustering benchmark on Gagolewski's. Using conda.
+software_backend: conda
diff --git a/Clustering_conda_short.yml b/Clustering_conda_short.yml
new file mode 100644
index 0000000..fd9ae01
--- /dev/null
+++ b/Clustering_conda_short.yml
@@ -0,0 +1,121 @@
+# this file has been generated automatically - DO NOT EDIT BY HAND
+version: 1.5
+benchmarker: "Izaskun Mallona, Daniel Incicau, Ben Carrillo"
+benchmark_yaml_spec: 0.5
+software_environments:
+  clustbench:
+    description: "clustbench on py3.12.3, default python"
+    envmodule: clustbench/0.1.0-foss-2023b
+    conda: envs/clustbench.yml
+    apptainer: oras://quay.io/omnibenchmark/clustbench-vanilla:0.1.0
+  fcps:
+    description: "CRAN's FCPS"
+    envmodule: fcps/1.3.4-foss-2023a-r-4.3.2
+    conda: envs/fcps.yml
+    apptainer: oras://quay.io/omnibenchmark/fcps:0.1.0
+  rmarkdown:
+    description: "R with some plotting dependencies"
+    conda: envs/rmarkdown.yml
+    envmodule: rmarkdown/0.1.0-gfbf-2024a-r-4.4.2
+    apptainer: oras://quay.io/omnibenchmark/fcps:0.1.0
+metric_collectors:
+  - id: plotting
+    name: "Single-backend metric collector."
+    software_environment: rmarkdown
+    repository:
+      url: https://github.com/imallona/clustering_report
+      commit: 1d6bdf5
+    inputs:
+      - metrics.scores
+    outputs:
+      - id: plotting.html
+        path: "{input}/{name}/plotting_report.html"
+stages:
+  - id: data
+    modules:
+      - id: clustbench
+        name: "clustbench datasets, from https://www.sciencedirect.com/science/article/pii/S0020025521010082#t0005 Table1"
+        software_environment: clustbench
+        repository:
+          url: https://github.com/imallona/clustbench_data
+          commit: 31ac323
+        parameters: # comments depict the possible cardinalities and the number of curated labelsets
+          - values: ["--dataset_generator", "fcps", "--dataset_name", "atom"] #	2	1
+    outputs:
+      - id: data.matrix
+        path: "{input}/{stage}/{module}/{params}/{dataset}.data.gz"
+      - id: data.true_labels
+        path: "{input}/{stage}/{module}/{params}/{dataset}.labels0.gz"
+  - id: clustering
+    modules:
+      - id: fastcluster
+        name: "fastcluster algorithm"
+        software_environment: clustbench
+        repository:
+          url: https://github.com/imallona/clustbench_fastcluster
+          commit: "45e43d3"
+        parameters:
+          - values: ["--linkage", "complete"]
+          - values: ["--linkage", "ward"]
+      - id: sklearn
+        name: "sklearn"
+        software_environment: clustbench
+        repository:
+          url: https://github.com/imallona/clustbench_sklearn
+          commit: 5877378
+        parameters:
+          - values: ["--method", "birch"]
+          - values: ["--method", "kmeans"]
+      - id: agglomerative
+        name: "agglomerative"
+        software_environment: clustbench
+        repository:
+          url: https://github.com/imallona/clustbench_agglomerative
+          commit: 5454368
+        parameters:
+          - values: ["--linkage", "complete"]
+          - values: ["--linkage", "ward"]
+      - id: genieclust
+        name: "genieclust"
+        software_environment: clustbench
+        repository:
+          url: https://github.com/imallona/clustbench_genieclust
+          commit: 6090043
+        parameters:
+          - values: ["--method", "genie", "--gini_threshold", 0.5]
+      - id: fcps
+        name: "fcps"
+        software_environment: fcps
+        repository:
+          url: https://github.com/imallona/clustbench_fcps
+          commit: 272fa5f
+        parameters:
+          - values: ["--method", "FCPS_Minimax"]
+    inputs:
+      - entries:
+          - data.matrix
+          - data.true_labels
+    outputs:
+      - id: clustering.predicted_ks_range
+        path: "{input}/{stage}/{module}/{params}/{dataset}_ks_range.labels.gz"
+  - id: metrics
+    modules:
+      - id: partition_metrics
+        name: "clustbench partition metrics"
+        software_environment: clustbench
+        repository:
+          url: https://github.com/imallona/clustbench_metrics
+          commit: 8184cd4
+        parameters:
+          - values: ["--metric", "normalized_clustering_accuracy"]
+          - values: ["--metric", "adjusted_fm_score"]
+    inputs:
+      - entries:
+          - clustering.predicted_ks_range
+          - data.true_labels
+    outputs:
+      - id: metrics.scores
+        path: "{input}/{stage}/{module}/{params}/{dataset}.scores.gz"
+id: clustering_benchmark_conda
+description: Clustering benchmark on Gagolewski's. Using conda.
+software_backend: conda
diff --git a/Clustering_envmodules.yml b/Clustering_envmodules.yml
index 3c2b8bd..805e130 100644
--- a/Clustering_envmodules.yml
+++ b/Clustering_envmodules.yml
@@ -1,42 +1,27 @@
-id: clustering_example_envmodules
-description: Clustering benchmark on Gagolewski's, true number of clusters plus minus 2.
-version: 1.4
-benchmarker: "Izaskun Mallona, Daniel Incicau"
-storage: http://omnibenchmark.org:9000
-benchmark_yaml_spec: 0.04
-storage_api: S3
-storage_bucket_name: clusteringexampleenvmodules
-software_backend: envmodules
+# this file has been generated automatically - DO NOT EDIT BY HAND
+version: 1.5
+benchmarker: "Izaskun Mallona, Daniel Incicau, Ben Carrillo"
+benchmark_yaml_spec: 0.4
 software_environments:
   clustbench:
-    description: "clustbench on py3.12.6"
+    description: "clustbench on py3.12.9, optimized python build"
     conda: envs/clustbench.yml
-    envmodule: clustbench
-    apptainer: envs/clustbench.sif
-  sklearn:
-    description: "Daniel's on py3.12.6"
-    conda: envs/sklearn.yml
-    apptainer: envs/sklearn.sif
-    envmodule: clustbench # not true, but
-  R:
-    description: "Daniel's R with readr, dplyr, mclust, caret"
-    conda: envs/r.yml
-    apptainer: envs/r.sif
-    envmodule: fcps # not true, but
-  rmarkdown:
-    description: "R with some plotting dependencies"
-    conda: envs/rmarkdown.yml
-    apptainer: envs/r.sif # not true, but
-    envmodule: fcps # not true, but
+    envmodule: clustbench/0.1.0-foss-2023b
+    apptainer: oras://quay.io/omnibenchmark/clustbench-vanilla:0.1.0
   fcps:
     description: "CRAN's FCPS"
     conda: envs/fcps.yml
-    apptainer: envs/fcps.sif
-    envmodule: fcps
+    envmodule: fcps/1.3.4-foss-2023a-r-4.3.2
+    apptainer: oras://quay.io/omnibenchmark/fcps:0.1.0
+  rmarkdown:
+    description: "R with some plotting dependencies"
+    conda: envs/rmarkdown.yml
+    envmodule: rmarkdown/0.1.0-gfbf-2024a-r-4.4.2
+    apptainer: oras://quay.io/omnibenchmark/fcps:0.1.0
 metric_collectors:
   - id: plotting
     name: "Single-backend metric collector."
-    software_environment: "rmarkdown"
+    software_environment: rmarkdown
     repository:
       url: https://github.com/imallona/clustering_report
       commit: 1d6bdf5
@@ -46,17 +31,15 @@ metric_collectors:
       - id: plotting.html
         path: "{input}/{name}/plotting_report.html"
 stages:
-  ## clustbench data ##########################################################
-
   - id: data
     modules:
       - id: clustbench
         name: "clustbench datasets, from https://www.sciencedirect.com/science/article/pii/S0020025521010082#t0005 Table1"
-        software_environment: "clustbench"
+        software_environment: clustbench
         repository:
           url: https://github.com/imallona/clustbench_data
-          commit: 366c5a2
-        parameters:  # comments depict the possible cardinalities and the number of curated labelsets
+          commit: 31ac323
+        parameters: # comments depict the possible cardinalities and the number of curated labelsets
           - values: ["--dataset_generator", "fcps", "--dataset_name", "atom"] #	2	1
           - values: ["--dataset_generator", "fcps", "--dataset_name", "chainlink"] #	2	1
           - values: ["--dataset_generator", "fcps", "--dataset_name", "engytime"] #	2	2
@@ -124,17 +107,13 @@ stages:
         path: "{input}/{stage}/{module}/{params}/{dataset}.data.gz"
       - id: data.true_labels
         path: "{input}/{stage}/{module}/{params}/{dataset}.labels0.gz"
-
-  ## clustbench methods (fastcluster) ###################################################################
-  
   - id: clustering
     modules:
       - id: fastcluster
         name: "fastcluster algorithm"
-        software_environment: "clustbench"
+        software_environment: clustbench
         repository:
           url: https://github.com/imallona/clustbench_fastcluster
-          # url: /home/imallona/src/clustbench_fastcluster/
           commit: "45e43d3"
         parameters:
           - values: ["--linkage", "complete"]
@@ -144,11 +123,10 @@ stages:
           - values: ["--linkage", "median"]
           - values: ["--linkage", "centroid"]
       - id: sklearn
-        name: "sklearn"
-        software_environment: "clustbench"
+        name: sklearn
+        software_environment: clustbench
         repository:
           url: https://github.com/imallona/clustbench_sklearn
-          #url: /home/imallona/src/clustbench_sklearn
           commit: 5877378
         parameters:
           - values: ["--method", "birch"]
@@ -166,8 +144,8 @@ stages:
           - values: ["--linkage", "complete"]
           - values: ["--linkage", "ward"]
       - id: genieclust
-        name: "genieclust"
-        software_environment: "clustbench"
+        name: genieclust
+        software_environment: clustbench
         repository:
           url: https://github.com/imallona/clustbench_genieclust
           commit: 6090043
@@ -201,12 +179,11 @@ stages:
     outputs:
       - id: clustering.predicted_ks_range
         path: "{input}/{stage}/{module}/{params}/{dataset}_ks_range.labels.gz"
-
   - id: metrics
     modules:
       - id: partition_metrics
         name: "clustbench partition metrics"
-        software_environment: "clustbench"
+        software_environment: clustbench
         repository:
           url: https://github.com/imallona/clustbench_metrics
           commit: 9132d45
@@ -229,89 +206,6 @@ stages:
     outputs:
       - id: metrics.scores
         path: "{input}/{stage}/{module}/{params}/{dataset}.scores.gz"
-
-  # ## daniel's data ###########################################################################
-  
-  # - id: danielsdata
-  #   modules:
-  #     - id: iris_manual
-  #       name: "Iris Dataset"
-  #       software_environment: "sklearn"
-  #       repository:
-  #         url: https://github.com/omnibenchmark-example/iris.git
-  #         commit: 47c63f0
-  #     - id: penguins
-  #       name: "Penguins Dataset"
-  #       software_environment: "sklearn"
-  #       repository:
-  #         url: https://github.com/omnibenchmark-example/penguins.git
-  #         commit: 9032478
-  #   outputs:
-  #     - id: data.features
-  #       path: "{input}/{stage}/{module}/{params}/{dataset}.features.csv"
-  #     - id: data.labels
-  #       path: "{input}/{stage}/{module}/{params}/{dataset}.labels.csv"
-  
-  # ## daniel's distances ########################################################################
-  
-  # - id: distances
-  #   modules:
-  #     - id: D1
-  #       software_environment: "sklearn"
-  #       parameters:
-  #         - values: ["--measure", "cosine"]
-  #         - values: ["--measure", "euclidean"]
-  #         - values: ["--measure", "manhattan"]
-  #         - values: ["--measure", "chebyshev"]
-  #       repository:
-  #         url: https://github.com/omnibenchmark-example/distance.git
-  #         commit: dd99d4f
-  #   inputs:
-  #     - entries:
-  #         - data.features
-  #   outputs:
-  #     - id: distances
-  #       path: "{input}/{stage}/{module}/{params}/{dataset}.distances.csv"
-        
-  # ## daniel's methods ###################################################################
-  
-  # - id: danielmethods
-  #   modules:
-  #     - id: kmeans
-  #       software_environment: "sklearn"
-  #       repository:
-  #         url: https://github.com/omnibenchmark-example/kmeans.git
-  #         commit: 049c8b1
-  #     - id: ward
-  #       software_environment: "R"
-  #       repository:
-  #         url: https://github.com/omnibenchmark-example/ward.git
-  #         commit: 976e3f3
-  #   inputs:
-  #     - entries:
-  #         - distances
-  #   outputs:
-  #     - id: methods.clusters
-  #       path: "{input}/{stage}/{module}/{params}/{dataset}.clusters.csv"
-
-  # ## daniel's metrics ###################################################################
-
-  # - id: danielsmetrics
-  #   modules:
-  #     - id: ari
-  #       software_environment: "R"
-  #       repository:
-  #         url: https://github.com/omnibenchmark-example/ari.git
-  #         commit: 72708f0
-  #     - id: accuracy
-  #       software_environment: "R"
-  #       repository:
-  #         url: https://github.com/omnibenchmark-example/accuracy.git
-  #         commit: e26b32f
-  #   inputs:
-  #     - entries:
-  #         - methods.clusters
-  #         - data.labels
-  #   outputs:
-  #     - id: metrics.mapping
-  #       path: "{input}/{stage}/{module}/{params}/{dataset}.metrics.txt"
+id: clustering_benchmark_envmodules
+description: Clustering benchmark on Gagolewski's. Using envmodules.
+software_backend: envmodules
diff --git a/Clustering_envmodules_short.yml b/Clustering_envmodules_short.yml
new file mode 100644
index 0000000..e3dc0fd
--- /dev/null
+++ b/Clustering_envmodules_short.yml
@@ -0,0 +1,121 @@
+# this file has been generated automatically - DO NOT EDIT BY HAND
+version: 1.5
+benchmarker: "Izaskun Mallona, Daniel Incicau, Ben Carrillo"
+benchmark_yaml_spec: 0.5
+software_environments:
+  clustbench:
+    description: "clustbench on py3.12.3, default python"
+    envmodule: clustbench/0.1.0-foss-2023b
+    conda: envs/clustbench.yml
+    apptainer: oras://quay.io/omnibenchmark/clustbench-vanilla:0.1.0
+  fcps:
+    description: "CRAN's FCPS"
+    envmodule: fcps/1.3.4-foss-2023a-r-4.3.2
+    conda: envs/fcps.yml
+    apptainer: oras://quay.io/omnibenchmark/fcps:0.1.0
+  rmarkdown:
+    description: "R with some plotting dependencies"
+    conda: envs/rmarkdown.yml
+    envmodule: rmarkdown/0.1.0-gfbf-2024a-r-4.4.2
+    apptainer: oras://quay.io/omnibenchmark/fcps:0.1.0
+metric_collectors:
+  - id: plotting
+    name: "Single-backend metric collector."
+    software_environment: rmarkdown
+    repository:
+      url: https://github.com/imallona/clustering_report
+      commit: 1d6bdf5
+    inputs:
+      - metrics.scores
+    outputs:
+      - id: plotting.html
+        path: "{input}/{name}/plotting_report.html"
+stages:
+  - id: data
+    modules:
+      - id: clustbench
+        name: "clustbench datasets, from https://www.sciencedirect.com/science/article/pii/S0020025521010082#t0005 Table1"
+        software_environment: clustbench
+        repository:
+          url: https://github.com/imallona/clustbench_data
+          commit: 31ac323
+        parameters: # comments depict the possible cardinalities and the number of curated labelsets
+          - values: ["--dataset_generator", "fcps", "--dataset_name", "atom"] #	2	1
+    outputs:
+      - id: data.matrix
+        path: "{input}/{stage}/{module}/{params}/{dataset}.data.gz"
+      - id: data.true_labels
+        path: "{input}/{stage}/{module}/{params}/{dataset}.labels0.gz"
+  - id: clustering
+    modules:
+      - id: fastcluster
+        name: "fastcluster algorithm"
+        software_environment: clustbench
+        repository:
+          url: https://github.com/imallona/clustbench_fastcluster
+          commit: "45e43d3"
+        parameters:
+          - values: ["--linkage", "complete"]
+          - values: ["--linkage", "ward"]
+      - id: sklearn
+        name: "sklearn"
+        software_environment: clustbench
+        repository:
+          url: https://github.com/imallona/clustbench_sklearn
+          commit: 5877378
+        parameters:
+          - values: ["--method", "birch"]
+          - values: ["--method", "kmeans"]
+      - id: agglomerative
+        name: "agglomerative"
+        software_environment: clustbench
+        repository:
+          url: https://github.com/imallona/clustbench_agglomerative
+          commit: 5454368
+        parameters:
+          - values: ["--linkage", "complete"]
+          - values: ["--linkage", "ward"]
+      - id: genieclust
+        name: "genieclust"
+        software_environment: clustbench
+        repository:
+          url: https://github.com/imallona/clustbench_genieclust
+          commit: 6090043
+        parameters:
+          - values: ["--method", "genie", "--gini_threshold", 0.5]
+      - id: fcps
+        name: "fcps"
+        software_environment: fcps
+        repository:
+          url: https://github.com/imallona/clustbench_fcps
+          commit: 272fa5f
+        parameters:
+          - values: ["--method", "FCPS_Minimax"]
+    inputs:
+      - entries:
+          - data.matrix
+          - data.true_labels
+    outputs:
+      - id: clustering.predicted_ks_range
+        path: "{input}/{stage}/{module}/{params}/{dataset}_ks_range.labels.gz"
+  - id: metrics
+    modules:
+      - id: partition_metrics
+        name: "clustbench partition metrics"
+        software_environment: clustbench
+        repository:
+          url: https://github.com/imallona/clustbench_metrics
+          commit: 8184cd4
+        parameters:
+          - values: ["--metric", "normalized_clustering_accuracy"]
+          - values: ["--metric", "adjusted_fm_score"]
+    inputs:
+      - entries:
+          - clustering.predicted_ks_range
+          - data.true_labels
+    outputs:
+      - id: metrics.scores
+        path: "{input}/{stage}/{module}/{params}/{dataset}.scores.gz"
+id: clustering_benchmark_envmodules
+description: Clustering benchmark on Gagolewski's. Using envmodules.
+software_backend: envmodules
diff --git a/Makefile b/Makefile
new file mode 100644
index 0000000..71b6860
--- /dev/null
+++ b/Makefile
@@ -0,0 +1,78 @@
+MAX_CORES ?= 10
+TIMEOUT ?= 4h
+YQ_MERGE=yq eval-all 'select(fileIndex==1) * select(fileIndex==0)'
+YQ_REPOS=yq '.stages[].modules[] | .id + ": " + .repository.url + "@" + .repository.commit'
+
+# by default, we want to run all snakemake rules even if there are failures (-k)
+OB_CMD=ob run benchmark -k --local --task-timeout ${TIMEOUT} --cores ${MAX_CORES} --yes
+
+APPTR = apptainer
+APPTV = apptainer_vanilla
+APPTO = apptainer_optimized
+CONDA = conda
+ENVMD = envmodules
+
+BASE       = base.yml
+BASE_SHORT = smoketest/base.yml
+
+# Install dependencies to generate files (requires go in the system)
+deps:
+	go install github.com/mikefarah/yq/v4@latest
+
+# Generate all the yaml files from base + overrides
+.SILENT: generate
+generate:
+	${YQ_MERGE} overrides/${APPTR}.yml ${BASE} > Clustering_${APPTR}.yml
+	${YQ_MERGE} overrides/${APPTV}.yml ${BASE} > Clustering_${APPTV}.yml
+	${YQ_MERGE} overrides/${APPTO}.yml ${BASE} > Clustering_${APPTO}.yml
+	${YQ_MERGE} overrides/${CONDA}.yml ${BASE} > Clustering_${CONDA}.yml
+	${YQ_MERGE} overrides/${ENVMD}.yml ${BASE} > Clustering_${ENVMD}.yml
+	${YQ_MERGE} overrides/${APPTR}.yml ${BASE_SHORT} > Clustering_${APPTR}_short.yml
+	${YQ_MERGE} overrides/${APPTV}.yml ${BASE_SHORT} > Clustering_${APPTV}_short.yml
+	${YQ_MERGE} overrides/${APPTO}.yml ${BASE_SHORT} > Clustering_${APPTO}_short.yml
+	${YQ_MERGE} overrides/${CONDA}.yml ${BASE_SHORT} > Clustering_${CONDA}_short.yml
+	${YQ_MERGE} overrides/${ENVMD}.yml ${BASE_SHORT} > Clustering_${ENVMD}_short.yml
+	echo "[+] The following files have been generated:"
+	ls Clustering_*.yml
+	echo "[+] You can use 'make clean' to delete them"
+
+clean:
+	rm Clustering_*.yml
+
+prepare_apptainer_env:
+	cd envs && ./build_singularity.sh
+prepare_envmodules_env:
+	cd envs && eb clustbench.eb --robot
+	cd envs && eb fcps.eb --robot
+	cd envs && eb rmarkdown.eb --robot
+
+# short versions, to debug runs & environments
+run_with_apptainer_backend_short:
+	 ${OB_CMD} -b Clustering_${APPTR}_short.yml
+	 mv out out_${APPTR}_short-$(shell date +'%Y%m%d%H%M')
+run_with_apptainer_backend_vanilla_short:
+	 ${OB_CMD} -b Clustering_${APPTV}_short.yml
+	 mv out out_${APPTV}_short-$(shell date +'%Y%m%d%H%M')
+run_with_conda_backend_short:
+	 ${OB_CMD} -b Clustering_${CONDA}_short.yml
+	 mv out out_${CONDA}_short-$(shell date +'%Y%m%d%H%M')
+run_with_envmodules_backend_short:
+	 ${OB_CMD} -b Clustering_${ENVMD}_short.yml
+	 mv out out_${ENVMD}_short-$(shell date +'%Y%m%d%H%M')
+
+# full versions (expect hours)
+run_with_apptainer_backend:
+	 ${OB_CMD} -b Clustering_${APPTR}.yml
+	 mv out out_${APPTR}-$(shell date +'%Y%m%d%H%M')
+run_with_apptainer_backend_vanilla:
+	 ${OB_CMD} -b Clustering_${APPTV}.yml
+	 mv out out_${APPTV}-$(shell date +'%Y%m%d%H%M')
+run_with_conda_backend:
+	 ${OB_CMD} -b Clustering_${CONDA}.yml
+	 mv out out_${CONDA}-$(shell date +'%Y%m%d%H%M')
+run_with_envmodules_backend:
+	 ${OB_CMD} -b Clustering_${ENVMD}.yml
+	 mv out out_${ENVMD}-$(shell date +'%Y%m%d%H%M')
+
+extract_modules:
+	@${YQ_REPOS} base.yml
diff --git a/README.md b/README.md
index a75c594..7859113 100644
--- a/README.md
+++ b/README.md
@@ -2,29 +2,30 @@ A clustering example for omnibenchmark
 
 # How to run
 
-1. Install omnibenchmark using [our tutorial](https://omnibenchmark.org/tutorial/)
-2. Clone the benchmark definition / this repository with `git clone git@github.com:omnibenchmark/clustering_example.git`
-3. Move to the cloned repository `cd clustering_example`
-4. Run locally, somewhat in parallel `ob run benchmark -b CLUSTERING.YAML  --local --threads 6`. Choose `Clustering.yml` specification based on whether running it with conda, easybuild, apptainer, etc. [More details about the available backends](https://github.com/omnibenchmark/clustering_example/blob/main/envs/README.md).
+1. Install omnibenchmark: `pip install omnibenchmark>=0.2.0`
+2. Clone the benchmark definition in this repository with `git clone https://github.com/omnibenchmark/clustering_example`
+3. Move into the cloned folder: `cd clustering_example`
+4. Run locally, with the desired degree of parallelism:
+   `ob run benchmark -b <Clustering_flavor.yaml> --local --cores 6`.
+   Choose your `Clustering_*.yml` specification based on the backend you want to run (conda, easybuild or apptainer). [More details about the available backends and how to build or enable them](https://github.com/omnibenchmark/clustering_example/blob/main/envs/README.md).
 
-# Clustbench attribution
+# Software backends and variants
 
-by Marek Gagolewski, modified by Izaskun Mallona
+* All needed recipes can be found under `envs`: conda, apptainer, easybuild (lmod modules)
+* The `_smoketest` variants are meant for [quick testing](https://en.wikipedia.org/wiki/Smoke_testing_(software))
+* The default `apptainer` container fetches images from an online registry.
+* `apptainer-vanilla` makes reference to a container image with stock python (`3.12`)
+* `apptainer-optimized` makes reference to a container image with a custom compiled python (`3.12.9`), just to check if optimization flags have a noticeable effect.
+* `envmodules` will need you to previously build the `.eb` easyconfigs with easybuild. We plan to make these modules publicly available in the future.
+* `conda` environments will fetch software from the configured conda channels and pypi. Does not compile anything, fetches pre-built binaries (assuming there's a build in those channels for your architecture, that is)
 
-# Data disclaimer
+[More info in the envs/ folder](https://github.com/omnibenchmark/clustering_example/blob/main/envs/README.md).
 
-Some datasets are commented out to speed up calculations.
-
-From [Are cluster validity measures (in) valid?](https://www.sciencedirect.com/science/article/pii/S0020025521010082):
-
-> The original benchmark battery consists of 79 data instances, however 16 datasets are accompanied by labels that yield ; they were omitted for their computation would be too lengthy (namely: mnist/digits, mnist/fashion, other/chameleon_t7_10k, other/chameleon_t8_8k, sipu/a1, sipu/a2, sipu/a3, sipu/birch1, sipu/birch2, sipu/d31, sipu/s1, sipu/s2, sipu/s3, sipu/s4, sipu/worms_2, sipu/worms_64). Also uci/glass has been removed as one of its 25-near-neighbour graph’s connected components was too small for the NN-based methods to succeed. This leaves us with 62 datasets in total, see Table 1.
-
-A yaml such as [0a88c91](https://github.com/omnibenchmark/clustering_example/blob/0a88c910bbda62d1b593f4215a682770227f39ff/Clustering.yaml) with 30 cores should run half of the stuff in ~4 h and reach 97% completion in ~8h.
 
 # Summary
 
 - Data. Example datasets (not a comprehensive list, it's >79 of them):
-  - https://github.com/imallona/clustbench_data 
+  - https://github.com/imallona/clustbench_data
     - args: ["--dataset_generator", "mnist", "--dataset_name", "fashion"]
     - args: ["--dataset_generator", "other", "--dataset_name", "iris"]
     - args: ["--dataset_generator", "mnist", "--dataset_name", "digits"]
@@ -37,7 +38,7 @@ A yaml such as [0a88c91](https://github.com/omnibenchmark/clustering_example/blo
     - args: ["--linkage", "weighted"]
     - args: ["--linkage", "median"]
     - args: ["--linkage", "centroid"]
-  - https://github.com/imallona/clustbench_sklearn 
+  - https://github.com/imallona/clustbench_sklearn
     - args: ["--method", "birch"]
     - args: ["--method", "kmeans"]
     - args: ["--method", "spectral"] ## too slow
@@ -84,14 +85,29 @@ A yaml such as [0a88c91](https://github.com/omnibenchmark/clustering_example/blo
   - https://github.com/omnibenchmark-example/ward.git
   - https://github.com/omnibenchmark-example/ari.git
   - https://github.com/omnibenchmark-example/accuracy.git
-  
-  
-# Software backends
 
-In `envs`: conda, apptainer, easybuild (lmod modules)
+
+# Omnibenchmark YAML generation
+
+The current repo has base templates for different runs.
+Install [yq](https://github.com/mikefarah/yq) and run `make generate` if you want to modify the base template in your tests.
 
 # Warnings
 
 Mind we try to run clusterings specifying the true number of clusters +- 2. But sometimes the true number is k=3. Then we do `k=2, k=2, k=3, k=5, k=6` filling with k=2s as needed, and recomputing the same values multiple times (so runtimes are comparable across datasets, regardless of their true number of clusters).
 
 Also, we have modules by Daniel not fully incorporated into Gagolewski's flow.
+
+# Data disclaimer
+
+Some datasets are commented out to speed up calculations.
+
+From [Are cluster validity measures (in) valid?](https://www.sciencedirect.com/science/article/pii/S0020025521010082):
+
+> The original benchmark battery consists of 79 data instances, however 16 datasets are accompanied by labels that yield ; they were omitted for their computation would be too lengthy (namely: mnist/digits, mnist/fashion, other/chameleon_t7_10k, other/chameleon_t8_8k, sipu/a1, sipu/a2, sipu/a3, sipu/birch1, sipu/birch2, sipu/d31, sipu/s1, sipu/s2, sipu/s3, sipu/s4, sipu/worms_2, sipu/worms_64). Also uci/glass has been removed as one of its 25-near-neighbour graph’s connected components was too small for the NN-based methods to succeed. This leaves us with 62 datasets in total, see Table 1.
+
+A yaml such as [0a88c91](https://github.com/omnibenchmark/clustering_example/blob/0a88c910bbda62d1b593f4215a682770227f39ff/Clustering.yaml) with 30 cores should run half of the stuff in ~4 h and reach 97% completion in ~8h.
+
+# Clustbench attribution
+
+by Marek Gagolewski, modified by Izaskun Mallona
diff --git a/Clustering_singularity.yml b/base.yml
similarity index 69%
rename from Clustering_singularity.yml
rename to base.yml
index c80b498..bfadca6 100644
--- a/Clustering_singularity.yml
+++ b/base.yml
@@ -1,42 +1,32 @@
-id: clustering_example_apptainer
-description: Clustering benchmark on Gagolewski's, true number of clusters plus minus 2.
-version: 1.4
-benchmarker: "Izaskun Mallona, Daniel Incicau"
-storage: http://omnibenchmark.org:9000
-benchmark_yaml_spec: 0.04
-storage_api: S3
-storage_bucket_name: clusteringexampleapptainer
-software_backend: apptainer
+version: 1.5
+benchmarker: "Izaskun Mallona, Daniel Incicau, Ben Carrillo"
+benchmark_yaml_spec: 0.4
+
 software_environments:
+
   clustbench:
-    description: "clustbench on py3.12.6"
+    description: "clustbench on py3.12.9, optimized python build"
     conda: envs/clustbench.yml
-    envmodule: clustbench
-    apptainer: envs/clustbench.sif
-  sklearn:
-    description: "Daniel's on py3.12.6"
-    conda: envs/sklearn.yml
-    apptainer: envs/sklearn.sif
-    envmodule: clustbench # not true, but
-  R:
-    description: "Daniel's R with readr, dplyr, mclust, caret"
-    conda: envs/r.yml
-    apptainer: envs/r.sif
-    envmodule: fcps # not true, but
-  rmarkdown:
-    description: "R with some plotting dependencies"
-    conda: envs/rmarkdown.yml
-    apptainer: envs/r.sif # not true, but
-    envmodule: fcps # not true, but
+    envmodule: clustbench/0.1.0-foss-2023b
+    apptainer: oras://quay.io/omnibenchmark/clustbench-vanilla:0.1.0
+
   fcps:
     description: "CRAN's FCPS"
     conda: envs/fcps.yml
-    apptainer: envs/fcps.sif
-    envmodule: fcps
+    envmodule: fcps/1.3.4-foss-2023a-r-4.3.2
+    apptainer: oras://quay.io/omnibenchmark/fcps:0.1.0
+
+  rmarkdown:
+    description: "R with some plotting dependencies"
+    conda: envs/rmarkdown.yml
+    envmodule: rmarkdown/0.1.0-gfbf-2024a-r-4.4.2
+    apptainer: oras://quay.io/omnibenchmark/fcps:0.1.0
+
+
 metric_collectors:
   - id: plotting
     name: "Single-backend metric collector."
-    software_environment: "rmarkdown"
+    software_environment: rmarkdown
     repository:
       url: https://github.com/imallona/clustering_report
       commit: 1d6bdf5
@@ -45,17 +35,18 @@ metric_collectors:
     outputs:
       - id: plotting.html
         path: "{input}/{name}/plotting_report.html"
+
 stages:
-  ## clustbench data ##########################################################
 
   - id: data
     modules:
       - id: clustbench
         name: "clustbench datasets, from https://www.sciencedirect.com/science/article/pii/S0020025521010082#t0005 Table1"
-        software_environment: "clustbench"
+        software_environment: clustbench
         repository:
           url: https://github.com/imallona/clustbench_data
-          commit: 366c5a2
+          commit: 31ac323
+
         parameters:  # comments depict the possible cardinalities and the number of curated labelsets
           - values: ["--dataset_generator", "fcps", "--dataset_name", "atom"] #	2	1
           - values: ["--dataset_generator", "fcps", "--dataset_name", "chainlink"] #	2	1
@@ -125,16 +116,13 @@ stages:
       - id: data.true_labels
         path: "{input}/{stage}/{module}/{params}/{dataset}.labels0.gz"
 
-  ## clustbench methods (fastcluster) ###################################################################
-  
   - id: clustering
     modules:
       - id: fastcluster
         name: "fastcluster algorithm"
-        software_environment: "clustbench"
+        software_environment: clustbench
         repository:
           url: https://github.com/imallona/clustbench_fastcluster
-          # url: /home/imallona/src/clustbench_fastcluster/
           commit: "45e43d3"
         parameters:
           - values: ["--linkage", "complete"]
@@ -143,12 +131,12 @@ stages:
           - values: ["--linkage", "weighted"]
           - values: ["--linkage", "median"]
           - values: ["--linkage", "centroid"]
+
       - id: sklearn
-        name: "sklearn"
-        software_environment: "clustbench"
+        name: sklearn
+        software_environment: clustbench
         repository:
           url: https://github.com/imallona/clustbench_sklearn
-          #url: /home/imallona/src/clustbench_sklearn
           commit: 5877378
         parameters:
           - values: ["--method", "birch"]
@@ -166,8 +154,8 @@ stages:
           - values: ["--linkage", "complete"]
           - values: ["--linkage", "ward"]
       - id: genieclust
-        name: "genieclust"
-        software_environment: "clustbench"
+        name: genieclust
+        software_environment: clustbench
         repository:
           url: https://github.com/imallona/clustbench_genieclust
           commit: 6090043
@@ -206,7 +194,7 @@ stages:
     modules:
       - id: partition_metrics
         name: "clustbench partition metrics"
-        software_environment: "clustbench"
+        software_environment: clustbench
         repository:
           url: https://github.com/imallona/clustbench_metrics
           commit: 9132d45
@@ -230,88 +218,3 @@ stages:
       - id: metrics.scores
         path: "{input}/{stage}/{module}/{params}/{dataset}.scores.gz"
 
-  # ## daniel's data ###########################################################################
-  
-  # - id: danielsdata
-  #   modules:
-  #     - id: iris_manual
-  #       name: "Iris Dataset"
-  #       software_environment: "sklearn"
-  #       repository:
-  #         url: https://github.com/omnibenchmark-example/iris.git
-  #         commit: 47c63f0
-  #     - id: penguins
-  #       name: "Penguins Dataset"
-  #       software_environment: "sklearn"
-  #       repository:
-  #         url: https://github.com/omnibenchmark-example/penguins.git
-  #         commit: 9032478
-  #   outputs:
-  #     - id: data.features
-  #       path: "{input}/{stage}/{module}/{params}/{dataset}.features.csv"
-  #     - id: data.labels
-  #       path: "{input}/{stage}/{module}/{params}/{dataset}.labels.csv"
-  
-  # ## daniel's distances ########################################################################
-  
-  # - id: distances
-  #   modules:
-  #     - id: D1
-  #       software_environment: "sklearn"
-  #       parameters:
-  #         - values: ["--measure", "cosine"]
-  #         - values: ["--measure", "euclidean"]
-  #         - values: ["--measure", "manhattan"]
-  #         - values: ["--measure", "chebyshev"]
-  #       repository:
-  #         url: https://github.com/omnibenchmark-example/distance.git
-  #         commit: dd99d4f
-  #   inputs:
-  #     - entries:
-  #         - data.features
-  #   outputs:
-  #     - id: distances
-  #       path: "{input}/{stage}/{module}/{params}/{dataset}.distances.csv"
-        
-  # ## daniel's methods ###################################################################
-  
-  # - id: danielmethods
-  #   modules:
-  #     - id: kmeans
-  #       software_environment: "sklearn"
-  #       repository:
-  #         url: https://github.com/omnibenchmark-example/kmeans.git
-  #         commit: 049c8b1
-  #     - id: ward
-  #       software_environment: "R"
-  #       repository:
-  #         url: https://github.com/omnibenchmark-example/ward.git
-  #         commit: 976e3f3
-  #   inputs:
-  #     - entries:
-  #         - distances
-  #   outputs:
-  #     - id: methods.clusters
-  #       path: "{input}/{stage}/{module}/{params}/{dataset}.clusters.csv"
-
-  # ## daniel's metrics ###################################################################
-
-  # - id: danielsmetrics
-  #   modules:
-  #     - id: ari
-  #       software_environment: "R"
-  #       repository:
-  #         url: https://github.com/omnibenchmark-example/ari.git
-  #         commit: 72708f0
-  #     - id: accuracy
-  #       software_environment: "R"
-  #       repository:
-  #         url: https://github.com/omnibenchmark-example/accuracy.git
-  #         commit: e26b32f
-  #   inputs:
-  #     - entries:
-  #         - methods.clusters
-  #         - data.labels
-  #   outputs:
-  #     - id: metrics.mapping
-  #       path: "{input}/{stage}/{module}/{params}/{dataset}.metrics.txt"
diff --git a/envs/README.md b/envs/README.md
index 69aa5c1..335a9d7 100644
--- a/envs/README.md
+++ b/envs/README.md
@@ -1,58 +1,61 @@
 We distribute `Clustering.yml` runs with different backends.
 
-- `Clustering_conda.yml`. Conda semi-reproducible (no pinning, pip)
-- `Clustering_singularity.yml`. Singularity semi-reproducible, local SIF files.
-- `Clustering_oras.yml`. Singularity semi-reproducible, prebuilt remote images.
-- `Clustering_envmodules.yml`. Easybuilt with default optimization.
+- `Clustering_envmodules.yml`. Easybuild backend with default optimization.
+- `Clustering_apptainer.yml`. Apptainer, pinned, prebuilt remote images from [omnibenchmark's registry](https://quay.io/organization/omnibenchmark).
+- `Clustering_apptainer_vanilla.yml`. Singularity, pinnned, from local SIF images.
+- `Clustering_apptainer_optimized.yml`. Singularity, pinnned, from local SIF images. This image compiles a custom python with optimization flags.
+- `Clustering_conda.yml`. Conda semi-reproducible (no pinning, using pip)
 
+The `_short` variants are meant to run smoketests and see that there's no operational problems when running the environments, abnormal terminations etc.
 
-## Conda
+
+## envmodules - reproducible builds with easybuild
 
 ### Files
 
-- `clustbench.yml`
-- `fcps.yml`
-- `r.yml`
-- `sklearn.yml`
+- `clustbench.eb`
+- `fcps.eb`
+- `rmarkdown.eb`
+- `rmarkdown-python.eb`
 
 ### How to build
 
-No need to `ob software conda pin / prepare`; let `ob run benchmark -b Clustering_conda.yml --local` do it.
+- `make prepare_envmodules_env` from the root folder.
 
-## Apptainer semi-reproducible and local
+## Aptainer, pinned, with registry pull
 
-### Files
+No need to prepare/build anything, since it fetches the apptainer images from a remote registry"
 
-- `clustbench_singularity.def`
-- `fcps_singularity.def`
-- `r_singularity.def`
-- `sklearn_singularity.def`
+```bash
+make run_with_apptainer_backend
+```
 
-### How to build
+## Apptainer, pinned, local build
 
-- `build_singularity.sh`
+### Files
 
-## Aptainer semi-reproducible and remote
+The apptainer images are based in ubuntu-noble docker images.
 
-No need to prepare/build anything; let `ob run benchmark -b Clustering_oras.yml --local` do it using pre-built images from https://gitlab.renkulab.io/izaskun.mallona/clustering_example/container_registry.
+The "optimized" flavor does a custom python 3.12 compilation; the vanillapy stocks the default py3.12 interpreter from the official ubuntu docker image.
 
-## Apptainer (reproducible) with easybuild
+- `clustbench_apptainer_optimized.def`
+- `clustbench_apptainer_vanillapy.def`
+- `fcps.def`
 
-Doing...
+### How to build the SIF images
 
-Lorem ipsum.
+- `make prepare_apptainer_env` from the root folder.
 
-## envmodules - reproducible builds with easybuild
+## Conda
 
 ### Files
 
-- `clustbench.eb`
-- `fcps.eb`
+- `clustbench.yml`
+- `fcps.yml`
+- `rmarkdown.yml`
 
 ### How to build
 
-1. Mind https://github.com/easybuilders/easybuild-easyconfigs/commit/e29210626f076e3a207f1abf3759ea124e28f8b2
-2. Mind `clustbench` is only installable from https://github.com/gagolews/genieclust/archive/refs/tags/v1.1.6.tar.gz and not from pypi's tgz (!), download it locally and ideally update the easyconfig to automate this
-3. `python3-wget` from pypi doesn't look very well maintaned
-4. `eb fcps.eb --robot`
-5. `eb clustbench.eb --robot`
+No need to `ob software conda pin / prepare`. Just use `ob run benchmark -b Clustering_conda.yml --local`.
+
+
diff --git a/envs/build_singularity.sh b/envs/build_singularity.sh
old mode 100644
new mode 100755
index 86e053f..099c4c1
--- a/envs/build_singularity.sh
+++ b/envs/build_singularity.sh
@@ -1,9 +1,11 @@
-#!/bin/bash
-
-sudo singularity build sklearn.sif sklearn_singularity.def
-
-sudo singularity build clustbench.sif clustbench_singularity.def
-
-sudo singularity build r.sif r_singularity.def
-
-sudo singularity build fcps.sif fcps_singularity.def
+#!/bin/sh
+# Builds singularity images.
+# Installation guide: check https://apptainer.org/docs/user/latest/quick_start.html#installation
+# Additionally, you will need:
+# apt install fakeroot uidmap
+CMD=singularity
+BUILD='build --fakeroot'
+# enable this if you want to compare with the custom python compilation
+# $CMD ${BUILD} clustbench-optimized.sif clustbench_apptainer_optimized.def
+$CMD ${BUILD} clustbench.sif clustbench_apptainer_vanillapy.def
+$CMD ${BUILD} fcps.sif fcps.def
diff --git a/envs/clustbench.eb b/envs/clustbench.eb
index 22597fb..7064c67 100644
--- a/envs/clustbench.eb
+++ b/envs/clustbench.eb
@@ -1,108 +1,42 @@
-## largely as https://github.com/easybuilders/easybuild-easyconfigs/blob/949c266db9e17440ec2829eb8ffdbdb87ceaf543/easybuild/easyconfigs/c/cooler/cooler-0.10.2-foss-2023b.eb#L4
-
 easyblock = 'PythonBundle'
 
 name = 'clustbench'
-version = '1'
+version = '0.1.0'
 
-homepage = 'https://python.org/'
+homepage = 'https://omnibenchmark.org'
 description = "Bundle of Python packages for ob clustering_example"
 
 toolchain = {'name': 'foss', 'version': '2023b'}
 
-
 dependencies = [
     ('Python', '3.11.5'),
-    ('Python-bundle-PyPI', '2023.10'), ## so GCC 13.2.0 like foss-2023b
     ('SciPy-bundle', '2023.11'),
-    ('meson-python', '0.15.0'),
     ('matplotlib', '3.8.2'),
-    ('scikit-learn', '1.4.0')
-
+    ('scikit-learn', '1.4.0'),
 ]
 
-sanity_pip_check = True 
-use_pip = True
-
-exts_default_options = {
-    'sanity_pip_check': True,
-    'use_pip' : True
-}
-
-## https://files.pythonhosted.org/packages/27/fe/e78538f4cd7b1b28e9c625eabd21f314004d00644a8347d0b01473e72ffa/clustering_benchmarks-1.1.5.tar.gz
-## https://files.pythonhosted.org/packages/47/6a/62e288da7bcda82b935ff0c6cfe542970f04e29c756b0e147251b2fb251f/wget-3.2.zip
-## https://files.pythonhosted.org/packages/5d/b8/f143d907d93bd4a3dd51d07c4e79b37bedbfc2177f4949bfa0d6ba0af647/fastcluster-1.2.6.tar.gz
-## https://files.pythonhosted.org/packages/68/7c/d465bab9f98b75c5c1f5e80165dd82847a504ced655d162b585df08a717b/genieclust-1.1.6.tar.gz
-## https://files.pythonhosted.org/packages/a2/45/eaaacaa4f4f2931a80d40e453df275d9af7c07616c5d753272d3055fb79e/genieclust-1.1.6-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl
-
-source_urls = [PYPI_SOURCE,
-               'https://files.pythonhosted.org/packages/27/fe/e78538f4cd7b1b28e9c625eabd21f314004d00644a8347d0b01473e72ffa/',
-               'https://files.pythonhosted.org/packages/68/7c/d465bab9f98b75c5c1f5e80165dd82847a504ced655d162b585df08a717b/',
-               'https://files.pythonhosted.org/packages/47/6a/62e288da7bcda82b935ff0c6cfe542970f04e29c756b0e147251b2fb251f/',
-               'https://files.pythonhosted.org/packages/5d/b8/f143d907d93bd4a3dd51d07c4e79b37bedbfc2177f4949bfa0d6ba0af647/',
-               'https://files.pythonhosted.org/packages/65/6e/09db70a523a96d25e115e71cc56a6f9031e7b8cd166c1ac8438307c14058/',
-               'https://files.pythonhosted.org/packages/84/4d/b720d6000f4ca77f030bd70f12550820f0766b568e43f11af7f7ad9061aa/',
-               'https://files.pythonhosted.org/packages/68/dd/fa2e1a45fce2d09f4aea3cee169760e672c8262325aa5796c49d543dc7e6/',
-               'https://files.pythonhosted.org/packages/84/4d/b720d6000f4ca77f030bd70f12550820f0766b568e43f11af7f7ad9061aa',
-               'https://files.pythonhosted.org/packages/67/66/91d242ea8dd1729addd36069318ba2cd03874872764f316c3bb51b633ed2/',
-               'https://files.pythonhosted.org/packages/e2/a9/a0c57aee75f77794adaf35322f8b6404cbd0f89ad45c87197a937764b7d0/',
-               'https://files.pythonhosted.org/packages/d2/c1/72b9622fcb32ff98b054f724e213c7f70d6898baa714f4516288456ceaba/',
-               'https://github.com/pybind/pybind11/archive/',
-               'https://files.pythonhosted.org/packages/a9/95/a3dbbb5028f35eafb79008e7522a75244477d2838f38cbb722248dabc2a8/',
-               'https://files.pythonhosted.org/packages/84/a4/d9da2989a3d937e94616ef07f0630c507f6baa77ad37f94ceb06b36cacc1/python3-wget-0.0.2-beta1.tar.gz',
-               'https://files.pythonhosted.org/packages/6a/ef/6e3736663ee67369f7f5b697674bfbd3efc91e7096ddd4452bbbc80065ff/hypothesis-6.124.7.tar.gz',
-               'https://files.pythonhosted.org/packages/03/c6/14a17e10813b8db20d1e800ff9a3a898e65d25f2b0e9d6a94616f1e3362c/numpy-1.23.0.tar.gz',
-               'https://files.pythonhosted.org/packages/f6/d8/ab692a75f584d13c6542c3994f75def5bce52ded9399f52e230fe402819d/numpy-1.22.4.zip',
-               'https://files.pythonhosted.org/packages/65/6e/09db70a523a96d25e115e71cc56a6f9031e7b8cd166c1ac8438307c14058/numpy-1.26.4.tar.gz',
-               'https://files.pythonhosted.org/packages/64/33/60135848598c076ce4b231e1b1895170f45fbcaeaa2c9d5e38b04db70c35/joblib-1.4.2.tar.gz',
-               'https://files.pythonhosted.org/packages/bd/55/b5148dcbf72f5cde221f8bfe3b6a540da7aa1842f6b491ad979a6c8b84af/threadpoolctl-3.5.0.tar.gz',
-               'https://files.pythonhosted.org/packages/9e/a5/4ae3b3a0755f7b35a280ac90b28817d1f380318973cff14075ab41ef50d9/scikit_learn-1.6.1.tar.gz',
-               'https://files.pythonhosted.org/packages/ef/e5/c09d20723bfd91315f6f4ddc77912b0dcc09588b4ca7ad2ffa204607ad7f/scikit-learn-1.4.2.tar.gz',
-               'https://files.pythonhosted.org/packages/ee/5e/16e17bedcf54d5b618dc0771690deda77178e5c310402881c3d2d6c5f27c/hurry.filesize-0.9.tar.gz']
-
-
-## caution download genieclust here, not pypi, they differ and pypi's it's not installable!
-## cd /home/imallona/.local/easybuild/sources/c/clustbench/extensions/
-## wget wget https://github.com/gagolews/genieclust/archive/refs/tags/v1.1.6.tar.gz -O genieclust-1.1.6.tar.gz
-## todo automate this within the easyconfig!
-
 exts_list = [
     ('natsort', '8.4.0', {
         'checksums': ['45312c4a0e5507593da193dedd04abb1469253b601ecaf63445ad80f0a1ea581'],
     }),
-    ('cython', '3.0.11', {
-        'checksums': ['7146dd2af8682b4ca61331851e6aebce9fe5158e75300343f80c07ca80b1faff'],
-    }),
     ('hypothesis', '6.124.7', {
         'checksums': ['8ed6c6ae47e7d26d869c1dc3dee04e8fc50c95240715bb9915ded88d6d920f0e'],
     }),
-    ('numpy', '1.26.4', {
-        'checksums': ['2a02aba9ed12e4ac4eb3ea9421c420301a0c6460d9830d74a9df87efa4912010'],
-    }),
     ('fastcluster', '1.2.6', {
         'checksums': ['aab886efa7b6bba7ac124f4498153d053e5a08b822d2254926b7206cdf5a8aa6'],
     }),
-    ('genieclust', '1.1.6', {
-        'checksums': ['fb5b4ff68eef9e73496afa5949e726c8522c72e51f092716a6a598b03d5c09d6'],
-    }),
     ('hurry.filesize', '0.9', {
         'checksums': ['f5368329adbef86accd3bc9490522340bb79260455ae89b1a42c10f63801b9a6'],
     }),
-    ('python3-wget', '0.0.2-beta1', {
-        'modulename': 'wget',
-        'checksums': ['bbe7f44b3c28c4f7126aff20e8a438e78f6e4f1878d8b0c4940e87363813c17d'],
+    ('genieclust', '1.1.6', {
+        'download_dep_fail': False,
+        'install_src': 'https://files.pythonhosted.org/packages/2a/09/d1fd7b02cfabe76262d0f88d74fa71dc93e857525f8249539ec5ab174292/genieclust-1.1.6-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl',
+        'checksums': ['4c159f507b84b6d6d171883223648d837c520a9bcce650944a6ee0cb320e2151'],
     }),
-    ('clustering_benchmarks', '1.1.5', {
+    ('clustering_benchmarks', '1.1.6', {
         'modulename': 'clustbench',
-        'checksums': ['1732c262fb13be2f88814ef9a19c60108e91a7f6cfb9b960a42feaa299034ea3'],
+        'checksums': ['8c3ac0aed7c4c4925df6e5000db29aed6359341bd1ef2e516f230e13d8b66a0c'],
     }),
 ]
 
-sanity_check_paths = {
-    'files': [],
-    'dirs': ['lib/python3.11/site-packages/clustbench/']
-}
-
 moduleclass = 'bio'
-
-
diff --git a/envs/clustbench.yml b/envs/clustbench.yml
index 6cb6201..f894c22 100644
--- a/envs/clustbench.yml
+++ b/envs/clustbench.yml
@@ -6,9 +6,7 @@ dependencies:
   - conda-forge::python=3.12.6
   - conda-forge::pip
   - pip:
-    #- "clustering-benchmarks==1.1.5"
-    - 'https://github.com/gagolews/clustering-benchmarks/releases/download/v1.1.5/clustering_benchmarks-1.1.5.tar.gz'
-    - "wget"
+    - "clustering-benchmarks==1.1.6"
     - "fastcluster==1.2.6"
     - "numpy==1.26.4"
     - "scipy==1.14.1"
diff --git a/envs/clustbench_apptainer_optimized.def b/envs/clustbench_apptainer_optimized.def
new file mode 100644
index 0000000..846bae3
--- /dev/null
+++ b/envs/clustbench_apptainer_optimized.def
@@ -0,0 +1,100 @@
+Bootstrap: docker
+From: ubuntu:noble-20250404
+
+%labels
+    Author izaskun.mallona@gmail.com
+    Author ben.uzh@proton.me
+
+%post
+    PYTHON_VERSION=3.12.9
+    PYTHON_MAJOR_VERSION=$(echo $PYTHON_VERSION | cut -d. -f1,2)
+
+    # Update and enable deb-src
+    export DEBIAN_FRONTEND=noninteractive
+    apt-get update
+    echo "deb-src http://archive.ubuntu.com/ubuntu/ noble main restricted universe multiverse" >> /etc/apt/sources.list
+    echo "deb-src http://archive.ubuntu.com/ubuntu/ noble-updates main restricted universe multiverse" >> /etc/apt/sources.list
+    apt-get update
+
+    # Get build dependencies for Python
+    apt-get build-dep -y python3
+
+    # Extra dependencies
+    apt-get install -y git \
+        python-is-python3 \
+        wget \
+        zlib1g-dev \
+        libbz2-dev \
+        libssl-dev \
+        libffi-dev \
+        && apt-get clean && \
+        rm -rf /var/lib/apt/lists/*
+
+    # Calculate half the number of available cores
+    HALF_NPROC=$(( $(nproc) / 2 ))
+    # Ensure at least one core is used
+    CORES_TO_USE=$(( HALF_NPROC > 0 ? HALF_NPROC : 1 ))
+
+    # Download and build Python from source, with optimizations
+
+    mkdir ~/src && cd ~/src
+    wget https://www.python.org/ftp/python/${PYTHON_VERSION}/Python-${PYTHON_VERSION}.tgz
+    tar -xf Python-${PYTHON_VERSION}.tgz
+    cd Python-${PYTHON_VERSION}*/
+
+    # Enable all possible optimizations
+    ./configure \
+        --enable-optimizations \
+        --with-lto \
+        --enable-shared \
+        LDFLAGS="-Wl,-rpath /usr/local/lib"
+    make -j ${CORES_TO_USE}
+    make altinstall
+
+    # Create virtualenv using the locally built Python
+    cd /opt
+    /usr/local/bin/python${PYTHON_MAJOR_VERSION} -m venv "default"
+    . default/bin/activate
+
+    # Install required packages with pip
+
+    pip install -U pip wheel
+
+    pip install \
+      "clustering-benchmarks==1.1.6" \
+      "contourpy==1.3.2" \
+      "cycler==0.12.1" \
+      "cython==3.1.0" \
+      "fonttools==4.58.0" \
+      "genieclust==1.1.6" \
+      "joblib==1.5.0" \
+      "kiwisolver==1.4.8" \
+      "matplotlib==3.10.3" \
+      "natsort==8.4.0" \
+      "numpy==2.2.5" \
+      "packaging==25.0" \
+      "pandas==2.2.3" \
+      "pillow==11.2.1" \
+      "pyparsing==3.2.3" \
+      "python-dateutil==2.9.0.post0" \
+      "pytz==2025.2" \
+      "scikit-learn==1.6.1" \
+      "scipy==1.15.3" \
+      "six==1.17.0" \
+      "threadpoolctl==3.6.0" \
+      "tzdata==2025.2" \
+      "fastcluster==1.3.0" \
+      "gitpython==3.1.43" \
+      "isodate==0.7.2" \
+      "pydantic-core==2.34.1"
+
+    # Do some cleanup to keep the image slim
+    rm -rf ~/.cache
+    rm -rf ~/src
+
+    echo '. /opt/default/bin/activate' >> $SINGULARITY_ENVIRONMENT
+
+%environment
+
+    . /opt/default/bin/activate
+
diff --git a/envs/clustbench_apptainer_vanillapy.def b/envs/clustbench_apptainer_vanillapy.def
new file mode 100644
index 0000000..a40366c
--- /dev/null
+++ b/envs/clustbench_apptainer_vanillapy.def
@@ -0,0 +1,65 @@
+Bootstrap: docker
+From: ubuntu:noble-20250404
+
+%labels
+    Author izaskun.mallona@gmail.com
+    Author ben.uzh@proton.me
+
+%post
+    export DEBIAN_FRONTEND=noninteractive
+    apt-get update && \
+        apt-get install -y \
+        python3 \
+        python3-venv \
+        python3-pip \
+        ca-certificates \
+        git \
+        && apt-get clean && \
+        rm -rf /var/lib/apt/lists/*
+
+    # Create virtualenv using the default Python
+    mkdir -p /opt && cd /opt
+    /usr/bin/python3 -m venv "default"
+    . default/bin/activate
+
+    # Install required packages with pip
+
+    pip install -U pip wheel
+
+    pip install \
+      "clustering-benchmarks==1.1.6" \
+      "contourpy==1.3.2" \
+      "cycler==0.12.1" \
+      "cython==3.1.0" \
+      "fonttools==4.58.0" \
+      "genieclust==1.1.6" \
+      "joblib==1.5.0" \
+      "kiwisolver==1.4.8" \
+      "matplotlib==3.10.3" \
+      "natsort==8.4.0" \
+      "numpy==2.2.5" \
+      "packaging==25.0" \
+      "pandas==2.2.3" \
+      "pillow==11.2.1" \
+      "pyparsing==3.2.3" \
+      "python-dateutil==2.9.0.post0" \
+      "pytz==2025.2" \
+      "scikit-learn==1.6.1" \
+      "scipy==1.15.3" \
+      "six==1.17.0" \
+      "threadpoolctl==3.6.0" \
+      "tzdata==2025.2" \
+      "fastcluster==1.3.0" \
+      "gitpython==3.1.43" \
+      "isodate==0.7.2" \
+      "pydantic-core==2.34.1"
+
+    # Do some cleanup to keep the image slim
+    rm -rf ~/.cache
+
+    echo '. /opt/default/bin/activate' >> $SINGULARITY_ENVIRONMENT
+
+%environment
+
+    . /opt/default/bin/activate
+
diff --git a/envs/clustbench_singularity.def b/envs/clustbench_singularity.def
deleted file mode 100644
index 8c2ae85..0000000
--- a/envs/clustbench_singularity.def
+++ /dev/null
@@ -1,35 +0,0 @@
-Bootstrap: docker 
-From: ubuntu:jammy-20240911.1
-
-%labels
-
-    AUTHOR izaskun.mallona@gmail.com
-
-%post
-    
-    # Install python3.12
-    apt-get update
-    apt install -y build-essential zlib1g-dev libncurses5-dev libgdbm-dev libnss3-dev libssl-dev \
-        libreadline-dev libffi-dev libsqlite3-dev wget libbz2-dev python-is-python3 git
-
-    wget https://www.python.org/ftp/python/3.12.6/Python-3.12.6.tgz
-    tar -xf Python-3.12.6.tgz
-    cd Python-3.12.*/
-    ./configure --enable-optimizations
-    make -j 4
-    make altinstall
-
-    # virtualenv
-    cd /opt
-    python3.12 -m venv "default"
-    . default/bin/activate
-    
-    pip3 install "clustering-benchmarks==1.1.5" "wget" "fastcluster==1.2.6" "numpy==1.26.4" "scipy==1.14.1" \
-      "isodate" "pydantic-core"  \
-      "genieclust==1.1.6" "pandas==2.2.3" "gitpython==3.1.43"
-
-    echo '. /opt/default/bin/activate' >> $SINGULARITY_ENVIRONMENT
-
-%environment
-
-    . /opt/default/bin/activate
diff --git a/envs/fcps.def b/envs/fcps.def
new file mode 100644
index 0000000..a4996d6
--- /dev/null
+++ b/envs/fcps.def
@@ -0,0 +1,39 @@
+Bootstrap: docker
+From: rocker/tidyverse:4.4
+
+%labels
+
+    AUTHOR izaskun.mallona@gmail.com
+    AUTHOR ben.uzh@proton.me
+
+%post
+
+    # Install python (3.12 as of noble)
+    export DEBIAN_FRONTEND=noninteractive
+    apt-get update
+    apt-get install -y git \
+        python-is-python3 \
+        python3.12 \
+        python3.12-venv \
+        && apt-get clean && \
+        rm -rf /var/lib/apt/lists/*
+
+    # virtualenv
+    cd /opt
+    python3.12 -m venv "default"
+    . default/bin/activate
+
+    pip install \
+        "gitpython==3.1.43" \
+        "isodate==0.7.2" \
+        "pydantic-core==2.34.1"
+
+    # Install R packages
+    ## FIXME no versioning here
+    Rscript -e 'BiocManager::install(c( "dbscan", "cluster", "protoclust", "energy", "argparse", "mclust", "caret", "DataVisualizations", "FCPS", "cclust"))'
+
+    echo '. /opt/default/bin/activate' >> $SINGULARITY_ENVIRONMENT
+
+%environment
+
+    . /opt/default/bin/activate
diff --git a/envs/fcps.eb b/envs/fcps.eb
index ee3db52..692bf0b 100644
--- a/envs/fcps.eb
+++ b/envs/fcps.eb
@@ -1,6 +1,3 @@
-## largely as in https://github.com/easybuilders/easybuild-easyconfigs/commit/e9a36171c68414f933ab1afa03b32422491f0f96#diff-3f2a92ab6ab59ddaccf4bc61b59bdd3f6717b95fd019131a57f51eefc831a699
-## Caution boost easyconfig needs update  https://raw.githubusercontent.com/easybuilders/easybuild-easyconfigs/refs/heads/develop/easybuild/easyconfigs/b/Boost/Boost-1.82.0-GCC-12.3.0.eb (https://github.com/easybuilders/easybuild-easyconfigs/commit/e29210626f076e3a207f1abf3759ea124e28f8b2)
-
 easyblock = 'Bundle'
 
 name = 'fcps'
@@ -16,17 +13,13 @@ builddependencies = [('pkgconf', '1.9.5')]
 
 dependencies = [
     ('R', '4.3.2'),
-    ('Boost', '1.82.0'), 
+    ('Boost', '1.82.0'),
     ('GSL', '2.7'),
-#    ('arrow-R', '14.0.1', versionsuffix),  # required by RcisTarget
 ]
 
 exts_default_options = {
     'source_urls': [
-        'https://bioconductor.org/packages/3.18/bioc/src/contrib/',
-        'https://bioconductor.org/packages/3.18/bioc/src/contrib/Archive/%(name)s',
-        'https://bioconductor.org/packages/3.18/data/annotation/src/contrib/',
-        'https://bioconductor.org/packages/3.18/data/experiment/src/contrib/',
+       'https://bioconductor.org/packages/release/bioc/src/contrib/',
         'https://cran.r-project.org/src/contrib/Archive/%(name)s',  # package archive
         'https://cran.r-project.org/src/contrib/',  # current version of packages
         'https://cran.freestatistics.org/src/contrib',  # mirror alternative for current packages
@@ -192,13 +185,15 @@ exts_list = [
     ('cluster', '2.1.8', {
         'checksums': ['c32a462e34694c99d58da953efa74882b5427f8c5db7cb226ae15c54ce6060ca'],
     }),
-    ('graph', '1.84.1', {
-        'checksums': ['cd2a91c93c81c09d9c59853c417e8a9cdde39b0589bacdce4ca916b6ee5f45a7'],
+    ('graph', '1.86.0', {
+        'checksums': ['ac9e196dfcb43848a851ea2d339cff41f8f16c7e80e76282c8fe7b822df8f367'],
     }),
     ('mclust', '6.1.1', {
         'checksums': ['ddd7018e5e6ea7f92c7fc9872b391491b7e91c2cd89ef1dcaf4408afb5116775'],
     }),
-    ('cclust', '0.6-26'),
+    ('cclust', '0.6-26', {
+        'checksums': ['92ec3c55a1864e4e1a4706bfdef8ad00727c720213ac656c718e867286b29857'],
+    }),
     ('flowClust', '3.40.0', {
         'installopts': "--configure-args='--with-gsl=${EBROOTGSL} --enable-bundled-gsl=false'",
         'checksums': ['7e699b06e378e32144704dbec18289109980b0f5eca166180f2c30007b83e0f5'],
@@ -240,4 +235,4 @@ sanity_check_paths = {
     'dirs': ['FCPS', 'dbscan', 'energy', 'protoclust'],
 }
 
-moduleclass = 'bio'
\ No newline at end of file
+moduleclass = 'bio'
diff --git a/envs/fcps_singularity.def b/envs/fcps_singularity.def
deleted file mode 100644
index a4a615e..0000000
--- a/envs/fcps_singularity.def
+++ /dev/null
@@ -1,37 +0,0 @@
-Bootstrap: docker
-From: rocker/tidyverse:4.3.3
-
-%labels
-
-    AUTHOR izaskun.mallona@gmail.com
-
-%post
-
-    # Install python3.12
-    apt-get update
-    apt install -y build-essential zlib1g-dev libncurses5-dev libgdbm-dev libnss3-dev libssl-dev \
-        libreadline-dev libffi-dev libsqlite3-dev wget libbz2-dev python-is-python3 git \
-        libgsl-dev
-
-    wget https://www.python.org/ftp/python/3.12.6/Python-3.12.6.tgz
-    tar -xf Python-3.12.6.tgz
-    cd Python-3.12.*/
-    ./configure --enable-optimizations
-    make -j 4
-    make altinstall
-
-    # virtualenv
-    cd /opt
-    python3.12 -m venv "default"
-    . default/bin/activate
-
-    pip install gitpython==3.1.43 isodate pydantic-core
-
-    ## no versioning here
-    Rscript -e 'BiocManager::install(c( "dbscan", "cluster", "protoclust", "energy", "argparse", "mclust", "DataVisualizations", "FCPS", "cclust"))'
-
-    echo '. /opt/default/bin/activate' >> $SINGULARITY_ENVIRONMENT
-
-%environment
-    
-    . /opt/default/bin/activate
diff --git a/envs/r.yml b/envs/r.yml
deleted file mode 100644
index 456e139..0000000
--- a/envs/r.yml
+++ /dev/null
@@ -1,12 +0,0 @@
-name: r_for_metrics
-channels:
-  - conda-forge
-  - nodefaults
-dependencies:
-  - conda-forge::python=3.12.6
-  - conda-forge::r-mclust
-  - conda-forge::r-caret
-  - conda-forge::r-dplyr
-  - conda-forge::r-readr
-  - conda-forge::r-argparse
-  
diff --git a/envs/r_singularity.def b/envs/r_singularity.def
deleted file mode 100644
index f1f9ec9..0000000
--- a/envs/r_singularity.def
+++ /dev/null
@@ -1,37 +0,0 @@
-Bootstrap: docker
-From: rocker/tidyverse:4.4
-
-%labels
-
-    AUTHOR izaskun.mallona@gmail.com
-
-%post
-
-    # Install python3.12
-    apt-get update
-    apt install -y build-essential zlib1g-dev libncurses5-dev libgdbm-dev libnss3-dev libssl-dev \
-        libreadline-dev libffi-dev libsqlite3-dev wget libbz2-dev python-is-python3 git
-
-    wget https://www.python.org/ftp/python/3.12.6/Python-3.12.6.tgz
-    tar -xf Python-3.12.6.tgz
-    cd Python-3.12.*/
-    ./configure --enable-optimizations
-    make -j 4
-    make altinstall
-
-    # virtualenv
-    cd /opt
-    python3.12 -m venv "default"
-    . default/bin/activate
-
-    pip install gitpython==3.1.43 isodate pydantic-core
-
-    # Install R packages
-    
-    Rscript -e 'BiocManager::install(c("mclust", "caret", "readr", "argparse"))'
-
-    echo '. /opt/default/bin/activate' >> $SINGULARITY_ENVIRONMENT
-
-%environment
-
-    . /opt/default/bin/activate
diff --git a/envs/rmarkdown-python.eb b/envs/rmarkdown-python.eb
new file mode 100644
index 0000000..a9edb00
--- /dev/null
+++ b/envs/rmarkdown-python.eb
@@ -0,0 +1,28 @@
+easyblock = 'Bundle'
+
+# This is a dummy bundle that installs:
+# 1. rmarkdown: an R bundle that we also package
+# 2. Python-3.12.3-GCCcore-13.3.0
+# This is a dependency for the clustering_benchmark metric collector.
+
+name = 'rmarkdown-python'
+version = '0.1.0'
+
+local_rver = '4.4.2'
+local_pyver = '3.12.3'
+versionsuffix = f'-r-{local_rver}-py-{local_pyver}'
+
+homepage = 'https://omnibenchmark.org'
+description = 'Rmarkdown bundle with specific Python dependency'
+
+toolchain = {'name': 'system', 'version': '1.0'}
+
+dependencies = [
+    ('rmarkdown', '0.1.0', f'-gfbf-2024a-r-{local_rver}'),
+    ('Python', local_pyver, '-GCCcore-13.3.0'),
+]
+
+sanity_check_paths = {
+    'files': [],
+    'dirs': ['../../rmarkdown/0.1.0-gfbf-2024a-r-4.4.2']
+}
diff --git a/envs/rmarkdown.eb b/envs/rmarkdown.eb
new file mode 100644
index 0000000..067eadd
--- /dev/null
+++ b/envs/rmarkdown.eb
@@ -0,0 +1,197 @@
+easyblock = 'Bundle'
+
+# TODO(ben): Try to use https://www.eessi.io/docs/available_software/detail/R-bundle-CRAN/
+# and build only what's left out.
+
+name = 'rmarkdown'
+version = '0.1.0'
+versionsuffix = '-r-%(rver)s'
+
+homepage = 'https://omnibenchmark.org'
+description = 'rmarkdown bundle for clustbench reports'
+
+toolchain = {'name': 'gfbf', 'version': '2024a'}
+
+dependencies = [
+    ('R', '4.4.2'),
+]
+
+exts_default_options = {
+    'source_urls': [
+        'https://cloud.r-project.org/src/contrib/',
+        'https://cran.r-project.org/src/contrib/',  				# current version of packages
+        'https://cran.r-project.org/src/contrib/Archive/%(name)s',  		# package archive
+        'https://www.bioconductor.org/packages/release/bioc/src/contrib/',      # bioconductor
+    ],
+    'sources': ['%(name)s_%(version)s.tar.gz'],
+}
+
+exts_defaultclass = 'RPackage'
+
+exts_list = [
+    ('rlang', '1.1.6', {
+        'checksums': ['18544c876f4e18ec554edecc308362a52fbc7e0805c4794cf59bcc4d0b57f330'],
+    }),
+    ('glue', '1.8.0', {
+        'checksums': ['c86f364ba899b8662f5da3e1a75f43ae081ab04e0d51171d052356e7ee4b72a0'],
+    }),
+    ('cli', '3.6.4', {
+        'checksums': ['0c39539ce173bcbf7abaca64e8d2c87ffec8257c144c31b793c4cf2dd9cf7620'],
+    }),
+    ('lifecycle', '1.0.4', {
+        'checksums': ['ada4d3c7e84b0c93105e888647c5754219a8334f6e1f82d5afaf83d4855b91cc'],
+    }),
+    ('vctrs', '0.6.5', {
+        'checksums': ['43167d2248fd699594044b5c8f1dbb7ed163f2d64761e08ba805b04e7ec8e402'],
+    }),
+    ('utf8', '1.2.4', {
+        'checksums': ['418f824bbd9cd868d2d8a0d4345545c62151d321224cdffca8b1ffd98a167b7d'],
+    }),
+    ('lattice', '0.22-5', {
+        'checksums': ['ba1fbe5e18a133507dca9851b7f933002bdb6d1f3ea5f410a0a441103b6da5f1'],
+    }),
+    ('pkgconfig', '2.0.3', {
+        'checksums': ['330fef440ffeb842a7dcfffc8303743f1feae83e8d6131078b5a44ff11bc3850'],
+    }),
+    ('pillar', '1.10.2', {
+        'checksums': ['2cdbe3fe1b28b62530880ab26fc3c874e0dd5060767ae1a8ee5685f65e56d645'],
+    }),
+    ('magrittr', '2.0.3', {
+        'checksums': ['a2bff83f792a1acb801bfe6330bb62724c74d5308832f2cb6a6178336ace55d2'],
+    }),
+    ('fansi', '1.0.6', {
+        'checksums': ['ea9dc690dfe50a7fad7c5eb863c157d70385512173574c56f4253b6dfe431863'],
+    }),
+    ('viridisLite', '0.4.2', {
+        'checksums': ['893f111d31deccd2cc959bc9db7ba2ce9020a2dd1b9c1c009587e449c4cce1a1'],
+    }),
+    ('RColorBrewer', '1.1-3', {
+        'checksums': ['4f42f5423c45688b39f492c7892d93f37b4541831c8ffb140364d2bd89031ac0'],
+    }),
+    ('R6', '2.6.1', {
+        'checksums': ['59c6eba8b1b912eb7e104f65053235604be853425ee67c152ac4e86a1f2073b4'],
+    }),
+    ('labeling', '0.4.3', {
+        'checksums': ['c62f4fc2cc74377d7055903c5f1913b7295f7587456fe468592738a483e264f2'],
+    }),
+    ('farver', '2.1.2', {
+        'checksums': ['528823b95daab4566137711f1c842027a952bea1b2ae6ff098e2ca512b17fe25'],
+    }),
+    ('Matrix', '1.7-3', {
+        'checksums': ['6642e9db8cddf32a051972fd5a634bf7edbdc925c5c2d139bf71e92df00fb44e'],
+    }),
+    ('nlme', '3.1-168', {
+        'checksums': ['23b78468344cb6775dee5e0d9c8133032d64f08ebaba20776508a0443a897362'],
+    }),
+    ('withr', '3.0.2', {
+        'checksums': ['0a3a05f493d275cca4bf13c8c1b95a1a4eed7f83b2493f41fde02ce3fc92c1a3'],
+    }),
+    ('tibble', '3.2.1', {
+        'checksums': ['65a72d0c557fd6e7c510d150c935ed6ced5db7d05fc20236b370f11428372131'],
+    }),
+    ('colorspace', '2.1-1', {
+        'checksums': ['e721cee5f4d6e4b0fc8eb18265e316b4f856fd3be02f0775a26032663758cd0b'],
+    }),
+    ('munsell', '0.5.1', {
+        'checksums': ['03a2fd9ac40766cded96dfe33b143d872d0aaa262a25482ce19161ca959429a6'],
+    }),
+    ('scales', '1.3.0', {
+        'checksums': ['b33e0f6b44259551ce02befd52eac53602509fbfdd903920620c658c50f35888'],
+    }),
+    ('mgcv', '1.9-1', {
+        'checksums': ['700fbc37bedd3a49505b9bc4949faee156d9cfb4f669d797d06a10a15a5bdb32'],
+    }),
+    ('MASS', '7.3-65', {
+        'checksums': ['b07ef1e3c364ce56269b4a8a7759cc9f87c876554f91293437bb578cfe38172f'],
+    }),
+    ('isoband', '0.2.7', {
+        'checksums': ['7693223343b45b86de2b5b638ff148f0dafa6d7b1237e822c5272902f79cdf61'],
+    }),
+    ('gtable', '0.3.6', {
+        'checksums': ['d305a5fa11278b649d2d8edc5288bf28009be888a42be58ff8714018e49de0ef'],
+    }),
+    ('ggplot2', '3.5.2', {
+        'checksums': ['0a30024a2ff3e569412223c8f14563ed504f3e0851de03e42d1b5f73fe1f06bf'],
+    }),
+    ('findpython', '1.0.9', {
+        'checksums': ['b6a15e0cdfcdd4b1cfc76f7e4eaad0125d4d52889711200075280e9b2a2cb7cb'],
+    }),
+    ('argparse', '2.2.5', {
+        'checksums': ['53c8a9eb51041084eb3d9c271b14ebcb32dc2f50cf16afa5c54c504a97229ea4'],
+    }),
+    (name, '2.29', {
+        'checksums': ['6662ac85316c869caad6e3b95468cad97f6eef106d47b066db8d40c05a490928'],
+    }),
+    ('generics', '0.1.3', {
+        'checksums': ['75046163bfa8b8a4f4214c1b689e796207f6447182f2e5062cf570302387d053'],
+    }),
+    ('tidyselect', '1.2.1', {
+        'checksums': ['169e97ba0bbfbcdf4a80534322751f87a04370310c40e27f04aac6525d45903c'],
+    }),
+    ('dplyr', '1.1.4', {
+        'checksums': ['cf730414d5d4ab387b4e9890a4b1df9d17a3903488e8da8df1cf2e11e44558cb'],
+    }),
+    ('tidyr', '1.3.1', {
+        'checksums': ['e820c261cb5543f572f49276a7bdc7302aa4215da4bf850b1b939a315353835d'],
+    }),
+    ('shape', '1.4.6.1', {
+        'checksums': ['43f9bd0f997fd6cf1838efd8b2509c9a6396513f4e54a20360481634affd22a4'],
+    }),
+    ('GlobalOptions', '0.1.2', {
+        'checksums': ['47890699668cfa9900a829c51f8a32e02a7a7764ad07cfac972aad66f839753e'],
+    }),
+    ('circlize', '0.4.16', {
+        'checksums': ['16dc32c7704906d13a9e5281bb396e92fb89a6b17fa5e201953240726b650b67'],
+    }),
+    ('rjson', '0.2.23', {
+        'checksums': ['55034575c854ed657e6701da278c0fdea251479624d06a963b2e58461a5f0f48'],
+    }),
+    ('GetoptLong', '1.0.5', {
+        'checksums': ['8c237986ed3dfb72d956ad865ef7768644eebf144675ad66140acfd1aca9d701'],
+    }),
+    ('cluster', '2.1.8.1', {
+        'checksums': ['4b95b78e09b17ddca72edc0bb180c753c004ed2f61c3eb12e0451ac77f441e57'],
+    }),
+    ('clue', '0.3-66', {
+        'checksums': ['aa86dd58c05635eb394c9ede0dd15a4f24af4815f299451bbc7895c0f737c2fb'],
+    }),
+    ('png', '0.1-8', {
+        'checksums': ['5a36fabb6d62ba2533d3fc4cececd07891942cfb76fe689ec0d550d08762f61c'],
+    }),
+    ('BiocGenerics', '0.54.0', {
+        'checksums': ['413d6f74cbc671147f63eefc46b718af815d6497535c2198925d9306e00c41b9'],
+    }),
+    ('S4Vectors', '0.46.0', {
+        'checksums': ['c34249c6a367a2a1e94158d9e60294f2b901e485d93717250a417569be187a40'],
+    }),
+    ('IRanges', '2.42.0', {
+        'checksums': ['0abb01ee93111c5fc678f9aa2f93d00d8d1548263cb60daa52645a6061b603fc'],
+    }),
+    ('matrixStats', '1.5.0', {
+        'checksums': ['12996c5f3e6fc202a43e1087f16a71b7fa93d7e908f512542c7ee89cf95dcc15'],
+    }),
+    ('iterators', '1.0.14', {
+        'checksums': ['cef3075a0930e1408c764e4da56bbadd4f7d14315809df8f38dd51f80ccc677b'],
+    }),
+    ('codetools', '0.2-20', {
+        'checksums': ['3be6f375ec178723ddfd559d1e8e85bfeee04a5fbaf9f53f2f844e1669fea863'],
+    }),
+    ('foreach', '1.5.2', {
+        'checksums': ['56338d8753f9f68f262cf532fd8a6d0fe25a71a2ff0107f3ce378feb926bafe4'],
+    }),
+    ('doParallel', '1.0.17', {
+        'checksums': ['b96a25ad105a654d70c7b4ca27290dc9967bc47f4668b2763927a886b178abd7'],
+    }),
+    ('ComplexHeatmap', '2.24.0', {
+        'checksums': ['2a015ad26c5a5f003ee203d77cc8d3eea5461bcf2db7ce102da1bef7db082650'],
+    }),
+]
+
+modextrapaths = {'R_LIBS_SITE': ''}
+
+sanity_check_paths = {
+    'files': [],
+    'dirs': ['argparse', 'rmarkdown', 'ggplot2', 'tidyr', 'ComplexHeatmap'],
+}
+
+moduleclass = 'bio'
diff --git a/envs/rmarkdown.yml b/envs/rmarkdown.yml
index e57969e..ed5c65e 100644
--- a/envs/rmarkdown.yml
+++ b/envs/rmarkdown.yml
@@ -7,6 +7,8 @@ dependencies:
   - conda-forge::python=3.12.6
   - conda-forge::r-argparse
   - conda-forge::r-rmarkdown
+  - conda-forge::r-cairo
+  - conda-forge::r-svglite
   - conda-forge::r-ggplot2
-  - conda-forge::r-tidyr  
+  - conda-forge::r-tidyr
   - bioconda::bioconductor-complexheatmap
diff --git a/envs/sklearn.yml b/envs/sklearn.yml
deleted file mode 100644
index 258b7ea..0000000
--- a/envs/sklearn.yml
+++ /dev/null
@@ -1,11 +0,0 @@
-name: sklearn
-channels:
-  - conda-forge
-  - nodefaults
-dependencies:
-  - conda-forge::python=3.12.6
-  - conda-forge::scikit-learn
-  - conda-forge::pip
-  - pip:
-    - "pandas"
-    - "argparse"
diff --git a/envs/sklearn_singularity.def b/envs/sklearn_singularity.def
deleted file mode 100644
index 939a3bb..0000000
--- a/envs/sklearn_singularity.def
+++ /dev/null
@@ -1,33 +0,0 @@
-Bootstrap: docker 
-From: ubuntu:jammy-20240911.1
-
-%labels
-
-    AUTHOR izaskun.mallona@gmail.com
-
-%post
-    
-    # Install python3.12
-    apt-get update
-    apt install -y build-essential zlib1g-dev libncurses5-dev libgdbm-dev libnss3-dev libssl-dev \
-        libreadline-dev libffi-dev libsqlite3-dev wget libbz2-dev python-is-python3 git
-
-    wget https://www.python.org/ftp/python/3.12.6/Python-3.12.6.tgz
-    tar -xf Python-3.12.6.tgz
-    cd Python-3.12.*/
-    ./configure --enable-optimizations
-    make -j 4
-    make altinstall
-
-    # virtualenv
-    cd /opt
-    python3.12 -m venv "default"
-    . default/bin/activate
-
-    pip3 install -U scikit-learn pandas argparse numpy scipy "isodate" "pydantic-core" "gitpython==3.1.43"
-
-    echo '. /opt/default/bin/activate' >> $SINGULARITY_ENVIRONMENT
-
-%environment
-
-    . /opt/default/bin/activate
diff --git a/envs/upload_to_registry.sh b/envs/upload_to_registry.sh
new file mode 100644
index 0000000..7e45e5a
--- /dev/null
+++ b/envs/upload_to_registry.sh
@@ -0,0 +1,12 @@
+#!/bin/sh
+USER=user
+REGISTRY=quay.io
+ORGANIZATION=omnibenchmark
+CLUSTBENCH_REPO=clustbench-vanilla
+CLUSTBENCH_TAG=0.1.0
+FCPS_REPO=fcps
+FCPS_TAG=0.1.0
+
+singularity registry login --username {$USER} docker://${REGISTRY}
+singularity push ${CLUSTBENCH_REPO}.sif oras://${REGISTRY}/${ORGANIZATION}/${CLUSTBENCH_REPO}:${CLUSTBENCH_TAG}
+singularity push ${FCPS_REPO}.sif oras://${REGISTRY}/${ORGANIZATION}/${FCPS_REPO}:${FCPS_TAG}
diff --git a/microbenchmark/microbench.py b/microbenchmark/microbench.py
new file mode 100644
index 0000000..6abc6ee
--- /dev/null
+++ b/microbenchmark/microbench.py
@@ -0,0 +1,71 @@
+"""
+This script exercises a few common linear algebra operations in numpy.
+It's intended mostly to gauge whether it makes sense to descend into
+compiler optimizations for the Python binary that we ship within the SIF images,
+but it can be easily repurposed for other specific microbenchmarks (i.e., numba or GPU perf gains).
+
+Be aware that here we're profiling simple operations; it would make sense to carefully
+profile the libraries of interest to see where the computational bottlenecks really are.
+
+Usage:
+
+singularity exec clustbench-vanilla.sif python3 microbench.py
+singularity exec clustbench-optimized.sif python3 microbench.py
+
+References: https://pythonspeed.com/articles/faster-python/
+"""
+import numpy as np
+import time
+import json
+from statistics import mean, stdev
+
+DEFAULT_REPETITIONS = 10
+
+def run_operation(operation, func, repetitions):
+    timings = []
+    for _ in range(repetitions):
+        start = time.perf_counter()
+        func()
+        elapsed = time.perf_counter() - start
+        timings.append(elapsed)
+    return {
+        'operation': operation,
+        'mean': mean(timings),
+        'stdev': stdev(timings),
+        'runs': repetitions
+    }
+
+def benchmark(repetitions=DEFAULT_REPETITIONS):
+    np.random.seed(42)
+    size = 1000
+
+    # Create random matrices
+    A = np.random.rand(size, size)
+    B = np.random.rand(size, size)
+    C = A @ A.T  # Ensure positive definite for Cholesky
+
+    # Define operations
+    operations = [
+        ('mat_mul', lambda: np.dot(A, B)),
+        ('svd', lambda: np.linalg.svd(A)),
+        ('chol_decomp', lambda: np.linalg.cholesky(C))
+    ]
+
+    results = []
+    for operation, func in operations:
+        try:
+            result = run_operation(operation, func, repetitions)
+        except np.linalg.LinAlgError:
+            result = {
+                'operation': operation,
+                'error': 'Operation failed due to numerical instability'
+            }
+        results.append(result)
+
+    # Output results as JSON
+    print(json.dumps(results, indent=2))
+
+if __name__ == "__main__":
+    import sys
+    repetitions = int(sys.argv[1]) if len(sys.argv) > 1 else DEFAULT_REPETITIONS
+    benchmark(repetitions)
diff --git a/overrides/apptainer.yml b/overrides/apptainer.yml
new file mode 100644
index 0000000..93b6c3e
--- /dev/null
+++ b/overrides/apptainer.yml
@@ -0,0 +1,4 @@
+# this file has been generated automatically - DO NOT EDIT BY HAND
+id: clustering_benchmark_apptainer_oras
+description: Clustering benchmark on Gagolewski's. Using apptainer from omnibenchmark ORAS registry.
+software_backend: apptainer
diff --git a/overrides/apptainer_optimized.yml b/overrides/apptainer_optimized.yml
new file mode 100644
index 0000000..ae4b5ad
--- /dev/null
+++ b/overrides/apptainer_optimized.yml
@@ -0,0 +1,12 @@
+# this file has been generated automatically - DO NOT EDIT BY HAND
+id: clustering_benchmark_apptainer_optimized_local
+description: Clustering benchmark on Gagolewski's. Using apptainer (locally built image, optimized python)
+software_backend: apptainer
+
+software_environments:
+  clustbench:
+    apptainer: envs/clustbench-optimized.sif
+  fcps:
+    apptainer: envs/fcps.sif
+  rmarkdown:
+    apptainer: envs/fcps.sif
diff --git a/overrides/apptainer_vanilla.yml b/overrides/apptainer_vanilla.yml
new file mode 100644
index 0000000..f0d3bc4
--- /dev/null
+++ b/overrides/apptainer_vanilla.yml
@@ -0,0 +1,12 @@
+# this file has been generated automatically - DO NOT EDIT BY HAND
+id: clustering_benchmark_apptainer_vanilla_local
+description: Clustering benchmark on Gagolewski's. Using apptainer (locally built image)
+software_backend: apptainer
+
+software_environments:
+  clustbench:
+    apptainer: envs/clustbench.sif
+  fcps:
+    apptainer: envs/fcps.sif
+  rmarkdown:
+    apptainer: envs/fcps.sif
diff --git a/overrides/conda.yml b/overrides/conda.yml
new file mode 100644
index 0000000..5f4a1ac
--- /dev/null
+++ b/overrides/conda.yml
@@ -0,0 +1,4 @@
+# this file has been generated automatically - DO NOT EDIT BY HAND
+id: clustering_benchmark_conda
+description: Clustering benchmark on Gagolewski's. Using conda.
+software_backend: conda
diff --git a/overrides/envmodules.yml b/overrides/envmodules.yml
new file mode 100644
index 0000000..a34d58e
--- /dev/null
+++ b/overrides/envmodules.yml
@@ -0,0 +1,4 @@
+# this file has been generated automatically - DO NOT EDIT BY HAND
+id: clustering_benchmark_envmodules
+description: Clustering benchmark on Gagolewski's. Using envmodules.
+software_backend: envmodules
diff --git a/smoketest/base.yml b/smoketest/base.yml
new file mode 100644
index 0000000..db885fe
--- /dev/null
+++ b/smoketest/base.yml
@@ -0,0 +1,127 @@
+version: 1.5
+benchmarker: "Izaskun Mallona, Daniel Incicau, Ben Carrillo"
+benchmark_yaml_spec: 0.5
+
+software_environments:
+
+  clustbench:
+    description: "clustbench on py3.12.3, default python"
+    envmodule: clustbench/0.1.0-foss-2023b
+    conda: envs/clustbench.yml
+    apptainer: oras://quay.io/omnibenchmark/clustbench-vanilla:0.1.0
+
+  fcps:
+    description: "CRAN's FCPS"
+    envmodule: fcps/1.3.4-foss-2023a-r-4.3.2
+    conda: envs/fcps.yml
+    apptainer: oras://quay.io/omnibenchmark/fcps:0.1.0
+
+  rmarkdown:
+    description: "R with some plotting dependencies"
+    conda: envs/rmarkdown.yml
+    envmodule: rmarkdown/0.1.0-gfbf-2024a-r-4.4.2
+    apptainer: oras://quay.io/omnibenchmark/fcps:0.1.0
+
+
+metric_collectors:
+  - id: plotting
+    name: "Single-backend metric collector."
+    software_environment: rmarkdown
+    repository:
+      url: https://github.com/imallona/clustering_report
+      commit: 1d6bdf5
+    inputs:
+      - metrics.scores
+    outputs:
+      - id: plotting.html
+        path: "{input}/{name}/plotting_report.html"
+
+stages:
+  - id: data
+    modules:
+      - id: clustbench
+        name: "clustbench datasets, from https://www.sciencedirect.com/science/article/pii/S0020025521010082#t0005 Table1"
+        software_environment: clustbench
+        repository:
+          url: https://github.com/imallona/clustbench_data
+          commit: 31ac323
+
+        parameters: # comments depict the possible cardinalities and the number of curated labelsets
+          - values: ["--dataset_generator", "fcps", "--dataset_name", "atom"] #	2	1
+    outputs:
+      - id: data.matrix
+        path: "{input}/{stage}/{module}/{params}/{dataset}.data.gz"
+      - id: data.true_labels
+        path: "{input}/{stage}/{module}/{params}/{dataset}.labels0.gz"
+
+  - id: clustering
+    modules:
+      - id: fastcluster
+        name: "fastcluster algorithm"
+        software_environment: clustbench
+        repository:
+          url: https://github.com/imallona/clustbench_fastcluster
+          commit: "45e43d3"
+        parameters:
+          - values: ["--linkage", "complete"]
+          - values: ["--linkage", "ward"]
+      - id: sklearn
+        name: "sklearn"
+        software_environment: clustbench
+        repository:
+          url: https://github.com/imallona/clustbench_sklearn
+          commit: 5877378
+        parameters:
+          - values: ["--method", "birch"]
+          - values: ["--method", "kmeans"]
+      - id: agglomerative
+        name: "agglomerative"
+        software_environment: clustbench
+        repository:
+          url: https://github.com/imallona/clustbench_agglomerative
+          commit: 5454368
+        parameters:
+          - values: ["--linkage", "complete"]
+          - values: ["--linkage", "ward"]
+      - id: genieclust
+        name: "genieclust"
+        software_environment: clustbench
+        repository:
+          url: https://github.com/imallona/clustbench_genieclust
+          commit: 6090043
+        parameters:
+          - values: ["--method", "genie", "--gini_threshold", 0.5]
+      - id: fcps
+        name: "fcps"
+        software_environment: fcps
+        repository:
+          url: https://github.com/imallona/clustbench_fcps
+          commit: 272fa5f
+        parameters:
+          - values: ["--method", "FCPS_Minimax"]
+    inputs:
+      - entries:
+          - data.matrix
+          - data.true_labels
+    outputs:
+      - id: clustering.predicted_ks_range
+        path: "{input}/{stage}/{module}/{params}/{dataset}_ks_range.labels.gz"
+
+  - id: metrics
+    modules:
+      - id: partition_metrics
+        name: "clustbench partition metrics"
+        software_environment: clustbench
+        repository:
+          url: https://github.com/imallona/clustbench_metrics
+          commit: 8184cd4
+        parameters:
+          - values: ["--metric", "normalized_clustering_accuracy"]
+          - values: ["--metric", "adjusted_fm_score"]
+    inputs:
+      - entries:
+          - clustering.predicted_ks_range
+          - data.true_labels
+    outputs:
+      - id: metrics.scores
+        path: "{input}/{stage}/{module}/{params}/{dataset}.scores.gz"