omnibenchmark · btraven00 · May 8, 2025 · Mar 17, 2025 · Mar 18, 2025 · Mar 18, 2025
diff --git a/.github/workflows/benchmark.yml b/.github/workflows/benchmark.yml
@@ -18,7 +18,6 @@ jobs:
   run-benchmark:
     name: Run Benchmark
     runs-on: ubuntu-latest
-    ## runs-on: self-hosted
     steps:
       - name: Check out repository
         uses: actions/checkout@v4
@@ -49,7 +48,7 @@ jobs:
         shell: bash -l {0}
         run: |
           mamba install -y pip
-          pip install git+https://github.com/omnibenchmark/omnibenchmark.git@reduce_install_scope
+          pip install git+https://github.com/omnibenchmark/omnibenchmark.git@dev
 
       - name: Load benchmark cache
         id: cache-benchmark
@@ -60,16 +59,15 @@ jobs:
 
       - name: Run benchmark
         shell: bash -l {0}
-        continue-on-error: true
+        continue-on-error: false
         run: |
-          echo "y" | ob run benchmark -b Clustering.yaml --local --cores 3 --continue-on-error
+          echo "y" | ob run benchmark -b Clustering_conda_smoketest.yml --local --cores 3 --continue-on-error
 
   upload-artifact:
     name: Benchmark Artifact
     runs-on: ubuntu-latest
-    ## runs-on: self-hosted
     needs: run-benchmark
-    if: always()
+    if: github.ref == 'refs/heads/main' && github.repository_owner == 'omnibenchmark'
     steps:
       - name: Check out repository
         uses: actions/checkout@v4
@@ -100,12 +98,11 @@ jobs:
 
       - name: Deploy to GitHub Pages
         uses: actions/deploy-pages@v4
-          
+
       - name: Create Job Summary
         if: always()
         run: |
           echo "### Reports" >> $GITHUB_STEP_SUMMARY
           echo "- [Plotting Report](https://${{ github.repository_owner }}.github.io/${{ github.event.repository.name }})" >> $GITHUB_STEP_SUMMARY
           echo "### All Outputs" >> $GITHUB_STEP_SUMMARY
           echo "- [Complete Benchmark Output](https://github.com/${{ github.repository }}/actions/runs/${{ github.run_id }}#artifacts)" >> $GITHUB_STEP_SUMMARY
-
diff --git a/.gitignore b/.gitignore
@@ -0,0 +1,10 @@
+# image build artifacts
+envs/*.sif
+
+# snakemake
+snakemake.log
+.snakemake/
+
+# vim swaps
+*.swp
+*.swo
diff --git a/Clustering.yaml b/Clustering.yaml
@@ -2,10 +2,10 @@ id: clustering_example
 description: Clustering benchmark on Gagolewski's, true number of clusters plus minus 2.
 version: 1.2
 benchmarker: "Izaskun Mallona, Daniel Incicau"
-storage: https://play.min.io
 benchmark_yaml_spec: 0.04
-storage_api: S3
-storage_bucket_name: clustering_example
+# storage: https://play.min.io
+# storage_api: S3
+# storage_bucket_name: clustering_example
 software_backend: conda
 software_environments:
   clustbench:

diff --git a/Clustering_singularity.yml → Clustering_apptainer_optimized.yml b/Clustering_singularity.yml → Clustering_apptainer_optimized.yml
@@ -1,42 +1,37 @@
-id: clustering_example_apptainer
+id: clustering_example_apptainer_optimized
 description: Clustering benchmark on Gagolewski's, true number of clusters plus minus 2.
-version: 1.4
-benchmarker: "Izaskun Mallona, Daniel Incicau"
-storage: http://omnibenchmark.org:9000
-benchmark_yaml_spec: 0.04
-storage_api: S3
-storage_bucket_name: clusteringexampleapptainer
+
+version: 1.5
+benchmarker: "Izaskun Mallona, Daniel Incicau, Ben Carrillo"
+benchmark_yaml_spec: 0.4
+
 software_backend: apptainer
+
 software_environments:
+
   clustbench:
-    description: "clustbench on py3.12.6"
-    conda: envs/clustbench.yml
-    envmodule: clustbench
-    apptainer: envs/clustbench.sif
-  sklearn:
-    description: "Daniel's on py3.12.6"
-    conda: envs/sklearn.yml
-    apptainer: envs/sklearn.sif
-    envmodule: clustbench # not true, but
-  R:
-    description: "Daniel's R with readr, dplyr, mclust, caret"
-    conda: envs/r.yml
-    apptainer: envs/r.sif
-    envmodule: fcps # not true, but
-  rmarkdown:
-    description: "R with some plotting dependencies"
-    conda: envs/rmarkdown.yml
-    apptainer: envs/r.sif # not true, but
-    envmodule: fcps # not true, but
+    description: "clustbench on py3.12.9, optimized python build"
+    conda: envs/clustbench.yml # not used
+    envmodule: na
+    apptainer: envs/clustbench-optimized.sif
+
   fcps:
     description: "CRAN's FCPS"
-    conda: envs/fcps.yml
+    conda: envs/fcps.yml # not used
+    envmodule: na
     apptainer: envs/fcps.sif
-    envmodule: fcps
+
+  rmarkdown:
+    description: "R with some plotting dependencies"
+    conda: envs/rmarkdown.yml # not used
+    envmodule: na
+    apptainer: envs/rmarkdown.sif
+
+
 metric_collectors:
   - id: plotting
     name: "Single-backend metric collector."
-    software_environment: "rmarkdown"
+    software_environment: rmarkdown
     repository:
       url: https://github.com/imallona/clustering_report
       commit: 1d6bdf5
@@ -45,14 +40,14 @@ metric_collectors:
     outputs:
       - id: plotting.html
         path: "{input}/{name}/plotting_report.html"
+
 stages:
-  ## clustbench data ##########################################################
 
   - id: data
     modules:
       - id: clustbench
         name: "clustbench datasets, from https://www.sciencedirect.com/science/article/pii/S0020025521010082#t0005 Table1"
-        software_environment: "clustbench"
+        software_environment: clustbench
         repository:
           url: https://github.com/imallona/clustbench_data
           commit: 366c5a2
@@ -125,16 +120,13 @@ stages:
       - id: data.true_labels
         path: "{input}/{stage}/{module}/{params}/{dataset}.labels0.gz"
 
-  ## clustbench methods (fastcluster) ###################################################################
-
   - id: clustering
     modules:
       - id: fastcluster
         name: "fastcluster algorithm"
-        software_environment: "clustbench"
+        software_environment: clustbench
         repository:
           url: https://github.com/imallona/clustbench_fastcluster
-          # url: /home/imallona/src/clustbench_fastcluster/
           commit: "45e43d3"
         parameters:
           - values: ["--linkage", "complete"]
@@ -143,12 +135,12 @@ stages:
           - values: ["--linkage", "weighted"]
           - values: ["--linkage", "median"]
           - values: ["--linkage", "centroid"]
+
       - id: sklearn
-        name: "sklearn"
-        software_environment: "clustbench"
+        name: sklearn
+        software_environment: clustbench
         repository:
           url: https://github.com/imallona/clustbench_sklearn
-          #url: /home/imallona/src/clustbench_sklearn
           commit: 5877378
         parameters:
           - values: ["--method", "birch"]
@@ -166,8 +158,8 @@ stages:
           - values: ["--linkage", "complete"]
           - values: ["--linkage", "ward"]
       - id: genieclust
-        name: "genieclust"
-        software_environment: "clustbench"
+        name: genieclust
+        software_environment: clustbench
         repository:
           url: https://github.com/imallona/clustbench_genieclust
           commit: 6090043
@@ -206,7 +198,7 @@ stages:
     modules:
       - id: partition_metrics
         name: "clustbench partition metrics"
-        software_environment: "clustbench"
+        software_environment: clustbench
         repository:
           url: https://github.com/imallona/clustbench_metrics
           commit: 9132d45
@@ -229,89 +221,3 @@ stages:
     outputs:
       - id: metrics.scores
         path: "{input}/{stage}/{module}/{params}/{dataset}.scores.gz"
-
-  # ## daniel's data ###########################################################################
-
-  # - id: danielsdata
-  #   modules:
-  #     - id: iris_manual
-  #       name: "Iris Dataset"
-  #       software_environment: "sklearn"
-  #       repository:
-  #         url: https://github.com/omnibenchmark-example/iris.git
-  #         commit: 47c63f0
-  #     - id: penguins
-  #       name: "Penguins Dataset"
-  #       software_environment: "sklearn"
-  #       repository:
-  #         url: https://github.com/omnibenchmark-example/penguins.git
-  #         commit: 9032478
-  #   outputs:
-  #     - id: data.features
-  #       path: "{input}/{stage}/{module}/{params}/{dataset}.features.csv"
-  #     - id: data.labels
-  #       path: "{input}/{stage}/{module}/{params}/{dataset}.labels.csv"
-
-  # ## daniel's distances ########################################################################
-
-  # - id: distances
-  #   modules:
-  #     - id: D1
-  #       software_environment: "sklearn"
-  #       parameters:
-  #         - values: ["--measure", "cosine"]
-  #         - values: ["--measure", "euclidean"]
-  #         - values: ["--measure", "manhattan"]
-  #         - values: ["--measure", "chebyshev"]
-  #       repository:
-  #         url: https://github.com/omnibenchmark-example/distance.git
-  #         commit: dd99d4f
-  #   inputs:
-  #     - entries:
-  #         - data.features
-  #   outputs:
-  #     - id: distances
-  #       path: "{input}/{stage}/{module}/{params}/{dataset}.distances.csv"
-
-  # ## daniel's methods ###################################################################
-
-  # - id: danielmethods
-  #   modules:
-  #     - id: kmeans
-  #       software_environment: "sklearn"
-  #       repository:
-  #         url: https://github.com/omnibenchmark-example/kmeans.git
-  #         commit: 049c8b1
-  #     - id: ward
-  #       software_environment: "R"
-  #       repository:
-  #         url: https://github.com/omnibenchmark-example/ward.git
-  #         commit: 976e3f3
-  #   inputs:
-  #     - entries:
-  #         - distances
-  #   outputs:
-  #     - id: methods.clusters
-  #       path: "{input}/{stage}/{module}/{params}/{dataset}.clusters.csv"
-
-  # ## daniel's metrics ###################################################################
-
-  # - id: danielsmetrics
-  #   modules:
-  #     - id: ari
-  #       software_environment: "R"
-  #       repository:
-  #         url: https://github.com/omnibenchmark-example/ari.git
-  #         commit: 72708f0
-  #     - id: accuracy
-  #       software_environment: "R"
-  #       repository:
-  #         url: https://github.com/omnibenchmark-example/accuracy.git
-  #         commit: e26b32f
-  #   inputs:
-  #     - entries:
-  #         - methods.clusters
-  #         - data.labels
-  #   outputs:
-  #     - id: metrics.mapping
-  #       path: "{input}/{stage}/{module}/{params}/{dataset}.metrics.txt"