Skip to content

Create github actions for running benchmark #11

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
Mar 21, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
111 changes: 111 additions & 0 deletions .github/workflows/benchmark.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,111 @@
name: Run Benchmark
concurrency:
group: ${{ github.head_ref || github.run_id }}
cancel-in-progress: true

on:
push:
branches:
- main
pull_request:
workflow_dispatch:

permissions:
id-token: write
pages: write

jobs:
run-benchmark:
name: Run Benchmark
## runs-on: ubuntu-latest
runs-on: self-hosted
steps:
- name: Check out repository
uses: actions/checkout@v4

- name: Install Mambaforge
uses: conda-incubator/setup-miniconda@v3
with:
miniforge-variant: Miniforge3
use-mamba: true
activate-environment: omnibenchmark-env
python-version: "3.12"
auto-update-conda: true
channels: conda-forge

- name: Cache environment
id: cache-env
uses: actions/cache@v3
with:
path: |
~/.conda/pkgs
~/.conda/envs/omnibenchmark-env
~/.cache/pip
key: ${{ runner.os }}-conda-pip-${{ hashFiles('requirements.txt') }}
restore-keys: |
${{ runner.os }}-conda-pip-

- name: Install omnibenchmark CLI
shell: bash -l {0}
run: |
mamba install -y pip
pip install git+https://github.com/omnibenchmark/omnibenchmark.git@dev

- name: Load benchmark cache
id: cache-benchmark
uses: actions/cache@v3
with:
path: out/
key: benchmark-${{ runner.os }}-${{ hashFiles('Clustering.yaml') }}

- name: Run benchmark
shell: bash -l {0}
continue-on-error: true
run: |
echo "y" | ob run benchmark -b Clustering.yaml --local --threads 20 --continue-on-error

upload-artifact:
name: Benchmark Artifact
## runs-on: ubuntu-latest
runs-on: self-hosted
needs: run-benchmark
if: always()
steps:
- name: Check out repository
uses: actions/checkout@v4

- name: Load cached output
uses: actions/cache@v3
with:
path: out/
key: benchmark-${{ runner.os }}-${{ hashFiles('Clustering.yaml') }}

- name: Prepare output
run: |
zip -r benchmark_output.zip out/
mkdir -p gh-pages
cp out/plotting/plotting_report.html gh-pages/index.html

- name: Upload zipped output
uses: actions/upload-artifact@v4
with:
name: benchmark-output
path: benchmark_output.zip
retention-days: 7

- name: Upload Pages Artifact
uses: actions/upload-pages-artifact@v3
with:
path: gh-pages

- name: Deploy to GitHub Pages
uses: actions/deploy-pages@v4

- name: Create Job Summary
if: always()
run: |
echo "### Reports" >> $GITHUB_STEP_SUMMARY
echo "- [Plotting Report](https://${{ github.repository_owner }}.github.io/${{ github.event.repository.name }})" >> $GITHUB_STEP_SUMMARY
echo "### All Outputs" >> $GITHUB_STEP_SUMMARY
echo "- [Complete Benchmark Output](https://github.com/${{ github.repository }}/actions/runs/${{ github.run_id }}#artifacts)" >> $GITHUB_STEP_SUMMARY

234 changes: 75 additions & 159 deletions Clustering.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -23,27 +23,29 @@ software_environments:
conda: envs/r.yml
apptainer: envs/r.sif
envmodule: fcps # not true, but
rmarkdown:
description: "R with some plotting dependencies"
conda: envs/rmarkdown.yml
apptainer: envs/r.sif # not true, but
envmodule: fcps # not true, but
fcps:
description: "CRAN's FCPS"
conda: envs/fcps.yml
apptainer: envs/fcps.sif
envmodule: fcps
metric_collectors:
- id: biometrics
name: "Biologically-relevant performance metrics gathering and postprocessing."
software_environment: "R"
- id: plotting
name: "Single-backend metric collector."
software_environment: "rmarkdown"
repository:
url: https://github.com/omnibenchmark-example/metric-collector.git
commit: ef4a601
url: https://github.com/imallona/clustering_report
commit: 0a4ddff
inputs:
- metrics.scores
outputs:
- id: biometrics.report.html
path: "{input}/{name}/biometrics_report.html"
- id: biometrics.tsv
path: "{input}/{name}/biometrics.tsv"
- id: plotting.html
path: "{input}/{name}/plotting_report.html"
stages:

## clustbench data ##########################################################

- id: data
Expand All @@ -55,68 +57,68 @@ stages:
url: https://github.com/imallona/clustbench_data
commit: 366c5a2
parameters: # comments depict the possible cardinalities and the number of curated labelsets
- values: ["--dataset_generator", "fcps", "--dataset_name", "atom"] # 2 1
- values: ["--dataset_generator", "fcps", "--dataset_name", "chainlink"] # 2 1
- values: ["--dataset_generator", "fcps", "--dataset_name", "engytime"] # 2 2
- values: ["--dataset_generator", "fcps", "--dataset_name", "hepta"] # 7 1
- values: ["--dataset_generator", "fcps", "--dataset_name", "lsun"] # 3 1
- values: ["--dataset_generator", "fcps", "--dataset_name", "target"] # 2, 6 2
- values: ["--dataset_generator", "fcps", "--dataset_name", "tetra"] # 4 1
- values: ["--dataset_generator", "fcps", "--dataset_name", "twodiamonds"] # 2 1
- values: ["--dataset_generator", "fcps", "--dataset_name", "wingnut"] # 2 1
- values: ["--dataset_generator", "graves", "--dataset_name", "dense"] # 2 1
- values: ["--dataset_generator", "graves", "--dataset_name", "fuzzyx"] # 2, 4, 5 6
- values: ["--dataset_generator", "graves", "--dataset_name", "line"] # 2 1
- values: ["--dataset_generator", "graves", "--dataset_name", "parabolic"] # 2, 4 2
- values: ["--dataset_generator", "graves", "--dataset_name", "ring"] # 2 1
- values: ["--dataset_generator", "graves", "--dataset_name", "ring_noisy"] # 2 1
- values: ["--dataset_generator", "graves", "--dataset_name", "ring_outliers"] # 2, 5 2
- values: ["--dataset_generator", "graves", "--dataset_name", "zigzag"] # 3, 5 2
- values: ["--dataset_generator", "graves", "--dataset_name", "zigzag_noisy"] # 3, 5 2
- values: ["--dataset_generator", "graves", "--dataset_name", "zigzag_outliers"] # 3, 5 2
- values: ["--dataset_generator", "other", "--dataset_name", "chameleon_t4_8k"] # 6 1
- values: ["--dataset_generator", "other", "--dataset_name", "chameleon_t5_8k"] # 6 1
- values: ["--dataset_generator", "other", "--dataset_name", "hdbscan"] # 6 1
- values: ["--dataset_generator", "other", "--dataset_name", "iris"] # 3 1
- values: ["--dataset_generator", "other", "--dataset_name", "iris5"] # 3 1
- values: ["--dataset_generator", "other", "--dataset_name", "square"] # 2 1
- values: ["--dataset_generator", "sipu", "--dataset_name", "aggregation"] # 7 1
- values: ["--dataset_generator", "sipu", "--dataset_name", "compound"] # 4, 5, 6 5
- values: ["--dataset_generator", "sipu", "--dataset_name", "flame"] # 2 2
- values: ["--dataset_generator", "sipu", "--dataset_name", "jain"] # 2 1
- values: ["--dataset_generator", "sipu", "--dataset_name", "pathbased"] # 3, 4 2
- values: ["--dataset_generator", "sipu", "--dataset_name", "r15"] # 8, 9, 15 3
- values: ["--dataset_generator", "sipu", "--dataset_name", "spiral"] # 3 1
- values: ["--dataset_generator", "sipu", "--dataset_name", "unbalance"] # 8 1
- values: ["--dataset_generator", "uci", "--dataset_name", "ecoli"] # 8 1
- values: ["--dataset_generator", "uci", "--dataset_name", "ionosphere"] # 2 1
- values: ["--dataset_generator", "uci", "--dataset_name", "sonar"] # 2 1
- values: ["--dataset_generator", "uci", "--dataset_name", "statlog"] # 7 1
- values: ["--dataset_generator", "uci", "--dataset_name", "wdbc"] # 2 1
- values: ["--dataset_generator", "uci", "--dataset_name", "wine"] # 3 1
- values: ["--dataset_generator", "uci", "--dataset_name", "yeast"] # 10 1
- values: ["--dataset_generator", "wut", "--dataset_name", "circles"] # 4 1
- values: ["--dataset_generator", "wut", "--dataset_name", "cross"] # 4 1
- values: ["--dataset_generator", "wut", "--dataset_name", "graph"] # 10 1
- values: ["--dataset_generator", "wut", "--dataset_name", "isolation"] # 3 1
- values: ["--dataset_generator", "wut", "--dataset_name", "labirynth"] # 6 1
- values: ["--dataset_generator", "wut", "--dataset_name", "mk1"] # 3 1
- values: ["--dataset_generator", "wut", "--dataset_name", "mk2"] # 2 1
- values: ["--dataset_generator", "wut", "--dataset_name", "mk3"] # 3 1
- values: ["--dataset_generator", "wut", "--dataset_name", "mk4"] # 3 1
- values: ["--dataset_generator", "wut", "--dataset_name", "olympic"] # 5 1
- values: ["--dataset_generator", "wut", "--dataset_name", "smile"] # 4, 6 2
- values: ["--dataset_generator", "wut", "--dataset_name", "stripes"] # 2 1
- values: ["--dataset_generator", "wut", "--dataset_name", "trajectories"] # 4 1
- values: ["--dataset_generator", "wut", "--dataset_name", "trapped_lovers"] # 3 1
- values: ["--dataset_generator", "wut", "--dataset_name", "twosplashes"] # 2 1
- values: ["--dataset_generator", "wut", "--dataset_name", "windows"] # 5 1
- values: ["--dataset_generator", "wut", "--dataset_name", "x1"] # 3 1
- values: ["--dataset_generator", "wut", "--dataset_name", "x2"] # 3 1
- values: ["--dataset_generator", "wut", "--dataset_name", "x3"] # 4 1
- values: ["--dataset_generator", "wut", "--dataset_name", "z1"] # 3 1
- values: ["--dataset_generator", "wut", "--dataset_name", "z2"] # 5 1
- values: ["--dataset_generator", "wut", "--dataset_name", "z3"] # 4 1
- values: ["--dataset_generator", "fcps", "--dataset_name", "atom"] # 2 1
# - values: ["--dataset_generator", "fcps", "--dataset_name", "chainlink"] # 2 1
# - values: ["--dataset_generator", "fcps", "--dataset_name", "engytime"] # 2 2
# - values: ["--dataset_generator", "fcps", "--dataset_name", "hepta"] # 7 1
# - values: ["--dataset_generator", "fcps", "--dataset_name", "lsun"] # 3 1
# - values: ["--dataset_generator", "fcps", "--dataset_name", "target"] # 2, 6 2
# - values: ["--dataset_generator", "fcps", "--dataset_name", "tetra"] # 4 1
# - values: ["--dataset_generator", "fcps", "--dataset_name", "twodiamonds"] # 2 1
# - values: ["--dataset_generator", "fcps", "--dataset_name", "wingnut"] # 2 1
# - values: ["--dataset_generator", "graves", "--dataset_name", "dense"] # 2 1
# - values: ["--dataset_generator", "graves", "--dataset_name", "fuzzyx"] # 2, 4, 5 6
# - values: ["--dataset_generator", "graves", "--dataset_name", "line"] # 2 1
# - values: ["--dataset_generator", "graves", "--dataset_name", "parabolic"] # 2, 4 2
# - values: ["--dataset_generator", "graves", "--dataset_name", "ring"] # 2 1
# - values: ["--dataset_generator", "graves", "--dataset_name", "ring_noisy"] # 2 1
# - values: ["--dataset_generator", "graves", "--dataset_name", "ring_outliers"] # 2, 5 2
# - values: ["--dataset_generator", "graves", "--dataset_name", "zigzag"] # 3, 5 2
# - values: ["--dataset_generator", "graves", "--dataset_name", "zigzag_noisy"] # 3, 5 2
# - values: ["--dataset_generator", "graves", "--dataset_name", "zigzag_outliers"] # 3, 5 2
# - values: ["--dataset_generator", "other", "--dataset_name", "chameleon_t4_8k"] # 6 1
# - values: ["--dataset_generator", "other", "--dataset_name", "chameleon_t5_8k"] # 6 1
# - values: ["--dataset_generator", "other", "--dataset_name", "hdbscan"] # 6 1
# - values: ["--dataset_generator", "other", "--dataset_name", "iris"] # 3 1
# - values: ["--dataset_generator", "other", "--dataset_name", "iris5"] # 3 1
# - values: ["--dataset_generator", "other", "--dataset_name", "square"] # 2 1
# - values: ["--dataset_generator", "sipu", "--dataset_name", "aggregation"] # 7 1
# - values: ["--dataset_generator", "sipu", "--dataset_name", "compound"] # 4, 5, 6 5
# - values: ["--dataset_generator", "sipu", "--dataset_name", "flame"] # 2 2
# - values: ["--dataset_generator", "sipu", "--dataset_name", "jain"] # 2 1
# - values: ["--dataset_generator", "sipu", "--dataset_name", "pathbased"] # 3, 4 2
# - values: ["--dataset_generator", "sipu", "--dataset_name", "r15"] # 8, 9, 15 3
# - values: ["--dataset_generator", "sipu", "--dataset_name", "spiral"] # 3 1
# - values: ["--dataset_generator", "sipu", "--dataset_name", "unbalance"] # 8 1
# - values: ["--dataset_generator", "uci", "--dataset_name", "ecoli"] # 8 1
# - values: ["--dataset_generator", "uci", "--dataset_name", "ionosphere"] # 2 1
# - values: ["--dataset_generator", "uci", "--dataset_name", "sonar"] # 2 1
# - values: ["--dataset_generator", "uci", "--dataset_name", "statlog"] # 7 1
# - values: ["--dataset_generator", "uci", "--dataset_name", "wdbc"] # 2 1
# - values: ["--dataset_generator", "uci", "--dataset_name", "wine"] # 3 1
# - values: ["--dataset_generator", "uci", "--dataset_name", "yeast"] # 10 1
# - values: ["--dataset_generator", "wut", "--dataset_name", "circles"] # 4 1
# - values: ["--dataset_generator", "wut", "--dataset_name", "cross"] # 4 1
# - values: ["--dataset_generator", "wut", "--dataset_name", "graph"] # 10 1
# - values: ["--dataset_generator", "wut", "--dataset_name", "isolation"] # 3 1
# - values: ["--dataset_generator", "wut", "--dataset_name", "labirynth"] # 6 1
# - values: ["--dataset_generator", "wut", "--dataset_name", "mk1"] # 3 1
# - values: ["--dataset_generator", "wut", "--dataset_name", "mk2"] # 2 1
# - values: ["--dataset_generator", "wut", "--dataset_name", "mk3"] # 3 1
# - values: ["--dataset_generator", "wut", "--dataset_name", "mk4"] # 3 1
# - values: ["--dataset_generator", "wut", "--dataset_name", "olympic"] # 5 1
# - values: ["--dataset_generator", "wut", "--dataset_name", "smile"] # 4, 6 2
# - values: ["--dataset_generator", "wut", "--dataset_name", "stripes"] # 2 1
# - values: ["--dataset_generator", "wut", "--dataset_name", "trajectories"] # 4 1
# - values: ["--dataset_generator", "wut", "--dataset_name", "trapped_lovers"] # 3 1
# - values: ["--dataset_generator", "wut", "--dataset_name", "twosplashes"] # 2 1
# - values: ["--dataset_generator", "wut", "--dataset_name", "windows"] # 5 1
# - values: ["--dataset_generator", "wut", "--dataset_name", "x1"] # 3 1
# - values: ["--dataset_generator", "wut", "--dataset_name", "x2"] # 3 1
- values: ["--dataset_generator", "wut", "--dataset_name", "x3"] # 4 1
# - values: ["--dataset_generator", "wut", "--dataset_name", "z1"] # 3 1
# - values: ["--dataset_generator", "wut", "--dataset_name", "z2"] # 5 1
# - values: ["--dataset_generator", "wut", "--dataset_name", "z3"] # 4 1
outputs:
- id: data.matrix
path: "{input}/{stage}/{module}/{params}/{dataset}.data.gz"
Expand Down Expand Up @@ -226,90 +228,4 @@ stages:
- data.true_labels
outputs:
- id: metrics.scores
path: "{input}/{stage}/{module}/{params}/{dataset}.scores.gz"

# ## daniel's data ###########################################################################

# - id: danielsdata
# modules:
# - id: iris_manual
# name: "Iris Dataset"
# software_environment: "sklearn"
# repository:
# url: https://github.com/omnibenchmark-example/iris.git
# commit: 47c63f0
# - id: penguins
# name: "Penguins Dataset"
# software_environment: "sklearn"
# repository:
# url: https://github.com/omnibenchmark-example/penguins.git
# commit: 9032478
# outputs:
# - id: data.features
# path: "{input}/{stage}/{module}/{params}/{dataset}.features.csv"
# - id: data.labels
# path: "{input}/{stage}/{module}/{params}/{dataset}.labels.csv"

# ## daniel's distances ########################################################################

# - id: distances
# modules:
# - id: D1
# software_environment: "sklearn"
# parameters:
# - values: ["--measure", "cosine"]
# - values: ["--measure", "euclidean"]
# - values: ["--measure", "manhattan"]
# - values: ["--measure", "chebyshev"]
# repository:
# url: https://github.com/omnibenchmark-example/distance.git
# commit: dd99d4f
# inputs:
# - entries:
# - data.features
# outputs:
# - id: distances
# path: "{input}/{stage}/{module}/{params}/{dataset}.distances.csv"

# ## daniel's methods ###################################################################

# - id: danielmethods
# modules:
# - id: kmeans
# software_environment: "sklearn"
# repository:
# url: https://github.com/omnibenchmark-example/kmeans.git
# commit: 049c8b1
# - id: ward
# software_environment: "R"
# repository:
# url: https://github.com/omnibenchmark-example/ward.git
# commit: 976e3f3
# inputs:
# - entries:
# - distances
# outputs:
# - id: methods.clusters
# path: "{input}/{stage}/{module}/{params}/{dataset}.clusters.csv"

# ## daniel's metrics ###################################################################

# - id: danielsmetrics
# modules:
# - id: ari
# software_environment: "R"
# repository:
# url: https://github.com/omnibenchmark-example/ari.git
# commit: 72708f0
# - id: accuracy
# software_environment: "R"
# repository:
# url: https://github.com/omnibenchmark-example/accuracy.git
# commit: e26b32f
# inputs:
# - entries:
# - methods.clusters
# - data.labels
# outputs:
# - id: metrics.mapping
# path: "{input}/{stage}/{module}/{params}/{dataset}.metrics.txt"
path: "{input}/{stage}/{module}/{params}/{dataset}.scores.gz"
Loading