diff --git a/.github/workflows/benchmark.yml b/.github/workflows/benchmark.yml new file mode 100644 index 0000000..102df80 --- /dev/null +++ b/.github/workflows/benchmark.yml @@ -0,0 +1,111 @@ +name: Run Benchmark +concurrency: + group: ${{ github.head_ref || github.run_id }} + cancel-in-progress: true + +on: + push: + branches: + - main + pull_request: + workflow_dispatch: + +permissions: + id-token: write + pages: write + +jobs: + run-benchmark: + name: Run Benchmark + ## runs-on: ubuntu-latest + runs-on: self-hosted + steps: + - name: Check out repository + uses: actions/checkout@v4 + + - name: Install Mambaforge + uses: conda-incubator/setup-miniconda@v3 + with: + miniforge-variant: Miniforge3 + use-mamba: true + activate-environment: omnibenchmark-env + python-version: "3.12" + auto-update-conda: true + channels: conda-forge + + - name: Cache environment + id: cache-env + uses: actions/cache@v3 + with: + path: | + ~/.conda/pkgs + ~/.conda/envs/omnibenchmark-env + ~/.cache/pip + key: ${{ runner.os }}-conda-pip-${{ hashFiles('requirements.txt') }} + restore-keys: | + ${{ runner.os }}-conda-pip- + + - name: Install omnibenchmark CLI + shell: bash -l {0} + run: | + mamba install -y pip + pip install git+https://github.com/omnibenchmark/omnibenchmark.git@dev + + - name: Load benchmark cache + id: cache-benchmark + uses: actions/cache@v3 + with: + path: out/ + key: benchmark-${{ runner.os }}-${{ hashFiles('Clustering.yaml') }} + + - name: Run benchmark + shell: bash -l {0} + continue-on-error: true + run: | + echo "y" | ob run benchmark -b Clustering.yaml --local --threads 20 --continue-on-error + + upload-artifact: + name: Benchmark Artifact + ## runs-on: ubuntu-latest + runs-on: self-hosted + needs: run-benchmark + if: always() + steps: + - name: Check out repository + uses: actions/checkout@v4 + + - name: Load cached output + uses: actions/cache@v3 + with: + path: out/ + key: benchmark-${{ runner.os }}-${{ hashFiles('Clustering.yaml') }} + + - name: Prepare output + run: | + zip -r benchmark_output.zip out/ + mkdir -p gh-pages + cp out/plotting/plotting_report.html gh-pages/index.html + + - name: Upload zipped output + uses: actions/upload-artifact@v4 + with: + name: benchmark-output + path: benchmark_output.zip + retention-days: 7 + + - name: Upload Pages Artifact + uses: actions/upload-pages-artifact@v3 + with: + path: gh-pages + + - name: Deploy to GitHub Pages + uses: actions/deploy-pages@v4 + + - name: Create Job Summary + if: always() + run: | + echo "### Reports" >> $GITHUB_STEP_SUMMARY + echo "- [Plotting Report](https://${{ github.repository_owner }}.github.io/${{ github.event.repository.name }})" >> $GITHUB_STEP_SUMMARY + echo "### All Outputs" >> $GITHUB_STEP_SUMMARY + echo "- [Complete Benchmark Output](https://github.com/${{ github.repository }}/actions/runs/${{ github.run_id }}#artifacts)" >> $GITHUB_STEP_SUMMARY + diff --git a/Clustering.yaml b/Clustering.yaml index a041cec..c0b89ae 100644 --- a/Clustering.yaml +++ b/Clustering.yaml @@ -23,27 +23,29 @@ software_environments: conda: envs/r.yml apptainer: envs/r.sif envmodule: fcps # not true, but + rmarkdown: + description: "R with some plotting dependencies" + conda: envs/rmarkdown.yml + apptainer: envs/r.sif # not true, but + envmodule: fcps # not true, but fcps: description: "CRAN's FCPS" conda: envs/fcps.yml apptainer: envs/fcps.sif envmodule: fcps metric_collectors: - - id: biometrics - name: "Biologically-relevant performance metrics gathering and postprocessing." - software_environment: "R" + - id: plotting + name: "Single-backend metric collector." + software_environment: "rmarkdown" repository: - url: https://github.com/omnibenchmark-example/metric-collector.git - commit: ef4a601 + url: https://github.com/imallona/clustering_report + commit: 0a4ddff inputs: - metrics.scores outputs: - - id: biometrics.report.html - path: "{input}/{name}/biometrics_report.html" - - id: biometrics.tsv - path: "{input}/{name}/biometrics.tsv" + - id: plotting.html + path: "{input}/{name}/plotting_report.html" stages: - ## clustbench data ########################################################## - id: data @@ -55,68 +57,68 @@ stages: url: https://github.com/imallona/clustbench_data commit: 366c5a2 parameters: # comments depict the possible cardinalities and the number of curated labelsets - - values: ["--dataset_generator", "fcps", "--dataset_name", "atom"] # 2 1 - - values: ["--dataset_generator", "fcps", "--dataset_name", "chainlink"] # 2 1 - - values: ["--dataset_generator", "fcps", "--dataset_name", "engytime"] # 2 2 - - values: ["--dataset_generator", "fcps", "--dataset_name", "hepta"] # 7 1 - - values: ["--dataset_generator", "fcps", "--dataset_name", "lsun"] # 3 1 - - values: ["--dataset_generator", "fcps", "--dataset_name", "target"] # 2, 6 2 - - values: ["--dataset_generator", "fcps", "--dataset_name", "tetra"] # 4 1 - - values: ["--dataset_generator", "fcps", "--dataset_name", "twodiamonds"] # 2 1 - - values: ["--dataset_generator", "fcps", "--dataset_name", "wingnut"] # 2 1 - - values: ["--dataset_generator", "graves", "--dataset_name", "dense"] # 2 1 - - values: ["--dataset_generator", "graves", "--dataset_name", "fuzzyx"] # 2, 4, 5 6 - - values: ["--dataset_generator", "graves", "--dataset_name", "line"] # 2 1 - - values: ["--dataset_generator", "graves", "--dataset_name", "parabolic"] # 2, 4 2 - - values: ["--dataset_generator", "graves", "--dataset_name", "ring"] # 2 1 - - values: ["--dataset_generator", "graves", "--dataset_name", "ring_noisy"] # 2 1 - - values: ["--dataset_generator", "graves", "--dataset_name", "ring_outliers"] # 2, 5 2 - - values: ["--dataset_generator", "graves", "--dataset_name", "zigzag"] # 3, 5 2 - - values: ["--dataset_generator", "graves", "--dataset_name", "zigzag_noisy"] # 3, 5 2 - - values: ["--dataset_generator", "graves", "--dataset_name", "zigzag_outliers"] # 3, 5 2 - - values: ["--dataset_generator", "other", "--dataset_name", "chameleon_t4_8k"] # 6 1 - - values: ["--dataset_generator", "other", "--dataset_name", "chameleon_t5_8k"] # 6 1 - - values: ["--dataset_generator", "other", "--dataset_name", "hdbscan"] # 6 1 - - values: ["--dataset_generator", "other", "--dataset_name", "iris"] # 3 1 - - values: ["--dataset_generator", "other", "--dataset_name", "iris5"] # 3 1 - - values: ["--dataset_generator", "other", "--dataset_name", "square"] # 2 1 - - values: ["--dataset_generator", "sipu", "--dataset_name", "aggregation"] # 7 1 - - values: ["--dataset_generator", "sipu", "--dataset_name", "compound"] # 4, 5, 6 5 - - values: ["--dataset_generator", "sipu", "--dataset_name", "flame"] # 2 2 - - values: ["--dataset_generator", "sipu", "--dataset_name", "jain"] # 2 1 - - values: ["--dataset_generator", "sipu", "--dataset_name", "pathbased"] # 3, 4 2 - - values: ["--dataset_generator", "sipu", "--dataset_name", "r15"] # 8, 9, 15 3 - - values: ["--dataset_generator", "sipu", "--dataset_name", "spiral"] # 3 1 - - values: ["--dataset_generator", "sipu", "--dataset_name", "unbalance"] # 8 1 - - values: ["--dataset_generator", "uci", "--dataset_name", "ecoli"] # 8 1 - - values: ["--dataset_generator", "uci", "--dataset_name", "ionosphere"] # 2 1 - - values: ["--dataset_generator", "uci", "--dataset_name", "sonar"] # 2 1 - - values: ["--dataset_generator", "uci", "--dataset_name", "statlog"] # 7 1 - - values: ["--dataset_generator", "uci", "--dataset_name", "wdbc"] # 2 1 - - values: ["--dataset_generator", "uci", "--dataset_name", "wine"] # 3 1 - - values: ["--dataset_generator", "uci", "--dataset_name", "yeast"] # 10 1 - - values: ["--dataset_generator", "wut", "--dataset_name", "circles"] # 4 1 - - values: ["--dataset_generator", "wut", "--dataset_name", "cross"] # 4 1 - - values: ["--dataset_generator", "wut", "--dataset_name", "graph"] # 10 1 - - values: ["--dataset_generator", "wut", "--dataset_name", "isolation"] # 3 1 - - values: ["--dataset_generator", "wut", "--dataset_name", "labirynth"] # 6 1 - - values: ["--dataset_generator", "wut", "--dataset_name", "mk1"] # 3 1 - - values: ["--dataset_generator", "wut", "--dataset_name", "mk2"] # 2 1 - - values: ["--dataset_generator", "wut", "--dataset_name", "mk3"] # 3 1 - - values: ["--dataset_generator", "wut", "--dataset_name", "mk4"] # 3 1 - - values: ["--dataset_generator", "wut", "--dataset_name", "olympic"] # 5 1 - - values: ["--dataset_generator", "wut", "--dataset_name", "smile"] # 4, 6 2 - - values: ["--dataset_generator", "wut", "--dataset_name", "stripes"] # 2 1 - - values: ["--dataset_generator", "wut", "--dataset_name", "trajectories"] # 4 1 - - values: ["--dataset_generator", "wut", "--dataset_name", "trapped_lovers"] # 3 1 - - values: ["--dataset_generator", "wut", "--dataset_name", "twosplashes"] # 2 1 - - values: ["--dataset_generator", "wut", "--dataset_name", "windows"] # 5 1 - - values: ["--dataset_generator", "wut", "--dataset_name", "x1"] # 3 1 - - values: ["--dataset_generator", "wut", "--dataset_name", "x2"] # 3 1 - - values: ["--dataset_generator", "wut", "--dataset_name", "x3"] # 4 1 - - values: ["--dataset_generator", "wut", "--dataset_name", "z1"] # 3 1 - - values: ["--dataset_generator", "wut", "--dataset_name", "z2"] # 5 1 - - values: ["--dataset_generator", "wut", "--dataset_name", "z3"] # 4 1 + - values: ["--dataset_generator", "fcps", "--dataset_name", "atom"] # 2 1 + # - values: ["--dataset_generator", "fcps", "--dataset_name", "chainlink"] # 2 1 + # - values: ["--dataset_generator", "fcps", "--dataset_name", "engytime"] # 2 2 + # - values: ["--dataset_generator", "fcps", "--dataset_name", "hepta"] # 7 1 + # - values: ["--dataset_generator", "fcps", "--dataset_name", "lsun"] # 3 1 + # - values: ["--dataset_generator", "fcps", "--dataset_name", "target"] # 2, 6 2 + # - values: ["--dataset_generator", "fcps", "--dataset_name", "tetra"] # 4 1 + # - values: ["--dataset_generator", "fcps", "--dataset_name", "twodiamonds"] # 2 1 + # - values: ["--dataset_generator", "fcps", "--dataset_name", "wingnut"] # 2 1 + # - values: ["--dataset_generator", "graves", "--dataset_name", "dense"] # 2 1 + # - values: ["--dataset_generator", "graves", "--dataset_name", "fuzzyx"] # 2, 4, 5 6 + # - values: ["--dataset_generator", "graves", "--dataset_name", "line"] # 2 1 + # - values: ["--dataset_generator", "graves", "--dataset_name", "parabolic"] # 2, 4 2 + # - values: ["--dataset_generator", "graves", "--dataset_name", "ring"] # 2 1 + # - values: ["--dataset_generator", "graves", "--dataset_name", "ring_noisy"] # 2 1 + # - values: ["--dataset_generator", "graves", "--dataset_name", "ring_outliers"] # 2, 5 2 + # - values: ["--dataset_generator", "graves", "--dataset_name", "zigzag"] # 3, 5 2 + # - values: ["--dataset_generator", "graves", "--dataset_name", "zigzag_noisy"] # 3, 5 2 + # - values: ["--dataset_generator", "graves", "--dataset_name", "zigzag_outliers"] # 3, 5 2 + # - values: ["--dataset_generator", "other", "--dataset_name", "chameleon_t4_8k"] # 6 1 + # - values: ["--dataset_generator", "other", "--dataset_name", "chameleon_t5_8k"] # 6 1 + # - values: ["--dataset_generator", "other", "--dataset_name", "hdbscan"] # 6 1 + # - values: ["--dataset_generator", "other", "--dataset_name", "iris"] # 3 1 + # - values: ["--dataset_generator", "other", "--dataset_name", "iris5"] # 3 1 + # - values: ["--dataset_generator", "other", "--dataset_name", "square"] # 2 1 + # - values: ["--dataset_generator", "sipu", "--dataset_name", "aggregation"] # 7 1 + # - values: ["--dataset_generator", "sipu", "--dataset_name", "compound"] # 4, 5, 6 5 + # - values: ["--dataset_generator", "sipu", "--dataset_name", "flame"] # 2 2 + # - values: ["--dataset_generator", "sipu", "--dataset_name", "jain"] # 2 1 + # - values: ["--dataset_generator", "sipu", "--dataset_name", "pathbased"] # 3, 4 2 + # - values: ["--dataset_generator", "sipu", "--dataset_name", "r15"] # 8, 9, 15 3 + # - values: ["--dataset_generator", "sipu", "--dataset_name", "spiral"] # 3 1 + # - values: ["--dataset_generator", "sipu", "--dataset_name", "unbalance"] # 8 1 + # - values: ["--dataset_generator", "uci", "--dataset_name", "ecoli"] # 8 1 + # - values: ["--dataset_generator", "uci", "--dataset_name", "ionosphere"] # 2 1 + # - values: ["--dataset_generator", "uci", "--dataset_name", "sonar"] # 2 1 + # - values: ["--dataset_generator", "uci", "--dataset_name", "statlog"] # 7 1 + # - values: ["--dataset_generator", "uci", "--dataset_name", "wdbc"] # 2 1 + # - values: ["--dataset_generator", "uci", "--dataset_name", "wine"] # 3 1 + # - values: ["--dataset_generator", "uci", "--dataset_name", "yeast"] # 10 1 + # - values: ["--dataset_generator", "wut", "--dataset_name", "circles"] # 4 1 + # - values: ["--dataset_generator", "wut", "--dataset_name", "cross"] # 4 1 + # - values: ["--dataset_generator", "wut", "--dataset_name", "graph"] # 10 1 + # - values: ["--dataset_generator", "wut", "--dataset_name", "isolation"] # 3 1 + # - values: ["--dataset_generator", "wut", "--dataset_name", "labirynth"] # 6 1 + # - values: ["--dataset_generator", "wut", "--dataset_name", "mk1"] # 3 1 + # - values: ["--dataset_generator", "wut", "--dataset_name", "mk2"] # 2 1 + # - values: ["--dataset_generator", "wut", "--dataset_name", "mk3"] # 3 1 + # - values: ["--dataset_generator", "wut", "--dataset_name", "mk4"] # 3 1 + # - values: ["--dataset_generator", "wut", "--dataset_name", "olympic"] # 5 1 + # - values: ["--dataset_generator", "wut", "--dataset_name", "smile"] # 4, 6 2 + # - values: ["--dataset_generator", "wut", "--dataset_name", "stripes"] # 2 1 + # - values: ["--dataset_generator", "wut", "--dataset_name", "trajectories"] # 4 1 + # - values: ["--dataset_generator", "wut", "--dataset_name", "trapped_lovers"] # 3 1 + # - values: ["--dataset_generator", "wut", "--dataset_name", "twosplashes"] # 2 1 + # - values: ["--dataset_generator", "wut", "--dataset_name", "windows"] # 5 1 + # - values: ["--dataset_generator", "wut", "--dataset_name", "x1"] # 3 1 + # - values: ["--dataset_generator", "wut", "--dataset_name", "x2"] # 3 1 + - values: ["--dataset_generator", "wut", "--dataset_name", "x3"] # 4 1 + # - values: ["--dataset_generator", "wut", "--dataset_name", "z1"] # 3 1 + # - values: ["--dataset_generator", "wut", "--dataset_name", "z2"] # 5 1 + # - values: ["--dataset_generator", "wut", "--dataset_name", "z3"] # 4 1 outputs: - id: data.matrix path: "{input}/{stage}/{module}/{params}/{dataset}.data.gz" @@ -226,90 +228,4 @@ stages: - data.true_labels outputs: - id: metrics.scores - path: "{input}/{stage}/{module}/{params}/{dataset}.scores.gz" - - # ## daniel's data ########################################################################### - - # - id: danielsdata - # modules: - # - id: iris_manual - # name: "Iris Dataset" - # software_environment: "sklearn" - # repository: - # url: https://github.com/omnibenchmark-example/iris.git - # commit: 47c63f0 - # - id: penguins - # name: "Penguins Dataset" - # software_environment: "sklearn" - # repository: - # url: https://github.com/omnibenchmark-example/penguins.git - # commit: 9032478 - # outputs: - # - id: data.features - # path: "{input}/{stage}/{module}/{params}/{dataset}.features.csv" - # - id: data.labels - # path: "{input}/{stage}/{module}/{params}/{dataset}.labels.csv" - - # ## daniel's distances ######################################################################## - - # - id: distances - # modules: - # - id: D1 - # software_environment: "sklearn" - # parameters: - # - values: ["--measure", "cosine"] - # - values: ["--measure", "euclidean"] - # - values: ["--measure", "manhattan"] - # - values: ["--measure", "chebyshev"] - # repository: - # url: https://github.com/omnibenchmark-example/distance.git - # commit: dd99d4f - # inputs: - # - entries: - # - data.features - # outputs: - # - id: distances - # path: "{input}/{stage}/{module}/{params}/{dataset}.distances.csv" - - # ## daniel's methods ################################################################### - - # - id: danielmethods - # modules: - # - id: kmeans - # software_environment: "sklearn" - # repository: - # url: https://github.com/omnibenchmark-example/kmeans.git - # commit: 049c8b1 - # - id: ward - # software_environment: "R" - # repository: - # url: https://github.com/omnibenchmark-example/ward.git - # commit: 976e3f3 - # inputs: - # - entries: - # - distances - # outputs: - # - id: methods.clusters - # path: "{input}/{stage}/{module}/{params}/{dataset}.clusters.csv" - - # ## daniel's metrics ################################################################### - - # - id: danielsmetrics - # modules: - # - id: ari - # software_environment: "R" - # repository: - # url: https://github.com/omnibenchmark-example/ari.git - # commit: 72708f0 - # - id: accuracy - # software_environment: "R" - # repository: - # url: https://github.com/omnibenchmark-example/accuracy.git - # commit: e26b32f - # inputs: - # - entries: - # - methods.clusters - # - data.labels - # outputs: - # - id: metrics.mapping - # path: "{input}/{stage}/{module}/{params}/{dataset}.metrics.txt" + path: "{input}/{stage}/{module}/{params}/{dataset}.scores.gz" \ No newline at end of file