[Benchmarks] remove force rebuild path, fail on error in CI, shorten CI time (#20388)

lslusarczyk · web-flow · commit b2c82bacbe31 · 2025-10-24T15:11:53.000Z
## Change in CI

### Add exit-on-failure to benchmarks CI run
There was exit-on-failure option available in benchmark scripts, but it
was not used by CI that checks correctness. Now it is so we catch more
errors in CI.

### Speed up benchmarks CI
CI was running as many interations as in normal performance measurement,
which was pointless for just checking scripts correctness. Now
iterations count is limited to minimum which changed benchmarks CI time
from 1 hour to 15 minutes.

## Change in Benchmark scripts

### disabled one failing benchmark, fixed another

Once CI exits on any failure it revealed two errors in benchmarking. One
was easy to fix. Another will be done in the future and now its
benchmarks are disabled

### removed needs_rebuild argument

Defining whether project use install dir is in one place - GitProject
constructor, not in configure and needs_rebuild arguments which was
error prone. No force rebuild path any more.

"--no-rebuild" option to devops/scripts/benchmarks/main.py is removed
and the behavior of the script is the same as previously was with that
option. This simplifies the code

Previously --no-rebuild option caused compute-benchmarks, llama , etc...
dependent project to be not rebuild if already compiled in proper
version (commit hash hardcoded in benchmarks scripts matches).

Now this behavior is default and the only one.

if suite is enabled by options (--sycl, --ur, etc..), compiled and its
tag (e.g. compute-benchmarks tag) matches benchmarks's hardcoded one -
it is not rebuild, otherwise it is rebuild.

In CI simply workdir is cleaned before running main.py, so previous
behavior is preserved.

During manual work, if one wants to force a rebuild a dependent repo
he/she needs to remove its binary dir from workdir.
diff --git a/.github/workflows/sycl-linux-precommit.yml b/.github/workflows/sycl-linux-precommit.yml
@@ -242,6 +242,7 @@ jobs:
       benchmark_upload_results: false
       benchmark_preset: 'Minimal'
       benchmark_dry_run: true
+      benchmark_exit_on_failure: true
       repo_ref: ${{ github.sha }}
       toolchain_artifact: ${{ needs.build.outputs.toolchain_artifact }}
       toolchain_artifact_filename: ${{ needs.build.outputs.toolchain_artifact_filename }}
diff --git a/.github/workflows/sycl-linux-run-tests.yml b/.github/workflows/sycl-linux-run-tests.yml
@@ -140,6 +140,12 @@ on:
         type: string
         default: 'false'
         required: False
+      benchmark_exit_on_failure:
+        description: |
+          Whether or not to fail the workflow upon a failure.
+        type: string
+        default: 'false'
+        required: False
 
   workflow_dispatch:
     inputs:
@@ -211,6 +217,12 @@ on:
           - "build-only"
           - "run-only"
 
+      benchmark_exit_on_failure:
+        type: choice
+        options:
+          - "true"
+          - "false"
+
 permissions:
   contents: read
   packages: read
@@ -358,6 +370,7 @@ jobs:
         save_name: ${{ inputs.benchmark_save_name }}
         preset: ${{ inputs.benchmark_preset }}
         dry_run: ${{ inputs.benchmark_dry_run }}
+        exit_on_failure: ${{ inputs.benchmark_exit_on_failure }}
         build_ref: ${{ inputs.repo_ref }}
       env:
         RUNNER_TAG: ${{ inputs.runner }}
diff --git a/.github/workflows/sycl-ur-perf-benchmarking.yml b/.github/workflows/sycl-ur-perf-benchmarking.yml
@@ -41,6 +41,12 @@ on:
         description: |
           Upload results to https://intel.github.io/llvm/benchmarks/.
         required: true
+      exit_on_failure:
+        type: string # true/false: workflow_dispatch does not support booleans
+        description: |
+          Fail benchmark script on any error. Limit number of iterations to just test correctness.
+        required: false
+        default: 'false'
       runner:
         type: string
         required: true
@@ -90,6 +96,13 @@ on:
           - false
           - true
         default: true
+      exit_on_failure:
+        description: Fail benchmark script on any error. Limit number of iterations to just test correctness.
+        type: choice
+        options:
+          - false
+          - true
+        default: false
       runner:
         description: Self-hosted runner to use for the benchmarks
         type: choice
@@ -193,6 +206,7 @@ jobs:
       benchmark_upload_results: ${{ inputs.upload_results }}
       benchmark_save_name: ${{ needs.sanitize_inputs.outputs.benchmark_save_name }}
       benchmark_preset: ${{ inputs.preset }}
+      benchmark_exit_on_failure: ${{ inputs.exit_on_failure }}
       repo_ref: ${{ needs.sanitize_inputs.outputs.build_ref }}
       toolchain_artifact: ${{ needs.build_sycl.outputs.toolchain_artifact }}
       toolchain_artifact_filename: ${{ needs.build_sycl.outputs.toolchain_artifact_filename }}
diff --git a/devops/actions/run-tests/benchmark/action.yml b/devops/actions/run-tests/benchmark/action.yml
@@ -32,6 +32,9 @@ inputs:
   dry_run:
     type: string
     required: False
+  exit_on_failure:
+    type: string
+    required: False
 
 runs:
   using: "composite"
@@ -192,8 +195,10 @@ runs:
       sycl-ls
       echo "-----"
 
-      taskset -c "$CORES" ./devops/scripts/benchmarks/main.py \
-        "$(realpath ./llvm_test_workdir)" \
+      WORKDIR="$(realpath ./llvm_test_workdir)"
+      if [ -n "$WORKDIR" ] && [ -d "$WORKDIR" ] && [[ "$WORKDIR" == *llvm_test_workdir* ]]; then rm -rf "$WORKDIR" ; fi
+
+      taskset -c "$CORES" ./devops/scripts/benchmarks/main.py "$WORKDIR" \
         --sycl "$(realpath ./toolchain)" \
         --ur "$(realpath ./ur/install)" \
         --adapter "$FORCELOAD_ADAPTER" \
@@ -204,7 +209,8 @@ runs:
         --preset "$PRESET" \
         --timestamp-override "$SAVE_TIMESTAMP" \
         --detect-version sycl,compute_runtime \
-        --flamegraph inclusive
+        ${{ inputs.exit_on_failure == 'true' && '--exit-on-failure --iterations 1' || '' }}
+      # TODO: add back: "--flamegraph inclusive" once works properly
 
       echo "-----"
       python3 ./devops/scripts/benchmarks/compare.py to_hist \
diff --git a/devops/scripts/benchmarks/README.md b/devops/scripts/benchmarks/README.md
@@ -36,6 +36,10 @@ using the built compiler located in `~/llvm/build/` and
 installed Unified Runtime in directory `~/ur_install`,
 and then **run** the benchmarks for `adapter_name` adapter.
 
+The scripts will try to reuse the files stored in `~/benchmarks_workdir/`. 
+If any dependant projects binaries are already built, they will not be rebuilt
+again if their tags match tags specified by benchmarks source code.
+
 >NOTE: By default `level_zero` adapter is used.
 
 >NOTE: Pay attention to the `--ur` parameter. It points directly to the directory where UR is installed.  
@@ -48,10 +52,6 @@ $ cmake --build ~/ur_build -j $(nproc)
 $ cmake --install ~/ur_build
 ```
 
-### Rebuild
-The scripts will try to reuse the files stored in `~/benchmarks_workdir/`, but the benchmarks will be rebuilt every time.  
-To avoid that, use `--no-rebuild` option.
-
 ## Results
 
 By default, the benchmark results are not stored.  
diff --git a/devops/scripts/benchmarks/benches/benchdnn.py b/devops/scripts/benchmarks/benches/benchdnn.py
@@ -66,9 +66,13 @@ def setup(self) -> None:
                 self.git_tag(),
                 Path(options.workdir),
                 "onednn",
-                force_rebuild=True,
+                use_installdir=False,
             )
 
+        if not self.project.needs_rebuild():
+            log.info(f"Rebuilding {self.project.name} skipped")
+            return
+
         extra_cmake_args = [
             f"-DCMAKE_PREFIX_PATH={options.sycl}",
             "-DCMAKE_CXX_COMPILER=clang++",
@@ -80,7 +84,6 @@ def setup(self) -> None:
         ]
         self.project.configure(
             extra_cmake_args,
-            install_prefix=False,
             add_sycl=True,
         )
         self.project.build(
diff --git a/devops/scripts/benchmarks/benches/compute.py b/devops/scripts/benchmarks/benches/compute.py
@@ -14,6 +14,7 @@
 from git_project import GitProject
 from options import options
 from utils.result import BenchmarkMetadata, Result
+from utils.logger import log
 
 from .base import Benchmark, Suite, TracingType
 from .compute_metadata import ComputeMetadataGenerator
@@ -74,9 +75,13 @@ def setup(self) -> None:
                 self.git_hash(),
                 Path(options.workdir),
                 "compute-benchmarks",
-                force_rebuild=True,
+                use_installdir=False,
             )
 
+        if not self.project.needs_rebuild():
+            log.info(f"Rebuilding {self.project.name} skipped")
+            return
+
         extra_args = [
             f"-DBUILD_SYCL=ON",
             f"-DSYCL_COMPILER_ROOT={options.sycl}",
@@ -96,7 +101,7 @@ def setup(self) -> None:
                 f"-Dunified-runtime_DIR={options.ur}/lib/cmake/unified-runtime",
             ]
 
-        self.project.configure(extra_args, install_prefix=False, add_sycl=True)
+        self.project.configure(extra_args, add_sycl=True)
         self.project.build(add_sycl=True)
 
     def additional_metadata(self) -> dict[str, BenchmarkMetadata]:
@@ -266,12 +271,13 @@ def benchmarks(self) -> list[Benchmark]:
 
         # Add UR-specific benchmarks
         benches += [
-            MemcpyExecute(self, RUNTIMES.UR, 400, 1, 102400, 10, 1, 1, 1, 1, 0),
-            MemcpyExecute(self, RUNTIMES.UR, 400, 1, 102400, 10, 0, 1, 1, 1, 0),
-            MemcpyExecute(self, RUNTIMES.UR, 100, 4, 102400, 10, 1, 1, 0, 1, 0),
-            MemcpyExecute(self, RUNTIMES.UR, 100, 4, 102400, 10, 1, 1, 0, 0, 0),
-            MemcpyExecute(self, RUNTIMES.UR, 4096, 4, 1024, 10, 0, 1, 0, 1, 0),
-            MemcpyExecute(self, RUNTIMES.UR, 4096, 4, 1024, 10, 0, 1, 0, 1, 1),
+            # TODO: multithread_benchmark_ur fails with segfault
+            # MemcpyExecute(self, RUNTIMES.UR, 400, 1, 102400, 10, 1, 1, 1, 1, 0),
+            # MemcpyExecute(self, RUNTIMES.UR, 400, 1, 102400, 10, 0, 1, 1, 1, 0),
+            # MemcpyExecute(self, RUNTIMES.UR, 100, 4, 102400, 10, 1, 1, 0, 1, 0),
+            # MemcpyExecute(self, RUNTIMES.UR, 100, 4, 102400, 10, 1, 1, 0, 0, 0),
+            # MemcpyExecute(self, RUNTIMES.UR, 4096, 4, 1024, 10, 0, 1, 0, 1, 0),
+            # MemcpyExecute(self, RUNTIMES.UR, 4096, 4, 1024, 10, 0, 1, 0, 1, 1),
             UsmMemoryAllocation(self, RUNTIMES.UR, "Device", 256, "Both"),
             UsmMemoryAllocation(self, RUNTIMES.UR, "Device", 256 * 1024, "Both"),
             UsmBatchMemoryAllocation(self, RUNTIMES.UR, "Device", 128, 256, "Both"),
@@ -342,11 +348,12 @@ def cpu_count_str(self, separator: str = "") -> str:
 
     def get_iters(self, run_trace: TracingType):
         """Returns the number of iterations to run for the given tracing type."""
-        return (
-            self.iterations_trace
-            if run_trace != TracingType.NONE
-            else self.iterations_regular
-        )
+        if options.exit_on_failure:
+            # we are just testing that the benchmark runs successfully
+            return 3
+        if run_trace == TracingType.NONE:
+            return self.iterations_regular
+        return self.iterations_trace
 
     def supported_runtimes(self) -> list[RUNTIMES]:
         """Base runtimes supported by this benchmark, can be overridden."""
@@ -764,6 +771,7 @@ def bin_args(self, run_trace: TracingType = TracingType.NONE) -> list[str]:
             "--contents=Zeros",
             "--multiplier=1",
             "--vectorSize=1",
+            "--lws=256",
         ]
 
 
diff --git a/devops/scripts/benchmarks/benches/gromacs.py b/devops/scripts/benchmarks/benches/gromacs.py
@@ -57,7 +57,7 @@ def setup(self) -> None:
                 self.git_tag(),
                 Path(options.workdir),
                 "gromacs",
-                force_rebuild=True,
+                use_installdir=False,
             )
 
         # TODO: Detect the GPU architecture and set the appropriate flags
@@ -83,8 +83,12 @@ def setup(self) -> None:
         if options.unitrace:
             extra_args.append("-DGMX_USE_ITT=ON")
 
-        self.project.configure(extra_args, install_prefix=False, add_sycl=True)
-        self.project.build(add_sycl=True, ld_library=self.oneapi.ld_libraries())
+        if self.project.needs_rebuild():
+            self.project.configure(extra_args, add_sycl=True)
+            self.project.build(add_sycl=True, ld_library=self.oneapi.ld_libraries())
+        else:
+            log.info(f"Rebuilding {self.project.name} skipped")
+
         download(
             options.workdir,
             self.grappa_url(),
diff --git a/devops/scripts/benchmarks/benches/llamacpp.py b/devops/scripts/benchmarks/benches/llamacpp.py
@@ -14,6 +14,7 @@
 from options import options
 from utils.oneapi import get_oneapi
 from git_project import GitProject
+from utils.logger import log
 
 
 class LlamaCppBench(Suite):
@@ -39,7 +40,6 @@ def setup(self) -> None:
                 self.git_hash(),
                 Path(options.workdir),
                 "llamacpp",
-                force_rebuild=True,
             )
 
         models_dir = Path(options.workdir, "llamacpp-models")
@@ -54,6 +54,10 @@ def setup(self) -> None:
 
         self.oneapi = get_oneapi()
 
+        if not self.project.needs_rebuild():
+            log.info(f"Rebuilding {self.project.name} skipped")
+            return
+
         extra_args = [
             f"-DGGML_SYCL=ON",
             f"-DCMAKE_C_COMPILER=clang",
diff --git a/devops/scripts/benchmarks/benches/syclbench.py b/devops/scripts/benchmarks/benches/syclbench.py
@@ -36,9 +36,13 @@ def setup(self) -> None:
                 self.git_hash(),
                 Path(options.workdir),
                 "sycl-bench",
-                force_rebuild=True,
+                use_installdir=False,
             )
 
+        if not self.project.needs_rebuild():
+            log.info(f"Rebuilding {self.project.name} skipped")
+            return
+
         extra_args = [
             f"-DCMAKE_CXX_COMPILER={options.sycl}/bin/clang++",
             f"-DCMAKE_C_COMPILER={options.sycl}/bin/clang",
@@ -53,7 +57,7 @@ def setup(self) -> None:
                 f"-DCMAKE_CXX_FLAGS=-fsycl -fsycl-targets=amdgcn-amd-amdhsa -Xsycl-target-backend --offload-arch={options.hip_arch}"
             ]
 
-        self.project.configure(extra_args, install_prefix=False, add_sycl=True)
+        self.project.configure(extra_args, add_sycl=True)
         self.project.build(add_sycl=True)
 
     def benchmarks(self) -> list[Benchmark]:
diff --git a/devops/scripts/benchmarks/benches/velocity.py b/devops/scripts/benchmarks/benches/velocity.py
@@ -14,6 +14,7 @@
 from options import options
 from utils.oneapi import get_oneapi
 from git_project import GitProject
+from utils.logger import log
 
 
 class VelocityBench(Suite):
@@ -45,10 +46,10 @@ def benchmarks(self) -> list[Benchmark]:
         return [
             Hashtable(self),
             Bitcracker(self),
-            CudaSift(self),
+            # CudaSift(self), # FIXME: configure fails, OpenCV not present
             Easywave(self),
             QuickSilver(self),
-            SobelFilter(self),
+            # SobelFilter(self), # FIXME: configure fails, OpenCV not present
             DLCifar(self),
             DLMnist(self),
             SVM(self),
@@ -106,13 +107,16 @@ def ld_libraries(self) -> list[str]:
 
     def setup(self):
         self.download_deps()
-        self.configure()
-        self.build()
+        if not self.benchmark_bin.is_file():
+            self.configure()
+            self.build()
+        else:
+            log.info(f"Skipping {self.bench_name} rebuild")
 
     def configure(self) -> None:
-        if options.rebuild and self.build_dir.exists():
+        if self.build_dir.exists():
             shutil.rmtree(self.build_dir)
-        self.build_dir.mkdir(parents=True, exist_ok=True)
+        self.build_dir.mkdir(parents=True)
 
         cmd = [
             "cmake",
diff --git a/devops/scripts/benchmarks/git_project.py b/devops/scripts/benchmarks/git_project.py
diff --git a/devops/scripts/benchmarks/main.py b/devops/scripts/benchmarks/main.py
diff --git a/devops/scripts/benchmarks/options.py b/devops/scripts/benchmarks/options.py
diff --git a/devops/scripts/benchmarks/utils/compute_runtime.py b/devops/scripts/benchmarks/utils/compute_runtime.py