NAG-DevOps · smokhov · Jun 19, 2025 · Jun 19, 2025 · Jun 21, 2025 · Jun 23, 2025
diff --git a/doc/job-management/cl.tex b/doc/job-management/cl.tex
@@ -0,0 +1,21 @@
+% OpenCL
+% -------------------------------------------------------------
+\subsection{OpenCL}
+\label{sect:cl}
+
+We have a directory of jobs using OpenCL-based examples, that can
+run on both NVIDIA and AMD GPUs, though examples, including a
+benchmark, are currently aimed mostly at AMD GPU in speed-19, they can
+be adjusted to NVIDIA.
+
+NOTE: OpenCL, originally to be an standard to CUDA,
+is technically slower on NVIDIA devices unless manually tuned,
+but some projects may still rely on it. Other compute alternatives
+would include HIP to run CUDA code on AMD GPUs and Vulkan.
+
+\begin{itemize}
+  \item \url{https://github.com/NAG-DevOps/speed-hpc/tree/master/src/cl}
+  \item \url{https://www.khronos.org/opencl/l}
+\end{itemize}
+
+% TODO: add HIP and Vulkan jobs
diff --git a/doc/job-management/job-management-main.tex b/doc/job-management/job-management-main.tex
@@ -94,6 +94,10 @@ \section{Job Management}
 % -------------------------------------------------------------
 \input{job-management/java-jobs.tex}
 
+% OpenCL Jobs
+% -------------------------------------------------------------
+\input{job-management/cl}
+
 % Rust Jobs
 % -------------------------------------------------------------
 \input{job-management/rust-jobs.tex}
@@ -104,4 +108,4 @@ \section{Job Management}
 
 % Singularity Containers
 % -------------------------------------------------------------
-\input{job-management/singularity-containers.tex}
+\input{job-management/singularity-containers.tex}
diff --git a/src/README.md b/src/README.md
@@ -55,12 +55,12 @@ This directory contains a range of job script examples. Some are basic, while ot
   - `openiss-reid-speed.sh` -- OpenISS for person re-identification. See more [here](https://github.com/NAG-DevOps/speed-hpc/tree/master/src#openiss-reid-tfk).
   - `openiss-yolo-speed.sh`, and `openiss-yolo-interactive.sh` -- OpenISS + YOLO demos; more [here](https://github.com/NAG-DevOps/speed-hpc/tree/master/src#openiss-yolov3).
   - `pytorch-multinode-multigpu.sh` -- Using Pytorch with Python virtual environment to run on multiple GPUs and nodes.
-
   - [`gpaw/`](gpaw/) -- Example job scrits for GPAW simulaptions.
   - [`jupyter/`](jupyter/) -- Hands-on examples for launching JupyterLab using Conda.
   - [`llm-examples/`](llm-examples/) -- Examples for running Large Language Models (LLMs) such as LLaMA or BERT.
   - [`pytorch-multicpu/`](pytorch-multicpu/) -- Using Pytorch with Python virtual environment to run on CPUs.
   - [`single-job-multi-mig/`](single-job-multi-mig/) -- Demonstrates how to run a single job using multiple MIGs (Multi-Instance GPU).
+  - `cl/*` -- samples to run [OpenCL](cl/) jobs from examples, tutorials, and an AMDGPU benchmark
 
 <!-- TOC --><a id="creating-environments"></a>
 # Creating Virtual Environments

diff --git a/src/cl/Makefile b/src/cl/Makefile
@@ -0,0 +1,11 @@
+# Quick makefile to run all or clean
+
+all:
+	./cl-all.sh
+
+clean:
+	rm -rf \
+		slurm-*.out bench_*.txt \
+		amd-gpu-benchmark opencl-examples opencl-tutorials
+
+# EOF
diff --git a/src/cl/README.md b/src/cl/README.md
@@ -0,0 +1,54 @@
+# OpenCL-like application examples
+
+We use some of the OpenCL examples and benchmarks as an alternative to CUDA.
+Especially for AMD GPUs. This can be later extended to HIP or Vulkan jobs.
+
+Note, OpenCL is more portable, but usually slower on NVIDIA devcices than
+CUDA, and CUDA to HIP can work for AMD GPUs in various environments and
+in most cases interoperate or convert OpenCL and CUDA applications.
+(Driver and hardware-dependent.)
+
+speed-19 currently has an AMD GPU. The OpenCL examples *should* also work
+on NVIDIA GPUs. The `cl` parition covers most of the GPUs including
+AMDGPU and NVIDIA.
+
+PyTorch and TensorFlow also support AMD GPUs.
+
+## Examples
+
+We leverage the following repo forks as examples for these OpenCL jobs:
+
+- https://github.com/NAG-DevOps/amd-gpu-benchmark
+- https://github.com/NAG-DevOps/OpenCL-Tutorials
+- https://github.com/NAG-DevOps/OpenCL-examples
+
+They are modified forks of the original projects with our modifications to make
+them compile and run on Speed. Some examples require graphics and interaction
+and are excluded from automated batch tests.
+
+- `cl-all.sh` launches all the below; assuming a submit node
+- `cl-bench.sh` runs `amd-gpu-benchmark`
+- `cl-examples.sh` runs a subset of `OpenCL-examples`
+- OpenCL-examples -- examples that work and included in tests
+  - `add_numbers`
+  - `Hello_World`
+  - `mandelbrot` -- compiles and runs, prints not output
+  - `sum_array`
+  - `square_array`
+  - `waste` -- needs an argument
+- OpenCL-examples -- need work or require X11, excluded from tests:
+  - `auger` -- requires clRNG to be downloaded to compile
+  - `cf4ocl` -- requires archived `cf4ocl` to be downloaded to compile
+  - `RayTraced_Quaternion_Julia-Set_Example` -- requires X11 and the client OpenGL AMD GPU to match if X11-forwarding
+  - `N-BodySimulation` -- currently an XCode-only project
+  - `rng` -- also relies on clRNG to be downloaded to compile
+- `cl-tutorials.sh` runs a subset of `OpenCL-Tutorials`, currently 1 and 3 are included in tests
+  - `tutorial_1`
+  - `tutorial_2` -- requires graphics to display images
+  - `tutorial_3`
+  - `tutorial_4` -- needs boost with compute
+
+## References
+
+- https://www.khronos.org/opencl/ (latest OpenCL spec release is 3.0.18 of April 2025, most devices in Speed run 2.x)
+- https://en.wikipedia.org/wiki/OpenCL
diff --git a/src/cl/cl-all.sh b/src/cl/cl-all.sh
@@ -0,0 +1,17 @@
+#!/encs/bin/tcsh
+
+#
+# Runs all benchmarks and examples for convenience
+#
+
+date
+echo "Starting 3 scripts; expect 3 slurm output files and whatever else they produce."
+echo "The scripts are launched synchronously but will queue asynchronously as jobs."
+echo "They will execute sequentially though because there is a single AMD GPU."
+echo "They will execute in parallel, if adapted to NVIDIA GPUs away from speed-19."
+./cl-bench.sh
+./cl-examples.sh
+./cl-tutorials.sh
+date
+
+# EOF
diff --git a/src/cl/cl-bench.sh b/src/cl/cl-bench.sh
@@ -0,0 +1,79 @@
+#!/encs/bin/tcsh
+
+#
+# Runs an AMDGPU benchmark
+#
+
+#SBATCH -J cl-bench
+#SBATCH --mem=50G
+##SBATCH -p cl
+#SBATCH -p hip
+#SBATCH -n 1
+#SBATCH -c 8
+#SBATCH -w speed-19
+#SBATCH --mail-type=ALL
+#SBATCH --gres=gpu:amdgpu:1
+#SBATCH --constraint=amdgpu
+
+echo "cl-bench START"
+date
+
+if (! $?SLURM_JOB_ID ) then
+  echo "Did not detect Slurm job ID... running on"
+  hostname
+  echo "Will spin a job to a compute node..."
+  sbatch $0
+  exit 0
+else
+  echo -n "Running on "
+  hostname
+endif
+
+set REPO = "amd-gpu-benchmark"
+set GHBASE = "https://github.com/NAG-DevOps"
+
+if ( ! -d $REPO ) then
+  echo "Cloning $REPO repo..."
+  time srun git clone --depth=1 "$GHBASE"/"$REPO".git
+else
+  echo "Found $REPO already present; pulling in case of updates..."
+  pushd $REPO
+    time srun git pull --rebase --autostash
+  popd
+endif
+
+echo "$0 rocm-smi..."
+date
+rocm-smi -a
+
+echo "$0 clinfo..."
+date
+/opt/amdgpu-pro/bin/clinfo
+
+echo "$0 compiling..."
+date
+pushd $REPO
+  module load gcc/7.3/default
+  which gcc
+  gcc -v
+  setenv CC `which gcc`
+  setenv CCX `which g++`
+  make  
+popd
+
+echo "$0 cl-info"
+date
+time srun ./$REPO/cl-info
+
+echo "$0 main"
+date
+time srun ./$REPO/main
+#srun strace ./$REPO/main
+#strace ./$REPO/main
+
+echo "$0 main-cpu"
+date
+time srun ./$REPO/main-cpu
+
+date
+echo "cl-bench DONE"
diff --git a/src/cl/cl-examples.sh b/src/cl/cl-examples.sh
@@ -0,0 +1,71 @@
+#!/encs/bin/tcsh
+
+#
+# Runs a set of OpenCL examples from a repo
+#
+
+#SBATCH -J cl-examples
+#SBATCH --mem=50G
+##SBATCH -p cl
+#SBATCH -p hip
+#SBATCH -n 1
+#SBATCH -c 8
+#SBATCH -w speed-19
+#SBATCH --mail-type=ALL
+#SBATCH --gres=gpu:amdgpu:1
+#SBATCH --constraint=amdgpu
+
+echo "cl-examples START"
+date
+
+if (! $?SLURM_JOB_ID ) then
+  echo "Did not detect Slurm job ID... running on"
+  hostname
+  echo "Will spin a job to a compute node..."
+  sbatch $0
+  exit 0
+else
+  echo -n "Running on "
+  hostname
+endif
+
+set REPO = "opencl-examples"
+set GHBASE = "https://github.com/NAG-DevOps"
+
+if ( ! -d $REPO ) then
+  echo "Cloning $REPO repo..."
+  time srun git clone --depth=1 "$GHBASE"/"$REPO".git
+else
+  echo "Found $REPO already present; pulling in case of updates..."
+  pushd $REPO
+    time srun git pull --rebase --autostash
+  popd
+endif
+
+echo "$0 compiling and running..."
+date
+pushd $REPO
+  module load gcc/7.3/default
+  which gcc
+  gcc -v
+  setenv CC `which gcc`
+  setenv CCX `which g++`
+
+  foreach example ( \
+      add_numbers \
+      Hello_World \
+      mandelbrot \
+      square_array \
+      sum_array \
+      waste \
+      )
+    pushd $example
+      date
+      pwd
+      time srun make run  
+    popd
+  end
+popd
+
+date
+echo "cl-examples DONE"
diff --git a/src/cl/cl-tutorials.sh b/src/cl/cl-tutorials.sh
@@ -0,0 +1,67 @@
+#!/encs/bin/tcsh
+
+#
+# Runs a set of OpenCL examples from a repo
+#
+
+#SBATCH -J cl-tutorials
+#SBATCH --mem=50G
+##SBATCH -p cl
+#SBATCH -p hip
+#SBATCH -n 1
+#SBATCH -c 8
+#SBATCH -w speed-19
+#SBATCH --mail-type=ALL
+#SBATCH --gres=gpu:amdgpu:1
+#SBATCH --constraint=amdgpu
+
+echo "cl-tutorials START"
+date
+
+if (! $?SLURM_JOB_ID ) then
+  echo "Did not detect Slurm job ID... running on"
+  hostname
+  echo "Will spin a job to a compute node..."
+  sbatch $0
+  exit 0
+else
+  echo -n "Running on "
+  hostname
+endif
+
+set REPO = "opencl-tutorials"
+set GHBASE = "https://github.com/NAG-DevOps"
+
+if ( ! -d $REPO ) then
+  echo "Cloning $REPO repo..."
+  time srun git clone -b multi_os --depth=1 "$GHBASE"/"$REPO".git
+else
+  echo "Found $REPO already present; pulling in case of updates..."
+  pushd $REPO
+    time srun git pull --rebase --autostash
+  popd
+endif
+
+echo "$0 compiling and running..."
+date
+pushd $REPO
+  module load gcc/12.2/default
+  which gcc
+  gcc -v
+  setenv CC `which gcc`
+  setenv CCX `which g++`
+
+  foreach tutorial ( \
+      tutorial_1 \
+      tutorial_3 \
+      )
+    pushd $tutorial
+      date
+      pwd
+      time srun ./build.sh -DOpenCL_LIBRARY:=/opt/amdgpu-pro/lib64/libOpenCL.so  
+    popd
+  end
+popd
+
+date
+echo "cl-tutorials DONE"