Skip to content
Open
Show file tree
Hide file tree
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
16 changes: 10 additions & 6 deletions .github/workflows/iris-tests-apptainer.yml
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,9 @@ jobs:
build-apptainer-image:
runs-on: [self-hosted, mi3008x]
timeout-minutes: 90
strategy:
matrix:
rocm_version: ["6.3.1", "7.0"]

steps:
- name: Checkout repository
Expand All @@ -32,20 +35,21 @@ jobs:
mkdir -p ~/apptainer

# Build Apptainer image from definition file (only if it doesn't exist)
if [ ! -f ~/apptainer/iris-dev.sif ]; then
echo "Building new Apptainer image..."
apptainer build ~/apptainer/iris-dev.sif apptainer/iris.def
if [ ! -f ~/apptainer/iris-dev-rocm${{ matrix.rocm_version }}.sif ]; then
echo "Building new Apptainer image for ROCm ${{ matrix.rocm_version }}..."
apptainer build ~/apptainer/iris-dev-rocm${{ matrix.rocm_version }}.sif apptainer/iris-rocm${{ matrix.rocm_version }}.def
else
echo "Using existing Apptainer image"
echo "Using existing Apptainer image for ROCm ${{ matrix.rocm_version }}"
fi
run-tests:
name: ${{ matrix.ranks }}-rank Iris Test
name: ${{ matrix.ranks }}-rank Iris Test (ROCm ${{ matrix.rocm_version }})
needs: build-apptainer-image
runs-on: [self-hosted, mi3008x]
timeout-minutes: 20
strategy:
matrix:
ranks: [1, 2, 4, 8]
rocm_version: ["6.3.1", "7.0"]
max-parallel: 1

steps:
Expand All @@ -54,7 +58,7 @@ jobs:

- name: Run Iris Tests with ${{ matrix.ranks }} ranks
run: |
apptainer exec ~/apptainer/iris-dev.sif bash -c "
apptainer exec ~/apptainer/iris-dev-rocm${{ matrix.rocm_version }}.sif bash -c "
set -e # Exit on any error

# Install iris first
Expand Down
35 changes: 35 additions & 0 deletions apptainer/iris-rocm6.3.1.def
Original file line number Diff line number Diff line change
@@ -0,0 +1,35 @@
# SPDX-License-Identifier: MIT
# Copyright (c) 2025 Advanced Micro Devices, Inc. All rights reserved.

Bootstrap: docker
From: rocm/pytorch:rocm6.3.1_ubuntu22.04_py3.10_pytorch

%post
/bin/bash -c "
apt-get update && apt-get install -y git
export TRITON_PATH=/workspace/triton
conda env list
source /opt/conda/bin/activate py_3.10
conda install -y -n py_3.10 -c conda-forge jupyter ninja cmake wheel
git clone https://github.com/triton-lang/triton.git \$TRITON_PATH
cd \$TRITON_PATH
git checkout dd5823453bcc7973eabadb65f9d827c43281c434
pip install -e .
wget https://github.com/ROCm/rocprofiler-systems/releases/download/rocm-6.3.1/rocprofiler-systems-install.py
python3 ./rocprofiler-systems-install.py --prefix /opt/rocprofiler-systems --rocm 6.3
"

%environment
# Define environment variables
export TRITON_PATH=/workspace/triton
export PYTHONPATH=$TRITON_PATH/python/
export LD_LIBRARY_PATH=/opt/rocm/lib:$LD_LIBRARY_PATH
export ROCM_PATH=/opt/rocm
export PATH=/opt/conda/envs/py_3.10/bin:/opt/rocm/bin:$PATH
export OMPI_MCA_mtl="^ofi"
export OMPI_MCA_pml="ob1"

%runscript
echo "Welcome to the ROCm-aware Apptainer image!"
source /opt/conda/bin/activate py_3.10
exec "$@"
35 changes: 35 additions & 0 deletions apptainer/iris-rocm7.0.def
Original file line number Diff line number Diff line change
@@ -0,0 +1,35 @@
# SPDX-License-Identifier: MIT
# Copyright (c) 2025 Advanced Micro Devices, Inc. All rights reserved.

Bootstrap: docker
From: rocm/pytorch:rocm7.0_ubuntu22.04_py3.10_pytorch_release_2.8.0

%post
/bin/bash -c "
apt-get update && apt-get install -y git
export TRITON_PATH=/workspace/triton
conda env list
source /opt/conda/bin/activate py_3.10
conda install -y -n py_3.10 -c conda-forge jupyter ninja cmake wheel
git clone https://github.com/triton-lang/triton.git \$TRITON_PATH
cd \$TRITON_PATH
git checkout dd5823453bcc7973eabadb65f9d827c43281c434
pip install -e .
wget https://github.com/ROCm/rocprofiler-systems/releases/download/rocm-7.0.0/rocprofiler-systems-install.py
python3 ./rocprofiler-systems-install.py --prefix /opt/rocprofiler-systems --rocm 6.4
"

%environment
# Define environment variables
export TRITON_PATH=/workspace/triton
export PYTHONPATH=$TRITON_PATH/python/
export LD_LIBRARY_PATH=/opt/rocm/lib:$LD_LIBRARY_PATH
export ROCM_PATH=/opt/rocm
export PATH=/opt/conda/envs/py_3.10/bin:/opt/rocm/bin:$PATH
export OMPI_MCA_mtl="^ofi"
export OMPI_MCA_pml="ob1"

%runscript
echo "Welcome to the ROCm-aware Apptainer image!"
source /opt/conda/bin/activate py_3.10
exec "$@"
Loading