Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
86 changes: 51 additions & 35 deletions docker/Dockerfile
Original file line number Diff line number Diff line change
@@ -1,55 +1,71 @@
# SPDX-License-Identifier: MIT
# Copyright (c) 2025 Advanced Micro Devices, Inc. All rights reserved.

FROM rocm/pytorch:rocm6.3.1_ubuntu22.04_py3.10_pytorch
FROM rocm/pytorch:rocm7.0_ubuntu22.04_py3.10_pytorch_release_2.8.0

# Use bash shell for RUN commands
SHELL ["/bin/bash", "-c"]

# Set environment variables
ENV TRITON_PATH=/opt/triton \
ROCM_PATH=/opt/rocm \
OMPI_MCA_mtl="^ofi" \
OMPI_MCA_pml="ob1"
ENV ROCM_PATH=/opt/rocm

ENV LD_LIBRARY_PATH=$ROCM_PATH/lib:$LD_LIBRARY_PATH \
PATH="$ROCM_PATH/bin:$PATH"

ENV OMPI_ALLOW_RUN_AS_ROOT_CONFIRM=1 \
OMPI_ALLOW_RUN_AS_ROOT=1

# Install system packages
# Install system packages needed for Iris RDMA
RUN apt-get update && \
DEBIAN_FRONTEND=noninteractive apt-get install -y \
git wget ninja-build cmake python3-pip python3-dev build-essential && \
rm -rf /var/lib/apt/lists/*
git wget cmake build-essential \
libibverbs-dev librdmacm-dev \
python3-pip python3-dev \
infiniband-diags \
perftest \
&& rm -rf /var/lib/apt/lists/*

# Install Python packages with pip
# Install Python packages
RUN pip3 install --upgrade pip && \
pip3 install wheel jupyter

# Clone and install Triton
WORKDIR $TRITON_PATH
RUN git clone https://github.com/triton-lang/triton.git $TRITON_PATH
RUN git checkout dd5823453bcc7973eabadb65f9d827c43281c434
RUN pip3 install -e .
ENV PYTHONPATH=$TRITON_PATH
pip3 install pybind11

# Install rocprofiler-systems
# Set working directory
WORKDIR /workspace
RUN wget https://github.com/ROCm/rocprofiler-systems/releases/download/rocm-6.3.1/rocprofiler-systems-install.py && \
python3 ./rocprofiler-systems-install.py --prefix /opt/rocprofiler-systems --rocm 6.3 && \
rm -f rocprofiler-systems-install.py

# Create entrypoint script
RUN echo '#!/bin/bash' > /entrypoint.sh && \
echo 'echo "Welcome to the ROCm-aware Docker image!"' >> /entrypoint.sh && \
echo 'if [ $# -eq 0 ]; then' >> /entrypoint.sh && \
echo ' exec /bin/bash' >> /entrypoint.sh && \
echo 'else' >> /entrypoint.sh && \
echo ' exec "$@"' >> /entrypoint.sh && \
echo 'fi' >> /entrypoint.sh && \
chmod +x /entrypoint.sh
RUN printf '#!/bin/bash\n\
echo "=== Iris RDMA Development Environment ==="\n\
echo "ROCm version: $(cat $ROCM_PATH/.info/version 2>/dev/null || echo unknown)"\n\
echo "PyTorch version: $(python -c '\''import torch; print(torch.__version__)'\'' 2>/dev/null)"\n\
\n\
# GPU detection using PyTorch\n\
python -c '\''\n\
import torch\n\
if torch.cuda.is_available():\n\
count = torch.cuda.device_count()\n\
print(f"GPUs available: {count}")\n\
for i in range(count):\n\
name = torch.cuda.get_device_name(i)\n\
print(f" GPU[{i}]: {name}")\n\
else:\n\
print("GPUs available: 0")\n\
'\'' 2>/dev/null || echo "GPUs available: 0"\n\
\n\
# InfiniBand detection\n\
if [ -d /dev/infiniband ]; then\n\
IB_COUNT=$(ls /dev/infiniband/uverbs* 2>/dev/null | wc -l)\n\
echo "InfiniBand devices available: $IB_COUNT"\n\
if [ $IB_COUNT -gt 0 ]; then\n\
echo "InfiniBand device(s): $(ls /sys/class/infiniband/ 2>/dev/null | tr '\''\n'\'' '\'' '\'')"\n\
fi\n\
else\n\
echo "InfiniBand devices available: 0"\n\
fi\n\
echo "======================================"\n\
if [ $# -eq 0 ]; then\n\
exec /bin/bash\n\
else\n\
exec "$@"\n\
fi\n' > /entrypoint.sh

RUN chmod +x /entrypoint.sh

# Set the entrypoint
ENTRYPOINT ["/bin/bash", "-c", "source /entrypoint.sh && exec bash"]
ENTRYPOINT ["/entrypoint.sh"]
CMD ["/bin/bash"]

10 changes: 5 additions & 5 deletions docker/build.sh
Original file line number Diff line number Diff line change
@@ -1,13 +1,13 @@
#!/bin/bash
# SPDX-License-Identifier: MIT
# Copyright (c) 2025 Advanced Micro Devices, Inc. All rights reserved.
# Build miniQP Docker image

SCRIPT_DIR=$(dirname "$(realpath "$0")")

IMAGE_NAME=${1:-"iris-dev"}
IMAGE_NAME=${1:-"iris-rdma"}

pushd "$SCRIPT_DIR" > /dev/null

docker build -t $IMAGE_NAME .
echo "Building Docker image: $IMAGE_NAME"
docker build -t $IMAGE_NAME --network=host .

popd > /dev/null

48 changes: 38 additions & 10 deletions docker/run.sh
Original file line number Diff line number Diff line change
@@ -1,14 +1,42 @@
#!/bin/bash
# SPDX-License-Identifier: MIT
# Copyright (c) 2025 Advanced Micro Devices, Inc. All rights reserved.
# Run Iris RDMA Docker container with InfiniBand support

IMAGE_NAME=${1:-"iris-rdma"}
WORKSPACE_DIR=$(cd "$(dirname "$0")/.." && pwd)

IMAGE_NAME=${1:-"iris-dev"}
WORKSPACE_DIR=${2:-"$(pwd)"}
echo "Starting miniQP container..."
echo " Image: $IMAGE_NAME"
echo " Workspace: $WORKSPACE_DIR"

# Auto-detect InfiniBand devices
IB_DEVICES=""
if [ -d /dev/infiniband ]; then
for dev in /dev/infiniband/uverbs*; do
if [ -e "$dev" ]; then
IB_DEVICES="$IB_DEVICES --device=$dev"
fi
done
if [ -n "$IB_DEVICES" ]; then
echo " InfiniBand devices: $(ls /dev/infiniband/uverbs* 2>/dev/null | wc -l) found"
fi
else
echo " Warning: No InfiniBand devices found"
fi
echo ""

docker run -it --rm \
--network=host \
--device=/dev/kfd \
--device=/dev/dri \
$IB_DEVICES \
--group-add video \
--cap-add=SYS_PTRACE \
--cap-add=IPC_LOCK \
--security-opt seccomp=unconfined \
-v "$WORKSPACE_DIR:$WORKSPACE_DIR" \
-w "$WORKSPACE_DIR" \
--shm-size=16G \
--ulimit memlock=-1 \
--ulimit stack=67108864 \
$IMAGE_NAME

docker run -it --network=host --device=/dev/kfd\
--device=/dev/dri --group-add video\
--cap-add=SYS_PTRACE --security-opt seccomp=unconfined\
-v "$WORKSPACE_DIR:$WORKSPACE_DIR" -w "$WORKSPACE_DIR"\
--shm-size=16G --ulimit memlock=-1\
--ulimit stack=67108864 $IMAGE_NAME
69 changes: 69 additions & 0 deletions examples/22_rdma_producer_consumer/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,69 @@
# 22. RDMA Producer-Consumer

Producer-consumer pattern using InfiniBand RDMA for multi-node communication.

## Overview

This example demonstrates:
- Producer Triton kernel generates data on Rank 0
- RDMA transfer from Rank 0 to Rank 1
- Consumer Triton kernel verifies data on Rank 1

## Requirements

- InfiniBand network adapter
- libibverbs-dev installed
- Iris built with RDMA support

## Architecture

```
Rank 0 (Producer) Rank 1 (Consumer)
───────────────── ─────────────────
producer_kernel()
↓ writes
GPU → CPU buffer
RDMA PUT ──────────────────→ CPU buffer
CPU → GPU
consumer_kernel()
↓ verifies
✓ Success
```

## Usage

### Single Node (2 GPUs)
```bash
torchrun --nproc_per_node=2 rdma_producer_consumer.py
```

### Multi-Node (2 Nodes, 1 GPU each)
```bash
# Node 0
torchrun --nnodes=2 --nproc_per_node=1 --node_rank=0 \
--master_addr=<node0_ip> --master_port=29500 \
rdma_producer_consumer.py

# Node 1
torchrun --nnodes=2 --nproc_per_node=1 --node_rank=1 \
--master_addr=<node0_ip> --master_port=29500 \
rdma_producer_consumer.py
```

## Expected Output

```
[Rank 0/2] Initialized on cuda:0
[Rank 1/2] Initialized on cuda:1
[Rank 0] Producing data
[Rank 0] First 10: [0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0]
[Rank 0] RDMA transfer to Rank 1
[Rank 0] RDMA completed
[Rank 1] Consuming data
[Rank 1] Received first 10: [0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0]
[Rank 1] Verified: 4096/4096
[Rank 1] SUCCESS!
```
Loading
Loading