Skip to content

Commit

Permalink
separate test matrix from build matrix and expand it (retry)
Browse files Browse the repository at this point in the history
  • Loading branch information
leofang committed Dec 15, 2024
1 parent 243a46f commit 1079b39
Show file tree
Hide file tree
Showing 3 changed files with 151 additions and 109 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ on:

jobs:
build:
name: Build (${{ inputs.host-platform }}, Python "${{ inputs.python-version }}")
name: Build (${{ inputs.host-platform }}, Python ${{ inputs.python-version }})
if: ${{ github.repository_owner == 'nvidia' }}
permissions:
id-token: write # This is required for configure-aws-credentials
Expand Down Expand Up @@ -163,108 +163,3 @@ jobs:
echo "CUDA_CORE_ARTIFACTS_DIR=${CUDA_CORE_ARTIFACTS_DIR}" >> $GITHUB_OUTPUT
echo "CUDA_BINDINGS_ARTIFACT_NAME=${CUDA_BINDINGS_ARTIFACT_NAME}" >> $GITHUB_OUTPUT
echo "CUDA_BINDINGS_ARTIFACTS_DIR=${CUDA_BINDINGS_ARTIFACTS_DIR}" >> $GITHUB_OUTPUT
test:
# TODO: improve the name once a separate test matrix is defined
name: Test (CUDA ${{ inputs.cuda-version }})
# TODO: enable testing once win-64 GPU runners are up
if: ${{ (github.repository_owner == 'nvidia') &&
startsWith(inputs.host-platform, 'linux') }}
permissions:
id-token: write # This is required for configure-aws-credentials
contents: read # This is required for actions/checkout
runs-on: ${{ (inputs.host-platform == 'linux-x64' && 'linux-amd64-gpu-v100-latest-1') ||
(inputs.host-platform == 'linux-aarch64' && 'linux-arm64-gpu-a100-latest-1') }}
# Our self-hosted runners require a container
# TODO: use a different (nvidia?) container
container:
options: -u root --security-opt seccomp=unconfined --shm-size 16g
image: ubuntu:22.04
env:
NVIDIA_VISIBLE_DEVICES: ${{ env.NVIDIA_VISIBLE_DEVICES }}
needs:
- build
steps:
- name: Run nvidia-smi to make sure GPU is working
shell: bash --noprofile --norc -xeuo pipefail {0}
run: nvidia-smi

- name: Checkout ${{ github.event.repository.name }}
uses: actions/checkout@v4
with:
fetch-depth: 0

- name: Set up test environment
shell: bash --noprofile --norc -xeuo pipefail {0}
run: |
# make outputs from the previous job as env vars
echo "CUDA_CORE_ARTIFACT_NAME=${{ needs.build.outputs.CUDA_CORE_ARTIFACT_NAME }}" >> $GITHUB_ENV
echo "CUDA_CORE_ARTIFACTS_DIR=${{ needs.build.outputs.CUDA_CORE_ARTIFACTS_DIR }}" >> $GITHUB_ENV
echo "CUDA_BINDINGS_ARTIFACT_NAME=${{ needs.build.outputs.CUDA_BINDINGS_ARTIFACT_NAME }}" >> $GITHUB_ENV
echo "CUDA_BINDINGS_ARTIFACTS_DIR=${{ needs.build.outputs.CUDA_BINDINGS_ARTIFACTS_DIR }}" >> $GITHUB_ENV
- name: Download bindings build artifacts
uses: actions/download-artifact@v4
with:
name: ${{ env.CUDA_BINDINGS_ARTIFACT_NAME }}
path: ${{ env.CUDA_BINDINGS_ARTIFACTS_DIR }}

- name: Display structure of downloaded bindings artifacts
shell: bash --noprofile --norc -xeuo pipefail {0}
run: |
pwd
ls -lahR $CUDA_BINDINGS_ARTIFACTS_DIR
- name: Download core build artifacts
uses: actions/download-artifact@v4
with:
name: ${{ env.CUDA_CORE_ARTIFACT_NAME }}
path: ${{ env.CUDA_CORE_ARTIFACTS_DIR }}

- name: Display structure of downloaded core build artifacts
shell: bash --noprofile --norc -xeuo pipefail {0}
run: |
pwd
ls -lahR $CUDA_CORE_ARTIFACTS_DIR
- name: Set up Python ${{ inputs.python-version }}
uses: actions/setup-python@v5
with:
python-version: ${{ inputs.python-version }}

# The cache action needs this
- name: Install zstd
shell: bash --noprofile --norc -xeuo pipefail {0}
run: |
apt update
apt install zstd
- name: Set up mini CTK
uses: ./.github/actions/fetch_ctk
continue-on-error: false
with:
host-platform: ${{ inputs.host-platform }}
cuda-version: ${{ inputs.cuda-version }}
fail-on-ctk-cache-miss: true

- name: Run test / analysis
shell: bash --noprofile --norc -xeuo pipefail {0}
run: |
ls $CUDA_PATH
REPO_DIR=$(pwd)
cd "${CUDA_BINDINGS_ARTIFACTS_DIR}"
pip install *.whl
cd "${CUDA_CORE_ARTIFACTS_DIR}"
pip install *.whl
cd "${REPO_DIR}/cuda_bindings"
pip install -r requirements.txt
pytest -rxXs tests/
# TODO: enable cython tests
#pytest tests/cython
cd "${REPO_DIR}/cuda_core"
pytest -rxXs tests/
37 changes: 34 additions & 3 deletions .github/workflows/ci-gh.yml
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ on:
- "main"

jobs:
ci:
build:
strategy:
fail-fast: false
matrix:
Expand All @@ -30,9 +30,40 @@ jobs:
# Note: this is for build-time only; the test-time matrix needs to be
# defined separately.
- "12.6.2"
name: "CI"
name: "CI-BUILD"
uses:
./.github/workflows/gh-build-and-test.yml
./.github/workflows/ci-build.yml
with:
host-platform: ${{ matrix.host-platform }}
python-version: ${{ matrix.python-version }}
cuda-version: ${{ matrix.cuda-version }}
secrets: inherit

test:
strategy:
fail-fast: false
matrix:
# TODO: align host-platform names with conda convention
host-platform:
- linux-x64
- linux-aarch64
- win-x64
python-version:
- "3.13"
- "3.12"
- "3.11"
- "3.10"
- "3.9"
cuda-version:
# Note: this is for test-time only.
- "12.6.2"
- "12.0.1"
- "11.8.0"
name: "CI-TEST"
needs:
- build
uses:
./.github/workflows/ci-test.yml
with:
host-platform: ${{ matrix.host-platform }}
python-version: ${{ matrix.python-version }}
Expand Down
116 changes: 116 additions & 0 deletions .github/workflows/ci-test.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,116 @@
on:
workflow_call:
inputs:
host-platform:
type: string
required: true
python-version:
type: string
required: true
cuda-version:
type: string
required: true

jobs:
test:
# TODO: improve the name once a separate test matrix is defined
name: Test (${{ inputs.host-platform }}, CUDA ${{ inputs.cuda-version }}, Python ${{ inputs.python-version }})
# TODO: enable testing once win-64 GPU runners are up
if: ${{ (github.repository_owner == 'nvidia') &&
startsWith(inputs.host-platform, 'linux') }}
permissions:
id-token: write # This is required for configure-aws-credentials
contents: read # This is required for actions/checkout
runs-on: ${{ (inputs.host-platform == 'linux-x64' && 'linux-amd64-gpu-v100-latest-1') ||
(inputs.host-platform == 'linux-aarch64' && 'linux-arm64-gpu-a100-latest-1') }}
# Our self-hosted runners require a container
# TODO: use a different (nvidia?) container
container:
options: -u root --security-opt seccomp=unconfined --shm-size 16g
image: ubuntu:22.04
env:
NVIDIA_VISIBLE_DEVICES: ${{ env.NVIDIA_VISIBLE_DEVICES }}
steps:
- name: Run nvidia-smi to make sure GPU is working
shell: bash --noprofile --norc -xeuo pipefail {0}
run: nvidia-smi

- name: Checkout ${{ github.event.repository.name }}
uses: actions/checkout@v4
with:
fetch-depth: 0

- name: Set up test environment
shell: bash --noprofile --norc -xeuo pipefail {0}
run: |
# make outputs from the previous job as env vars
echo "CUDA_CORE_ARTIFACT_NAME=${{ needs.build.outputs.CUDA_CORE_ARTIFACT_NAME }}" >> $GITHUB_ENV
echo "CUDA_CORE_ARTIFACTS_DIR=${{ needs.build.outputs.CUDA_CORE_ARTIFACTS_DIR }}" >> $GITHUB_ENV
echo "CUDA_BINDINGS_ARTIFACT_NAME=${{ needs.build.outputs.CUDA_BINDINGS_ARTIFACT_NAME }}" >> $GITHUB_ENV
echo "CUDA_BINDINGS_ARTIFACTS_DIR=${{ needs.build.outputs.CUDA_BINDINGS_ARTIFACTS_DIR }}" >> $GITHUB_ENV
- name: Download bindings build artifacts
uses: actions/download-artifact@v4
with:
name: ${{ env.CUDA_BINDINGS_ARTIFACT_NAME }}
path: ${{ env.CUDA_BINDINGS_ARTIFACTS_DIR }}

- name: Display structure of downloaded bindings artifacts
shell: bash --noprofile --norc -xeuo pipefail {0}
run: |
pwd
ls -lahR $CUDA_BINDINGS_ARTIFACTS_DIR
- name: Download core build artifacts
uses: actions/download-artifact@v4
with:
name: ${{ env.CUDA_CORE_ARTIFACT_NAME }}
path: ${{ env.CUDA_CORE_ARTIFACTS_DIR }}

- name: Display structure of downloaded core build artifacts
shell: bash --noprofile --norc -xeuo pipefail {0}
run: |
pwd
ls -lahR $CUDA_CORE_ARTIFACTS_DIR
- name: Set up Python ${{ inputs.python-version }}
uses: actions/setup-python@v5
with:
python-version: ${{ inputs.python-version }}

# The cache action needs this
- name: Install zstd
shell: bash --noprofile --norc -xeuo pipefail {0}
run: |
apt update
apt install zstd
- name: Set up mini CTK
uses: ./.github/actions/fetch_ctk
continue-on-error: false
with:
host-platform: ${{ inputs.host-platform }}
cuda-version: ${{ inputs.cuda-version }}
fail-on-ctk-cache-miss: true

- name: Run test / analysis
shell: bash --noprofile --norc -xeuo pipefail {0}
run: |
ls $CUDA_PATH
REPO_DIR=$(pwd)
cd "${CUDA_BINDINGS_ARTIFACTS_DIR}"
pip install *.whl
cd "${CUDA_CORE_ARTIFACTS_DIR}"
pip install *.whl
cd "${REPO_DIR}/cuda_bindings"
pip install -r requirements.txt
pytest -rxXs tests/
# TODO: enable cython tests
#pytest tests/cython
cd "${REPO_DIR}/cuda_core"
pytest -rxXs tests/

0 comments on commit 1079b39

Please sign in to comment.