From 1079b39dbbb80fce1a5d0c30637f213fc0a52c47 Mon Sep 17 00:00:00 2001 From: Leo Fang Date: Sun, 15 Dec 2024 21:31:23 +0000 Subject: [PATCH] separate test matrix from build matrix and expand it (retry) --- .../{gh-build-and-test.yml => ci-build.yml} | 107 +--------------- .github/workflows/ci-gh.yml | 37 +++++- .github/workflows/ci-test.yml | 116 ++++++++++++++++++ 3 files changed, 151 insertions(+), 109 deletions(-) rename .github/workflows/{gh-build-and-test.yml => ci-build.yml} (62%) create mode 100644 .github/workflows/ci-test.yml diff --git a/.github/workflows/gh-build-and-test.yml b/.github/workflows/ci-build.yml similarity index 62% rename from .github/workflows/gh-build-and-test.yml rename to .github/workflows/ci-build.yml index 5b031a24..02cdb8a8 100644 --- a/.github/workflows/gh-build-and-test.yml +++ b/.github/workflows/ci-build.yml @@ -13,7 +13,7 @@ on: jobs: build: - name: Build (${{ inputs.host-platform }}, Python "${{ inputs.python-version }}") + name: Build (${{ inputs.host-platform }}, Python ${{ inputs.python-version }}) if: ${{ github.repository_owner == 'nvidia' }} permissions: id-token: write # This is required for configure-aws-credentials @@ -163,108 +163,3 @@ jobs: echo "CUDA_CORE_ARTIFACTS_DIR=${CUDA_CORE_ARTIFACTS_DIR}" >> $GITHUB_OUTPUT echo "CUDA_BINDINGS_ARTIFACT_NAME=${CUDA_BINDINGS_ARTIFACT_NAME}" >> $GITHUB_OUTPUT echo "CUDA_BINDINGS_ARTIFACTS_DIR=${CUDA_BINDINGS_ARTIFACTS_DIR}" >> $GITHUB_OUTPUT - - test: - # TODO: improve the name once a separate test matrix is defined - name: Test (CUDA ${{ inputs.cuda-version }}) - # TODO: enable testing once win-64 GPU runners are up - if: ${{ (github.repository_owner == 'nvidia') && - startsWith(inputs.host-platform, 'linux') }} - permissions: - id-token: write # This is required for configure-aws-credentials - contents: read # This is required for actions/checkout - runs-on: ${{ (inputs.host-platform == 'linux-x64' && 'linux-amd64-gpu-v100-latest-1') || - (inputs.host-platform == 'linux-aarch64' && 'linux-arm64-gpu-a100-latest-1') }} - # Our self-hosted runners require a container - # TODO: use a different (nvidia?) container - container: - options: -u root --security-opt seccomp=unconfined --shm-size 16g - image: ubuntu:22.04 - env: - NVIDIA_VISIBLE_DEVICES: ${{ env.NVIDIA_VISIBLE_DEVICES }} - needs: - - build - steps: - - name: Run nvidia-smi to make sure GPU is working - shell: bash --noprofile --norc -xeuo pipefail {0} - run: nvidia-smi - - - name: Checkout ${{ github.event.repository.name }} - uses: actions/checkout@v4 - with: - fetch-depth: 0 - - - name: Set up test environment - shell: bash --noprofile --norc -xeuo pipefail {0} - run: | - # make outputs from the previous job as env vars - echo "CUDA_CORE_ARTIFACT_NAME=${{ needs.build.outputs.CUDA_CORE_ARTIFACT_NAME }}" >> $GITHUB_ENV - echo "CUDA_CORE_ARTIFACTS_DIR=${{ needs.build.outputs.CUDA_CORE_ARTIFACTS_DIR }}" >> $GITHUB_ENV - echo "CUDA_BINDINGS_ARTIFACT_NAME=${{ needs.build.outputs.CUDA_BINDINGS_ARTIFACT_NAME }}" >> $GITHUB_ENV - echo "CUDA_BINDINGS_ARTIFACTS_DIR=${{ needs.build.outputs.CUDA_BINDINGS_ARTIFACTS_DIR }}" >> $GITHUB_ENV - - - name: Download bindings build artifacts - uses: actions/download-artifact@v4 - with: - name: ${{ env.CUDA_BINDINGS_ARTIFACT_NAME }} - path: ${{ env.CUDA_BINDINGS_ARTIFACTS_DIR }} - - - name: Display structure of downloaded bindings artifacts - shell: bash --noprofile --norc -xeuo pipefail {0} - run: | - pwd - ls -lahR $CUDA_BINDINGS_ARTIFACTS_DIR - - - name: Download core build artifacts - uses: actions/download-artifact@v4 - with: - name: ${{ env.CUDA_CORE_ARTIFACT_NAME }} - path: ${{ env.CUDA_CORE_ARTIFACTS_DIR }} - - - name: Display structure of downloaded core build artifacts - shell: bash --noprofile --norc -xeuo pipefail {0} - run: | - pwd - ls -lahR $CUDA_CORE_ARTIFACTS_DIR - - - name: Set up Python ${{ inputs.python-version }} - uses: actions/setup-python@v5 - with: - python-version: ${{ inputs.python-version }} - - # The cache action needs this - - name: Install zstd - shell: bash --noprofile --norc -xeuo pipefail {0} - run: | - apt update - apt install zstd - - - name: Set up mini CTK - uses: ./.github/actions/fetch_ctk - continue-on-error: false - with: - host-platform: ${{ inputs.host-platform }} - cuda-version: ${{ inputs.cuda-version }} - fail-on-ctk-cache-miss: true - - - name: Run test / analysis - shell: bash --noprofile --norc -xeuo pipefail {0} - run: | - ls $CUDA_PATH - - REPO_DIR=$(pwd) - - cd "${CUDA_BINDINGS_ARTIFACTS_DIR}" - pip install *.whl - - cd "${CUDA_CORE_ARTIFACTS_DIR}" - pip install *.whl - - cd "${REPO_DIR}/cuda_bindings" - pip install -r requirements.txt - pytest -rxXs tests/ - # TODO: enable cython tests - #pytest tests/cython - - cd "${REPO_DIR}/cuda_core" - pytest -rxXs tests/ diff --git a/.github/workflows/ci-gh.yml b/.github/workflows/ci-gh.yml index b4646f0e..f861713f 100644 --- a/.github/workflows/ci-gh.yml +++ b/.github/workflows/ci-gh.yml @@ -11,7 +11,7 @@ on: - "main" jobs: - ci: + build: strategy: fail-fast: false matrix: @@ -30,9 +30,40 @@ jobs: # Note: this is for build-time only; the test-time matrix needs to be # defined separately. - "12.6.2" - name: "CI" + name: "CI-BUILD" uses: - ./.github/workflows/gh-build-and-test.yml + ./.github/workflows/ci-build.yml + with: + host-platform: ${{ matrix.host-platform }} + python-version: ${{ matrix.python-version }} + cuda-version: ${{ matrix.cuda-version }} + secrets: inherit + + test: + strategy: + fail-fast: false + matrix: + # TODO: align host-platform names with conda convention + host-platform: + - linux-x64 + - linux-aarch64 + - win-x64 + python-version: + - "3.13" + - "3.12" + - "3.11" + - "3.10" + - "3.9" + cuda-version: + # Note: this is for test-time only. + - "12.6.2" + - "12.0.1" + - "11.8.0" + name: "CI-TEST" + needs: + - build + uses: + ./.github/workflows/ci-test.yml with: host-platform: ${{ matrix.host-platform }} python-version: ${{ matrix.python-version }} diff --git a/.github/workflows/ci-test.yml b/.github/workflows/ci-test.yml new file mode 100644 index 00000000..064f7535 --- /dev/null +++ b/.github/workflows/ci-test.yml @@ -0,0 +1,116 @@ +on: + workflow_call: + inputs: + host-platform: + type: string + required: true + python-version: + type: string + required: true + cuda-version: + type: string + required: true + +jobs: + test: + # TODO: improve the name once a separate test matrix is defined + name: Test (${{ inputs.host-platform }}, CUDA ${{ inputs.cuda-version }}, Python ${{ inputs.python-version }}) + # TODO: enable testing once win-64 GPU runners are up + if: ${{ (github.repository_owner == 'nvidia') && + startsWith(inputs.host-platform, 'linux') }} + permissions: + id-token: write # This is required for configure-aws-credentials + contents: read # This is required for actions/checkout + runs-on: ${{ (inputs.host-platform == 'linux-x64' && 'linux-amd64-gpu-v100-latest-1') || + (inputs.host-platform == 'linux-aarch64' && 'linux-arm64-gpu-a100-latest-1') }} + # Our self-hosted runners require a container + # TODO: use a different (nvidia?) container + container: + options: -u root --security-opt seccomp=unconfined --shm-size 16g + image: ubuntu:22.04 + env: + NVIDIA_VISIBLE_DEVICES: ${{ env.NVIDIA_VISIBLE_DEVICES }} + steps: + - name: Run nvidia-smi to make sure GPU is working + shell: bash --noprofile --norc -xeuo pipefail {0} + run: nvidia-smi + + - name: Checkout ${{ github.event.repository.name }} + uses: actions/checkout@v4 + with: + fetch-depth: 0 + + - name: Set up test environment + shell: bash --noprofile --norc -xeuo pipefail {0} + run: | + # make outputs from the previous job as env vars + echo "CUDA_CORE_ARTIFACT_NAME=${{ needs.build.outputs.CUDA_CORE_ARTIFACT_NAME }}" >> $GITHUB_ENV + echo "CUDA_CORE_ARTIFACTS_DIR=${{ needs.build.outputs.CUDA_CORE_ARTIFACTS_DIR }}" >> $GITHUB_ENV + echo "CUDA_BINDINGS_ARTIFACT_NAME=${{ needs.build.outputs.CUDA_BINDINGS_ARTIFACT_NAME }}" >> $GITHUB_ENV + echo "CUDA_BINDINGS_ARTIFACTS_DIR=${{ needs.build.outputs.CUDA_BINDINGS_ARTIFACTS_DIR }}" >> $GITHUB_ENV + + - name: Download bindings build artifacts + uses: actions/download-artifact@v4 + with: + name: ${{ env.CUDA_BINDINGS_ARTIFACT_NAME }} + path: ${{ env.CUDA_BINDINGS_ARTIFACTS_DIR }} + + - name: Display structure of downloaded bindings artifacts + shell: bash --noprofile --norc -xeuo pipefail {0} + run: | + pwd + ls -lahR $CUDA_BINDINGS_ARTIFACTS_DIR + + - name: Download core build artifacts + uses: actions/download-artifact@v4 + with: + name: ${{ env.CUDA_CORE_ARTIFACT_NAME }} + path: ${{ env.CUDA_CORE_ARTIFACTS_DIR }} + + - name: Display structure of downloaded core build artifacts + shell: bash --noprofile --norc -xeuo pipefail {0} + run: | + pwd + ls -lahR $CUDA_CORE_ARTIFACTS_DIR + + - name: Set up Python ${{ inputs.python-version }} + uses: actions/setup-python@v5 + with: + python-version: ${{ inputs.python-version }} + + # The cache action needs this + - name: Install zstd + shell: bash --noprofile --norc -xeuo pipefail {0} + run: | + apt update + apt install zstd + + - name: Set up mini CTK + uses: ./.github/actions/fetch_ctk + continue-on-error: false + with: + host-platform: ${{ inputs.host-platform }} + cuda-version: ${{ inputs.cuda-version }} + fail-on-ctk-cache-miss: true + + - name: Run test / analysis + shell: bash --noprofile --norc -xeuo pipefail {0} + run: | + ls $CUDA_PATH + + REPO_DIR=$(pwd) + + cd "${CUDA_BINDINGS_ARTIFACTS_DIR}" + pip install *.whl + + cd "${CUDA_CORE_ARTIFACTS_DIR}" + pip install *.whl + + cd "${REPO_DIR}/cuda_bindings" + pip install -r requirements.txt + pytest -rxXs tests/ + # TODO: enable cython tests + #pytest tests/cython + + cd "${REPO_DIR}/cuda_core" + pytest -rxXs tests/