From 522b4939e16a2ced7cb8c0bf98a9d1c46c8f7681 Mon Sep 17 00:00:00 2001 From: Leo Fang Date: Mon, 16 Dec 2024 02:46:52 +0000 Subject: [PATCH] name cleanup + add H100 runner + install curl --- .github/actions/fetch_ctk/action.yml | 4 +-- .github/workflows/ci-gh.yml | 3 +- .github/workflows/gh-build-and-test.yml | 38 ++++++++++++++----------- 3 files changed, 25 insertions(+), 20 deletions(-) diff --git a/.github/actions/fetch_ctk/action.yml b/.github/actions/fetch_ctk/action.yml index 3720a853..61bf96d7 100644 --- a/.github/actions/fetch_ctk/action.yml +++ b/.github/actions/fetch_ctk/action.yml @@ -39,7 +39,7 @@ runs: CTK_BASE_URL="https://developer.download.nvidia.com/compute/cuda/redist/" CTK_JSON_URL="$CTK_BASE_URL/redistrib_${{ inputs.cuda-version }}.json" if [[ "${{ inputs.host-platform }}" == linux* ]]; then - if [[ "${{ inputs.host-platform }}" == "linux-x64" ]]; then + if [[ "${{ inputs.host-platform }}" == "linux-64" ]]; then CTK_SUBDIR="linux-x86_64" elif [[ "${{ inputs.host-platform }}" == "linux-aarch64" ]]; then CTK_SUBDIR="linux-sbsa" @@ -47,7 +47,7 @@ runs: function extract() { tar -xvf $1 -C $CUDA_PATH --strip-components=1 } - elif [[ "${{ inputs.host-platform }}" == "win-x64" ]]; then + elif [[ "${{ inputs.host-platform }}" == "win-64" ]]; then CTK_SUBDIR="windows-x86_64" function extract() { _TEMP_DIR_=$(mktemp -d) diff --git a/.github/workflows/ci-gh.yml b/.github/workflows/ci-gh.yml index 795d6d0a..81cf6da3 100644 --- a/.github/workflows/ci-gh.yml +++ b/.github/workflows/ci-gh.yml @@ -1,4 +1,4 @@ -name: "CI" +name: CI concurrency: group: ${{ startsWith(github.ref_name, 'main') && format('unique-{0}', github.run_id) || format('ci-build-and-test-on-{0}-from-{1}', github.event_name, github.ref_name) }} @@ -12,7 +12,6 @@ on: jobs: ci: - name: "CI" uses: ./.github/workflows/gh-build-and-test.yml secrets: inherit diff --git a/.github/workflows/gh-build-and-test.yml b/.github/workflows/gh-build-and-test.yml index 13c5feef..636da9a5 100644 --- a/.github/workflows/gh-build-and-test.yml +++ b/.github/workflows/gh-build-and-test.yml @@ -5,11 +5,10 @@ jobs: strategy: fail-fast: false matrix: - # TODO: align host-platform names with conda convention host-platform: - - linux-x64 + - linux-64 - linux-aarch64 - - win-x64 + - win-64 python-version: - "3.13" - "3.12" @@ -19,15 +18,15 @@ jobs: cuda-version: # Note: this is for build-time only. - "12.6.2" - name: Build (${{ matrix.host-platform }}, Python "${{ matrix.python-version }}") + name: Build (${{ matrix.host-platform }}, Python ${{ matrix.python-version }}) if: ${{ github.repository_owner == 'nvidia' }} permissions: id-token: write # This is required for configure-aws-credentials contents: read # This is required for actions/checkout - runs-on: ${{ (matrix.host-platform == 'linux-x64' && 'linux-amd64-cpu8') || + runs-on: ${{ (matrix.host-platform == 'linux-64' && 'linux-amd64-cpu8') || (matrix.host-platform == 'linux-aarch64' && 'linux-arm64-cpu8') || - (matrix.host-platform == 'win-x64' && 'windows-2019') }} - # (matrix.host-platform == 'win-x64' && 'windows-amd64-cpu8') }} + (matrix.host-platform == 'win-64' && 'windows-2019') }} + # (matrix.host-platform == 'win-64' && 'windows-amd64-cpu8') }} outputs: BUILD_CTK_VER: ${{ steps.pass_env.outputs.CUDA_VERSION }} steps: @@ -166,13 +165,13 @@ jobs: test: strategy: fail-fast: false + # TODO: add driver version here matrix: - # TODO: align host-platform names with conda convention host-platform: - - linux-x64 + - linux-64 - linux-aarch64 # TODO: enable testing once win-64 GPU runners are up - # - win-x64 + # - win-64 python-version: - "3.13" - "3.12" @@ -184,13 +183,21 @@ jobs: - "12.6.2" - "12.0.1" - "11.8.0" - name: Test (${{ matrix.host-platform }}, CUDA ${{ matrix.cuda-version }}, Python "${{ matrix.python-version }}") + runner: + - default + include: + - host-platform: linux-64 + python-version: "3.12" + cuda-version: "12.6.2" + runner: H100 + name: Test (${{ matrix.host-platform }}, CUDA ${{ matrix.cuda-version }}, Python ${{ matrix.python-version }}) if: ${{ (github.repository_owner == 'nvidia') }} permissions: id-token: write # This is required for configure-aws-credentials contents: read # This is required for actions/checkout - runs-on: ${{ (matrix.host-platform == 'linux-x64' && 'linux-amd64-gpu-v100-latest-1') || - (matrix.host-platform == 'linux-aarch64' && 'linux-arm64-gpu-a100-latest-1') }} + runs-on: ${{ (matrix.runner == 'default' && matrix.host-platform == 'linux-64' && 'linux-amd64-gpu-v100-latest-1') || + (matrix.runner == 'default' && matrix.host-platform == 'linux-aarch64' && 'linux-arm64-gpu-a100-latest-1') || + (matrix.runner == 'H100' && 'linux-amd64-gpu-h100-latest-1-testing') }} # Our self-hosted runners require a container # TODO: use a different (nvidia?) container container: @@ -256,12 +263,11 @@ jobs: with: python-version: ${{ matrix.python-version }} - # The cache action needs this - - name: Install zstd + - name: Install mini CTK action dependencies shell: bash --noprofile --norc -xeuo pipefail {0} run: | apt update - apt install zstd + apt install zstd curl - name: Set up mini CTK uses: ./.github/actions/fetch_ctk