Skip to content

Commit 92cf593

Browse files
committed
name cleanup + add H100 runner + install curl
1 parent 61813fa commit 92cf593

File tree

3 files changed

+118
-98
lines changed

3 files changed

+118
-98
lines changed

.github/actions/fetch_ctk/action.yml

Lines changed: 25 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,26 @@ runs:
1919
echo "CTK_CACHE_KEY=mini-ctk-${{ inputs.cuda-version }}-${{ inputs.host-platform }}" >> $GITHUB_ENV
2020
echo "CTK_CACHE_FILENAME=mini-ctk-${{ inputs.cuda-version }}-${{ inputs.host-platform }}.tar.gz" >> $GITHUB_ENV
2121
22+
- name: Install dependencies
23+
shell: bash --noprofile --norc -xeuo pipefail {0}
24+
run: |
25+
if (command -v curl 2>&1 >/dev/null) && (command -v zstd 2>&1 >/dev/null); then
26+
echo "All dependencies are found. Do nothing."
27+
exit 0
28+
fi
29+
if ! command -v sudo 2>&1 >/dev/null; then
30+
if [[ $EUID == 0 ]]; then
31+
alias SUDO=""
32+
else
33+
echo "The following oprations require root access."
34+
exit 1
35+
fi
36+
else
37+
alias SUDO="sudo"
38+
fi
39+
SUDO apt update
40+
SUDO apt install -y zstd curl
41+
2242
- name: Download CTK cache
2343
id: ctk-get-cache
2444
uses: actions/cache/restore@v4
@@ -32,22 +52,22 @@ runs:
3252
if: ${{ steps.ctk-get-cache.outputs.cache-hit != 'true' }}
3353
shell: bash --noprofile --norc -xeuo pipefail {0}
3454
run: |
35-
CUDA_PATH="$(pwd)/cuda_toolkit"
55+
CUDA_PATH="./cuda_toolkit"
3656
mkdir $CUDA_PATH
3757
3858
# The binary archives (redist) are guaranteed to be updated as part of the release posting.
3959
CTK_BASE_URL="https://developer.download.nvidia.com/compute/cuda/redist/"
4060
CTK_JSON_URL="$CTK_BASE_URL/redistrib_${{ inputs.cuda-version }}.json"
4161
if [[ "${{ inputs.host-platform }}" == linux* ]]; then
42-
if [[ "${{ inputs.host-platform }}" == "linux-x64" ]]; then
62+
if [[ "${{ inputs.host-platform }}" == "linux-64" ]]; then
4363
CTK_SUBDIR="linux-x86_64"
4464
elif [[ "${{ inputs.host-platform }}" == "linux-aarch64" ]]; then
4565
CTK_SUBDIR="linux-sbsa"
4666
fi
4767
function extract() {
4868
tar -xvf $1 -C $CUDA_PATH --strip-components=1
4969
}
50-
elif [[ "${{ inputs.host-platform }}" == "win-x64" ]]; then
70+
elif [[ "${{ inputs.host-platform }}" == "win-64" ]]; then
5171
CTK_SUBDIR="windows-x86_64"
5272
function extract() {
5373
_TEMP_DIR_=$(mktemp -d)
@@ -102,12 +122,13 @@ runs:
102122
shell: bash --noprofile --norc -xeuo pipefail {0}
103123
run: |
104124
ls -l
105-
CUDA_PATH="$(pwd)/cuda_toolkit"
125+
CUDA_PATH="./cuda_toolkit"
106126
tar -xzvf $CTK_CACHE_FILENAME
107127
ls -l $CUDA_PATH
108128
if [ ! -d "$CUDA_PATH/include" ]; then
109129
exit 1
110130
fi
131+
CUDA_PATH=$(realpath ${CUDA_PATH})
111132
112133
echo "CUDA_PATH=${CUDA_PATH}" >> $GITHUB_ENV
113134
echo "${CUDA_PATH}/bin" >> $GITHUB_PATH

.github/workflows/ci-gh.yml

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
name: "CI"
1+
name: CI
22

33
concurrency:
44
group: ${{ startsWith(github.ref_name, 'main') && format('unique-{0}', github.run_id) || format('ci-build-and-test-on-{0}-from-{1}', github.event_name, github.ref_name) }}
@@ -12,7 +12,6 @@ on:
1212

1313
jobs:
1414
ci:
15-
name: "CI"
1615
uses:
1716
./.github/workflows/gh-build-and-test.yml
1817
secrets: inherit

.github/workflows/gh-build-and-test.yml

Lines changed: 92 additions & 92 deletions
Original file line numberDiff line numberDiff line change
@@ -5,29 +5,28 @@ jobs:
55
strategy:
66
fail-fast: false
77
matrix:
8-
# TODO: align host-platform names with conda convention
98
host-platform:
10-
- linux-x64
9+
- linux-64
1110
- linux-aarch64
12-
- win-x64
11+
- win-64
1312
python-version:
14-
- "3.13"
15-
- "3.12"
16-
- "3.11"
17-
- "3.10"
13+
# - "3.13"
14+
# - "3.12"
15+
# - "3.11"
16+
# - "3.10"
1817
- "3.9"
1918
cuda-version:
2019
# Note: this is for build-time only.
2120
- "12.6.2"
22-
name: Build (${{ matrix.host-platform }}, Python "${{ matrix.python-version }}")
21+
name: Build (${{ matrix.host-platform }}, Python ${{ matrix.python-version }}, CUDA ${{ matrix.cuda-version }})
2322
if: ${{ github.repository_owner == 'nvidia' }}
2423
permissions:
2524
id-token: write # This is required for configure-aws-credentials
2625
contents: read # This is required for actions/checkout
27-
runs-on: ${{ (matrix.host-platform == 'linux-x64' && 'linux-amd64-cpu8') ||
26+
runs-on: ${{ (matrix.host-platform == 'linux-64' && 'linux-amd64-cpu8') ||
2827
(matrix.host-platform == 'linux-aarch64' && 'linux-arm64-cpu8') ||
29-
(matrix.host-platform == 'win-x64' && 'windows-2019') }}
30-
# (matrix.host-platform == 'win-x64' && 'windows-amd64-cpu8') }}
28+
(matrix.host-platform == 'win-64' && 'windows-2019') }}
29+
# (matrix.host-platform == 'win-64' && 'windows-amd64-cpu8') }}
3130
outputs:
3231
BUILD_CTK_VER: ${{ steps.pass_env.outputs.CUDA_VERSION }}
3332
steps:
@@ -116,47 +115,47 @@ jobs:
116115
host-platform: ${{ matrix.host-platform }}
117116
cuda-version: ${{ matrix.cuda-version }}
118117

119-
- name: Build cuda.bindings wheel
120-
uses: pypa/[email protected]
121-
env:
122-
CIBW_BUILD: ${{ env.CIBW_BUILD }}
123-
CIBW_ARCHS_LINUX: "native"
124-
CIBW_BUILD_VERBOSITY: 1
125-
# CIBW mounts the host filesystem under /host
126-
CIBW_ENVIRONMENT_LINUX: >
127-
CUDA_PATH=/host/${{ env.CUDA_PATH }}
128-
PARALLEL_LEVEL=${{ env.PARALLEL_LEVEL }}
129-
CIBW_ENVIRONMENT_WINDOWS: >
130-
CUDA_HOME="$(cygpath -w ${{ env.CUDA_PATH }})"
131-
# PARALLEL_LEVEL=${{ env.PARALLEL_LEVEL }}
132-
with:
133-
package-dir: ./cuda_bindings/
134-
output-dir: ${{ env.CUDA_BINDINGS_ARTIFACTS_DIR }}
135-
136-
- name: List the cuda.bindings artifacts directory
137-
shell: bash --noprofile --norc -xeuo pipefail {0}
138-
run: |
139-
if [[ "${{ matrix.host-platform }}" == win* ]]; then
140-
export CHOWN=chown
141-
else
142-
export CHOWN="sudo chown"
143-
fi
144-
$CHOWN -R $(whoami) ${{ env.CUDA_BINDINGS_ARTIFACTS_DIR }}
145-
ls -lahR ${{ env.CUDA_BINDINGS_ARTIFACTS_DIR }}
146-
147-
# TODO: enable this after NVIDIA/cuda-python#297 is resolved
148-
# - name: Check cuda.bindings wheel
149-
# shell: bash --noprofile --norc -xeuo pipefail {0}
150-
# run: |
151-
# twine check ${{ env.CUDA_BINDINGS_ARTIFACTS_DIR }}/*.whl
152-
153-
- name: Upload cuda.bindings build artifacts
154-
uses: actions/upload-artifact@v4
155-
with:
156-
name: ${{ env.CUDA_BINDINGS_ARTIFACT_NAME }}
157-
path: ${{ env.CUDA_BINDINGS_ARTIFACTS_DIR }}/*.whl
158-
if-no-files-found: error
159-
overwrite: 'true'
118+
# - name: Build cuda.bindings wheel
119+
# uses: pypa/[email protected]
120+
# env:
121+
# CIBW_BUILD: ${{ env.CIBW_BUILD }}
122+
# CIBW_ARCHS_LINUX: "native"
123+
# CIBW_BUILD_VERBOSITY: 1
124+
# # CIBW mounts the host filesystem under /host
125+
# CIBW_ENVIRONMENT_LINUX: >
126+
# CUDA_PATH=/host/${{ env.CUDA_PATH }}
127+
# PARALLEL_LEVEL=${{ env.PARALLEL_LEVEL }}
128+
# CIBW_ENVIRONMENT_WINDOWS: >
129+
# CUDA_HOME="$(cygpath -w ${{ env.CUDA_PATH }})"
130+
# # PARALLEL_LEVEL=${{ env.PARALLEL_LEVEL }}
131+
# with:
132+
# package-dir: ./cuda_bindings/
133+
# output-dir: ${{ env.CUDA_BINDINGS_ARTIFACTS_DIR }}
134+
#
135+
# - name: List the cuda.bindings artifacts directory
136+
# shell: bash --noprofile --norc -xeuo pipefail {0}
137+
# run: |
138+
# if [[ "${{ matrix.host-platform }}" == win* ]]; then
139+
# export CHOWN=chown
140+
# else
141+
# export CHOWN="sudo chown"
142+
# fi
143+
# $CHOWN -R $(whoami) ${{ env.CUDA_BINDINGS_ARTIFACTS_DIR }}
144+
# ls -lahR ${{ env.CUDA_BINDINGS_ARTIFACTS_DIR }}
145+
#
146+
# # TODO: enable this after NVIDIA/cuda-python#297 is resolved
147+
# # - name: Check cuda.bindings wheel
148+
# # shell: bash --noprofile --norc -xeuo pipefail {0}
149+
# # run: |
150+
# # twine check ${{ env.CUDA_BINDINGS_ARTIFACTS_DIR }}/*.whl
151+
#
152+
# - name: Upload cuda.bindings build artifacts
153+
# uses: actions/upload-artifact@v4
154+
# with:
155+
# name: ${{ env.CUDA_BINDINGS_ARTIFACT_NAME }}
156+
# path: ${{ env.CUDA_BINDINGS_ARTIFACTS_DIR }}/*.whl
157+
# if-no-files-found: error
158+
# overwrite: 'true'
160159

161160
- name: Pass environment variables to the next runner
162161
id: pass_env
@@ -166,31 +165,39 @@ jobs:
166165
test:
167166
strategy:
168167
fail-fast: false
168+
# TODO: add driver version here
169169
matrix:
170-
# TODO: align host-platform names with conda convention
171170
host-platform:
172-
- linux-x64
171+
- linux-64
173172
- linux-aarch64
174173
# TODO: enable testing once win-64 GPU runners are up
175-
# - win-x64
174+
# - win-64
176175
python-version:
177-
- "3.13"
178-
- "3.12"
179-
- "3.11"
180-
- "3.10"
176+
# - "3.13"
177+
# - "3.12"
178+
# - "3.11"
179+
# - "3.10"
181180
- "3.9"
182181
cuda-version:
183182
# Note: this is for test-time only.
184183
- "12.6.2"
185184
- "12.0.1"
186185
- "11.8.0"
187-
name: Test (${{ matrix.host-platform }}, CUDA ${{ matrix.cuda-version }}, Python "${{ matrix.python-version }}")
186+
runner:
187+
- default
188+
include:
189+
- host-platform: linux-64
190+
python-version: "3.12"
191+
cuda-version: "12.6.2"
192+
runner: H100
193+
name: Test (${{ matrix.host-platform }}, Python ${{ matrix.python-version }}, CUDA ${{ matrix.cuda-version }}, Runner ${{ matrix.runner }})
188194
if: ${{ (github.repository_owner == 'nvidia') }}
189195
permissions:
190196
id-token: write # This is required for configure-aws-credentials
191197
contents: read # This is required for actions/checkout
192-
runs-on: ${{ (matrix.host-platform == 'linux-x64' && 'linux-amd64-gpu-v100-latest-1') ||
193-
(matrix.host-platform == 'linux-aarch64' && 'linux-arm64-gpu-a100-latest-1') }}
198+
runs-on: ${{ (matrix.runner == 'default' && matrix.host-platform == 'linux-64' && 'linux-amd64-gpu-v100-latest-1') ||
199+
(matrix.runner == 'default' && matrix.host-platform == 'linux-aarch64' && 'linux-arm64-gpu-a100-latest-1') ||
200+
(matrix.runner == 'H100' && 'linux-amd64-gpu-h100-latest-1-testing') }}
194201
# Our self-hosted runners require a container
195202
# TODO: use a different (nvidia?) container
196203
container:
@@ -227,42 +234,35 @@ jobs:
227234
echo "CUDA_BINDINGS_ARTIFACT_NAME=cuda-bindings-python${PYTHON_VERSION_FORMATTED}-cuda${{ needs.build.outputs.BUILD_CTK_VER }}-${{ matrix.host-platform }}-${{ github.sha }}" >> $GITHUB_ENV
228235
echo "CUDA_BINDINGS_ARTIFACTS_DIR=$(realpath "$REPO_DIR/cuda_bindings/dist")" >> $GITHUB_ENV
229236
230-
- name: Download bindings build artifacts
231-
uses: actions/download-artifact@v4
232-
with:
233-
name: ${{ env.CUDA_BINDINGS_ARTIFACT_NAME }}
234-
path: ${{ env.CUDA_BINDINGS_ARTIFACTS_DIR }}
235-
236-
- name: Display structure of downloaded bindings artifacts
237-
shell: bash --noprofile --norc -xeuo pipefail {0}
238-
run: |
239-
pwd
240-
ls -lahR $CUDA_BINDINGS_ARTIFACTS_DIR
241-
242-
- name: Download core build artifacts
243-
uses: actions/download-artifact@v4
244-
with:
245-
name: ${{ env.CUDA_CORE_ARTIFACT_NAME }}
246-
path: ${{ env.CUDA_CORE_ARTIFACTS_DIR }}
247-
248-
- name: Display structure of downloaded core build artifacts
249-
shell: bash --noprofile --norc -xeuo pipefail {0}
250-
run: |
251-
pwd
252-
ls -lahR $CUDA_CORE_ARTIFACTS_DIR
237+
# - name: Download bindings build artifacts
238+
# uses: actions/download-artifact@v4
239+
# with:
240+
# name: ${{ env.CUDA_BINDINGS_ARTIFACT_NAME }}
241+
# path: ${{ env.CUDA_BINDINGS_ARTIFACTS_DIR }}
242+
#
243+
# - name: Display structure of downloaded bindings artifacts
244+
# shell: bash --noprofile --norc -xeuo pipefail {0}
245+
# run: |
246+
# pwd
247+
# ls -lahR $CUDA_BINDINGS_ARTIFACTS_DIR
248+
#
249+
# - name: Download core build artifacts
250+
# uses: actions/download-artifact@v4
251+
# with:
252+
# name: ${{ env.CUDA_CORE_ARTIFACT_NAME }}
253+
# path: ${{ env.CUDA_CORE_ARTIFACTS_DIR }}
254+
#
255+
# - name: Display structure of downloaded core build artifacts
256+
# shell: bash --noprofile --norc -xeuo pipefail {0}
257+
# run: |
258+
# pwd
259+
# ls -lahR $CUDA_CORE_ARTIFACTS_DIR
253260

254261
- name: Set up Python ${{ matrix.python-version }}
255262
uses: actions/setup-python@v5
256263
with:
257264
python-version: ${{ matrix.python-version }}
258265

259-
# The cache action needs this
260-
- name: Install zstd
261-
shell: bash --noprofile --norc -xeuo pipefail {0}
262-
run: |
263-
apt update
264-
apt install zstd
265-
266266
- name: Set up mini CTK
267267
uses: ./.github/actions/fetch_ctk
268268
continue-on-error: false

0 commit comments

Comments
 (0)