diff --git a/.common-ci.yml b/.common-ci.yml index 86f91be2..737f727c 100644 --- a/.common-ci.yml +++ b/.common-ci.yml @@ -88,6 +88,13 @@ trigger-pipeline: - DRIVER_BRANCH: [535, 550] KERNEL_FLAVOR: [aws, azure, generic, nvidia, oracle] +# Define the matrix of precompiled jobs that can be run in parallel for ubuntu24.04 +.driver-versions-precompiled-ubuntu24.04: + parallel: + matrix: + - DRIVER_BRANCH: [535, 550] + KERNEL_FLAVOR: [aws, azure, generic, nvidia, oracle] + # Define the distribution targets .dist-ubuntu20.04: variables: @@ -304,3 +311,14 @@ release:staging-precompiled-ubuntu22.04: - .release:staging-precompiled needs: - image-precompiled-ubuntu22.04 + +# Precompiled Ubuntu24.04 release +release:staging-precompiled-ubuntu24.04: + variables: + DIST: signed_ubuntu24.04 + BASE_TARGET: noble + extends: + - .driver-versions-precompiled-ubuntu24.04 + - .release:staging-precompiled + needs: + - image-precompiled-ubuntu24.04 diff --git a/.github/workflows/ci.yaml b/.github/workflows/ci.yaml index a46e34d3..fa5b321c 100644 --- a/.github/workflows/ci.yaml +++ b/.github/workflows/ci.yaml @@ -40,7 +40,7 @@ jobs: aws_access_key_id: ${{ secrets.AWS_ACCESS_KEY_ID }} aws_secret_access_key: ${{ secrets.AWS_SECRET_ACCESS_KEY }} aws_ssh_key: ${{ secrets.AWS_SSH_KEY }} - holodeck_config: "tests/holodeck.yaml" + holodeck_config: "tests/holodeck_ubuntu22.04.yaml" - name: Get public dns name id: get_public_dns_name diff --git a/.github/workflows/image.yaml b/.github/workflows/image.yaml index 0dade847..4acf8db8 100644 --- a/.github/workflows/image.yaml +++ b/.github/workflows/image.yaml @@ -21,12 +21,12 @@ on: - opened - synchronize branches: - - main - - release-* + - maini-no + # - release-* push: branches: - - main - - release-* + - main-no + # - release-* jobs: image: diff --git a/.github/workflows/precompiled.yaml b/.github/workflows/precompiled.yaml index 769ce505..0d77f691 100644 --- a/.github/workflows/precompiled.yaml +++ b/.github/workflows/precompiled.yaml @@ -15,9 +15,20 @@ # Run this workflow on a schedule name: Precompiled images +# on: +# schedule: +# - cron: '00 09 * * *' # scheduled job + on: - schedule: - - cron: '00 09 * * *' + pull_request: + types: + - opened + - synchronize + branches: + - ci-precompile-ubuntu24.04 + push: + branches: + - ci-precompile-ubuntu24.04 jobs: set-driver-version-matrix: @@ -26,6 +37,8 @@ jobs: driver_branch: ${{ steps.extract_driver_branch.outputs.driver_branch }} kernel_flavors: ${{ steps.extract_driver_branch.outputs.kernel_flavors }} dist: ${{ steps.extract_driver_branch.outputs.dist }} + BASE_TARGETS: ${{ steps.extract_driver_branch.outputs.BASE_TARGETS }} + LTS_KERNELS: ${{ steps.extract_driver_branch.outputs.LTS_KERNELS }} steps: - name: Checkout code uses: actions/checkout@v4 @@ -43,10 +56,32 @@ jobs: echo "kernel_flavors=$kernel_flavors_json" >> $GITHUB_OUTPUT # get ubuntu distributions - DIST=("ubuntu22.04") + DIST=("ubuntu22.04" "ubuntu24.04") dist_json=$(printf '%s\n' "${DIST[@]}" | jq -R . | jq -cs .) echo "dist=$dist_json" >> $GITHUB_OUTPUT + # BASE_TARGET setup + declare -A MAP_BASE_TARGETS + MAP_BASE_TARGETS["ubuntu22.04"]="jammy" + MAP_BASE_TARGETS["ubuntu24.04"]="noble" + BASE_TARGETS_JSON="{" + for key in "${!MAP_BASE_TARGETS[@]}"; do + BASE_TARGETS_JSON+="\"$key\":\"${MAP_BASE_TARGETS[$key]}\"," + done + BASE_TARGETS_JSON="${BASE_TARGETS_JSON%,}}" + echo "BASE_TARGETS=$BASE_TARGETS_JSON" >> "$GITHUB_OUTPUT" + + # LTS_KERNELS env setup + declare -A MAP_LTS_KERNELS + MAP_LTS_KERNELS["ubuntu22.04"]="5.15" + MAP_LTS_KERNELS["ubuntu24.04"]="6.8" + LTS_KERNELS_JSON="{" + for key in "${!MAP_LTS_KERNELS[@]}"; do + LTS_KERNELS_JSON+="\"$key\":\"${MAP_LTS_KERNELS[$key]}\"," + done + LTS_KERNELS_JSON="${LTS_KERNELS_JSON%,}}" + echo "LTS_KERNELS=$LTS_KERNELS_JSON" >> "$GITHUB_OUTPUT" + precompiled-build-image: needs: set-driver-version-matrix runs-on: linux-amd64-cpu4 @@ -55,11 +90,16 @@ jobs: driver_branch: ${{ fromJson(needs.set-driver-version-matrix.outputs.driver_branch) }} flavor: ${{ fromJson(needs.set-driver-version-matrix.outputs.kernel_flavors) }} dist: ${{ fromJson(needs.set-driver-version-matrix.outputs.dist) }} + exclude: + - dist: ubuntu24.04 + driver_branch: 535 steps: - uses: actions/checkout@v4 name: Check out code - name: Calculate build vars id: vars + env: + DIST: ${{ matrix.dist }} run: | echo "COMMIT_SHORT_SHA=${GITHUB_SHA:0:8}" >> $GITHUB_ENV echo "LOWERCASE_REPO_OWNER=$(echo "${GITHUB_REPOSITORY_OWNER}" | awk '{print tolower($0)}')" >> $GITHUB_ENV @@ -70,6 +110,14 @@ jobs: echo "PUSH_ON_BUILD=${GENERATE_ARTIFACTS}" >> $GITHUB_ENV echo "BUILD_MULTI_ARCH_IMAGES=${GENERATE_ARTIFACTS}" >> $GITHUB_ENV + BASE_TARGETS_JSON='${{ needs.set-driver-version-matrix.outputs.BASE_TARGETS }}' + BASE_TARGET=$(echo "$BASE_TARGETS_JSON" | jq -r --arg dist "$DIST" '.[$dist]') + echo "BASE_TARGET=$BASE_TARGET" >> $GITHUB_ENV + LTS_KERNELS_JSON='${{ needs.set-driver-version-matrix.outputs.LTS_KERNELS }}' + LTS_KERNEL=$(echo "$LTS_KERNELS_JSON" | jq -r --arg dist "$DIST" '.[$dist]') + echo "LTS_KERNEL=$LTS_KERNEL" >> $GITHUB_ENV + + - name: Set up QEMU uses: docker/setup-qemu-action@v3 - name: Set up Docker Buildx @@ -84,9 +132,10 @@ jobs: env: IMAGE_NAME: ghcr.io/nvidia/driver VERSION: ${COMMIT_SHORT_SHA} - BASE_TARGET: jammy + BASE_TARGET: ${{ env.BASE_TARGET }} + LTS_KERNEL: ${{ env.LTS_KERNEL }} run: | - make DRIVER_BRANCH=${{ matrix.driver_branch }} KERNEL_FLAVOR=${{ matrix.flavor }} build-base-${BASE_TARGET} + make DRIVER_BRANCH=${{ matrix.driver_branch }} KERNEL_FLAVOR=${{ matrix.flavor }} LTS_KERNEL=${LTS_KERNEL} build-base-${BASE_TARGET} trap "docker rm -f base-${BASE_TARGET}-${{ matrix.flavor }}" EXIT docker run -d --name base-${BASE_TARGET}-${{ matrix.flavor }} ghcr.io/nvidia/driver:base-${BASE_TARGET}-${{ matrix.flavor }}-${{ matrix.driver_branch }} @@ -105,12 +154,13 @@ jobs: source kernel_version.txt && \ make DRIVER_VERSIONS=${DRIVER_VERSIONS} DRIVER_BRANCH=${{ matrix.driver_branch }} build-${DIST}-${DRIVER_VERSION} - - name: Save build image as a tar + - name: Save build image and kernel version file env: DIST: ${{ matrix.dist }} PRIVATE_REGISTRY: "ghcr.io" run: | source kernel_version.txt + tar -cvf kernel-version-${{ matrix.driver_branch }}-${KERNEL_VERSION}-${DIST}.tar kernel_version.txt docker save "${PRIVATE_REGISTRY}/nvidia/driver:${{ matrix.driver_branch }}-${KERNEL_VERSION}-${DIST}" \ -o ./driver-images-${{ matrix.driver_branch }}-${KERNEL_VERSION}-${DIST}.tar # set env for artifacts upload @@ -123,7 +173,14 @@ jobs: name: driver-images-${{ matrix.driver_branch }}-${{ env.KERNEL_VERSION }}-${{ env.DIST }} path: ./driver-images-${{ matrix.driver_branch }}-${{ env.KERNEL_VERSION }}-${{ env.DIST }}.tar retention-days: 1 - + + - name: Upload kernel version as an artifact + uses: actions/upload-artifact@v4 + with: + name: kernel-version-${{ matrix.driver_branch }}-${{ env.KERNEL_VERSION }}-${{ env.DIST }} + path: ./kernel-version-${{ matrix.driver_branch }}-${{ env.KERNEL_VERSION }}-${{ env.DIST }}.tar + retention-days: 1 + determine-e2e-test-matrix: runs-on: linux-amd64-cpu4 strategy: @@ -132,10 +189,6 @@ jobs: needs: - precompiled-build-image - set-driver-version-matrix - outputs: - matrix_values_not_empty: ${{ steps.set_kernel_version.outputs.matrix_values_not_empty }} - matrix_values: ${{ steps.set_kernel_version.outputs.matrix_values }} - dist: ${{ steps.set-driver-version-matrix.outputs.dist }} steps: - name: Check out code uses: actions/checkout@v4 @@ -146,19 +199,36 @@ jobs: username: ${{ github.actor }} password: ${{ secrets.GITHUB_TOKEN }} - - name: Set kernel version - id: set_kernel_version + - name: Calculate build vars + id: vars env: - BASE_TARGET: "jammy" DIST: ${{ matrix.dist }} run: | - echo "matrix_values_not_empty=0" >> $GITHUB_OUTPUT + BASE_TARGETS_JSON='${{ needs.set-driver-version-matrix.outputs.BASE_TARGETS }}' + BASE_TARGET=$(echo "$BASE_TARGETS_JSON" | jq -r --arg dist "$DIST" '.[$dist]') + echo "BASE_TARGET=$BASE_TARGET" >> $GITHUB_ENV + LTS_KERNELS_JSON='${{ needs.set-driver-version-matrix.outputs.LTS_KERNELS }}' + LTS_KERNEL=$(echo "$LTS_KERNELS_JSON" | jq -r --arg dist "$DIST" '.[$dist]') + echo "LTS_KERNEL=$LTS_KERNEL" >> $GITHUB_ENV + - name: Set kernel version + env: + DIST: ${{ matrix.dist }} + GH_TOKEN: ${{ secrets.GITHUB_TOKEN }} + BASE_TARGET: ${{ env.BASE_TARGET }} + LTS_KERNEL: ${{ env.LTS_KERNEL }} + run: | kernel_flavors_json='${{ needs.set-driver-version-matrix.outputs.kernel_flavors }}' KERNEL_FLAVORS=($(echo "$kernel_flavors_json" | jq -r '.[]')) driver_branch_json='${{ needs.set-driver-version-matrix.outputs.driver_branch }}' DRIVER_BRANCHES=($(echo "$driver_branch_json" | jq -r '.[]')) + # remove 535 driver branch for ubuntu24.04 + if [ "$DIST" == "ubuntu24.04" ]; then + DRIVER_BRANCHES=($(for branch in "${DRIVER_BRANCHES[@]}"; do + [[ $branch != "535" ]] && echo "$branch" + done)) + fi source ./tests/scripts/ci-precompiled-helpers.sh KERNEL_VERSIONS=($(get_kernel_versions_to_test $BASE_TARGET KERNEL_FLAVORS[@] DRIVER_BRANCHES[@] $DIST)) if [ -z "$KERNEL_VERSIONS" ]; then @@ -166,22 +236,67 @@ jobs: echo "Skipping e2e tests" exit 0 fi + for i in "${!KERNEL_VERSIONS[@]}"; do + KERNEL_VERSIONS[$i]="${KERNEL_VERSIONS[$i]}-$DIST" + done # Convert array to JSON format and assign - echo "[]" > $GITHUB_WORKSPACE/matrix_values.json - printf '%s\n' "${KERNEL_VERSIONS[@]}" | jq -R . | jq -s . > $GITHUB_WORKSPACE/matrix_values.json - echo "matrix_values=$(cat $GITHUB_WORKSPACE/matrix_values.json | jq -c .)" >> $GITHUB_OUTPUT - echo "matrix_values_not_empty=1" >> $GITHUB_OUTPUT + echo "[]" > ./matrix_values_$DIST.json + printf '%s\n' "${KERNEL_VERSIONS[@]}" | jq -R . | jq -s . > ./matrix_values_$DIST.json - e2e-tests-nvidiadriver: + - name: Upload kernel matrix values as artifacts + uses: actions/upload-artifact@v4 + with: + name: matrix-values-${{ matrix.dist }} + path: ./matrix_values_${{ matrix.dist }}.json + retention-days: 1 + + collect-e2e-test-matrix: runs-on: linux-amd64-cpu4 needs: - determine-e2e-test-matrix - set-driver-version-matrix - if: ${{ needs.determine-e2e-test-matrix.outputs.matrix_values_not_empty == '1' }} + outputs: + matrix_values_not_empty: ${{ steps.set_kernel_version.outputs.matrix_values_not_empty }} + matrix_values: ${{ steps.set_kernel_version.outputs.matrix_values }} + steps: + - name: Check out code + uses: actions/checkout@v4 + - name: Set and append matrix values for ubuntu + id: set_kernel_version + env: + GH_TOKEN: ${{ secrets.GITHUB_TOKEN }} + run: | + echo "matrix_values_not_empty=0" >> $GITHUB_OUTPUT + # combined_values="[]" + kernel_versions=() + + # Read and merge kernel_version values from dist files + DIST=("ubuntu22.04" "ubuntu24.04") + for d in "${DIST[@]}"; do + artifact_name="matrix-values-${d}" + file_path="./matrix_values_${d}.json" + echo "Attempting to download artifact: $artifact_name" + if gh run download --name "$artifact_name" --dir ./; then + echo "Successfully downloaded artifact: $artifact_name" + value=$(jq -r '.[]' "$file_path") + kernel_versions+=($value) + echo "matrix_values_not_empty=1" >> $GITHUB_OUTPUT + fi + done + echo "Collected Kernel Versions: ${kernel_versions[@]}" + combined_values=$(printf '%s\n' "${kernel_versions[@]}" | jq -R . | jq -s -c . | tr -d ' \n') + echo "Combined Kernel Versions JSON: $combined_values" + echo "matrix_values=$combined_values" >> $GITHUB_OUTPUT + + e2e-tests-nvidiadriver: + runs-on: linux-amd64-cpu4 + needs: + - collect-e2e-test-matrix + - set-driver-version-matrix + if: ${{ needs.collect-e2e-test-matrix.outputs.matrix_values_not_empty == '1' }} strategy: matrix: - kernel_version: ${{ fromJson(needs.determine-e2e-test-matrix.outputs.matrix_values) }} - dist: ${{ fromJson(needs.set-driver-version-matrix.outputs.dist) }} + kernel_version: ${{ fromJson(needs.collect-e2e-test-matrix.outputs.matrix_values) }} steps: - name: Check out code uses: actions/checkout@v4 @@ -191,43 +306,63 @@ jobs: registry: ghcr.io username: ${{ github.actor }} password: ${{ secrets.GITHUB_TOKEN }} + - name: Set and Calculate test vars + run: | + echo "private_key=${{ github.workspace }}/key.pem" >> $GITHUB_ENV + echo "${{ secrets.AWS_SSH_KEY }}" > ${{ github.workspace }}/key.pem && chmod 400 ${{ github.workspace }}/key.pem + echo "COMMIT_SHORT_SHA=${GITHUB_SHA:0:8}" >> $GITHUB_ENV + echo "PRIVATE_REGISTRY=ghcr.io" >> $GITHUB_ENV + KERNEL_VERSION="${{ matrix.kernel_version }}" + # Extract the last segment after the last dash + DIST=${KERNEL_VERSION##*-} + echo "DIST=$DIST" >> $GITHUB_ENV + KERNEL_VERSION=${KERNEL_VERSION%-*} + echo "KERNEL_VERSION=$KERNEL_VERSION" >> $GITHUB_ENV + driver_branch_json="${{ needs.set-driver-version-matrix.outputs.driver_branch }}" + DRIVER_BRANCHES=($(echo "$driver_branch_json" | jq -r '.[]')) + echo "DRIVER_BRANCHES=${DRIVER_BRANCHES[*]}" >> $GITHUB_ENV + + # step added to skip azure e2e tests and publish the image + # FIXME -- remove step once azure kernel upgrade starts working + - name: Skip azure e2e + run: | + if [[ "${KERNEL_VERSION}" == *-azure ]]; then + echo "e2e test for azure flavor skipped, as kernel upgrade AWS => azure is not supported" + echo "SKIP_REMAINING=true" >> $GITHUB_ENV + fi + - name: Set up Holodeck + if: ${{ env.SKIP_REMAINING != 'true' }} uses: NVIDIA/holodeck@v0.2.1 env: AWS_SECRET_ACCESS_KEY: ${{ secrets.AWS_SECRET_ACCESS_KEY }} AWS_ACCESS_KEY_ID: ${{ secrets.AWS_ACCESS_KEY_ID }} AWS_SSH_KEY: ${{ secrets.AWS_SSH_KEY }} + DIST: ${{ env.DIST }} with: aws_access_key_id: ${{ secrets.AWS_ACCESS_KEY_ID }} aws_secret_access_key: ${{ secrets.AWS_SECRET_ACCESS_KEY }} aws_ssh_key: ${{ secrets.AWS_SSH_KEY }} - holodeck_config: "tests/holodeck.yaml" - + holodeck_config: "tests/holodeck_${{ env.DIST }}.yaml" - name: Get public dns name + if: ${{ env.SKIP_REMAINING != 'true' }} id: get_public_dns_name uses: mikefarah/yq@master with: cmd: yq '.status.properties[] | select(.name == "public-dns-name") | .value' /github/workspace/.cache/holodeck.yaml - - name: Set and Calculate test vars + - name: Calculate holodeck instance hostname + if: ${{ env.SKIP_REMAINING != 'true' }} run: | echo "instance_hostname=ubuntu@${{ steps.get_public_dns_name.outputs.result }}" >> $GITHUB_ENV - echo "private_key=${{ github.workspace }}/key.pem" >> $GITHUB_ENV - echo "${{ secrets.AWS_SSH_KEY }}" > ${{ github.workspace }}/key.pem && chmod 400 ${{ github.workspace }}/key.pem - echo "COMMIT_SHORT_SHA=${GITHUB_SHA:0:8}" >> $GITHUB_ENV - echo "PRIVATE_REGISTRY=ghcr.io" >> $GITHUB_ENV - KERNEL_VERSION="${{ matrix.kernel_version }}" - echo "KERNEL_VERSION=$KERNEL_VERSION" >> $GITHUB_ENV - echo "DIST=${{ matrix.dist }}" >> $GITHUB_ENV - driver_branch_json="${{ needs.set-driver-version-matrix.outputs.driver_branch }}" - DRIVER_BRANCHES=($(echo "$driver_branch_json" | jq -r '.[]')) - echo "DRIVER_BRANCHES=${DRIVER_BRANCHES[*]}" >> $GITHUB_ENV - name: Install GitHub CLI + if: ${{ env.SKIP_REMAINING != 'true' }} run: | sudo apt-get update sudo apt-get install -y gh - name: Upgrade the kernel for Precompiled e2e test + if: ${{ env.SKIP_REMAINING != 'true' }} env: UPGRADE_KERNEL_SCRIPT: "./tests/scripts/upgrade-kernel.sh" run: | @@ -249,6 +384,7 @@ jobs: fi - name: Precompiled e2e test gpu driver validation + if: ${{ env.SKIP_REMAINING != 'true' }} env: TEST_CASE: "./tests/cases/nvidia-driver.sh" GPU_OPERATOR_OPTIONS: "--set driver.repository=${{ env.PRIVATE_REGISTRY }}/nvidia --set driver.usePrecompiled=true \ @@ -258,6 +394,12 @@ jobs: rc=0 # for precompiled driver we are setting driver branch as driver version DRIVER_BRANCHES=(${{ env.DRIVER_BRANCHES }}) + # remove 535 driver branch for ubuntu24.04 + if [ "$DIST" == "ubuntu24.04" ]; then + DRIVER_BRANCHES=($(for branch in "${DRIVER_BRANCHES[@]}"; do + [[ $branch != "535" ]] && echo "$branch" + done)) + fi for DRIVER_VERSION in "${DRIVER_BRANCHES[@]}"; do echo "Running e2e for DRIVER_VERSION=$DRIVER_VERSION" image="driver-images-${DRIVER_VERSION}-${KERNEL_VERSION}-${DIST}" @@ -290,13 +432,12 @@ jobs: runs-on: linux-amd64-cpu4 needs: - set-driver-version-matrix - - determine-e2e-test-matrix + - collect-e2e-test-matrix - e2e-tests-nvidiadriver strategy: matrix: driver_branch: ${{ fromJson(needs.set-driver-version-matrix.outputs.driver_branch) }} - kernel_version: ${{ fromJson(needs.determine-e2e-test-matrix.outputs.matrix_values) }} - dist: ${{ fromJson(needs.set-driver-version-matrix.outputs.dist) }} + kernel_version: ${{ fromJson(needs.collect-e2e-test-matrix.outputs.matrix_values) }} steps: - name: Check out code uses: actions/checkout@v4 @@ -310,17 +451,18 @@ jobs: - name: Set image vars run: | echo "PRIVATE_REGISTRY=ghcr.io" >> $GITHUB_ENV - echo "DIST=${{ matrix.dist }}" >> $GITHUB_ENV - name: Download built image artifact + if: ${{ ! (matrix.driver_branch == 535 && contains(matrix.kernel_version, 'ubuntu24.04')) }} uses: actions/download-artifact@v4 with: - name: driver-images-${{ matrix.driver_branch }}-${{ matrix.kernel_version }}-${{ env.DIST }} + name: driver-images-${{ matrix.driver_branch }}-${{ matrix.kernel_version }} path: ./ - name: Publish image + if: ${{ ! (matrix.driver_branch == 535 && contains(matrix.kernel_version, 'ubuntu24.04')) }} run: | - image_path="./driver-images-${{ matrix.driver_branch }}-${{ matrix.kernel_version }}-${{ env.DIST }}.tar" + image_path="./driver-images-${{ matrix.driver_branch }}-${{ matrix.kernel_version }}.tar" echo "uploading $image_path" docker load -i $image_path - docker push ${PRIVATE_REGISTRY}/nvidia/driver:${{ matrix.driver_branch }}-${{ matrix.kernel_version }}-${{ env.DIST }} + docker push ${PRIVATE_REGISTRY}/nvidia/driver:${{ matrix.driver_branch }}-${{ matrix.kernel_version }} diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml index d05de9e0..8782e343 100644 --- a/.gitlab-ci.yml +++ b/.gitlab-ci.yml @@ -120,3 +120,12 @@ image-precompiled-ubuntu22.04: extends: - .driver-versions-precompiled-ubuntu22.04 - .image-build-precompiled + +image-precompiled-ubuntu24.04: + variables: + DIST: signed_ubuntu24.04 + BASE_TARGET: noble + CVE_UPDATES: "curl libc6" + extends: + - .driver-versions-precompiled-ubuntu24.04 + - .image-build-precompiled diff --git a/.nvidia-ci.yml b/.nvidia-ci.yml index e1094d9d..2aa35799 100644 --- a/.nvidia-ci.yml +++ b/.nvidia-ci.yml @@ -77,6 +77,30 @@ variables: - !reference [.image-pull-rules, rules] +.image-pull-ubuntu24.04: + # Perform for each DRIVER_VERSION + extends: + - .driver-versions + - .image-pull-generic + rules: + - if: $CI_PIPELINE_SOURCE == "schedule" + when: never + - !reference [.image-pull-rules, rules] + +image-precompiled-ubuntu24.04: + variables: + DIST: signed_ubuntu24.04 + BASE_TARGET: noble + PRECOMPILED: "true" + CVE_UPDATES: "curl libc6" + rules: + - when: delayed + start_in: 30 minutes + extends: + - .driver-versions-precompiled-ubuntu24.04 + - .image-pull-generic + + .image-pull-ubuntu22.04: # Perform for each DRIVER_VERSION extends: @@ -196,6 +220,18 @@ image-rhel8: - if: $CI_PIPELINE_SOURCE == "merge_request_event" - !reference [.pipeline-trigger-rules, rules] +.scan-precompiled-ubuntu24.04: + variables: + DIST: signed_ubuntu24.04 + BASE_TARGET: noble + PRECOMPILED: "true" + extends: + - .driver-versions-precompiled-ubuntu24.04 + - .scan-generic + rules: + - !reference [.scan-rules-common, rules] + - when: always + .scan-precompiled-ubuntu22.04: variables: DIST: signed_ubuntu22.04 @@ -306,6 +342,26 @@ release:ngc-ubuntu22.04: - .dist-ubuntu22.04 - .driver-versions +# TODO will be enabled after QA +# release:ngc-ubuntu24.04: +# extends: +# - .release:ngc +# - .dist-ubuntu24.04 +# - .driver-versions + +# release:ngc-precompiled-ubuntu24.04: +# variables: +# DIST: signed_ubuntu24.04 +# BASE_TARGET: noble +# PRECOMPILED: "true" +# extends: +# - .driver-versions-precompiled-ubuntu24.04 +# - .release-generic +# - .release:ngc-variables +# rules: +# # Only run NGC release job on scheduled pipelines +# - if: $CI_PIPELINE_SOURCE == "schedule" + release:ngc-precompiled-ubuntu22.04: variables: DIST: signed_ubuntu22.04 @@ -433,6 +489,23 @@ release:ngc-rhel8.10: - 'echo "Signing the image ${IMAGE_NAME}:${IMAGE_TAG}"' - ngc-cli/ngc registry image publish --source ${IMAGE_NAME}:${IMAGE_TAG} ${IMAGE_NAME}:${IMAGE_TAG} --public --discoverable --allow-guest --sign --org nvidia +sign:ngc-precompiled-ubuntu24.04: + extends: + - .driver-versions-precompiled-ubuntu24.04 + - .dist-ubuntu22.04 + - .release-generic + - .release:ngc-variables + - .sign:ngc + variables: + DIST: signed_ubuntu24.04 + BASE_TARGET: noble + PRECOMPILED: "true" + needs: + - release:ngc-precompiled-ubuntu24.04 + rules: + # Only run NGC release job on scheduled pipelines + - if: $CI_PIPELINE_SOURCE == "schedule" + sign:ngc-precompiled-ubuntu22.04: extends: - .driver-versions-precompiled-ubuntu22.04 @@ -455,6 +528,9 @@ sign:ngc-ubuntu-rhel-rhcos: - .sign:ngc parallel: matrix: + - SIGN_JOB_NAME: ["ubuntu"] + VERSION: ["24.04"] + DRIVER_VERSION: ["550.127.08"] - SIGN_JOB_NAME: ["ubuntu"] VERSION: ["22.04"] DRIVER_VERSION: ["535.216.03", "550.127.08"] diff --git a/Makefile b/Makefile index 7b338960..07aae4e6 100644 --- a/Makefile +++ b/Makefile @@ -56,7 +56,7 @@ OUT_IMAGE = $(OUT_IMAGE_NAME):$(OUT_IMAGE_TAG) ##### Public rules ##### DISTRIBUTIONS := ubuntu18.04 ubuntu20.04 ubuntu22.04 ubuntu24.04 signed_ubuntu20.04 signed_ubuntu22.04 signed_ubuntu24.04 rhel8 rhel9 flatcar fedora36 sles15.3 precompiled_rhcos PUSH_TARGETS := $(patsubst %, push-%, $(DISTRIBUTIONS)) -BASE_FROM := jammy focal +BASE_FROM := noble jammy focal PUSH_TARGETS := $(patsubst %, push-%, $(DISTRIBUTIONS)) DRIVER_PUSH_TARGETS := $(foreach push_target, $(PUSH_TARGETS), $(addprefix $(push_target)-, $(DRIVER_VERSIONS))) BUILD_TARGETS := $(patsubst %, build-%, $(DISTRIBUTIONS)) @@ -210,6 +210,7 @@ $(BASE_BUILD_TARGETS): --build-arg GOLANG_VERSION="$(GOLANG_VERSION)" \ --build-arg DRIVER_BRANCH="$(DRIVER_BRANCH)" \ --build-arg KERNEL_FLAVOR="$(KERNEL_FLAVOR)" \ + --build-arg LTS_KERNEL="$(LTS_KERNEL)" \ --file $(DOCKERFILE) \ $(CURDIR)/base diff --git a/base/Dockerfile b/base/Dockerfile index 5b86a348..0387473f 100644 --- a/base/Dockerfile +++ b/base/Dockerfile @@ -1,12 +1,51 @@ +# Ubuntu 24.04 +FROM nvcr.io/nvidia/cuda:12.6.2-base-ubuntu24.04 AS noble + +SHELL ["/bin/bash", "-c"] + +ARG DRIVER_BRANCH +ARG KERNEL_FLAVOR +ARG LTS_KERNEL +ENV DRIVER_BRANCH=${DRIVER_BRANCH} +ENV KERNEL_FLAVOR=${KERNEL_FLAVOR} +ENV LTS_KERNEL=${LTS_KERNEL} + +# Remove cuda repository to avoid GPG errors +RUN rm -f /etc/apt/sources.list.d/cuda* + +RUN echo 'debconf debconf/frontend select Noninteractive' | debconf-set-selections + +ENV NVIDIA_VISIBLE_DEVICES=void + +RUN apt-get update && apt-get install -y --no-install-recommends \ + apt-utils git curl && \ + rm -rf /var/lib/apt/lists/* + +RUN echo "deb [arch=amd64] http://archive.ubuntu.com/ubuntu/ noble main universe" > /etc/apt/sources.list && \ + echo "deb [arch=amd64] http://archive.ubuntu.com/ubuntu/ noble-updates main universe" >> /etc/apt/sources.list && \ + echo "deb [arch=amd64] http://archive.ubuntu.com/ubuntu/ noble-security main universe" >> /etc/apt/sources.list && \ + echo "deb [arch=amd64] http://us.archive.ubuntu.com/ubuntu noble-updates main restricted" >> /etc/apt/sources.list && \ + echo "deb [arch=amd64] http://us.archive.ubuntu.com/ubuntu noble-security main restricted" >> /etc/apt/sources.list && \ + usermod -o -u 0 -g 0 _apt + +COPY generate-ci-config /usr/local/bin/generate-ci-config + +RUN chmod +x /usr/local/bin/generate-ci-config && \ + generate-ci-config + +ENTRYPOINT ["/usr/bin/sleep","1000"] + # Ubuntu 22.04 -FROM nvcr.io/nvidia/cuda:12.6.2-base-ubuntu22.04 as jammy +FROM nvcr.io/nvidia/cuda:12.6.2-base-ubuntu22.04 AS jammy SHELL ["/bin/bash", "-c"] ARG DRIVER_BRANCH ARG KERNEL_FLAVOR +ARG LTS_KERNEL ENV DRIVER_BRANCH=${DRIVER_BRANCH} ENV KERNEL_FLAVOR=${KERNEL_FLAVOR} +ENV LTS_KERNEL=${LTS_KERNEL} # Remove cuda repository to avoid GPG errors RUN rm -f /etc/apt/sources.list.d/cuda* @@ -34,14 +73,16 @@ RUN chmod +x /usr/local/bin/generate-ci-config && \ ENTRYPOINT ["/usr/bin/sleep","1000"] # Ubuntu 20.04 -FROM nvcr.io/nvidia/cuda:12.6.2-base-ubuntu20.04 as focal +FROM nvcr.io/nvidia/cuda:12.6.2-base-ubuntu20.04 AS focal SHELL ["/bin/bash", "-c"] ARG DRIVER_BRANCH ARG KERNEL_FLAVOR +ARG LTS_KERNEL ENV DRIVER_BRANCH=${DRIVER_BRANCH} ENV KERNEL_FLAVOR=${KERNEL_FLAVOR} +ENV LTS_KERNEL=${LTS_KERNEL} # Remove cuda repository to avoid GPG errors RUN rm -f /etc/apt/sources.list.d/cuda* diff --git a/tests/holodeck.yaml b/tests/holodeck_ubuntu22.04.yaml similarity index 100% rename from tests/holodeck.yaml rename to tests/holodeck_ubuntu22.04.yaml diff --git a/tests/holodeck_ubuntu24.04.yaml b/tests/holodeck_ubuntu24.04.yaml new file mode 100644 index 00000000..759c7d93 --- /dev/null +++ b/tests/holodeck_ubuntu24.04.yaml @@ -0,0 +1,32 @@ +apiVersion: holodeck.nvidia.com/v1alpha1 +kind: Environment +metadata: + name: HOLODECK_NAME + description: "end-to-end test infrastructure" +spec: + provider: aws + auth: + keyName: cnt-ci + privateKey: HOLODECK_PRIVATE_KEY + instance: + type: g4dn.xlarge + region: us-west-1 + ingressIpRanges: + - 18.190.12.32/32 + - 3.143.46.93/32 + - 52.15.119.136/32 + - 35.155.108.162/32 + - 35.162.190.51/32 + - 54.201.61.24/32 + image: + architecture: amd64 + imageId: ami-0da424eb883458071 + containerRuntime: + install: true + name: containerd + version: 1.7.22 + kubernetes: + install: true + installer: kubeadm + version: v1.30.0 + crictlVersion: v1.30.0 diff --git a/tests/scripts/ci-precompiled-helpers.sh b/tests/scripts/ci-precompiled-helpers.sh index 2efa9a75..20f7a813 100644 --- a/tests/scripts/ci-precompiled-helpers.sh +++ b/tests/scripts/ci-precompiled-helpers.sh @@ -1,6 +1,6 @@ get_kernel_versions_to_test() { if [[ "$#" -ne 4 ]]; then - echo " Error:$0 must be called with BASE_TARGET DRIVER_BRANCHES DRIVER_BRANCHES DIST" >&2 + echo " Error:$0 must be called with BASE_TARGET KERNEL_FLAVORS DRIVER_BRANCHES DIST" >&2 exit 1 fi @@ -11,10 +11,6 @@ get_kernel_versions_to_test() { kernel_versions=() for kernel_flavor in "${KERNEL_FLAVORS[@]}"; do - # FIXME -- remove if condition, once azure kernel upgrade starts working - if [[ "$kernel_flavor" == "azure" ]]; then - continue - fi for DRIVER_BRANCH in "${DRIVER_BRANCHES[@]}"; do source ./tests/scripts/findkernelversion.sh "$BASE_TARGET" "${kernel_flavor}" "$DRIVER_BRANCH" "$DIST" >&2 if [[ "$should_continue" == true ]]; then diff --git a/tests/scripts/findkernelversion.sh b/tests/scripts/findkernelversion.sh index b66aca83..8428fb4b 100755 --- a/tests/scripts/findkernelversion.sh +++ b/tests/scripts/findkernelversion.sh @@ -17,8 +17,28 @@ chmod a+x bin/regctl export PATH=$(pwd)/bin:${PATH} # calculate kernel version of latest image -regctl image get-file ghcr.io/nvidia/driver:base-${BASE_TARGET}-${KERNEL_FLAVOR}-${DRIVER_BRANCH} /var/kernel_version.txt ./kernel_version.txt -export $(grep -oP 'KERNEL_VERSION=[^ ]+' ./kernel_version.txt) +regctl image get-file ghcr.io/nvidia/driver:base-${BASE_TARGET}-${KERNEL_FLAVOR}-${DRIVER_BRANCH} /var/kernel_version.txt ./kernel_version.txt 2>/dev/null || true +if [[ -f ./kernel_version.txt && -s ./kernel_version.txt ]]; then + # File exists and is not empty + export $(grep -oP 'KERNEL_VERSION=[^ ]+' ./kernel_version.txt) + rm -f kernel_version.txt +else + # Define variables for artifact pattern + prefix="kernel-version-${DRIVER_BRANCH}-${LTS_KERNEL}" + suffix="${kernel_flavor}-${DIST}" + artifacts=$(gh api -X GET /repos/${GITHUB_REPOSITORY}/actions/runs/${GITHUB_RUN_ID}/artifacts --jq '.artifacts[].name') + # Use a loop or a pattern to find the matching artifact dynamically + for artifact in $artifacts; do + if [[ $artifact == $prefix*-$suffix ]]; then + gh run download --name "$artifact" --dir ./ + tar -xf $artifact.tar + rm -f $artifact.tar + export $(grep -oP 'KERNEL_VERSION=[^ ]+' ./kernel_version.txt) + rm -f kernel_version.txt + break + fi + done +fi # calculate driver tag status=0 @@ -28,3 +48,5 @@ if [[ $status -eq 0 ]]; then else export should_continue=true fi +#SHIVA +export should_continue=true