diff --git a/.common-ci.yml b/.common-ci.yml index 86f91be2..424eba74 100644 --- a/.common-ci.yml +++ b/.common-ci.yml @@ -88,6 +88,13 @@ trigger-pipeline: - DRIVER_BRANCH: [535, 550] KERNEL_FLAVOR: [aws, azure, generic, nvidia, oracle] +# Define the matrix of precompiled jobs that can be run in parallel for ubuntu24.04 +.driver-versions-precompiled-ubuntu24.04: + parallel: + matrix: + - DRIVER_BRANCH: [550] + KERNEL_FLAVOR: [aws, azure, generic, nvidia, oracle] + # Define the distribution targets .dist-ubuntu20.04: variables: @@ -304,3 +311,14 @@ release:staging-precompiled-ubuntu22.04: - .release:staging-precompiled needs: - image-precompiled-ubuntu22.04 + +# Precompiled Ubuntu24.04 release +release:staging-precompiled-ubuntu24.04: + variables: + DIST: signed_ubuntu24.04 + BASE_TARGET: noble + extends: + - .driver-versions-precompiled-ubuntu24.04 + - .release:staging-precompiled + needs: + - image-precompiled-ubuntu24.04 diff --git a/.github/workflows/ci.yaml b/.github/workflows/ci.yaml index a46e34d3..fa5b321c 100644 --- a/.github/workflows/ci.yaml +++ b/.github/workflows/ci.yaml @@ -40,7 +40,7 @@ jobs: aws_access_key_id: ${{ secrets.AWS_ACCESS_KEY_ID }} aws_secret_access_key: ${{ secrets.AWS_SECRET_ACCESS_KEY }} aws_ssh_key: ${{ secrets.AWS_SSH_KEY }} - holodeck_config: "tests/holodeck.yaml" + holodeck_config: "tests/holodeck_ubuntu22.04.yaml" - name: Get public dns name id: get_public_dns_name diff --git a/.github/workflows/precompiled.yaml b/.github/workflows/precompiled.yaml index 769ce505..0353db59 100644 --- a/.github/workflows/precompiled.yaml +++ b/.github/workflows/precompiled.yaml @@ -26,6 +26,10 @@ jobs: driver_branch: ${{ steps.extract_driver_branch.outputs.driver_branch }} kernel_flavors: ${{ steps.extract_driver_branch.outputs.kernel_flavors }} dist: ${{ steps.extract_driver_branch.outputs.dist }} + base_target_ubuntu22_04: ${{ steps.extract_driver_branch.outputs.base_target_ubuntu22_04 }} + lts_kernel_ubuntu22_04: ${{ steps.extract_driver_branch.outputs.lts_kernel_ubuntu22_04 }} + base_target_ubuntu24_04: ${{ steps.extract_driver_branch.outputs.base_target_ubuntu24_04 }} + lts_kernel_ubuntu24_04: ${{ steps.extract_driver_branch.outputs.lts_kernel_ubuntu24_04 }} steps: - name: Checkout code uses: actions/checkout@v4 @@ -43,10 +47,16 @@ jobs: echo "kernel_flavors=$kernel_flavors_json" >> $GITHUB_OUTPUT # get ubuntu distributions - DIST=("ubuntu22.04") + DIST=("ubuntu22.04" "ubuntu24.04") dist_json=$(printf '%s\n' "${DIST[@]}" | jq -R . | jq -cs .) echo "dist=$dist_json" >> $GITHUB_OUTPUT + # BASE_TARGET and LTS_KERNEL setup + echo "base_target_ubuntu22_04=jammy" >> $GITHUB_OUTPUT + echo "base_target_ubuntu24_04=noble" >> $GITHUB_OUTPUT + echo "lts_kernel_ubuntu22_04=5.15" >> $GITHUB_OUTPUT + echo "lts_kernel_ubuntu24_04=6.8" >> $GITHUB_OUTPUT + precompiled-build-image: needs: set-driver-version-matrix runs-on: linux-amd64-cpu4 @@ -55,11 +65,16 @@ jobs: driver_branch: ${{ fromJson(needs.set-driver-version-matrix.outputs.driver_branch) }} flavor: ${{ fromJson(needs.set-driver-version-matrix.outputs.kernel_flavors) }} dist: ${{ fromJson(needs.set-driver-version-matrix.outputs.dist) }} + exclude: + - dist: ubuntu24.04 + driver_branch: 535 steps: - uses: actions/checkout@v4 name: Check out code - name: Calculate build vars id: vars + env: + DIST: ${{ matrix.dist }} run: | echo "COMMIT_SHORT_SHA=${GITHUB_SHA:0:8}" >> $GITHUB_ENV echo "LOWERCASE_REPO_OWNER=$(echo "${GITHUB_REPOSITORY_OWNER}" | awk '{print tolower($0)}')" >> $GITHUB_ENV @@ -70,6 +85,11 @@ jobs: echo "PUSH_ON_BUILD=${GENERATE_ARTIFACTS}" >> $GITHUB_ENV echo "BUILD_MULTI_ARCH_IMAGES=${GENERATE_ARTIFACTS}" >> $GITHUB_ENV + VAR_NAME_BASE_TARGET=$(echo base_target_${DIST} | sed 's/\./_/g') + echo "VAR_NAME_BASE_TARGET=${VAR_NAME_BASE_TARGET}" >> $GITHUB_ENV + VAR_NAME_LTS_KERNEL=$(echo lts_kernel_${DIST} | sed 's/\./_/g') + echo "VAR_NAME_LTS_KERNEL=${VAR_NAME_LTS_KERNEL}" >> $GITHUB_ENV + - name: Set up QEMU uses: docker/setup-qemu-action@v3 - name: Set up Docker Buildx @@ -84,9 +104,10 @@ jobs: env: IMAGE_NAME: ghcr.io/nvidia/driver VERSION: ${COMMIT_SHORT_SHA} - BASE_TARGET: jammy + BASE_TARGET: ${{ needs.set-driver-version-matrix.outputs[env.VAR_NAME_BASE_TARGET] }} + LTS_KERNEL: ${{ needs.set-driver-version-matrix.outputs[env.VAR_NAME_LTS_KERNEL] }} run: | - make DRIVER_BRANCH=${{ matrix.driver_branch }} KERNEL_FLAVOR=${{ matrix.flavor }} build-base-${BASE_TARGET} + make DRIVER_BRANCH=${{ matrix.driver_branch }} KERNEL_FLAVOR=${{ matrix.flavor }} LTS_KERNEL=${LTS_KERNEL} build-base-${BASE_TARGET} trap "docker rm -f base-${BASE_TARGET}-${{ matrix.flavor }}" EXIT docker run -d --name base-${BASE_TARGET}-${{ matrix.flavor }} ghcr.io/nvidia/driver:base-${BASE_TARGET}-${{ matrix.flavor }}-${{ matrix.driver_branch }} @@ -105,12 +126,13 @@ jobs: source kernel_version.txt && \ make DRIVER_VERSIONS=${DRIVER_VERSIONS} DRIVER_BRANCH=${{ matrix.driver_branch }} build-${DIST}-${DRIVER_VERSION} - - name: Save build image as a tar + - name: Save build image and kernel version file env: DIST: ${{ matrix.dist }} PRIVATE_REGISTRY: "ghcr.io" run: | source kernel_version.txt + tar -cvf kernel-version-${{ matrix.driver_branch }}-${KERNEL_VERSION}-${DIST}.tar kernel_version.txt docker save "${PRIVATE_REGISTRY}/nvidia/driver:${{ matrix.driver_branch }}-${KERNEL_VERSION}-${DIST}" \ -o ./driver-images-${{ matrix.driver_branch }}-${KERNEL_VERSION}-${DIST}.tar # set env for artifacts upload @@ -123,7 +145,14 @@ jobs: name: driver-images-${{ matrix.driver_branch }}-${{ env.KERNEL_VERSION }}-${{ env.DIST }} path: ./driver-images-${{ matrix.driver_branch }}-${{ env.KERNEL_VERSION }}-${{ env.DIST }}.tar retention-days: 1 - + + - name: Upload kernel version as an artifact + uses: actions/upload-artifact@v4 + with: + name: kernel-version-${{ matrix.driver_branch }}-${{ env.KERNEL_VERSION }}-${{ env.DIST }} + path: ./kernel-version-${{ matrix.driver_branch }}-${{ env.KERNEL_VERSION }}-${{ env.DIST }}.tar + retention-days: 1 + determine-e2e-test-matrix: runs-on: linux-amd64-cpu4 strategy: @@ -132,10 +161,6 @@ jobs: needs: - precompiled-build-image - set-driver-version-matrix - outputs: - matrix_values_not_empty: ${{ steps.set_kernel_version.outputs.matrix_values_not_empty }} - matrix_values: ${{ steps.set_kernel_version.outputs.matrix_values }} - dist: ${{ steps.set-driver-version-matrix.outputs.dist }} steps: - name: Check out code uses: actions/checkout@v4 @@ -146,19 +171,34 @@ jobs: username: ${{ github.actor }} password: ${{ secrets.GITHUB_TOKEN }} - - name: Set kernel version - id: set_kernel_version + - name: Calculate build vars + id: vars env: - BASE_TARGET: "jammy" DIST: ${{ matrix.dist }} run: | - echo "matrix_values_not_empty=0" >> $GITHUB_OUTPUT + VAR_NAME_BASE_TARGET=$(echo base_target_${DIST} | sed 's/\./_/g') + echo "VAR_NAME_BASE_TARGET=${VAR_NAME_BASE_TARGET}" >> $GITHUB_ENV + VAR_NAME_LTS_KERNEL=$(echo lts_kernel_${DIST} | sed 's/\./_/g') + echo "VAR_NAME_LTS_KERNEL=${VAR_NAME_LTS_KERNEL}" >> $GITHUB_ENV + - name: Set kernel version + env: + DIST: ${{ matrix.dist }} + GH_TOKEN: ${{ secrets.GITHUB_TOKEN }} + BASE_TARGET: ${{ needs.set-driver-version-matrix.outputs[env.VAR_NAME_BASE_TARGET] }} + LTS_KERNEL: ${{ needs.set-driver-version-matrix.outputs[env.VAR_NAME_LTS_KERNEL] }} + run: | kernel_flavors_json='${{ needs.set-driver-version-matrix.outputs.kernel_flavors }}' KERNEL_FLAVORS=($(echo "$kernel_flavors_json" | jq -r '.[]')) driver_branch_json='${{ needs.set-driver-version-matrix.outputs.driver_branch }}' DRIVER_BRANCHES=($(echo "$driver_branch_json" | jq -r '.[]')) + # remove 535 driver branch for ubuntu24.04 + if [ "$DIST" == "ubuntu24.04" ]; then + DRIVER_BRANCHES=($(for branch in "${DRIVER_BRANCHES[@]}"; do + [[ $branch != "535" ]] && echo "$branch" + done)) + fi source ./tests/scripts/ci-precompiled-helpers.sh KERNEL_VERSIONS=($(get_kernel_versions_to_test $BASE_TARGET KERNEL_FLAVORS[@] DRIVER_BRANCHES[@] $DIST)) if [ -z "$KERNEL_VERSIONS" ]; then @@ -167,21 +207,71 @@ jobs: exit 0 fi # Convert array to JSON format and assign - echo "[]" > $GITHUB_WORKSPACE/matrix_values.json - printf '%s\n' "${KERNEL_VERSIONS[@]}" | jq -R . | jq -s . > $GITHUB_WORKSPACE/matrix_values.json - echo "matrix_values=$(cat $GITHUB_WORKSPACE/matrix_values.json | jq -c .)" >> $GITHUB_OUTPUT - echo "matrix_values_not_empty=1" >> $GITHUB_OUTPUT + echo "[]" > ./matrix_values_$DIST.json + printf '%s\n' "${KERNEL_VERSIONS[@]}" | jq -R . | jq -s . > ./matrix_values_$DIST.json - e2e-tests-nvidiadriver: + - name: Upload kernel matrix values as artifacts + uses: actions/upload-artifact@v4 + with: + name: matrix-values-${{ matrix.dist }} + path: ./matrix_values_${{ matrix.dist }}.json + retention-days: 1 + + collect-e2e-test-matrix: runs-on: linux-amd64-cpu4 needs: - determine-e2e-test-matrix - set-driver-version-matrix - if: ${{ needs.determine-e2e-test-matrix.outputs.matrix_values_not_empty == '1' }} + outputs: + matrix_values_not_empty: ${{ steps.set_kernel_version.outputs.matrix_values_not_empty }} + matrix_values: ${{ steps.set_kernel_version.outputs.matrix_values }} + exclude_matrix_values: ${{ steps.set_kernel_version.outputs.exclude_matrix_values }} + steps: + - name: Check out code + uses: actions/checkout@v4 + - name: Set and append matrix values for ubuntu + id: set_kernel_version + env: + GH_TOKEN: ${{ secrets.GITHUB_TOKEN }} + run: | + echo "matrix_values_not_empty=0" >> $GITHUB_OUTPUT + # combined_values="[]" + kernel_versions=() + + # Read and merge kernel_version values from dist files + dist_json='${{ needs.set-driver-version-matrix.outputs.dist }}' + DIST=($(echo "$dist_json" | jq -r '.[]')) + for d in "${DIST[@]}"; do + artifact_name="matrix-values-${d}" + file_path="./matrix_values_${d}.json" + echo "Attempting to download artifact: $artifact_name" + if gh run download --name "$artifact_name" --dir ./; then + echo "Successfully downloaded artifact: $artifact_name" + value=$(jq -r '.[]' "$file_path") + kernel_versions+=($value) + echo "matrix_values_not_empty=1" >> $GITHUB_OUTPUT + fi + done + echo "Collected Kernel Versions: ${kernel_versions[@]}" + combined_values=$(printf '%s\n' "${kernel_versions[@]}" | jq -R . | jq -s -c . | tr -d ' \n') + echo "Combined Kernel Versions JSON: $combined_values" + # FIXME -- remove once azure kernel upgrade starts working + exclude_combined_values=$(printf '%s\n' "${kernel_versions[@]}" | jq -R . | jq -s -c 'map(select(test("azure")))') + exclude_combined_values=$(echo "$exclude_combined_values" | jq -c '[.[] | {kernel_version: .}]') + echo "exclude_combined_values Kernel Versions : $exclude_combined_values" + echo "matrix_values=$combined_values" >> $GITHUB_OUTPUT + echo "exclude_matrix_values=$exclude_combined_values" >> $GITHUB_OUTPUT + + e2e-tests-nvidiadriver: + runs-on: linux-amd64-cpu4 + needs: + - collect-e2e-test-matrix + - set-driver-version-matrix + if: ${{ needs.collect-e2e-test-matrix.outputs.matrix_values_not_empty == '1' }} strategy: matrix: - kernel_version: ${{ fromJson(needs.determine-e2e-test-matrix.outputs.matrix_values) }} - dist: ${{ fromJson(needs.set-driver-version-matrix.outputs.dist) }} + kernel_version: ${{ fromJson(needs.collect-e2e-test-matrix.outputs.matrix_values) }} + exclude: ${{ fromJson(needs.collect-e2e-test-matrix.outputs.exclude_matrix_values) }} steps: - name: Check out code uses: actions/checkout@v4 @@ -191,36 +281,42 @@ jobs: registry: ghcr.io username: ${{ github.actor }} password: ${{ secrets.GITHUB_TOKEN }} + - name: Set and Calculate test vars + run: | + echo "private_key=${{ github.workspace }}/key.pem" >> $GITHUB_ENV + echo "${{ secrets.AWS_SSH_KEY }}" > ${{ github.workspace }}/key.pem && chmod 400 ${{ github.workspace }}/key.pem + echo "COMMIT_SHORT_SHA=${GITHUB_SHA:0:8}" >> $GITHUB_ENV + echo "PRIVATE_REGISTRY=ghcr.io" >> $GITHUB_ENV + KERNEL_VERSION="${{ matrix.kernel_version }}" + # Extract the last segment after the last dash + DIST=${KERNEL_VERSION##*-} + echo "DIST=$DIST" >> $GITHUB_ENV + KERNEL_VERSION=${KERNEL_VERSION%-*} + echo "KERNEL_VERSION=$KERNEL_VERSION" >> $GITHUB_ENV + driver_branch_json="${{ needs.set-driver-version-matrix.outputs.driver_branch }}" + DRIVER_BRANCHES=($(echo "$driver_branch_json" | jq -r '.[]')) + echo "DRIVER_BRANCHES=${DRIVER_BRANCHES[*]}" >> $GITHUB_ENV + - name: Set up Holodeck uses: NVIDIA/holodeck@v0.2.1 env: AWS_SECRET_ACCESS_KEY: ${{ secrets.AWS_SECRET_ACCESS_KEY }} AWS_ACCESS_KEY_ID: ${{ secrets.AWS_ACCESS_KEY_ID }} AWS_SSH_KEY: ${{ secrets.AWS_SSH_KEY }} + DIST: ${{ env.DIST }} with: aws_access_key_id: ${{ secrets.AWS_ACCESS_KEY_ID }} aws_secret_access_key: ${{ secrets.AWS_SECRET_ACCESS_KEY }} aws_ssh_key: ${{ secrets.AWS_SSH_KEY }} - holodeck_config: "tests/holodeck.yaml" - + holodeck_config: "tests/holodeck_${{ env.DIST }}.yaml" - name: Get public dns name id: get_public_dns_name uses: mikefarah/yq@master with: cmd: yq '.status.properties[] | select(.name == "public-dns-name") | .value' /github/workspace/.cache/holodeck.yaml - - name: Set and Calculate test vars + - name: Calculate holodeck instance hostname run: | echo "instance_hostname=ubuntu@${{ steps.get_public_dns_name.outputs.result }}" >> $GITHUB_ENV - echo "private_key=${{ github.workspace }}/key.pem" >> $GITHUB_ENV - echo "${{ secrets.AWS_SSH_KEY }}" > ${{ github.workspace }}/key.pem && chmod 400 ${{ github.workspace }}/key.pem - echo "COMMIT_SHORT_SHA=${GITHUB_SHA:0:8}" >> $GITHUB_ENV - echo "PRIVATE_REGISTRY=ghcr.io" >> $GITHUB_ENV - KERNEL_VERSION="${{ matrix.kernel_version }}" - echo "KERNEL_VERSION=$KERNEL_VERSION" >> $GITHUB_ENV - echo "DIST=${{ matrix.dist }}" >> $GITHUB_ENV - driver_branch_json="${{ needs.set-driver-version-matrix.outputs.driver_branch }}" - DRIVER_BRANCHES=($(echo "$driver_branch_json" | jq -r '.[]')) - echo "DRIVER_BRANCHES=${DRIVER_BRANCHES[*]}" >> $GITHUB_ENV - name: Install GitHub CLI run: | @@ -258,6 +354,12 @@ jobs: rc=0 # for precompiled driver we are setting driver branch as driver version DRIVER_BRANCHES=(${{ env.DRIVER_BRANCHES }}) + # remove 535 driver branch for ubuntu24.04 + if [ "$DIST" == "ubuntu24.04" ]; then + DRIVER_BRANCHES=($(for branch in "${DRIVER_BRANCHES[@]}"; do + [[ $branch != "535" ]] && echo "$branch" + done)) + fi for DRIVER_VERSION in "${DRIVER_BRANCHES[@]}"; do echo "Running e2e for DRIVER_VERSION=$DRIVER_VERSION" image="driver-images-${DRIVER_VERSION}-${KERNEL_VERSION}-${DIST}" @@ -290,13 +392,12 @@ jobs: runs-on: linux-amd64-cpu4 needs: - set-driver-version-matrix - - determine-e2e-test-matrix + - collect-e2e-test-matrix - e2e-tests-nvidiadriver strategy: matrix: driver_branch: ${{ fromJson(needs.set-driver-version-matrix.outputs.driver_branch) }} - kernel_version: ${{ fromJson(needs.determine-e2e-test-matrix.outputs.matrix_values) }} - dist: ${{ fromJson(needs.set-driver-version-matrix.outputs.dist) }} + kernel_version: ${{ fromJson(needs.collect-e2e-test-matrix.outputs.matrix_values) }} steps: - name: Check out code uses: actions/checkout@v4 @@ -310,17 +411,18 @@ jobs: - name: Set image vars run: | echo "PRIVATE_REGISTRY=ghcr.io" >> $GITHUB_ENV - echo "DIST=${{ matrix.dist }}" >> $GITHUB_ENV - name: Download built image artifact + if: ${{ ! (matrix.driver_branch == 535 && contains(matrix.kernel_version, 'ubuntu24.04')) }} uses: actions/download-artifact@v4 with: - name: driver-images-${{ matrix.driver_branch }}-${{ matrix.kernel_version }}-${{ env.DIST }} + name: driver-images-${{ matrix.driver_branch }}-${{ matrix.kernel_version }} path: ./ - name: Publish image + if: ${{ ! (matrix.driver_branch == 535 && contains(matrix.kernel_version, 'ubuntu24.04')) }} run: | - image_path="./driver-images-${{ matrix.driver_branch }}-${{ matrix.kernel_version }}-${{ env.DIST }}.tar" + image_path="./driver-images-${{ matrix.driver_branch }}-${{ matrix.kernel_version }}.tar" echo "uploading $image_path" docker load -i $image_path - docker push ${PRIVATE_REGISTRY}/nvidia/driver:${{ matrix.driver_branch }}-${{ matrix.kernel_version }}-${{ env.DIST }} + docker push ${PRIVATE_REGISTRY}/nvidia/driver:${{ matrix.driver_branch }}-${{ matrix.kernel_version }} diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml index d05de9e0..8782e343 100644 --- a/.gitlab-ci.yml +++ b/.gitlab-ci.yml @@ -120,3 +120,12 @@ image-precompiled-ubuntu22.04: extends: - .driver-versions-precompiled-ubuntu22.04 - .image-build-precompiled + +image-precompiled-ubuntu24.04: + variables: + DIST: signed_ubuntu24.04 + BASE_TARGET: noble + CVE_UPDATES: "curl libc6" + extends: + - .driver-versions-precompiled-ubuntu24.04 + - .image-build-precompiled diff --git a/.nvidia-ci.yml b/.nvidia-ci.yml index e1094d9d..1cb2ef82 100644 --- a/.nvidia-ci.yml +++ b/.nvidia-ci.yml @@ -77,6 +77,30 @@ variables: - !reference [.image-pull-rules, rules] +.image-pull-ubuntu24.04: + # Perform for each DRIVER_VERSION + extends: + - .driver-versions + - .image-pull-generic + rules: + - if: $CI_PIPELINE_SOURCE == "schedule" + when: never + - !reference [.image-pull-rules, rules] + +image-precompiled-ubuntu24.04: + variables: + DIST: signed_ubuntu24.04 + BASE_TARGET: noble + PRECOMPILED: "true" + CVE_UPDATES: "curl libc6" + rules: + - when: delayed + start_in: 30 minutes + extends: + - .driver-versions-precompiled-ubuntu24.04 + - .image-pull-generic + + .image-pull-ubuntu22.04: # Perform for each DRIVER_VERSION extends: @@ -196,6 +220,18 @@ image-rhel8: - if: $CI_PIPELINE_SOURCE == "merge_request_event" - !reference [.pipeline-trigger-rules, rules] +.scan-precompiled-ubuntu24.04: + variables: + DIST: signed_ubuntu24.04 + BASE_TARGET: noble + PRECOMPILED: "true" + extends: + - .driver-versions-precompiled-ubuntu24.04 + - .scan-generic + rules: + - !reference [.scan-rules-common, rules] + - when: always + .scan-precompiled-ubuntu22.04: variables: DIST: signed_ubuntu22.04 @@ -306,6 +342,26 @@ release:ngc-ubuntu22.04: - .dist-ubuntu22.04 - .driver-versions +# TODO will be enabled after QA +# release:ngc-ubuntu24.04: +# extends: +# - .release:ngc +# - .dist-ubuntu24.04 +# - .driver-versions + +# release:ngc-precompiled-ubuntu24.04: +# variables: +# DIST: signed_ubuntu24.04 +# BASE_TARGET: noble +# PRECOMPILED: "true" +# extends: +# - .driver-versions-precompiled-ubuntu24.04 +# - .release-generic +# - .release:ngc-variables +# rules: +# # Only run NGC release job on scheduled pipelines +# - if: $CI_PIPELINE_SOURCE == "schedule" + release:ngc-precompiled-ubuntu22.04: variables: DIST: signed_ubuntu22.04 @@ -433,6 +489,23 @@ release:ngc-rhel8.10: - 'echo "Signing the image ${IMAGE_NAME}:${IMAGE_TAG}"' - ngc-cli/ngc registry image publish --source ${IMAGE_NAME}:${IMAGE_TAG} ${IMAGE_NAME}:${IMAGE_TAG} --public --discoverable --allow-guest --sign --org nvidia +#sign:ngc-precompiled-ubuntu24.04: +# extends: +# - .driver-versions-precompiled-ubuntu24.04 +# - .dist-ubuntu22.04 +# - .release-generic +# - .release:ngc-variables +# - .sign:ngc +# variables: +# DIST: signed_ubuntu24.04 +# BASE_TARGET: noble +# PRECOMPILED: "true" +# needs: +# - release:ngc-precompiled-ubuntu24.04 +# rules: +# # Only run NGC release job on scheduled pipelines +# - if: $CI_PIPELINE_SOURCE == "schedule" + sign:ngc-precompiled-ubuntu22.04: extends: - .driver-versions-precompiled-ubuntu22.04 @@ -455,6 +528,9 @@ sign:ngc-ubuntu-rhel-rhcos: - .sign:ngc parallel: matrix: +# - SIGN_JOB_NAME: ["ubuntu"] +# VERSION: ["24.04"] +# DRIVER_VERSION: ["550.127.08"] - SIGN_JOB_NAME: ["ubuntu"] VERSION: ["22.04"] DRIVER_VERSION: ["535.216.03", "550.127.08"] diff --git a/Makefile b/Makefile index 7b338960..07aae4e6 100644 --- a/Makefile +++ b/Makefile @@ -56,7 +56,7 @@ OUT_IMAGE = $(OUT_IMAGE_NAME):$(OUT_IMAGE_TAG) ##### Public rules ##### DISTRIBUTIONS := ubuntu18.04 ubuntu20.04 ubuntu22.04 ubuntu24.04 signed_ubuntu20.04 signed_ubuntu22.04 signed_ubuntu24.04 rhel8 rhel9 flatcar fedora36 sles15.3 precompiled_rhcos PUSH_TARGETS := $(patsubst %, push-%, $(DISTRIBUTIONS)) -BASE_FROM := jammy focal +BASE_FROM := noble jammy focal PUSH_TARGETS := $(patsubst %, push-%, $(DISTRIBUTIONS)) DRIVER_PUSH_TARGETS := $(foreach push_target, $(PUSH_TARGETS), $(addprefix $(push_target)-, $(DRIVER_VERSIONS))) BUILD_TARGETS := $(patsubst %, build-%, $(DISTRIBUTIONS)) @@ -210,6 +210,7 @@ $(BASE_BUILD_TARGETS): --build-arg GOLANG_VERSION="$(GOLANG_VERSION)" \ --build-arg DRIVER_BRANCH="$(DRIVER_BRANCH)" \ --build-arg KERNEL_FLAVOR="$(KERNEL_FLAVOR)" \ + --build-arg LTS_KERNEL="$(LTS_KERNEL)" \ --file $(DOCKERFILE) \ $(CURDIR)/base diff --git a/base/Dockerfile b/base/Dockerfile index 5b86a348..0387473f 100644 --- a/base/Dockerfile +++ b/base/Dockerfile @@ -1,12 +1,51 @@ +# Ubuntu 24.04 +FROM nvcr.io/nvidia/cuda:12.6.2-base-ubuntu24.04 AS noble + +SHELL ["/bin/bash", "-c"] + +ARG DRIVER_BRANCH +ARG KERNEL_FLAVOR +ARG LTS_KERNEL +ENV DRIVER_BRANCH=${DRIVER_BRANCH} +ENV KERNEL_FLAVOR=${KERNEL_FLAVOR} +ENV LTS_KERNEL=${LTS_KERNEL} + +# Remove cuda repository to avoid GPG errors +RUN rm -f /etc/apt/sources.list.d/cuda* + +RUN echo 'debconf debconf/frontend select Noninteractive' | debconf-set-selections + +ENV NVIDIA_VISIBLE_DEVICES=void + +RUN apt-get update && apt-get install -y --no-install-recommends \ + apt-utils git curl && \ + rm -rf /var/lib/apt/lists/* + +RUN echo "deb [arch=amd64] http://archive.ubuntu.com/ubuntu/ noble main universe" > /etc/apt/sources.list && \ + echo "deb [arch=amd64] http://archive.ubuntu.com/ubuntu/ noble-updates main universe" >> /etc/apt/sources.list && \ + echo "deb [arch=amd64] http://archive.ubuntu.com/ubuntu/ noble-security main universe" >> /etc/apt/sources.list && \ + echo "deb [arch=amd64] http://us.archive.ubuntu.com/ubuntu noble-updates main restricted" >> /etc/apt/sources.list && \ + echo "deb [arch=amd64] http://us.archive.ubuntu.com/ubuntu noble-security main restricted" >> /etc/apt/sources.list && \ + usermod -o -u 0 -g 0 _apt + +COPY generate-ci-config /usr/local/bin/generate-ci-config + +RUN chmod +x /usr/local/bin/generate-ci-config && \ + generate-ci-config + +ENTRYPOINT ["/usr/bin/sleep","1000"] + # Ubuntu 22.04 -FROM nvcr.io/nvidia/cuda:12.6.2-base-ubuntu22.04 as jammy +FROM nvcr.io/nvidia/cuda:12.6.2-base-ubuntu22.04 AS jammy SHELL ["/bin/bash", "-c"] ARG DRIVER_BRANCH ARG KERNEL_FLAVOR +ARG LTS_KERNEL ENV DRIVER_BRANCH=${DRIVER_BRANCH} ENV KERNEL_FLAVOR=${KERNEL_FLAVOR} +ENV LTS_KERNEL=${LTS_KERNEL} # Remove cuda repository to avoid GPG errors RUN rm -f /etc/apt/sources.list.d/cuda* @@ -34,14 +73,16 @@ RUN chmod +x /usr/local/bin/generate-ci-config && \ ENTRYPOINT ["/usr/bin/sleep","1000"] # Ubuntu 20.04 -FROM nvcr.io/nvidia/cuda:12.6.2-base-ubuntu20.04 as focal +FROM nvcr.io/nvidia/cuda:12.6.2-base-ubuntu20.04 AS focal SHELL ["/bin/bash", "-c"] ARG DRIVER_BRANCH ARG KERNEL_FLAVOR +ARG LTS_KERNEL ENV DRIVER_BRANCH=${DRIVER_BRANCH} ENV KERNEL_FLAVOR=${KERNEL_FLAVOR} +ENV LTS_KERNEL=${LTS_KERNEL} # Remove cuda repository to avoid GPG errors RUN rm -f /etc/apt/sources.list.d/cuda* diff --git a/base/generate-ci-config b/base/generate-ci-config index 1fc863e0..5f560df5 100755 --- a/base/generate-ci-config +++ b/base/generate-ci-config @@ -22,7 +22,8 @@ SUPPORTED_KERNELS=$(apt-cache search linux-objects-nvidia-${DRIVER_BRANCH}-serve DRIVER_VERSION=$(apt-cache show nvidia-utils-${DRIVER_BRANCH}-server |grep Version |awk '{print $2}' | cut -d'-' -f1 | head -n 1) # Latest supported kernel -SK=$(echo $SUPPORTED_KERNELS | awk '{print $NF}') +# only consider suffix -KERNEL_FLAVOR not KERNEL_FLAVOR-* (e.g. KERNEL_FLAVOR-lowlatency) +SK=$(echo "$SUPPORTED_KERNELS" | awk -v f="$KERNEL_FLAVOR" '$0 ~ "-" f "$" {last=$0} END{print last}') # Write to file echo "export KERNEL_VERSION=$SK DRIVER_VERSION=$DRIVER_VERSION DRIVER_VERSIONS=$DRIVER_VERSION" > /var/kernel_version.txt diff --git a/tests/holodeck.yaml b/tests/holodeck_ubuntu22.04.yaml similarity index 100% rename from tests/holodeck.yaml rename to tests/holodeck_ubuntu22.04.yaml diff --git a/tests/holodeck_ubuntu24.04.yaml b/tests/holodeck_ubuntu24.04.yaml new file mode 100644 index 00000000..759c7d93 --- /dev/null +++ b/tests/holodeck_ubuntu24.04.yaml @@ -0,0 +1,32 @@ +apiVersion: holodeck.nvidia.com/v1alpha1 +kind: Environment +metadata: + name: HOLODECK_NAME + description: "end-to-end test infrastructure" +spec: + provider: aws + auth: + keyName: cnt-ci + privateKey: HOLODECK_PRIVATE_KEY + instance: + type: g4dn.xlarge + region: us-west-1 + ingressIpRanges: + - 18.190.12.32/32 + - 3.143.46.93/32 + - 52.15.119.136/32 + - 35.155.108.162/32 + - 35.162.190.51/32 + - 54.201.61.24/32 + image: + architecture: amd64 + imageId: ami-0da424eb883458071 + containerRuntime: + install: true + name: containerd + version: 1.7.22 + kubernetes: + install: true + installer: kubeadm + version: v1.30.0 + crictlVersion: v1.30.0 diff --git a/tests/scripts/ci-precompiled-helpers.sh b/tests/scripts/ci-precompiled-helpers.sh index 2efa9a75..436208b0 100644 --- a/tests/scripts/ci-precompiled-helpers.sh +++ b/tests/scripts/ci-precompiled-helpers.sh @@ -1,6 +1,6 @@ get_kernel_versions_to_test() { if [[ "$#" -ne 4 ]]; then - echo " Error:$0 must be called with BASE_TARGET DRIVER_BRANCHES DRIVER_BRANCHES DIST" >&2 + echo " Error:$0 must be called with BASE_TARGET KERNEL_FLAVORS DRIVER_BRANCHES DIST" >&2 exit 1 fi @@ -11,10 +11,6 @@ get_kernel_versions_to_test() { kernel_versions=() for kernel_flavor in "${KERNEL_FLAVORS[@]}"; do - # FIXME -- remove if condition, once azure kernel upgrade starts working - if [[ "$kernel_flavor" == "azure" ]]; then - continue - fi for DRIVER_BRANCH in "${DRIVER_BRANCHES[@]}"; do source ./tests/scripts/findkernelversion.sh "$BASE_TARGET" "${kernel_flavor}" "$DRIVER_BRANCH" "$DIST" >&2 if [[ "$should_continue" == true ]]; then @@ -26,5 +22,10 @@ get_kernel_versions_to_test() { kernel_versions+=("$KERNEL_VERSION") fi done + # Remove duplicates + kernel_versions=($(printf "%s\n" "${kernel_versions[@]}" | sort -u)) + for i in "${!kernel_versions[@]}"; do + kernel_versions[$i]="${kernel_versions[$i]}-$DIST" + done echo "${kernel_versions[@]}" } diff --git a/tests/scripts/findkernelversion.sh b/tests/scripts/findkernelversion.sh index b66aca83..f3378acf 100755 --- a/tests/scripts/findkernelversion.sh +++ b/tests/scripts/findkernelversion.sh @@ -17,8 +17,30 @@ chmod a+x bin/regctl export PATH=$(pwd)/bin:${PATH} # calculate kernel version of latest image -regctl image get-file ghcr.io/nvidia/driver:base-${BASE_TARGET}-${KERNEL_FLAVOR}-${DRIVER_BRANCH} /var/kernel_version.txt ./kernel_version.txt -export $(grep -oP 'KERNEL_VERSION=[^ ]+' ./kernel_version.txt) +regctl image get-file ghcr.io/nvidia/driver:base-${BASE_TARGET}-${KERNEL_FLAVOR}-${DRIVER_BRANCH} /var/kernel_version.txt ./kernel_version.txt 2>/dev/null || true +if [[ -f ./kernel_version.txt && -s ./kernel_version.txt ]]; then + # File exists and is not empty + export $(grep -oP 'KERNEL_VERSION=[^ ]+' ./kernel_version.txt) + rm -f kernel_version.txt +else + # Define variables for artifact pattern + prefix="kernel-version-${DRIVER_BRANCH}-${LTS_KERNEL}" + suffix="${kernel_flavor}-${DIST}" + artifacts=$(gh api -X GET /repos/${GITHUB_REPOSITORY}/actions/runs/${GITHUB_RUN_ID}/artifacts --jq '.artifacts[].name') + # Use a loop or a pattern to find the matching artifact dynamically + for artifact in $artifacts; do + # TODO remove this check once nvidia avaialble + # currently for ubuntu24.04 kernel_flavor = nvidia-lowlatency + if [[ $artifact == $prefix*-$suffix ]]; then + gh run download --name "$artifact" --dir ./ + tar -xf $artifact.tar + rm -f $artifact.tar + export $(grep -oP 'KERNEL_VERSION=[^ ]+' ./kernel_version.txt) + rm -f kernel_version.txt + break + fi + done +fi # calculate driver tag status=0