Skip to content

Commit

Permalink
ubuntu24.04 precompile image support
Browse files Browse the repository at this point in the history
Signed-off-by: shiva kumar <[email protected]>
  • Loading branch information
tariq1890 authored and shivakunv committed Dec 3, 2024
1 parent eea136d commit 8cec607
Show file tree
Hide file tree
Showing 12 changed files with 356 additions and 53 deletions.
18 changes: 18 additions & 0 deletions .common-ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -88,6 +88,13 @@ trigger-pipeline:
- DRIVER_BRANCH: [535, 550]
KERNEL_FLAVOR: [aws, azure, generic, nvidia, oracle]

# Define the matrix of precompiled jobs that can be run in parallel for ubuntu24.04
.driver-versions-precompiled-ubuntu24.04:
parallel:
matrix:
- DRIVER_BRANCH: [550]
KERNEL_FLAVOR: [aws, azure, generic, nvidia, oracle]

# Define the distribution targets
.dist-ubuntu20.04:
variables:
Expand Down Expand Up @@ -304,3 +311,14 @@ release:staging-precompiled-ubuntu22.04:
- .release:staging-precompiled
needs:
- image-precompiled-ubuntu22.04

# Precompiled Ubuntu24.04 release
release:staging-precompiled-ubuntu24.04:
variables:
DIST: signed_ubuntu24.04
BASE_TARGET: noble
extends:
- .driver-versions-precompiled-ubuntu24.04
- .release:staging-precompiled
needs:
- image-precompiled-ubuntu24.04
2 changes: 1 addition & 1 deletion .github/workflows/ci.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,7 @@ jobs:
aws_access_key_id: ${{ secrets.AWS_ACCESS_KEY_ID }}
aws_secret_access_key: ${{ secrets.AWS_SECRET_ACCESS_KEY }}
aws_ssh_key: ${{ secrets.AWS_SSH_KEY }}
holodeck_config: "tests/holodeck.yaml"
holodeck_config: "tests/holodeck_ubuntu22.04.yaml"

- name: Get public dns name
id: get_public_dns_name
Expand Down
184 changes: 143 additions & 41 deletions .github/workflows/precompiled.yaml

Large diffs are not rendered by default.

9 changes: 9 additions & 0 deletions .gitlab-ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -120,3 +120,12 @@ image-precompiled-ubuntu22.04:
extends:
- .driver-versions-precompiled-ubuntu22.04
- .image-build-precompiled

image-precompiled-ubuntu24.04:
variables:
DIST: signed_ubuntu24.04
BASE_TARGET: noble
CVE_UPDATES: "curl libc6"
extends:
- .driver-versions-precompiled-ubuntu24.04
- .image-build-precompiled
76 changes: 76 additions & 0 deletions .nvidia-ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -77,6 +77,30 @@ variables:
- !reference [.image-pull-rules, rules]


.image-pull-ubuntu24.04:
# Perform for each DRIVER_VERSION
extends:
- .driver-versions
- .image-pull-generic
rules:
- if: $CI_PIPELINE_SOURCE == "schedule"
when: never
- !reference [.image-pull-rules, rules]

image-precompiled-ubuntu24.04:
variables:
DIST: signed_ubuntu24.04
BASE_TARGET: noble
PRECOMPILED: "true"
CVE_UPDATES: "curl libc6"
rules:
- when: delayed
start_in: 30 minutes
extends:
- .driver-versions-precompiled-ubuntu24.04
- .image-pull-generic


.image-pull-ubuntu22.04:
# Perform for each DRIVER_VERSION
extends:
Expand Down Expand Up @@ -196,6 +220,18 @@ image-rhel8:
- if: $CI_PIPELINE_SOURCE == "merge_request_event"
- !reference [.pipeline-trigger-rules, rules]

.scan-precompiled-ubuntu24.04:
variables:
DIST: signed_ubuntu24.04
BASE_TARGET: noble
PRECOMPILED: "true"
extends:
- .driver-versions-precompiled-ubuntu24.04
- .scan-generic
rules:
- !reference [.scan-rules-common, rules]
- when: always

.scan-precompiled-ubuntu22.04:
variables:
DIST: signed_ubuntu22.04
Expand Down Expand Up @@ -306,6 +342,26 @@ release:ngc-ubuntu22.04:
- .dist-ubuntu22.04
- .driver-versions

# TODO will be enabled after QA
# release:ngc-ubuntu24.04:
# extends:
# - .release:ngc
# - .dist-ubuntu24.04
# - .driver-versions

# release:ngc-precompiled-ubuntu24.04:
# variables:
# DIST: signed_ubuntu24.04
# BASE_TARGET: noble
# PRECOMPILED: "true"
# extends:
# - .driver-versions-precompiled-ubuntu24.04
# - .release-generic
# - .release:ngc-variables
# rules:
# # Only run NGC release job on scheduled pipelines
# - if: $CI_PIPELINE_SOURCE == "schedule"

release:ngc-precompiled-ubuntu22.04:
variables:
DIST: signed_ubuntu22.04
Expand Down Expand Up @@ -433,6 +489,23 @@ release:ngc-rhel8.10:
- 'echo "Signing the image ${IMAGE_NAME}:${IMAGE_TAG}"'
- ngc-cli/ngc registry image publish --source ${IMAGE_NAME}:${IMAGE_TAG} ${IMAGE_NAME}:${IMAGE_TAG} --public --discoverable --allow-guest --sign --org nvidia

sign:ngc-precompiled-ubuntu24.04:
extends:
- .driver-versions-precompiled-ubuntu24.04
- .dist-ubuntu22.04
- .release-generic
- .release:ngc-variables
- .sign:ngc
variables:
DIST: signed_ubuntu24.04
BASE_TARGET: noble
PRECOMPILED: "true"
needs:
- release:ngc-precompiled-ubuntu24.04
rules:
# Only run NGC release job on scheduled pipelines
- if: $CI_PIPELINE_SOURCE == "schedule"

sign:ngc-precompiled-ubuntu22.04:
extends:
- .driver-versions-precompiled-ubuntu22.04
Expand All @@ -455,6 +528,9 @@ sign:ngc-ubuntu-rhel-rhcos:
- .sign:ngc
parallel:
matrix:
- SIGN_JOB_NAME: ["ubuntu"]
VERSION: ["24.04"]
DRIVER_VERSION: ["550.127.08"]
- SIGN_JOB_NAME: ["ubuntu"]
VERSION: ["22.04"]
DRIVER_VERSION: ["535.216.03", "550.127.08"]
Expand Down
3 changes: 2 additions & 1 deletion Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -56,7 +56,7 @@ OUT_IMAGE = $(OUT_IMAGE_NAME):$(OUT_IMAGE_TAG)
##### Public rules #####
DISTRIBUTIONS := ubuntu18.04 ubuntu20.04 ubuntu22.04 ubuntu24.04 signed_ubuntu20.04 signed_ubuntu22.04 signed_ubuntu24.04 rhel8 rhel9 flatcar fedora36 sles15.3 precompiled_rhcos
PUSH_TARGETS := $(patsubst %, push-%, $(DISTRIBUTIONS))
BASE_FROM := jammy focal
BASE_FROM := noble jammy focal
PUSH_TARGETS := $(patsubst %, push-%, $(DISTRIBUTIONS))
DRIVER_PUSH_TARGETS := $(foreach push_target, $(PUSH_TARGETS), $(addprefix $(push_target)-, $(DRIVER_VERSIONS)))
BUILD_TARGETS := $(patsubst %, build-%, $(DISTRIBUTIONS))
Expand Down Expand Up @@ -210,6 +210,7 @@ $(BASE_BUILD_TARGETS):
--build-arg GOLANG_VERSION="$(GOLANG_VERSION)" \
--build-arg DRIVER_BRANCH="$(DRIVER_BRANCH)" \
--build-arg KERNEL_FLAVOR="$(KERNEL_FLAVOR)" \
--build-arg LTS_KERNEL="$(LTS_KERNEL)" \
--file $(DOCKERFILE) \
$(CURDIR)/base

Expand Down
45 changes: 43 additions & 2 deletions base/Dockerfile
Original file line number Diff line number Diff line change
@@ -1,12 +1,51 @@
# Ubuntu 24.04
FROM nvcr.io/nvidia/cuda:12.6.2-base-ubuntu24.04 AS noble

SHELL ["/bin/bash", "-c"]

ARG DRIVER_BRANCH
ARG KERNEL_FLAVOR
ARG LTS_KERNEL
ENV DRIVER_BRANCH=${DRIVER_BRANCH}
ENV KERNEL_FLAVOR=${KERNEL_FLAVOR}
ENV LTS_KERNEL=${LTS_KERNEL}

# Remove cuda repository to avoid GPG errors
RUN rm -f /etc/apt/sources.list.d/cuda*

RUN echo 'debconf debconf/frontend select Noninteractive' | debconf-set-selections

ENV NVIDIA_VISIBLE_DEVICES=void

RUN apt-get update && apt-get install -y --no-install-recommends \
apt-utils git curl && \
rm -rf /var/lib/apt/lists/*

RUN echo "deb [arch=amd64] http://archive.ubuntu.com/ubuntu/ noble main universe" > /etc/apt/sources.list && \
echo "deb [arch=amd64] http://archive.ubuntu.com/ubuntu/ noble-updates main universe" >> /etc/apt/sources.list && \
echo "deb [arch=amd64] http://archive.ubuntu.com/ubuntu/ noble-security main universe" >> /etc/apt/sources.list && \
echo "deb [arch=amd64] http://us.archive.ubuntu.com/ubuntu noble-updates main restricted" >> /etc/apt/sources.list && \
echo "deb [arch=amd64] http://us.archive.ubuntu.com/ubuntu noble-security main restricted" >> /etc/apt/sources.list && \
usermod -o -u 0 -g 0 _apt

COPY generate-ci-config /usr/local/bin/generate-ci-config

RUN chmod +x /usr/local/bin/generate-ci-config && \
generate-ci-config

ENTRYPOINT ["/usr/bin/sleep","1000"]

# Ubuntu 22.04
FROM nvcr.io/nvidia/cuda:12.6.2-base-ubuntu22.04 as jammy
FROM nvcr.io/nvidia/cuda:12.6.2-base-ubuntu22.04 AS jammy

SHELL ["/bin/bash", "-c"]

ARG DRIVER_BRANCH
ARG KERNEL_FLAVOR
ARG LTS_KERNEL
ENV DRIVER_BRANCH=${DRIVER_BRANCH}
ENV KERNEL_FLAVOR=${KERNEL_FLAVOR}
ENV LTS_KERNEL=${LTS_KERNEL}

# Remove cuda repository to avoid GPG errors
RUN rm -f /etc/apt/sources.list.d/cuda*
Expand Down Expand Up @@ -34,14 +73,16 @@ RUN chmod +x /usr/local/bin/generate-ci-config && \
ENTRYPOINT ["/usr/bin/sleep","1000"]

# Ubuntu 20.04
FROM nvcr.io/nvidia/cuda:12.6.2-base-ubuntu20.04 as focal
FROM nvcr.io/nvidia/cuda:12.6.2-base-ubuntu20.04 AS focal

SHELL ["/bin/bash", "-c"]

ARG DRIVER_BRANCH
ARG KERNEL_FLAVOR
ARG LTS_KERNEL
ENV DRIVER_BRANCH=${DRIVER_BRANCH}
ENV KERNEL_FLAVOR=${KERNEL_FLAVOR}
ENV LTS_KERNEL=${LTS_KERNEL}

# Remove cuda repository to avoid GPG errors
RUN rm -f /etc/apt/sources.list.d/cuda*
Expand Down
3 changes: 2 additions & 1 deletion base/generate-ci-config
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,8 @@ SUPPORTED_KERNELS=$(apt-cache search linux-objects-nvidia-${DRIVER_BRANCH}-serve
DRIVER_VERSION=$(apt-cache show nvidia-utils-${DRIVER_BRANCH}-server |grep Version |awk '{print $2}' | cut -d'-' -f1 | head -n 1)

# Latest supported kernel
SK=$(echo $SUPPORTED_KERNELS | awk '{print $NF}')
# only consider suffix -KERNEL_FLAVOR not KERNEL_FLAVOR-* (e.g. KERNEL_FLAVOR-lowlatency)
SK=$(echo "$SUPPORTED_KERNELS" | awk -v f="$KERNEL_FLAVOR" '$0 ~ "-" f "$" {last=$0} END{print last}')

# Write to file
echo "export KERNEL_VERSION=$SK DRIVER_VERSION=$DRIVER_VERSION DRIVER_VERSIONS=$DRIVER_VERSION" > /var/kernel_version.txt
Expand Down
File renamed without changes.
32 changes: 32 additions & 0 deletions tests/holodeck_ubuntu24.04.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
apiVersion: holodeck.nvidia.com/v1alpha1
kind: Environment
metadata:
name: HOLODECK_NAME
description: "end-to-end test infrastructure"
spec:
provider: aws
auth:
keyName: cnt-ci
privateKey: HOLODECK_PRIVATE_KEY
instance:
type: g4dn.xlarge
region: us-west-1
ingressIpRanges:
- 18.190.12.32/32
- 3.143.46.93/32
- 52.15.119.136/32
- 35.155.108.162/32
- 35.162.190.51/32
- 54.201.61.24/32
image:
architecture: amd64
imageId: ami-0da424eb883458071
containerRuntime:
install: true
name: containerd
version: 1.7.22
kubernetes:
install: true
installer: kubeadm
version: v1.30.0
crictlVersion: v1.30.0
11 changes: 6 additions & 5 deletions tests/scripts/ci-precompiled-helpers.sh
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
get_kernel_versions_to_test() {
if [[ "$#" -ne 4 ]]; then
echo " Error:$0 must be called with BASE_TARGET DRIVER_BRANCHES DRIVER_BRANCHES DIST" >&2
echo " Error:$0 must be called with BASE_TARGET KERNEL_FLAVORS DRIVER_BRANCHES DIST" >&2
exit 1
fi

Expand All @@ -11,10 +11,6 @@ get_kernel_versions_to_test() {

kernel_versions=()
for kernel_flavor in "${KERNEL_FLAVORS[@]}"; do
# FIXME -- remove if condition, once azure kernel upgrade starts working
if [[ "$kernel_flavor" == "azure" ]]; then
continue
fi
for DRIVER_BRANCH in "${DRIVER_BRANCHES[@]}"; do
source ./tests/scripts/findkernelversion.sh "$BASE_TARGET" "${kernel_flavor}" "$DRIVER_BRANCH" "$DIST" >&2
if [[ "$should_continue" == true ]]; then
Expand All @@ -26,5 +22,10 @@ get_kernel_versions_to_test() {
kernel_versions+=("$KERNEL_VERSION")
fi
done
# Remove duplicates
kernel_versions=($(printf "%s\n" "${kernel_versions[@]}" | sort -u))
for i in "${!kernel_versions[@]}"; do
kernel_versions[$i]="${kernel_versions[$i]}-$DIST"
done
echo "${kernel_versions[@]}"
}
26 changes: 24 additions & 2 deletions tests/scripts/findkernelversion.sh
Original file line number Diff line number Diff line change
Expand Up @@ -17,8 +17,30 @@ chmod a+x bin/regctl
export PATH=$(pwd)/bin:${PATH}

# calculate kernel version of latest image
regctl image get-file ghcr.io/nvidia/driver:base-${BASE_TARGET}-${KERNEL_FLAVOR}-${DRIVER_BRANCH} /var/kernel_version.txt ./kernel_version.txt
export $(grep -oP 'KERNEL_VERSION=[^ ]+' ./kernel_version.txt)
regctl image get-file ghcr.io/nvidia/driver:base-${BASE_TARGET}-${KERNEL_FLAVOR}-${DRIVER_BRANCH} /var/kernel_version.txt ./kernel_version.txt 2>/dev/null || true
if [[ -f ./kernel_version.txt && -s ./kernel_version.txt ]]; then
# File exists and is not empty
export $(grep -oP 'KERNEL_VERSION=[^ ]+' ./kernel_version.txt)
rm -f kernel_version.txt
else
# Define variables for artifact pattern
prefix="kernel-version-${DRIVER_BRANCH}-${LTS_KERNEL}"
suffix="${kernel_flavor}-${DIST}"
artifacts=$(gh api -X GET /repos/${GITHUB_REPOSITORY}/actions/runs/${GITHUB_RUN_ID}/artifacts --jq '.artifacts[].name')
# Use a loop or a pattern to find the matching artifact dynamically
for artifact in $artifacts; do
# TODO remove this check once nvidia avaialble
# currently for ubuntu24.04 kernel_flavor = nvidia-lowlatency
if [[ $artifact == $prefix*-$suffix ]]; then
gh run download --name "$artifact" --dir ./
tar -xf $artifact.tar
rm -f $artifact.tar
export $(grep -oP 'KERNEL_VERSION=[^ ]+' ./kernel_version.txt)
rm -f kernel_version.txt
break
fi
done
fi

# calculate driver tag
status=0
Expand Down

0 comments on commit 8cec607

Please sign in to comment.