Skip to content

Commit e7c82d3

Browse files
nobbscdesiniotis
authored andcommitted
feat: add initial support for RHEL9
1 parent 0b1dc1c commit e7c82d3

File tree

15 files changed

+1620
-2
lines changed

15 files changed

+1620
-2
lines changed

.common-ci.yml

Lines changed: 34 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -87,6 +87,12 @@ trigger-pipeline:
8787
- DRIVER_BRANCH: [525, 535]
8888
KERNEL_FLAVOR: [generic, nvidia, aws, azure]
8989

90+
# Define the driver versions for jobs that can be run in parallel for rhel9
91+
.driver-versions-rhel9:
92+
parallel:
93+
matrix:
94+
- DRIVER_VERSION: [525.147.05, 535.154.05]
95+
9096
# Define the distribution targets
9197
.dist-ubuntu20.04:
9298
variables:
@@ -102,6 +108,10 @@ trigger-pipeline:
102108
variables:
103109
DIST: rhel8
104110

111+
.dist-rhel9:
112+
variables:
113+
DIST: rhel9
114+
105115
.dist-centos7:
106116
variables:
107117
DIST: centos7
@@ -167,6 +177,14 @@ trigger-pipeline:
167177
rules:
168178
- if: $CI_PIPELINE_SOURCE != "schedule"
169179

180+
.release-rhel9:
181+
# Perform for each DRIVER_VERSION
182+
extends:
183+
- .release-generic
184+
- .driver-versions-rhel9
185+
rules:
186+
- if: $CI_PIPELINE_SOURCE != "schedule"
187+
170188
.release:
171189
# Perform for each DRIVER_VERSION
172190
extends:
@@ -196,6 +214,15 @@ trigger-pipeline:
196214
OUT_REGISTRY: "${CI_REGISTRY}"
197215
OUT_IMAGE_NAME: "${CI_REGISTRY_IMAGE}/staging/driver"
198216

217+
.release:staging-rhel9:
218+
extends:
219+
- .release-rhel9
220+
variables:
221+
OUT_REGISTRY_USER: "${CI_REGISTRY_USER}"
222+
OUT_REGISTRY_TOKEN: "${CI_REGISTRY_PASSWORD}"
223+
OUT_REGISTRY: "${CI_REGISTRY}"
224+
OUT_IMAGE_NAME: "${CI_REGISTRY_IMAGE}/staging/driver"
225+
199226
# Define an external release step that pushes an image to an external repository.
200227
.release:external:
201228
extends:
@@ -244,6 +271,13 @@ release:staging-rhel8:
244271
needs:
245272
- image-rhel8
246273

274+
release:staging-rhel9:
275+
extends:
276+
- .release:staging-rhel9
277+
- .dist-rhel9
278+
needs:
279+
- image-rhel9
280+
247281
release:staging-centos7:
248282
extends:
249283
- .release:staging

.gitlab-ci.yml

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -49,6 +49,16 @@ include:
4949
rules:
5050
- if: $CI_PIPELINE_SOURCE != "schedule"
5151

52+
# Define the image build targets
53+
.image-build-rhel9:
54+
# Perform for each DRIVER_VERSION
55+
extends:
56+
- .driver-versions-rhel9
57+
- .image-build-generic
58+
rules:
59+
- if: $CI_PIPELINE_SOURCE != "schedule"
60+
61+
5262
image-ubuntu20.04:
5363
extends:
5464
- .image-build
@@ -64,6 +74,11 @@ image-rhel8:
6474
- .image-build
6575
- .dist-rhel8
6676

77+
image-rhel9:
78+
extends:
79+
- .image-build-rhel9
80+
- .dist-rhel9
81+
6782
image-centos7:
6883
extends:
6984
- .image-build

.nvidia-ci.yml

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -115,6 +115,11 @@ image-rhel8:
115115
- .image-pull
116116
- .dist-rhel8
117117

118+
image-rhel9:
119+
extends:
120+
- .image-pull
121+
- .dist-rhel9
122+
118123
image-centos7:
119124
extends:
120125
- .image-pull
@@ -259,6 +264,22 @@ scan-rhel8-arm64:
259264
needs:
260265
- image-rhel8
261266

267+
scan-rhel9-amd64:
268+
extends:
269+
- .scan
270+
- .dist-rhel9
271+
- .platform-amd64
272+
needs:
273+
- image-rhel9
274+
275+
scan-rhel9-arm64:
276+
extends:
277+
- .scan
278+
- .dist-rhel9
279+
- .platform-arm64
280+
needs:
281+
- image-rhel9
282+
262283
scan-centos7-amd64:
263284
extends:
264285
- .scan

Makefile

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -54,7 +54,7 @@ OUT_IMAGE_TAG = $(OUT_IMAGE_VERSION)-$(OUT_DIST)
5454
OUT_IMAGE = $(OUT_IMAGE_NAME):$(OUT_IMAGE_TAG)
5555

5656
##### Public rules #####
57-
DISTRIBUTIONS := ubuntu18.04 ubuntu20.04 ubuntu22.04 signed_ubuntu20.04 signed_ubuntu22.04 rhel8 centos7 flatcar fedora36 sles15.3 precompiled_rhcos
57+
DISTRIBUTIONS := ubuntu18.04 ubuntu20.04 ubuntu22.04 signed_ubuntu20.04 signed_ubuntu22.04 rhel8 rhel9 centos7 flatcar fedora36 sles15.3 precompiled_rhcos
5858
PUSH_TARGETS := $(patsubst %, push-%, $(DISTRIBUTIONS))
5959
BASE_FROM := jammy focal
6060
PUSH_TARGETS := $(patsubst %, push-%, $(DISTRIBUTIONS))

ci/localbuild.sh

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -64,6 +64,7 @@ driver_container_build_rhel()
6464
{
6565
driver_container_build_simple "rhel7"
6666
driver_container_build_simple "rhel8"
67+
driver_container_build_simple "rhel9"
6768
}
6869

6970
list_all_containers()

ci/run.sh

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -66,7 +66,7 @@ latest_rhel_kernel() {
6666
"yum install -y yum-utils &> /dev/null && repoquery kernel-headers \
6767
| cut -d ':' -f 2 \
6868
| tail -n 1"
69-
elif [[ "${1}" -eq 8 ]]; then
69+
elif [[ "${1}" -eq 8 || "${1}" -eq 9 ]]; then
7070
docker run --rm centos:"${1}" /bin/bash -c\
7171
"dnf repoquery -q --latest-limit 1 kernel-headers \
7272
| cut -d ':' -f 2 | head -n 1"
@@ -187,6 +187,7 @@ done
187187

188188
build "rhel7" "${CONTAINER_VERSION}-rhel7" "$(mk_short_version rhel7)" ""
189189
build "rhel8" "${CONTAINER_VERSION}-rhel8" "$(mk_short_version rhel8)" ""
190+
build "rhel9" "${CONTAINER_VERSION}-rhel9" "$(mk_short_version rhel9)" ""
190191

191192
# Add rhcos tags
192193
docker pull "${REGISTRY}:${CONTAINER_VERSION}-rhel8"

rhel9/Dockerfile

Lines changed: 113 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,113 @@
1+
ARG CUDA_VERSION
2+
FROM nvidia/cuda:${CUDA_VERSION}-base-ubi9 as build
3+
4+
ARG TARGETARCH
5+
6+
SHELL ["/bin/bash", "-c"]
7+
8+
RUN dnf install -y git wget
9+
10+
ENV GOLANG_VERSION=1.22.0
11+
12+
# download appropriate binary based on the target architecture for multi-arch builds
13+
RUN OS_ARCH=${TARGETARCH/x86_64/amd64} && OS_ARCH=${OS_ARCH/aarch64/arm64} && \
14+
curl https://storage.googleapis.com/golang/go${GOLANG_VERSION}.linux-${OS_ARCH}.tar.gz \
15+
| tar -C /usr/local -xz
16+
17+
ENV PATH /usr/local/go/bin:$PATH
18+
19+
WORKDIR /work
20+
21+
RUN git clone https://gitlab.com/nvidia/container-images/driver && \
22+
cd driver/vgpu/src && \
23+
go build -o vgpu-util && \
24+
mv vgpu-util /work
25+
26+
FROM nvidia/cuda:${CUDA_VERSION}-base-ubi9
27+
28+
ARG TARGETARCH
29+
ENV TARGETARCH=$TARGETARCH
30+
31+
SHELL ["/bin/bash", "-c"]
32+
33+
#ARG BASE_URL=http://us.download.nvidia.com/XFree86/Linux-x86_64
34+
ARG BASE_URL=https://us.download.nvidia.com/tesla
35+
ARG DRIVER_VERSION
36+
ENV DRIVER_VERSION=$DRIVER_VERSION
37+
38+
# Arg to indicate if driver type is either of passthrough/baremetal or vgpu
39+
ARG DRIVER_TYPE=passthrough
40+
ENV DRIVER_TYPE=$DRIVER_TYPE
41+
ARG VGPU_LICENSE_SERVER_TYPE=NLS
42+
ENV VGPU_LICENSE_SERVER_TYPE=$VGPU_LICENSE_SERVER_TYPE
43+
# Enable vGPU version compability check by default
44+
ARG DISABLE_VGPU_VERSION_CHECK=true
45+
ENV DISABLE_VGPU_VERSION_CHECK=$DISABLE_VGPU_VERSION_CHECK
46+
# Avoid dependency of container-toolkit for driver container
47+
ENV NVIDIA_VISIBLE_DEVICES=void
48+
49+
ADD install.sh /tmp/
50+
51+
RUN NVIDIA_GPGKEY_SUM=d0664fbbdb8c32356d45de36c5984617217b2d0bef41b93ccecd326ba3b80c87 && \
52+
OS_ARCH=${TARGETARCH/amd64/x86_64} && OS_ARCH=${OS_ARCH/arm64/sbsa} && \
53+
curl -fsSL "https://developer.download.nvidia.com/compute/cuda/repos/rhel9/$OS_ARCH/D42D0685.pub" | sed '/^Version/d' > /etc/pki/rpm-gpg/RPM-GPG-KEY-NVIDIA && \
54+
echo "$NVIDIA_GPGKEY_SUM /etc/pki/rpm-gpg/RPM-GPG-KEY-NVIDIA" | sha256sum -c --strict -
55+
56+
RUN sh /tmp/install.sh depinstall && \
57+
curl -fsSL -o /usr/local/bin/donkey https://github.com/3XX0/donkey/releases/download/v1.1.0/donkey && \
58+
curl -fsSL -o /usr/local/bin/extract-vmlinux https://raw.githubusercontent.com/torvalds/linux/master/scripts/extract-vmlinux && \
59+
chmod +x /usr/local/bin/donkey /usr/local/bin/extract-vmlinux && \
60+
ln -s /sbin/ldconfig /sbin/ldconfig.real
61+
62+
ADD drivers drivers/
63+
64+
# Fetch the installer automatically for passthrough/baremetal types
65+
RUN if [ "$DRIVER_TYPE" != "vgpu" ]; then \
66+
cd drivers && \
67+
DRIVER_ARCH=${TARGETARCH/amd64/x86_64} && DRIVER_ARCH=${DRIVER_ARCH/arm64/aarch64} && \
68+
curl -fSsl -O $BASE_URL/$DRIVER_VERSION/NVIDIA-Linux-$DRIVER_ARCH-$DRIVER_VERSION.run && \
69+
chmod +x NVIDIA-Linux-$DRIVER_ARCH-$DRIVER_VERSION.run; fi
70+
71+
# Install fabric-manager packages
72+
RUN if [ "$DRIVER_TYPE" != "vgpu" ] && [ "$TARGETARCH" != "arm64" ]; then \
73+
versionArray=(${DRIVER_VERSION//./ }); \
74+
DRIVER_BRANCH=${versionArray[0]}; \
75+
dnf module enable -y nvidia-driver:${DRIVER_BRANCH} && \
76+
dnf install -y nvidia-fabric-manager-${DRIVER_VERSION}-1 libnvidia-nscq-${DRIVER_BRANCH}-${DRIVER_VERSION}-1; fi
77+
78+
COPY nvidia-driver /usr/local/bin
79+
COPY ocp_dtk_entrypoint /usr/local/bin
80+
COPY common.sh /usr/local/bin
81+
82+
COPY --from=build /work/vgpu-util /usr/local/bin
83+
84+
WORKDIR /drivers
85+
86+
ARG PUBLIC_KEY=empty
87+
COPY ${PUBLIC_KEY} kernel/pubkey.x509
88+
89+
ARG PRIVATE_KEY
90+
ARG KERNEL_VERSION=latest
91+
92+
LABEL io.k8s.display-name="NVIDIA Driver Container"
93+
LABEL name="NVIDIA Driver Container"
94+
LABEL vendor="NVIDIA"
95+
LABEL version="${DRIVER_VERSION}"
96+
LABEL release="N/A"
97+
LABEL summary="Provision the NVIDIA driver through containers"
98+
LABEL description="See summary"
99+
100+
# Add NGC DL license from the CUDA image
101+
RUN mkdir /licenses && mv /NGC-DL-CONTAINER-LICENSE /licenses/NGC-DL-CONTAINER-LICENSE
102+
103+
# Install / upgrade packages here that are required to resolve CVEs
104+
ARG CVE_UPDATES
105+
RUN if [ -n "${CVE_UPDATES}" ]; then \
106+
yum update -y ${CVE_UPDATES} && \
107+
rm -rf /var/cache/yum/*; \
108+
fi
109+
110+
# Remove cuda repository to avoid GPG errors
111+
RUN rm -f /etc/yum.repos.d/cuda.repo
112+
113+
ENTRYPOINT ["nvidia-driver", "init"]

0 commit comments

Comments
 (0)