Skip to content

Commit

Permalink
amazon linux 2023 support
Browse files Browse the repository at this point in the history
Signed-off-by: shiva kumar <[email protected]>
  • Loading branch information
shivakunv committed Oct 29, 2024
1 parent c610352 commit d179a93
Show file tree
Hide file tree
Showing 13 changed files with 1,017 additions and 2 deletions.
27 changes: 27 additions & 0 deletions .common-ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -81,6 +81,12 @@ trigger-pipeline:
matrix:
- DRIVER_VERSION: [535.216.01, 550.127.05]

# Define the driver versions for jobs that can be run in parallel for amzn2023
.driver-versions-amzn2023:
parallel:
matrix:
- DRIVER_VERSION: [535.216.01, 560.35.03]

# Define the matrix of precompiled jobs that can be run in parallel for ubuntu22.04
.driver-versions-precompiled-ubuntu22.04:
parallel:
Expand All @@ -105,6 +111,10 @@ trigger-pipeline:
DIST: ubuntu22.04
CVE_UPDATES: "openssl"

.dist-amzn2023:
variables:
DIST: amzn2023

.dist-rhel8:
variables:
DIST: rhel8
Expand Down Expand Up @@ -174,6 +184,14 @@ trigger-pipeline:
rules:
- if: $CI_PIPELINE_SOURCE != "schedule"

.release-amzn2023:
# Perform for each DRIVER_VERSION
extends:
- .release-generic
- .driver-versions-amzn2023
rules:
- if: $CI_PIPELINE_SOURCE != "schedule"

.release-rhel9:
# Perform for each DRIVER_VERSION
extends:
Expand Down Expand Up @@ -211,6 +229,15 @@ trigger-pipeline:
OUT_REGISTRY: "${CI_REGISTRY}"
OUT_IMAGE_NAME: "${CI_REGISTRY_IMAGE}/staging/driver"

.release:staging-amzn2023:
extends:
- .release-amzn2023
variables:
OUT_REGISTRY_USER: "${CI_REGISTRY_USER}"
OUT_REGISTRY_TOKEN: "${CI_REGISTRY_PASSWORD}"
OUT_REGISTRY: "${CI_REGISTRY}"
OUT_IMAGE_NAME: "${CI_REGISTRY_IMAGE}/staging/driver"

.release:staging-rhel9:
extends:
- .release-rhel9
Expand Down
11 changes: 11 additions & 0 deletions .github/workflows/image.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -36,9 +36,11 @@ jobs:
driver:
- 535.216.01
- 550.127.05
- 560.35.03
dist:
- ubuntu20.04
- ubuntu22.04
- amzn2023
- rhel8
ispr:
- ${{github.event_name == 'pull_request'}}
Expand All @@ -49,6 +51,15 @@ jobs:
- ispr: true
dist: ubuntu20.04
driver: 550.127.05
- ispr: true
dist: ubuntu20.04
driver: 560.35.03
- ispr: true
dist: ubuntu22.04
driver: 560.35.03
- ispr: true
dist: amzn2023
driver: 535.216.01
fail-fast: false
steps:
- uses: actions/checkout@v4
Expand Down
14 changes: 14 additions & 0 deletions .gitlab-ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,15 @@ include:
rules:
- if: $CI_PIPELINE_SOURCE != "schedule"

# Define the image build targets
.image-build-amzn2023:
# Perform for each DRIVER_VERSION
extends:
- .driver-versions-amzn2023
- .image-build-generic
rules:
- if: $CI_PIPELINE_SOURCE != "schedule"

# Define the image build targets
.image-build-rhel9:
# Perform for each DRIVER_VERSION
Expand All @@ -69,6 +78,11 @@ image-ubuntu22.04:
- .image-build-ubuntu22.04
- .dist-ubuntu22.04

image-amzn2023:
extends:
- .image-build-amzn2023
- .dist-amzn2023

image-rhel8:
extends:
- .image-build
Expand Down
35 changes: 35 additions & 0 deletions .nvidia-ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -115,6 +115,12 @@ image-rhel8:
- .image-pull
- .dist-rhel8

image-amzn2023:
extends:
- .image-pull
- .dist-amzn2023
- .driver-versions-amzn2023

# The .scan step forms the base of the image scan operation performed before releasing
# images.
.scan-generic:
Expand Down Expand Up @@ -184,6 +190,18 @@ image-rhel8:
- if: $CI_PIPELINE_SOURCE == "merge_request_event"
- !reference [.pipeline-trigger-rules, rules]

.scan-amzn2023:
# Repeat for each DRIVER_VERSION
extends:
- .driver-versions-amzn2023
- .scan-generic
rules:
- !reference [.scan-rules-common, rules]
- if: $CI_PIPELINE_SOURCE == "schedule"
when: never
- if: $CI_PIPELINE_SOURCE == "merge_request_event"
- !reference [.pipeline-trigger-rules, rules]

.scan-precompiled-ubuntu22.04:
variables:
DIST: signed_ubuntu22.04
Expand Down Expand Up @@ -229,6 +247,14 @@ scan-ubuntu22.04-arm64:
needs:
- image-ubuntu22.04

scan-amzn2023-amd64:
extends:
- .scan-amzn2023
- .dist-amzn2023
- .platform-amd64
needs:
- image-amzn2023

scan-precompiled-ubuntu22.04-amd64:
variables:
PLATFORM: linux/amd64
Expand Down Expand Up @@ -278,6 +304,12 @@ release:ngc-ubuntu22.04:
- .dist-ubuntu22.04
- .driver-versions-ubuntu22.04

release:ngc-amzn2023:
extends:
- .release:ngc
- .dist-amzn2023
- .driver-versions-amzn2023

release:ngc-precompiled-ubuntu22.04:
variables:
DIST: signed_ubuntu22.04
Expand Down Expand Up @@ -439,3 +471,6 @@ sign:ngc-ubuntu-rhel-rhcos:
- SIGN_JOB_NAME: ["rhcos"]
VERSION: ["4.12","4.13","4.14","4.15", "4.16", "4.17"]
DRIVER_VERSION: ["535.216.01", "550.127.05"]
- SIGN_JOB_NAME: ["amzn"]
VERSION: ["2023"]
DRIVER_VERSION: ["550.127.05", "560.35.03"]
2 changes: 1 addition & 1 deletion Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -54,7 +54,7 @@ OUT_IMAGE_TAG = $(OUT_IMAGE_VERSION)-$(OUT_DIST)
OUT_IMAGE = $(OUT_IMAGE_NAME):$(OUT_IMAGE_TAG)

##### Public rules #####
DISTRIBUTIONS := ubuntu18.04 ubuntu20.04 ubuntu22.04 signed_ubuntu20.04 signed_ubuntu22.04 rhel8 rhel9 flatcar fedora36 sles15.3 precompiled_rhcos
DISTRIBUTIONS := ubuntu18.04 ubuntu20.04 ubuntu22.04 amzn2023 signed_ubuntu20.04 signed_ubuntu22.04 rhel8 rhel9 flatcar fedora36 sles15.3 precompiled_rhcos
PUSH_TARGETS := $(patsubst %, push-%, $(DISTRIBUTIONS))
BASE_FROM := jammy focal
PUSH_TARGETS := $(patsubst %, push-%, $(DISTRIBUTIONS))
Expand Down
99 changes: 99 additions & 0 deletions amzn2023/Dockerfile
Original file line number Diff line number Diff line change
@@ -0,0 +1,99 @@
FROM nvcr.io/nvidia/cuda:12.6.2-base-amzn2023 AS build

ARG TARGETARCH

SHELL ["/bin/bash", "-c"]

RUN dnf update -y && \
dnf install -y \
gcc \
gcc-c++ \
make \
ca-certificates \
git \
tar && \
dnf clean all

ENV GOLANG_VERSION=1.23.2

# download appropriate binary based on the target architecture for multi-arch builds
RUN curl https://storage.googleapis.com/golang/go${GOLANG_VERSION}.linux-${TARGETARCH}.tar.gz \
| tar -C /usr/local -xz

ENV PATH /usr/local/bin:$PATH
ENV PATH /usr/local/go/bin:$PATH

WORKDIR /work

RUN git clone https://github.com/NVIDIA/gpu-driver-container driver && \
cd driver/vgpu/src && \
go build -o vgpu-util && \
mv vgpu-util /work

FROM nvcr.io/nvidia/cuda:12.6.2-base-amzn2023

SHELL ["/bin/bash", "-c"]

ARG BASE_URL=https://us.download.nvidia.com/tesla
ARG TARGETARCH
ENV TARGETARCH=$TARGETARCH
ENV DRIVER_ARCH=${TARGETARCH/amd64/x86_64}
ARG DRIVER_VERSION
ENV DRIVER_VERSION=$DRIVER_VERSION

# Arg to indicate if driver type is either of passthrough(baremetal) or vgpu
ARG DRIVER_TYPE=passthrough
ENV DRIVER_TYPE=$DRIVER_TYPE
ARG DRIVER_BRANCH=550
ENV DRIVER_BRANCH=$DRIVER_BRANCH
ARG VGPU_LICENSE_SERVER_TYPE=NLS
ENV VGPU_LICENSE_SERVER_TYPE=$VGPU_LICENSE_SERVER_TYPE
# Enable vGPU version compability check by default
ARG DISABLE_VGPU_VERSION_CHECK=true
ENV DISABLE_VGPU_VERSION_CHECK=$DISABLE_VGPU_VERSION_CHECK
ENV NVIDIA_VISIBLE_DEVICES=void

RUN echo "TARGETARCH=$TARGETARCH"

ADD install.sh /tmp

RUN NVIDIA_GPGKEY_SUM=d0664fbbdb8c32356d45de36c5984617217b2d0bef41b93ccecd326ba3b80c87 && \
curl -fsSL https://developer.download.nvidia.com/compute/cuda/repos/amzn2023/${DRIVER_ARCH}/D42D0685.pub | sed '/^Version/d' > /etc/pki/rpm-gpg/RPM-GPG-KEY-NVIDIA && \
echo "$NVIDIA_GPGKEY_SUM /etc/pki/rpm-gpg/RPM-GPG-KEY-NVIDIA" | sha256sum -c --strict - && \
curl -fsSL -o /etc/yum.repos.d/cuda-amzn2023.repo https://developer.download.nvidia.com/compute/cuda/repos/amzn2023/${DRIVER_ARCH}/cuda-amzn2023.repo

RUN dnf clean all && dnf update -y && dnf install -y shadow-utils
RUN /tmp/install.sh reposetup && /tmp/install.sh depinstall && \
curl -fsSL -o /usr/local/bin/donkey https://github.com/3XX0/donkey/releases/download/v1.1.0/donkey && \
chmod +x /usr/local/bin/donkey

COPY nvidia-driver /usr/local/bin
COPY --from=build /work/vgpu-util /usr/local/bin

RUN curl -fsSL -o /usr/local/bin/extract-vmlinux https://raw.githubusercontent.com/torvalds/linux/master/scripts/extract-vmlinux && \
chmod +x /usr/local/bin/extract-vmlinux

ADD drivers drivers/

# Fetch the installer automatically for passthrough/baremetal types
RUN if [ "$DRIVER_TYPE" != "vgpu" ]; then \
cd drivers && \
/tmp/install.sh download_installer; fi

RUN if [ "$DRIVER_TYPE" != "vgpu" ] && [ "$TARGETARCH" != "arm64" ]; then \
dnf update -y && \
dnf install -y \
nvidia-fabric-manager-${DRIVER_VERSION}-1 \
libnvidia-nscq-${DRIVER_BRANCH}-${DRIVER_VERSION}-1; fi

WORKDIR /drivers

ARG PUBLIC_KEY=empty
COPY ${PUBLIC_KEY} kernel/pubkey.x509

RUN dnf clean all

# Add NGC DL license from the CUDA image
RUN mkdir /licenses && mv /NGC-DL-CONTAINER-LICENSE /licenses/NGC-DL-CONTAINER-LICENSE

ENTRYPOINT ["nvidia-driver", "init"]
3 changes: 3 additions & 0 deletions amzn2023/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
# AmazonLinux2 [![build status](https://gitlab.com/nvidia/driver/badges/master/build.svg)](https://gitlab.com/nvidia/driver/commits/master)

See https://github.com/NVIDIA/nvidia-docker/wiki/Driver-containers-(Beta)
1 change: 1 addition & 0 deletions amzn2023/drivers/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
# Folder for downloading vGPU drivers and dependent metadata files
Empty file added amzn2023/empty
Empty file.
60 changes: 60 additions & 0 deletions amzn2023/install.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,60 @@
#!/usr/bin/env bash

set -eu

download_installer () {
DRIVER_ARCH=${TARGETARCH/amd64/x86_64} && curl -fSsl -O $BASE_URL/$DRIVER_VERSION/NVIDIA-Linux-$DRIVER_ARCH-$DRIVER_VERSION.run && \
chmod +x NVIDIA-Linux-$DRIVER_ARCH-$DRIVER_VERSION.run;
}

dep_install () {
if [ "$TARGETARCH" = "amd64" ]; then
dnf update -y && dnf install -y \
gcc \
make \
glibc-devel \
ca-certificates \
kmod \
file \
elfutils-libelf-devel \
libglvnd-devel \
shadow-utils \
util-linux \
tar \
rpm-build \
dnf-utils \
pkgconfig \
nvidia-container-runtime \
nvidia-container-toolkit \
libnvidia-container-tools \
libnvidia-container1 && \
dnf clean all
fi
}

repo_setup () {
if [ "$TARGETARCH" = "amd64" ]; then
echo "[nvidia]
name=Main Repository
baseurl=https://developer.download.nvidia.com/compute/cuda/repos/amzn2023/$DRIVER_ARCH
gpgcheck=1
enabled=1
gpgkey=https://developer.download.nvidia.com/compute/cuda/repos/amzn2023/$DRIVER_ARCH/D42D0685.pub" > /etc/yum.repos.d/nvidia.repo && \
usermod -o -u 0 -g 0 nobody
else
echo "TARGETARCH doesn't match a known arch target"
exit 1
fi
}

if [ "$1" = "reposetup" ]; then
repo_setup
elif [ "$1" = "depinstall" ]; then
dep_install
elif [ "$1" = "download_installer" ]; then
download_installer
else
echo "Unknown function: $1"
exit 1
fi

Loading

0 comments on commit d179a93

Please sign in to comment.