Skip to content

Commit

Permalink
amazon linux 2023 support
Browse files Browse the repository at this point in the history
Signed-off-by: shiva kumar <[email protected]>
  • Loading branch information
shivakunv committed Oct 31, 2024
1 parent 3e34c94 commit 400da5c
Show file tree
Hide file tree
Showing 12 changed files with 1,013 additions and 1 deletion.
21 changes: 21 additions & 0 deletions .common-ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -93,6 +93,10 @@ trigger-pipeline:
DIST: ubuntu22.04
CVE_UPDATES: "openssl"

.dist-amzn2023:
variables:
DIST: amzn2023

.dist-rhel8:
variables:
DIST: rhel8
Expand Down Expand Up @@ -162,6 +166,14 @@ trigger-pipeline:
rules:
- if: $CI_PIPELINE_SOURCE != "schedule"

.release-amzn2023:
# Perform for each DRIVER_VERSION
extends:
- .release-generic
- .driver-versions
rules:
- if: $CI_PIPELINE_SOURCE != "schedule"

.release-rhel9:
# Perform for each DRIVER_VERSION
extends:
Expand Down Expand Up @@ -199,6 +211,15 @@ trigger-pipeline:
OUT_REGISTRY: "${CI_REGISTRY}"
OUT_IMAGE_NAME: "${CI_REGISTRY_IMAGE}/staging/driver"

.release:staging-amzn2023:
extends:
- .release-amzn2023
variables:
OUT_REGISTRY_USER: "${CI_REGISTRY_USER}"
OUT_REGISTRY_TOKEN: "${CI_REGISTRY_PASSWORD}"
OUT_REGISTRY: "${CI_REGISTRY}"
OUT_IMAGE_NAME: "${CI_REGISTRY_IMAGE}/staging/driver"

.release:staging-rhel9:
extends:
- .release-rhel9
Expand Down
16 changes: 16 additions & 0 deletions .github/workflows/image.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,7 @@ jobs:
dist:
- ubuntu20.04
- ubuntu22.04
- amzn2023
- rhel8
ispr:
- ${{github.event_name == 'pull_request'}}
Expand All @@ -50,6 +51,21 @@ jobs:
- ispr: true
dist: ubuntu20.04
driver: 550.127.05
- ispr: true
dist: ubuntu20.04
driver: 560.35.03
- ispr: true
dist: ubuntu22.04
driver: 560.35.03
- ispr: true
dist: amzn2023
driver: 535.216.01
- ispr: true
dist: amzn2023
driver: 550.127.05
- ispr: true
dist: amzn2023
driver: 560.35.03
fail-fast: false
steps:
- uses: actions/checkout@v4
Expand Down
14 changes: 14 additions & 0 deletions .gitlab-ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,15 @@ include:
rules:
- if: $CI_PIPELINE_SOURCE != "schedule"

# Define the image build targets
.image-build-amzn2023:
# Perform for each DRIVER_VERSION
extends:
- .driver-versions
- .image-build-generic
rules:
- if: $CI_PIPELINE_SOURCE != "schedule"

# Define the image build targets
.image-build-rhel9:
# Perform for each DRIVER_VERSION
Expand All @@ -69,6 +78,11 @@ image-ubuntu22.04:
- .image-build-ubuntu22.04
- .dist-ubuntu22.04

image-amzn2023:
extends:
- .image-build-amzn2023
- .dist-amzn2023

image-rhel8:
extends:
- .image-build
Expand Down
21 changes: 21 additions & 0 deletions .nvidia-ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -184,6 +184,18 @@ image-rhel8:
- if: $CI_PIPELINE_SOURCE == "merge_request_event"
- !reference [.pipeline-trigger-rules, rules]

.scan-amzn2023:
# Repeat for each DRIVER_VERSION
extends:
- .driver-versions
- .scan-generic
rules:
- !reference [.scan-rules-common, rules]
- if: $CI_PIPELINE_SOURCE == "schedule"
when: never
- if: $CI_PIPELINE_SOURCE == "merge_request_event"
- !reference [.pipeline-trigger-rules, rules]

.scan-precompiled-ubuntu22.04:
variables:
DIST: signed_ubuntu22.04
Expand Down Expand Up @@ -278,6 +290,12 @@ release:ngc-ubuntu22.04:
- .dist-ubuntu22.04
- .driver-versions

release:ngc-amzn2023:
extends:
- .release:ngc
- .dist-amzn2023
- .driver-versions

release:ngc-precompiled-ubuntu22.04:
variables:
DIST: signed_ubuntu22.04
Expand Down Expand Up @@ -439,3 +457,6 @@ sign:ngc-ubuntu-rhel-rhcos:
- SIGN_JOB_NAME: ["rhcos"]
VERSION: ["4.12","4.13","4.14","4.15", "4.16", "4.17"]
DRIVER_VERSION: ["535.216.01", "550.127.05", "565.57.01"]
- SIGN_JOB_NAME: ["amzn"]
VERSION: ["2023"]
DRIVER_VERSION: ["565.57.01"]
2 changes: 1 addition & 1 deletion Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -54,7 +54,7 @@ OUT_IMAGE_TAG = $(OUT_IMAGE_VERSION)-$(OUT_DIST)
OUT_IMAGE = $(OUT_IMAGE_NAME):$(OUT_IMAGE_TAG)

##### Public rules #####
DISTRIBUTIONS := ubuntu18.04 ubuntu20.04 ubuntu22.04 signed_ubuntu20.04 signed_ubuntu22.04 rhel8 rhel9 flatcar fedora36 sles15.3 precompiled_rhcos
DISTRIBUTIONS := amzn2023 flatcar fedora36 precompiled_rhcos rhel8 rhel9 signed_ubuntu20.04 signed_ubuntu22.04 sles15.3 ubuntu18.04 ubuntu20.04 ubuntu22.04
PUSH_TARGETS := $(patsubst %, push-%, $(DISTRIBUTIONS))
BASE_FROM := jammy focal
PUSH_TARGETS := $(patsubst %, push-%, $(DISTRIBUTIONS))
Expand Down
111 changes: 111 additions & 0 deletions amzn2023/Dockerfile
Original file line number Diff line number Diff line change
@@ -0,0 +1,111 @@
FROM nvcr.io/nvidia/cuda:12.6.2-base-amzn2023 AS build

ARG TARGETARCH

SHELL ["/bin/bash", "-c"]

# Remove cuda repository to avoid GPG errors
RUN rm -f /etc/yum.repos.d/cuda*

RUN dnf update -y && dnf makecache && \
dnf install -y \
gcc \
gcc-c++ \
make \
ca-certificates \
git \
tar && \
dnf clean all && rm -rf /var/cache/yum/*

ENV GOLANG_VERSION=1.23.2

# download appropriate binary based on the target architecture for multi-arch builds
RUN curl https://storage.googleapis.com/golang/go${GOLANG_VERSION}.linux-${TARGETARCH}.tar.gz \
| tar -C /usr/local -xz

ENV PATH /usr/local/bin:$PATH
ENV PATH /usr/local/go/bin:$PATH

WORKDIR /work

RUN git clone https://github.com/NVIDIA/gpu-driver-container driver && \
cd driver/vgpu/src && \
go build -o vgpu-util && \
mv vgpu-util /work

FROM nvcr.io/nvidia/cuda:12.6.2-base-amzn2023

SHELL ["/bin/bash", "-c"]

ARG BASE_URL=https://us.download.nvidia.com/tesla
ARG TARGETARCH
ENV TARGETARCH=$TARGETARCH
ENV DRIVER_ARCH=${TARGETARCH/amd64/x86_64}
ARG DRIVER_VERSION
ENV DRIVER_VERSION=$DRIVER_VERSION

# Arg to indicate if driver type is either of passthrough(baremetal) or vgpu
ARG DRIVER_TYPE=passthrough
ENV DRIVER_TYPE=$DRIVER_TYPE
ARG DRIVER_BRANCH=560
ENV DRIVER_BRANCH=$DRIVER_BRANCH
ARG VGPU_LICENSE_SERVER_TYPE=NLS
ENV VGPU_LICENSE_SERVER_TYPE=$VGPU_LICENSE_SERVER_TYPE
# Enable vGPU version compability check by default
ARG DISABLE_VGPU_VERSION_CHECK=true
ENV DISABLE_VGPU_VERSION_CHECK=$DISABLE_VGPU_VERSION_CHECK
ENV NVIDIA_VISIBLE_DEVICES=void

RUN echo "TARGETARCH=$TARGETARCH"

ADD install.sh /tmp

RUN NVIDIA_GPGKEY_SUM=d0664fbbdb8c32356d45de36c5984617217b2d0bef41b93ccecd326ba3b80c87 && \
curl -fsSL https://developer.download.nvidia.com/compute/cuda/repos/amzn2023/${DRIVER_ARCH}/D42D0685.pub | sed '/^Version/d' > /etc/pki/rpm-gpg/RPM-GPG-KEY-NVIDIA && \
echo "$NVIDIA_GPGKEY_SUM /etc/pki/rpm-gpg/RPM-GPG-KEY-NVIDIA" | sha256sum -c --strict - && \
curl -fsSL -o /etc/yum.repos.d/cuda.repo https://developer.download.nvidia.com/compute/cuda/repos/amzn2023/${DRIVER_ARCH}/cuda-amzn2023.repo

RUN dnf clean all && dnf makecache && dnf update -y && dnf install -y shadow-utils
RUN /tmp/install.sh reposetup && /tmp/install.sh depinstall && \
curl -fsSL -o /usr/local/bin/donkey https://github.com/3XX0/donkey/releases/download/v1.1.0/donkey && \
chmod +x /usr/local/bin/donkey

COPY nvidia-driver /usr/local/bin
COPY --from=build /work/vgpu-util /usr/local/bin

RUN curl -fsSL -o /usr/local/bin/extract-vmlinux https://raw.githubusercontent.com/torvalds/linux/master/scripts/extract-vmlinux && \
chmod +x /usr/local/bin/extract-vmlinux

ADD drivers drivers/

# Fetch the installer automatically for passthrough/baremetal types
RUN if [ "$DRIVER_TYPE" != "vgpu" ]; then \
cd drivers && \
/tmp/install.sh download_installer; fi

# Check for nvidia-fabric-manager or nvidia-fabricmanager availability and install
RUN if [ "$DRIVER_TYPE" != "vgpu" ] && [ "$TARGETARCH" != "arm64" ]; then \
# Initialize the fabric manager package variable
FABRIC_PACKAGE=""; \
if dnf list nvidia-fabric-manager-${DRIVER_VERSION}-1 &>/dev/null; then \
FABRIC_PACKAGE="nvidia-fabric-manager-${DRIVER_VERSION}-1"; \
elif dnf list nvidia-fabricmanager-${DRIVER_BRANCH}-${DRIVER_VERSION}-1 &>/dev/null; then \
FABRIC_PACKAGE="nvidia-fabricmanager-${DRIVER_BRANCH}-${DRIVER_VERSION}-1"; \
else \
echo "Error: No suitable package found for fabric manager version ${DRIVER_VERSION}"; \
exit 1; \
fi; \
dnf install -y "$FABRIC_PACKAGE" libnvidia-nscq-${DRIVER_BRANCH}-${DRIVER_VERSION}-1; fi

WORKDIR /drivers

ARG PUBLIC_KEY=empty
COPY ${PUBLIC_KEY} kernel/pubkey.x509

# Remove cuda repository to avoid GPG errors
# clean cache
# Add NGC DL license from the CUDA image
RUN rm -f /etc/yum.repos.d/cuda* && dnf clean all && \
mkdir /licenses && mv /NGC-DL-CONTAINER-LICENSE /licenses/NGC-DL-CONTAINER-LICENSE

ENTRYPOINT ["nvidia-driver", "init"]
3 changes: 3 additions & 0 deletions amzn2023/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
# AmazonLinux2 [![build status](https://gitlab.com/nvidia/driver/badges/master/build.svg)](https://gitlab.com/nvidia/driver/commits/master)

See https://github.com/NVIDIA/nvidia-docker/wiki/Driver-containers-(Beta)
1 change: 1 addition & 0 deletions amzn2023/drivers/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
# Folder for downloading vGPU drivers and dependent metadata files
Empty file added amzn2023/empty
Empty file.
58 changes: 58 additions & 0 deletions amzn2023/install.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,58 @@
#!/usr/bin/env bash

set -eux

download_installer () {
DRIVER_ARCH=${TARGETARCH/amd64/x86_64} && curl -fSsl -O $BASE_URL/$DRIVER_VERSION/NVIDIA-Linux-$DRIVER_ARCH-$DRIVER_VERSION.run && \
chmod +x NVIDIA-Linux-$DRIVER_ARCH-$DRIVER_VERSION.run;
}

dep_install () {
if [ "$TARGETARCH" = "amd64" ]; then
DRIVER_ARCH=${TARGETARCH/amd64/x86_64}
dnf update -y && dnf install -y \
gcc \
make \
glibc-devel \
ca-certificates \
kmod \
file \
elfutils-libelf-devel \
libglvnd-devel \
shadow-utils \
util-linux \
tar \
rpm-build \
dnf-utils \
pkgconfig && \
dnf clean all && \
rm -rf /var/cache/yum/*
fi
}

repo_setup () {
if [ "$TARGETARCH" = "amd64" ]; then
echo "[cuda-amzn2023-x86_64]
name=cuda-amzn2023-x86_64
baseurl=https://developer.download.nvidia.com/compute/cuda/repos/amzn2023/$DRIVER_ARCH
enabled=1
gpgcheck=1
gpgkey=https://developer.download.nvidia.com/compute/cuda/repos/amzn2023/$DRIVER_ARCH/D42D0685.pub" > /etc/yum.repos.d/cuda.repo && \
usermod -o -u 0 -g 0 nobody
else
echo "TARGETARCH doesn't match a known arch target"
exit 1
fi
}

if [ "$1" = "reposetup" ]; then
repo_setup
elif [ "$1" = "depinstall" ]; then
dep_install
elif [ "$1" = "download_installer" ]; then
download_installer
else
echo "Unknown function: $1"
exit 1
fi

Loading

0 comments on commit 400da5c

Please sign in to comment.