Skip to content

Commit

Permalink
[Ubuntu24.04] Install the driver in a single step
Browse files Browse the repository at this point in the history
Signed-off-by: Tariq Ibrahim <[email protected]>
  • Loading branch information
tariq1890 committed Nov 22, 2024
1 parent eea136d commit 2a1f2b2
Show file tree
Hide file tree
Showing 2 changed files with 28 additions and 145 deletions.
4 changes: 1 addition & 3 deletions ubuntu24.04/Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -67,9 +67,7 @@ ADD install.sh /tmp
RUN apt-key del 7fa2af80 && OS_ARCH=${TARGETARCH/amd64/x86_64} && OS_ARCH=${OS_ARCH/arm64/sbsa} && \
apt-key adv --fetch-keys "https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2404/${OS_ARCH}/3bf863cc.pub"

RUN /tmp/install.sh reposetup && /tmp/install.sh depinstall && \
curl -fsSL -o /usr/local/bin/donkey https://github.com/3XX0/donkey/releases/download/v1.1.0/donkey && \
chmod +x /usr/local/bin/donkey
RUN /tmp/install.sh depinstall

COPY nvidia-driver /usr/local/bin

Expand Down
169 changes: 27 additions & 142 deletions ubuntu24.04/nvidia-driver
Original file line number Diff line number Diff line change
Expand Up @@ -120,71 +120,22 @@ _kernel_requires_package() {
return 0
}

# Compile the kernel modules, optionally sign them, and generate a precompiled package for use by the nvidia-installer.
_create_driver_package() (
local pkg_name="nvidia-modules-${KERNEL_VERSION%-*}${PACKAGE_TAG:+-${PACKAGE_TAG}}"
local nvidia_sign_args=""
local nvidia_modeset_sign_args=""
local nvidia_uvm_sign_args=""

trap "make -s -j ${MAX_THREADS} SYSSRC=/lib/modules/${KERNEL_VERSION}/build clean > /dev/null" EXIT

echo "Compiling NVIDIA driver kernel modules..."
cd /usr/src/nvidia-${DRIVER_VERSION}/${KERNEL_TYPE}

# This is required as currently GPU driver installer doesn't expect headers in x86_64 folder, but only in either default
# or kernel-version folder.
_link_ofa_kernel() (
if _gpu_direct_rdma_enabled; then
ln -s /run/mellanox/drivers/usr/src/ofa_kernel /usr/src/
# if arch directory exists(MOFED >=5.5) then create a symlink as expected by GPU driver installer
# This is required as currently GPU driver installer doesn't expect headers in x86_64 folder, but only in either default or kernel-version folder.
# ls -ltr /usr/src/ofa_kernel/
# lrwxrwxrwx 1 root root 36 Dec 8 20:10 default -> /etc/alternatives/ofa_kernel_headers
# drwxr-xr-x 4 root root 4096 Dec 8 20:14 x86_64
# lrwxrwxrwx 1 root root 44 Dec 9 19:05 5.4.0-90-generic -> /usr/src/ofa_kernel/x86_64/5.4.0-90-generic/
if [[ -d /run/mellanox/drivers/usr/src/ofa_kernel/$DRIVER_ARCH/`uname -r` ]]; then
if [[ ! -e /usr/src/ofa_kernel/`uname -r` ]]; then
ln -s /run/mellanox/drivers/usr/src/ofa_kernel/$DRIVER_ARCH/`uname -r` /usr/src/ofa_kernel/
if [[ -d /run/mellanox/drivers/usr/src/ofa_kernel/$DRIVER_ARCH/$(uname -r) ]]; then
if [[ ! -e /usr/src/ofa_kernel/$(uname -r) ]]; then
ln -s /run/mellanox/drivers/usr/src/ofa_kernel/$DRIVER_ARCH/$(uname -r) /usr/src/ofa_kernel/
fi
fi
fi

export IGNORE_CC_MISMATCH=1
make -s -j ${MAX_THREADS} SYSSRC=/lib/modules/${KERNEL_VERSION}/build nv-linux.o nv-modeset-linux.o > /dev/null

echo "Relinking NVIDIA driver kernel modules..."
rm -f nvidia.ko nvidia-modeset.ko
ld -d -r -o nvidia.ko ./nv-linux.o ./nvidia/nv-kernel.o_binary
ld -d -r -o nvidia-modeset.ko ./nv-modeset-linux.o ./nvidia-modeset/nv-modeset-kernel.o_binary

if [ -n "${PRIVATE_KEY}" ]; then
echo "Signing NVIDIA driver kernel modules..."
donkey get ${PRIVATE_KEY} sh -c "PATH=${PATH}:/usr/src/linux-headers-${KERNEL_VERSION}/scripts && \
sign-file sha512 \$DONKEY_FILE pubkey.x509 nvidia.ko nvidia.ko.sign && \
sign-file sha512 \$DONKEY_FILE pubkey.x509 nvidia-modeset.ko nvidia-modeset.ko.sign && \
sign-file sha512 \$DONKEY_FILE pubkey.x509 nvidia-uvm.ko"
nvidia_sign_args="--linked-module nvidia.ko --signed-module nvidia.ko.sign"
nvidia_modeset_sign_args="--linked-module nvidia-modeset.ko --signed-module nvidia-modeset.ko.sign"
nvidia_uvm_sign_args="--signed"
fi

echo "Building NVIDIA driver package ${pkg_name}..."
../mkprecompiled --pack ${pkg_name} --description ${KERNEL_VERSION} \
--proc-mount-point /lib/modules/${KERNEL_VERSION}/proc \
--driver-version ${DRIVER_VERSION} \
--kernel-interface nv-linux.o \
--linked-module-name nvidia.ko \
--core-object-name nvidia/nv-kernel.o_binary \
${nvidia_sign_args} \
--target-directory . \
--kernel-interface nv-modeset-linux.o \
--linked-module-name nvidia-modeset.ko \
--core-object-name nvidia-modeset/nv-modeset-kernel.o_binary \
${nvidia_modeset_sign_args} \
--target-directory . \
--kernel-module nvidia-uvm.ko \
${nvidia_uvm_sign_args} \
--target-directory .
mkdir -p precompiled
mv ${pkg_name} precompiled
)

_assert_nvswitch_system() {
Expand Down Expand Up @@ -420,18 +371,29 @@ _unload_driver() {
_install_driver() {
local install_args=()

echo "Installing NVIDIA driver kernel modules..."
cd /usr/src/nvidia-${DRIVER_VERSION}
if [ -d /lib/modules/${KERNEL_VERSION}/kernel/drivers/video ]; then
rm -rf /lib/modules/${KERNEL_VERSION}/kernel/drivers/video
else
rm -rf /lib/modules/${KERNEL_VERSION}/video
fi

if [ "${ACCEPT_LICENSE}" = "yes" ]; then
install_args+=("--accept-license")
fi
nvidia-installer --kernel-module-only --no-drm --ui=none --no-nouveau-check -m=${KERNEL_TYPE} ${install_args[@]+"${install_args[@]}"}

# Install the NVIDIA driver in one step
sh NVIDIA-Linux-$DRIVER_ARCH-$DRIVER_VERSION.run -x && \
cd NVIDIA-Linux-$DRIVER_ARCH-$DRIVER_VERSION && \
./nvidia-installer --silent \
--ui=none \
--no-drm \
--no-nouveau-check \
--no-nvidia-modprobe \
--no-rpms \
--no-backup \
--no-check-for-alternate-installs \
--no-libglx-indirect \
--no-install-libglvnd \
--x-prefix=/tmp/null \
--x-module-path=/tmp/null \
--x-library-path=/tmp/null \
--x-sysconfig-path=/tmp/null \
-m=${KERNEL_TYPE} \
${install_args[@]+"${install_args[@]}"}
}

# Mount the driver rootfs into the run directory with the exception of sysfs.
Expand Down Expand Up @@ -524,26 +486,6 @@ init() {
_find_vgpu_driver_version || exit 1
fi

# Install the userspace components and copy the kernel module sources.
sh NVIDIA-Linux-$DRIVER_ARCH-$DRIVER_VERSION.run -x && \
cd NVIDIA-Linux-$DRIVER_ARCH-$DRIVER_VERSION && \
./nvidia-installer --silent \
--no-kernel-module \
--no-nouveau-check \
--no-nvidia-modprobe \
--no-rpms \
--no-backup \
--no-check-for-alternate-installs \
--no-libglx-indirect \
--no-install-libglvnd \
--x-prefix=/tmp/null \
--x-module-path=/tmp/null \
--x-library-path=/tmp/null \
--x-sysconfig-path=/tmp/null && \
mkdir -p /usr/src/nvidia-${DRIVER_VERSION} && \
mv LICENSE mkprecompiled ${KERNEL_TYPE} /usr/src/nvidia-${DRIVER_VERSION} && \
sed '9,${/^\(kernel\|LICENSE\)/!d}' .manifest > /usr/src/nvidia-${DRIVER_VERSION}/.manifest

echo -e "\n========== NVIDIA Software Installer ==========\n"
echo -e "Starting installation of NVIDIA driver version ${DRIVER_VERSION} for Linux kernel version ${KERNEL_VERSION}\n"

Expand All @@ -565,7 +507,7 @@ init() {
_update_package_cache
_resolve_kernel_version || exit 1
_install_prerequisites
_create_driver_package
_link_ofa_kernel
#_remove_prerequisites
#_cleanup_package_cache
fi
Expand All @@ -583,63 +525,6 @@ init() {
exit 0
}

update() {
exec 3>&2
if exec 2> /dev/null 4< ${PID_FILE}; then
if ! flock -n 4 && read pid <&4 && kill -0 "${pid}"; then
exec > >(tee -a "/proc/${pid}/fd/1")
exec 2> >(tee -a "/proc/${pid}/fd/2" >&3)
else
exec 2>&3
fi
exec 4>&-
fi
exec 3>&-

# vgpu driver version is choosen dynamically during runtime, so pre-compile modules for
# only non-vgpu driver types
if [ "${DRIVER_TYPE}" != "vgpu" ]; then
# Install the userspace components and copy the kernel module sources.
if [ ! -e /usr/src/nvidia-${DRIVER_VERSION}/mkprecompiled ]; then
sh NVIDIA-Linux-$DRIVER_ARCH-$DRIVER_VERSION.run -x && \
cd NVIDIA-Linux-$DRIVER_ARCH-$DRIVER_VERSION && \
./nvidia-installer --silent \
--no-kernel-module \
--no-nouveau-check \
--no-nvidia-modprobe \
--no-rpms \
--no-backup \
--no-check-for-alternate-installs \
--no-libglx-indirect \
--no-install-libglvnd \
--x-prefix=/tmp/null \
--x-module-path=/tmp/null \
--x-library-path=/tmp/null \
--x-sysconfig-path=/tmp/null && \
mkdir -p /usr/src/nvidia-${DRIVER_VERSION} && \
mv LICENSE mkprecompiled ${KERNEL_TYPE} /usr/src/nvidia-${DRIVER_VERSION} && \
sed '9,${/^\(kernel\|LICENSE\)/!d}' .manifest > /usr/src/nvidia-${DRIVER_VERSION}/.manifest
fi
fi

echo -e "\n========== NVIDIA Software Updater ==========\n"
echo -e "Starting update of NVIDIA driver version ${DRIVER_VERSION} for Linux kernel version ${KERNEL_VERSION}\n"

trap "echo 'Caught signal'; exit 1" HUP INT QUIT PIPE TERM

_update_package_cache
_resolve_kernel_version || exit 1
_install_prerequisites
if _kernel_requires_package; then
_create_driver_package
fi
_remove_prerequisites
_cleanup_package_cache

echo "Done"
exit 0
}

# Wait for MOFED drivers to be loaded and load nvidia-peermem whenever it gets unloaded during MOFED driver updates
reload_nvidia_peermem() {
if [ "$USE_HOST_MOFED" = "true" ]; then
Expand Down

0 comments on commit 2a1f2b2

Please sign in to comment.