Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

PERSIST_DRIVER update #43

Draft
wants to merge 6 commits into
base: main
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 8 additions & 0 deletions ubuntu22.04/clear_folders.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
#! /bin/bash

sudo rm -r /opt/nvidia/driver/cache
sudo rm -r /opt/nvidia/driver/driver-workdir
sudo rm -r /opt/nvidia/driver/lib
sudo rm -r /opt/nvidia/driver/lib64-workdir
sudo rm -r /opt/nvidia/driver/usr
sudo rm -r /opt/nvidia/driver/usr-bin-workdir
130 changes: 112 additions & 18 deletions ubuntu22.04/nvidia-driver
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,10 @@ NVIDIA_UVM_MODULE_PARAMS=()
NVIDIA_MODESET_MODULE_PARAMS=()
NVIDIA_PEERMEM_MODULE_PARAMS=()
TARGETARCH=${TARGETARCH:?"Missing TARGETARCH env"}
INSTALL_DIR=${INSTALL_DIR:-"/opt/nvidia/driver"}
PERSIST_DRIVER="${PERSIST_DRIVER:-false}"

export PATH="${INSTALL_DIR}/usr/bin:${PATH}"

OPEN_KERNEL_MODULES_ENABLED=${OPEN_KERNEL_MODULES_ENABLED:-false}
[[ "${OPEN_KERNEL_MODULES_ENABLED}" == "true" ]] && KERNEL_TYPE=kernel-open || KERNEL_TYPE=kernel
Expand Down Expand Up @@ -68,19 +72,27 @@ _install_prerequisites() (

trap "rm -rf ${tmp_dir}" EXIT
cd ${tmp_dir}

if [ "${PERSIST_DRIVER}" = false ]; then
rm -rf /lib/modules/${KERNEL_VERSION}
fi

rm -rf /lib/modules/${KERNEL_VERSION}
#rm -rf /lib/modules/${KERNEL_VERSION}
mkdir -p /lib/modules/${KERNEL_VERSION}/proc

echo "Installing Linux kernel headers..."
apt-get -qq install --no-install-recommends linux-headers-${KERNEL_VERSION} > /dev/null

echo "Installing Linux kernel module files..."
apt-get -qq download linux-image-${KERNEL_VERSION} && dpkg -x linux-image*.deb .
{ apt-get -qq download linux-modules-${KERNEL_VERSION} && dpkg -x linux-modules*.deb . || true; } 2> /dev/null
mv lib/modules/${KERNEL_VERSION}/modules.* /lib/modules/${KERNEL_VERSION}
mv lib/modules/${KERNEL_VERSION}/kernel /lib/modules/${KERNEL_VERSION}
depmod ${KERNEL_VERSION}
if [ "${PERSIST_DRIVER}" = false ]; then
mv lib/modules/${KERNEL_VERSION}/modules.* /lib/modules/${KERNEL_VERSION}
mv lib/modules/${KERNEL_VERSION}/kernel /lib/modules/${KERNEL_VERSION}
depmod ${KERNEL_VERSION}
fi
#mv lib/modules/${KERNEL_VERSION}/modules.* /lib/modules/${KERNEL_VERSION}
#mv lib/modules/${KERNEL_VERSION}/kernel /lib/modules/${KERNEL_VERSION}
#depmod ${KERNEL_VERSION}

echo "Generating Linux kernel version string..."

Expand Down Expand Up @@ -286,19 +298,35 @@ _load_driver() {
fi

echo "Loading ipmi and i2c_core kernel modules..."
modprobe -a i2c_core ipmi_msghandler ipmi_devintf
if [ "${PERSIST_DRIVER}" = false ]; then
modprobe -a i2c_core ipmi_msghandler ipmi_devintf
fi

echo "Loading NVIDIA driver kernel modules..."
set -o xtrace +o nounset
modprobe nvidia "${NVIDIA_MODULE_PARAMS[@]}"
modprobe nvidia-uvm "${NVIDIA_UVM_MODULE_PARAMS[@]}"
modprobe nvidia-modeset "${NVIDIA_MODESET_MODULE_PARAMS[@]}"

if [ "${PERSIST_DRIVER}" = true ]; then
if [[ ! -e /sys/module/nvidia ]]; then
insmod "${INSTALL_DIR}/lib/modules/$(uname -r)/kernel/drivers/video/nvidia.ko" "${NVIDIA_MODULE_PARAMS[@]}"
insmod "${INSTALL_DIR}/lib/modules/$(uname -r)/kernel/drivers/video/nvidia-uvm.ko" "${NVIDIA_UVM_MODULE_PARAMS[@]}"
insmod "${INSTALL_DIR}/lib/modules/$(uname -r)/kernel/drivers/video/nvidia-modeset.ko" "${NVIDIA_MODESET_MODULE_PARAMS[@]}"
fi
else
modprobe nvidia "${NVIDIA_MODULE_PARAMS[@]}"
modprobe nvidia-uvm "${NVIDIA_UVM_MODULE_PARAMS[@]}"
modprobe nvidia-modeset "${NVIDIA_MODESET_MODULE_PARAMS[@]}"
fi

set +o xtrace -o nounset

if _gpu_direct_rdma_enabled; then
echo "Loading NVIDIA Peer Memory kernel module..."
set -o xtrace +o nounset
modprobe nvidia-peermem "${NVIDIA_PEERMEM_MODULE_PARAMS[@]}"
if [ "${PERSIST_DRIVER}" = true ]; then
insmod ${INSTALL_DIR}/lib/modules/$(uname -r)/kernel/drivers/video/nvidia-peermem.ko "${NVIDIA_PEERMEM_MODULE_PARAMS[@]}"
else
modprobe nvidia-peermem "${NVIDIA_PEERMEM_MODULE_PARAMS[@]}"
fi
set +o xtrace -o nounset
fi

Expand All @@ -315,8 +343,11 @@ _load_driver() {
fi

echo "Starting nvidia-gridd.."
LD_LIBRARY_PATH=/usr/lib/$DRIVER_ARCH-linux-gnu/nvidia/gridd nvidia-gridd

if [ "${PERSIST_DRIVER}" = true ]; then
LD_LIBRARY_PATH=${INSTALL_DIR}/usr/lib/$DRIVER_ARCH-linux-gnu/nvidia/gridd nvidia-gridd
else
LD_LIBRARY_PATH=/usr/lib/$DRIVER_ARCH-linux-gnu/nvidia/gridd nvidia-gridd
fi
# Start virtual topology daemon
_start_vgpu_topology_daemon
fi
Expand Down Expand Up @@ -422,15 +453,26 @@ _install_driver() {

echo "Installing NVIDIA driver kernel modules..."
cd /usr/src/nvidia-${DRIVER_VERSION}
if [ -d /lib/modules/${KERNEL_VERSION}/kernel/drivers/video ]; then
rm -rf /lib/modules/${KERNEL_VERSION}/kernel/drivers/video
else
rm -rf /lib/modules/${KERNEL_VERSION}/video

if [ "${PERSIST_DRIVER}" = false ]; then
if [ -d /lib/modules/${KERNEL_VERSION}/kernel/drivers/video ]; then
rm -rf /lib/modules/${KERNEL_VERSION}/kernel/drivers/video
else
rm -rf /lib/modules/${KERNEL_VERSION}/video
fi
fi

if [ "${ACCEPT_LICENSE}" = "yes" ]; then
install_args+=("--accept-license")
fi

if [ "${PERSIST_DRIVER}" = true ]; then
mkdir -p /lib/modules/`uname -r`/kernel/drivers/video
mkdir -p ${INSTALL_DIR}/lib/modules/`uname -r`/kernel/drivers/video
mkdir -p ${INSTALL_DIR}/driver-workdir
mount -t overlay -o lowerdir=/lib/modules/`uname -r`/kernel/drivers/video,upperdir=${INSTALL_DIR}/lib/modules/`uname -r`/kernel/drivers/video,workdir=${INSTALL_DIR}/driver-workdir none /lib/modules/`uname -r`/kernel/drivers/video
fi

nvidia-installer --kernel-module-only --no-drm --ui=none --no-nouveau-check -m=${KERNEL_TYPE} ${install_args[@]+"${install_args[@]}"}
}

Expand Down Expand Up @@ -519,11 +561,49 @@ _start_vgpu_topology_daemon() {
nvidia-topologyd
}

restart_check() {
export PATH="${INSTALL_DIR}/usr/bin:${PATH}"

#VERIFY THAT THE INIT CALL TO METHOD WORKS
#FIX THIS - NEED TO CHANGE KERNEL VERSION CHECK

if grep -q "${DRIVER_VERSION}" /opt/nvidia/driver/cache && grep -q "${KERNEL_VERSION}" /opt/nvidia/driver/cache; then
echo "Container install exists. Re-using the installation and loading the driver"

mount -t overlay -o lowerdir=/usr/lib/x86_64-linux-gnu,upperdir=${INSTALL_DIR}/usr/lib/x86_64-linux-gnu,workdir=${INSTALL_DIR}/lib64-workdir none /usr/lib/x86_64-linux-gnu
mount -t overlay -o lowerdir=/usr/bin,upperdir=${INSTALL_DIR}/usr/bin,workdir=${INSTALL_DIR}/usr-bin-workdir none /usr/bin

_load_driver

echo "Done, now waiting for signal"
sleep infinity &
trap "echo 'Caught signal'; _shutdown && { kill $!; exit 0; }" HUP INT QUIT PIPE TERM
trap - EXIT
while true; do wait $! || continue; done
exit 0
return
fi

mkdir -p ${INSTALL_DIR}/usr/lib/x86_64-linux-gnu
mkdir -p ${INSTALL_DIR}/lib64-workdir

mkdir -p ${INSTALL_DIR}/usr/bin
mkdir -p ${INSTALL_DIR}/usr-bin-workdir

mount -t overlay -o lowerdir=/usr/lib/x86_64-linux-gnu,upperdir=${INSTALL_DIR}/usr/lib/x86_64-linux-gnu,workdir=${INSTALL_DIR}/lib64-workdir none /usr/lib/x86_64-linux-gnu
mount -t overlay -o lowerdir=/usr/bin,upperdir=${INSTALL_DIR}/usr/bin,workdir=${INSTALL_DIR}/usr-bin-workdir none /usr/bin

}

init() {
if [ "${DRIVER_TYPE}" = "vgpu" ]; then
_find_vgpu_driver_version || exit 1
fi

if [ "${PERSIST_DRIVER}" = true ]; then
restart_check
fi

# Install the userspace components and copy the kernel module sources.
sh NVIDIA-Linux-$DRIVER_ARCH-$DRIVER_VERSION.run -x && \
cd NVIDIA-Linux-$DRIVER_ARCH-$DRIVER_VERSION && \
Expand Down Expand Up @@ -566,15 +646,17 @@ init() {
_resolve_kernel_version || exit 1
_install_prerequisites
_create_driver_package
#_remove_prerequisites
#_cleanup_package_cache
fi

_install_driver
_load_driver || exit 1
_mount_rootfs
_write_kernel_update_hook

echo "DRIVER_VERSION=${DRIVER_VERSION}" > ${INSTALL_DIR}/cache
echo "KERNEL_VERSION=${KERNEL_VERSION}" >> ${INSTALL_DIR}/cache
# todo: add a checksum for kernel driver parameters as well

echo "Done, now waiting for signal"
sleep infinity &
trap "echo 'Caught signal'; _shutdown && { kill $!; exit 0; }" HUP INT QUIT PIPE TERM
Expand All @@ -596,6 +678,15 @@ update() {
fi
exec 3>&-

mkdir -p ${INSTALL_DIR}/usr/lib/x86_64-linux-gnu
mkdir -p ${INSTALL_DIR}/lib64-workdir

mkdir -p ${INSTALL_DIR}/usr/bin
mkdir -p ${INSTALL_DIR}/usr-bin-workdir

mount -t overlay -o lowerdir=/usr/lib/x86_64-linux-gnu,upperdir=${INSTALL_DIR}/usr/lib/x86_64-linux-gnu,workdir=${INSTALL_DIR}/lib64-workdir none /usr/lib/x86_64-linux-gnu
mount -t overlay -o lowerdir=/usr/bin,upperdir=${INSTALL_DIR}/usr/bin,workdir=${INSTALL_DIR}/usr-bin-workdir none /usr/bin

# vgpu driver version is choosen dynamically during runtime, so pre-compile modules for
# only non-vgpu driver types
if [ "${DRIVER_TYPE}" != "vgpu" ]; then
Expand Down Expand Up @@ -636,6 +727,9 @@ update() {
_remove_prerequisites
_cleanup_package_cache

echo "DRIVER_VERSION=${DRIVER_VERSION}" > ${INSTALL_DIR}/cache
echo "KERNEL_VERSION=${KERNEL_VERSION}" >> ${INSTALL_DIR}/cache

echo "Done"
exit 0
}
Expand Down