diff --git a/ubuntu22.04/clear_folders.sh b/ubuntu22.04/clear_folders.sh new file mode 100644 index 00000000..0e36984e --- /dev/null +++ b/ubuntu22.04/clear_folders.sh @@ -0,0 +1,8 @@ +#! /bin/bash + +sudo rm -r /opt/nvidia/driver/cache +sudo rm -r /opt/nvidia/driver/driver-workdir +sudo rm -r /opt/nvidia/driver/lib +sudo rm -r /opt/nvidia/driver/lib64-workdir +sudo rm -r /opt/nvidia/driver/usr +sudo rm -r /opt/nvidia/driver/usr-bin-workdir diff --git a/ubuntu22.04/nvidia-driver b/ubuntu22.04/nvidia-driver index aedeeea2..cd5c02f1 100755 --- a/ubuntu22.04/nvidia-driver +++ b/ubuntu22.04/nvidia-driver @@ -15,6 +15,10 @@ NVIDIA_UVM_MODULE_PARAMS=() NVIDIA_MODESET_MODULE_PARAMS=() NVIDIA_PEERMEM_MODULE_PARAMS=() TARGETARCH=${TARGETARCH:?"Missing TARGETARCH env"} +INSTALL_DIR=${INSTALL_DIR:-"/opt/nvidia/driver"} +PERSIST_DRIVER="${PERSIST_DRIVER:-false}" + +export PATH="${INSTALL_DIR}/usr/bin:${PATH}" OPEN_KERNEL_MODULES_ENABLED=${OPEN_KERNEL_MODULES_ENABLED:-false} [[ "${OPEN_KERNEL_MODULES_ENABLED}" == "true" ]] && KERNEL_TYPE=kernel-open || KERNEL_TYPE=kernel @@ -68,8 +72,12 @@ _install_prerequisites() ( trap "rm -rf ${tmp_dir}" EXIT cd ${tmp_dir} + + if [ "${PERSIST_DRIVER}" = false ]; then + rm -rf /lib/modules/${KERNEL_VERSION} + fi - rm -rf /lib/modules/${KERNEL_VERSION} + #rm -rf /lib/modules/${KERNEL_VERSION} mkdir -p /lib/modules/${KERNEL_VERSION}/proc echo "Installing Linux kernel headers..." @@ -77,10 +85,14 @@ _install_prerequisites() ( echo "Installing Linux kernel module files..." apt-get -qq download linux-image-${KERNEL_VERSION} && dpkg -x linux-image*.deb . - { apt-get -qq download linux-modules-${KERNEL_VERSION} && dpkg -x linux-modules*.deb . || true; } 2> /dev/null - mv lib/modules/${KERNEL_VERSION}/modules.* /lib/modules/${KERNEL_VERSION} - mv lib/modules/${KERNEL_VERSION}/kernel /lib/modules/${KERNEL_VERSION} - depmod ${KERNEL_VERSION} + if [ "${PERSIST_DRIVER}" = false ]; then + mv lib/modules/${KERNEL_VERSION}/modules.* /lib/modules/${KERNEL_VERSION} + mv lib/modules/${KERNEL_VERSION}/kernel /lib/modules/${KERNEL_VERSION} + depmod ${KERNEL_VERSION} + fi + #mv lib/modules/${KERNEL_VERSION}/modules.* /lib/modules/${KERNEL_VERSION} + #mv lib/modules/${KERNEL_VERSION}/kernel /lib/modules/${KERNEL_VERSION} + #depmod ${KERNEL_VERSION} echo "Generating Linux kernel version string..." @@ -286,19 +298,35 @@ _load_driver() { fi echo "Loading ipmi and i2c_core kernel modules..." - modprobe -a i2c_core ipmi_msghandler ipmi_devintf + if [ "${PERSIST_DRIVER}" = false ]; then + modprobe -a i2c_core ipmi_msghandler ipmi_devintf + fi echo "Loading NVIDIA driver kernel modules..." set -o xtrace +o nounset - modprobe nvidia "${NVIDIA_MODULE_PARAMS[@]}" - modprobe nvidia-uvm "${NVIDIA_UVM_MODULE_PARAMS[@]}" - modprobe nvidia-modeset "${NVIDIA_MODESET_MODULE_PARAMS[@]}" + + if [ "${PERSIST_DRIVER}" = true ]; then + if [[ ! -e /sys/module/nvidia ]]; then + insmod "${INSTALL_DIR}/lib/modules/$(uname -r)/kernel/drivers/video/nvidia.ko" "${NVIDIA_MODULE_PARAMS[@]}" + insmod "${INSTALL_DIR}/lib/modules/$(uname -r)/kernel/drivers/video/nvidia-uvm.ko" "${NVIDIA_UVM_MODULE_PARAMS[@]}" + insmod "${INSTALL_DIR}/lib/modules/$(uname -r)/kernel/drivers/video/nvidia-modeset.ko" "${NVIDIA_MODESET_MODULE_PARAMS[@]}" + fi + else + modprobe nvidia "${NVIDIA_MODULE_PARAMS[@]}" + modprobe nvidia-uvm "${NVIDIA_UVM_MODULE_PARAMS[@]}" + modprobe nvidia-modeset "${NVIDIA_MODESET_MODULE_PARAMS[@]}" + fi + set +o xtrace -o nounset if _gpu_direct_rdma_enabled; then echo "Loading NVIDIA Peer Memory kernel module..." set -o xtrace +o nounset - modprobe nvidia-peermem "${NVIDIA_PEERMEM_MODULE_PARAMS[@]}" + if [ "${PERSIST_DRIVER}" = true ]; then + insmod ${INSTALL_DIR}/lib/modules/$(uname -r)/kernel/drivers/video/nvidia-peermem.ko "${NVIDIA_PEERMEM_MODULE_PARAMS[@]}" + else + modprobe nvidia-peermem "${NVIDIA_PEERMEM_MODULE_PARAMS[@]}" + fi set +o xtrace -o nounset fi @@ -315,8 +343,11 @@ _load_driver() { fi echo "Starting nvidia-gridd.." - LD_LIBRARY_PATH=/usr/lib/$DRIVER_ARCH-linux-gnu/nvidia/gridd nvidia-gridd - + if [ "${PERSIST_DRIVER}" = true ]; then + LD_LIBRARY_PATH=${INSTALL_DIR}/usr/lib/$DRIVER_ARCH-linux-gnu/nvidia/gridd nvidia-gridd + else + LD_LIBRARY_PATH=/usr/lib/$DRIVER_ARCH-linux-gnu/nvidia/gridd nvidia-gridd + fi # Start virtual topology daemon _start_vgpu_topology_daemon fi @@ -422,15 +453,26 @@ _install_driver() { echo "Installing NVIDIA driver kernel modules..." cd /usr/src/nvidia-${DRIVER_VERSION} - if [ -d /lib/modules/${KERNEL_VERSION}/kernel/drivers/video ]; then - rm -rf /lib/modules/${KERNEL_VERSION}/kernel/drivers/video - else - rm -rf /lib/modules/${KERNEL_VERSION}/video + + if [ "${PERSIST_DRIVER}" = false ]; then + if [ -d /lib/modules/${KERNEL_VERSION}/kernel/drivers/video ]; then + rm -rf /lib/modules/${KERNEL_VERSION}/kernel/drivers/video + else + rm -rf /lib/modules/${KERNEL_VERSION}/video + fi fi if [ "${ACCEPT_LICENSE}" = "yes" ]; then install_args+=("--accept-license") fi + + if [ "${PERSIST_DRIVER}" = true ]; then + mkdir -p /lib/modules/`uname -r`/kernel/drivers/video + mkdir -p ${INSTALL_DIR}/lib/modules/`uname -r`/kernel/drivers/video + mkdir -p ${INSTALL_DIR}/driver-workdir + mount -t overlay -o lowerdir=/lib/modules/`uname -r`/kernel/drivers/video,upperdir=${INSTALL_DIR}/lib/modules/`uname -r`/kernel/drivers/video,workdir=${INSTALL_DIR}/driver-workdir none /lib/modules/`uname -r`/kernel/drivers/video + fi + nvidia-installer --kernel-module-only --no-drm --ui=none --no-nouveau-check -m=${KERNEL_TYPE} ${install_args[@]+"${install_args[@]}"} } @@ -519,11 +561,49 @@ _start_vgpu_topology_daemon() { nvidia-topologyd } +restart_check() { + export PATH="${INSTALL_DIR}/usr/bin:${PATH}" + + #VERIFY THAT THE INIT CALL TO METHOD WORKS + #FIX THIS - NEED TO CHANGE KERNEL VERSION CHECK + + if grep -q "${DRIVER_VERSION}" /opt/nvidia/driver/cache && grep -q "${KERNEL_VERSION}" /opt/nvidia/driver/cache; then + echo "Container install exists. Re-using the installation and loading the driver" + + mount -t overlay -o lowerdir=/usr/lib/x86_64-linux-gnu,upperdir=${INSTALL_DIR}/usr/lib/x86_64-linux-gnu,workdir=${INSTALL_DIR}/lib64-workdir none /usr/lib/x86_64-linux-gnu + mount -t overlay -o lowerdir=/usr/bin,upperdir=${INSTALL_DIR}/usr/bin,workdir=${INSTALL_DIR}/usr-bin-workdir none /usr/bin + + _load_driver + + echo "Done, now waiting for signal" + sleep infinity & + trap "echo 'Caught signal'; _shutdown && { kill $!; exit 0; }" HUP INT QUIT PIPE TERM + trap - EXIT + while true; do wait $! || continue; done + exit 0 + return + fi + + mkdir -p ${INSTALL_DIR}/usr/lib/x86_64-linux-gnu + mkdir -p ${INSTALL_DIR}/lib64-workdir + + mkdir -p ${INSTALL_DIR}/usr/bin + mkdir -p ${INSTALL_DIR}/usr-bin-workdir + + mount -t overlay -o lowerdir=/usr/lib/x86_64-linux-gnu,upperdir=${INSTALL_DIR}/usr/lib/x86_64-linux-gnu,workdir=${INSTALL_DIR}/lib64-workdir none /usr/lib/x86_64-linux-gnu + mount -t overlay -o lowerdir=/usr/bin,upperdir=${INSTALL_DIR}/usr/bin,workdir=${INSTALL_DIR}/usr-bin-workdir none /usr/bin + +} + init() { if [ "${DRIVER_TYPE}" = "vgpu" ]; then _find_vgpu_driver_version || exit 1 fi + if [ "${PERSIST_DRIVER}" = true ]; then + restart_check + fi + # Install the userspace components and copy the kernel module sources. sh NVIDIA-Linux-$DRIVER_ARCH-$DRIVER_VERSION.run -x && \ cd NVIDIA-Linux-$DRIVER_ARCH-$DRIVER_VERSION && \ @@ -566,8 +646,6 @@ init() { _resolve_kernel_version || exit 1 _install_prerequisites _create_driver_package - #_remove_prerequisites - #_cleanup_package_cache fi _install_driver @@ -575,6 +653,10 @@ init() { _mount_rootfs _write_kernel_update_hook + echo "DRIVER_VERSION=${DRIVER_VERSION}" > ${INSTALL_DIR}/cache + echo "KERNEL_VERSION=${KERNEL_VERSION}" >> ${INSTALL_DIR}/cache + # todo: add a checksum for kernel driver parameters as well + echo "Done, now waiting for signal" sleep infinity & trap "echo 'Caught signal'; _shutdown && { kill $!; exit 0; }" HUP INT QUIT PIPE TERM @@ -596,6 +678,15 @@ update() { fi exec 3>&- + mkdir -p ${INSTALL_DIR}/usr/lib/x86_64-linux-gnu + mkdir -p ${INSTALL_DIR}/lib64-workdir + + mkdir -p ${INSTALL_DIR}/usr/bin + mkdir -p ${INSTALL_DIR}/usr-bin-workdir + + mount -t overlay -o lowerdir=/usr/lib/x86_64-linux-gnu,upperdir=${INSTALL_DIR}/usr/lib/x86_64-linux-gnu,workdir=${INSTALL_DIR}/lib64-workdir none /usr/lib/x86_64-linux-gnu + mount -t overlay -o lowerdir=/usr/bin,upperdir=${INSTALL_DIR}/usr/bin,workdir=${INSTALL_DIR}/usr-bin-workdir none /usr/bin + # vgpu driver version is choosen dynamically during runtime, so pre-compile modules for # only non-vgpu driver types if [ "${DRIVER_TYPE}" != "vgpu" ]; then @@ -636,6 +727,9 @@ update() { _remove_prerequisites _cleanup_package_cache + echo "DRIVER_VERSION=${DRIVER_VERSION}" > ${INSTALL_DIR}/cache + echo "KERNEL_VERSION=${KERNEL_VERSION}" >> ${INSTALL_DIR}/cache + echo "Done" exit 0 }