From b91870491e3aa8ec14850ad81583c813ec4ddee3 Mon Sep 17 00:00:00 2001 From: Arjun Gadiyar Date: Fri, 7 Jun 2024 21:26:10 +0000 Subject: [PATCH 1/6] container restarts --- ubuntu22.04/nvidia-driver | 110 +++++++++++++++++++++++++++++--------- 1 file changed, 85 insertions(+), 25 deletions(-) diff --git a/ubuntu22.04/nvidia-driver b/ubuntu22.04/nvidia-driver index aedeeea2..eec6011f 100755 --- a/ubuntu22.04/nvidia-driver +++ b/ubuntu22.04/nvidia-driver @@ -15,6 +15,9 @@ NVIDIA_UVM_MODULE_PARAMS=() NVIDIA_MODESET_MODULE_PARAMS=() NVIDIA_PEERMEM_MODULE_PARAMS=() TARGETARCH=${TARGETARCH:?"Missing TARGETARCH env"} +INSTALL_DIR=${INSTALL_DIR:-"/opt/nvidia/driver"} + +export PATH="${INSTALL_DIR}/usr/bin:${PATH}" OPEN_KERNEL_MODULES_ENABLED=${OPEN_KERNEL_MODULES_ENABLED:-false} [[ "${OPEN_KERNEL_MODULES_ENABLED}" == "true" ]] && KERNEL_TYPE=kernel-open || KERNEL_TYPE=kernel @@ -73,14 +76,14 @@ _install_prerequisites() ( mkdir -p /lib/modules/${KERNEL_VERSION}/proc echo "Installing Linux kernel headers..." - apt-get -qq install --no-install-recommends linux-headers-${KERNEL_VERSION} > /dev/null + #apt-get -qq install --no-install-recommends linux-headers-${KERNEL_VERSION} > /dev/null echo "Installing Linux kernel module files..." apt-get -qq download linux-image-${KERNEL_VERSION} && dpkg -x linux-image*.deb . - { apt-get -qq download linux-modules-${KERNEL_VERSION} && dpkg -x linux-modules*.deb . || true; } 2> /dev/null - mv lib/modules/${KERNEL_VERSION}/modules.* /lib/modules/${KERNEL_VERSION} - mv lib/modules/${KERNEL_VERSION}/kernel /lib/modules/${KERNEL_VERSION} - depmod ${KERNEL_VERSION} + #{ apt-get -qq download linux-modules-${KERNEL_VERSION} && dpkg -x linux-modules*.deb . || true; } 2> /dev/null + #mv lib/modules/${KERNEL_VERSION}/modules.* /lib/modules/${KERNEL_VERSION} + #mv lib/modules/${KERNEL_VERSION}/kernel /lib/modules/${KERNEL_VERSION} + #depmod ${KERNEL_VERSION} echo "Generating Linux kernel version string..." @@ -290,15 +293,19 @@ _load_driver() { echo "Loading NVIDIA driver kernel modules..." set -o xtrace +o nounset - modprobe nvidia "${NVIDIA_MODULE_PARAMS[@]}" - modprobe nvidia-uvm "${NVIDIA_UVM_MODULE_PARAMS[@]}" - modprobe nvidia-modeset "${NVIDIA_MODESET_MODULE_PARAMS[@]}" + + if [[ ! -e /sys/module/nvidia ]]; then + insmod "${INSTALL_DIR}/lib/modules/$(uname -r)/kernel/drivers/video/nvidia.ko" "${NVIDIA_MODULE_PARAMS[@]}" + insmod "${INSTALL_DIR}/lib/modules/$(uname -r)/kernel/drivers/video/nvidia-uvm.ko" "${NVIDIA_UVM_MODULE_PARAMS[@]}" + insmod "${INSTALL_DIR}/lib/modules/$(uname -r)/kernel/drivers/video/nvidia-modeset.ko" "${NVIDIA_MODESET_MODULE_PARAMS[@]}" + fi + set +o xtrace -o nounset if _gpu_direct_rdma_enabled; then echo "Loading NVIDIA Peer Memory kernel module..." set -o xtrace +o nounset - modprobe nvidia-peermem "${NVIDIA_PEERMEM_MODULE_PARAMS[@]}" + insmod ${INSTALL_DIR}/lib/modules/$(uname -r)/kernel/drivers/video/nvidia-peermem.ko "${NVIDIA_PEERMEM_MODULE_PARAMS[@]}" set +o xtrace -o nounset fi @@ -315,7 +322,7 @@ _load_driver() { fi echo "Starting nvidia-gridd.." - LD_LIBRARY_PATH=/usr/lib/$DRIVER_ARCH-linux-gnu/nvidia/gridd nvidia-gridd + LD_LIBRARY_PATH=${INSTALL_DIR}/usr/lib/$DRIVER_ARCH-linux-gnu/nvidia/gridd nvidia-gridd # Start virtual topology daemon _start_vgpu_topology_daemon @@ -422,26 +429,33 @@ _install_driver() { echo "Installing NVIDIA driver kernel modules..." cd /usr/src/nvidia-${DRIVER_VERSION} - if [ -d /lib/modules/${KERNEL_VERSION}/kernel/drivers/video ]; then - rm -rf /lib/modules/${KERNEL_VERSION}/kernel/drivers/video - else - rm -rf /lib/modules/${KERNEL_VERSION}/video - fi + #if [ -d /lib/modules/${KERNEL_VERSION}/kernel/drivers/video ]; then + #rm -rf /lib/modules/${KERNEL_VERSION}/kernel/drivers/video + #else + #rm -rf /lib/modules/${KERNEL_VERSION}/video + #fi if [ "${ACCEPT_LICENSE}" = "yes" ]; then install_args+=("--accept-license") fi + + mkdir -p /lib/modules/`uname -r`/kernel/drivers/video + mkdir -p ${INSTALL_DIR}/lib/modules/`uname -r`/kernel/drivers/video + mkdir -p ${INSTALL_DIR}/driver-workdir + + mount -t overlay -o lowerdir=/lib/modules/`uname -r`/kernel/drivers/video,upperdir=${INSTALL_DIR}/lib/modules/`uname -r`/kernel/drivers/video,workdir=${INSTALL_DIR}/driver-workdir none /lib/modules/`uname -r`/kernel/drivers/video + nvidia-installer --kernel-module-only --no-drm --ui=none --no-nouveau-check -m=${KERNEL_TYPE} ${install_args[@]+"${install_args[@]}"} } # Mount the driver rootfs into the run directory with the exception of sysfs. -_mount_rootfs() { - echo "Mounting NVIDIA driver rootfs..." - mount --make-runbindable /sys - mount --make-private /sys - mkdir -p ${RUN_DIR}/driver - mount --rbind / ${RUN_DIR}/driver -} +#_mount_rootfs() { +# echo "Mounting NVIDIA driver rootfs..." +# mount --make-runbindable /sys +# mount --make-private /sys +# mkdir -p ${RUN_DIR}/driver +# mount --rbind / ${RUN_DIR}/driver +#} # Unmount the driver rootfs from the run directory. _unmount_rootfs() { @@ -523,6 +537,48 @@ init() { if [ "${DRIVER_TYPE}" = "vgpu" ]; then _find_vgpu_driver_version || exit 1 fi + + export PATH="${INSTALL_DIR}/usr/bin:${PATH}" + + first_line="" + second_line="" + if [ -e "/opt/nvidia/driver/cache" ]; then + first_line=$(head -n 1 "/opt/nvidia/driver/cache") + second_line=$(sed -n '2p' "/opt/nvidia/driver/cache") + echo "$first_line" + echo "$second_line" + fi + + if [ "$first_line" = "DRIVER_VERSION=550.54.15" ] && [ "$second_line" = "KERNEL_VERSION=5.15.0-102-generic" ]; then + #if [ -f "${INSTALL_DIR}/cache" ]; then + echo "Container install exists. Re-using the installation and loading the driver" + + # _load_driver + #mkdir -p /lib/modules/`uname -r`/kernel/drivers/video + + mount -t overlay -o lowerdir=/usr/lib/x86_64-linux-gnu,upperdir=${INSTALL_DIR}/usr/lib/x86_64-linux-gnu,workdir=${INSTALL_DIR}/lib64-workdir none /usr/lib/x86_64-linux-gnu + mount -t overlay -o lowerdir=/usr/bin,upperdir=${INSTALL_DIR}/usr/bin,workdir=${INSTALL_DIR}/usr-bin-workdir none /usr/bin + #mount -t overlay -o lowerdir=/lib/modules/`uname -r`/kernel/drivers/video,upperdir=${INSTALL_DIR}/lib/modules/`uname -r`/kernel/drivers/video,workdir=${INSTALL_DIR}/driver-workdir none /lib/modules/`uname -r`/kernel/drivers/video + + _load_driver + + echo "Done, now waiting for signal" + sleep infinity & + trap "echo 'Caught signal'; _shutdown && { kill $!; exit 0; }" HUP INT QUIT PIPE TERM + trap - EXIT + while true; do wait $! || continue; done + exit 0 + return + fi + + mkdir -p ${INSTALL_DIR}/usr/lib/x86_64-linux-gnu + mkdir -p ${INSTALL_DIR}/lib64-workdir + + mkdir -p ${INSTALL_DIR}/usr/bin + mkdir -p ${INSTALL_DIR}/usr-bin-workdir + + mount -t overlay -o lowerdir=/usr/lib/x86_64-linux-gnu,upperdir=${INSTALL_DIR}/usr/lib/x86_64-linux-gnu,workdir=${INSTALL_DIR}/lib64-workdir none /usr/lib/x86_64-linux-gnu + mount -t overlay -o lowerdir=/usr/bin,upperdir=${INSTALL_DIR}/usr/bin,workdir=${INSTALL_DIR}/usr-bin-workdir none /usr/bin # Install the userspace components and copy the kernel module sources. sh NVIDIA-Linux-$DRIVER_ARCH-$DRIVER_VERSION.run -x && \ @@ -558,7 +614,7 @@ init() { trap "_shutdown" EXIT _unload_driver || exit 1 - _unmount_rootfs + #_unmount_rootfs if _kernel_requires_package; then _update_ca_certificates @@ -572,8 +628,12 @@ init() { _install_driver _load_driver || exit 1 - _mount_rootfs - _write_kernel_update_hook + #_mount_rootfs + #_write_kernel_update_hook + + echo "DRIVER_VERSION=${DRIVER_VERSION}" > ${INSTALL_DIR}/cache + echo "KERNEL_VERSION=${KERNEL_VERSION}" >> ${INSTALL_DIR}/cache + # todo: add a checksum for kernel driver parameters as well echo "Done, now waiting for signal" sleep infinity & From aa2d5c3c2d3de2d65865d025ad981ecabf738a7b Mon Sep 17 00:00:00 2001 From: Arjun Gadiyar Date: Fri, 7 Jun 2024 23:22:29 +0000 Subject: [PATCH 2/6] removed comments --- ubuntu22.04/clear_folders.sh | 8 ++++++++ ubuntu22.04/nvidia-driver | 24 +++--------------------- 2 files changed, 11 insertions(+), 21 deletions(-) create mode 100644 ubuntu22.04/clear_folders.sh diff --git a/ubuntu22.04/clear_folders.sh b/ubuntu22.04/clear_folders.sh new file mode 100644 index 00000000..0e36984e --- /dev/null +++ b/ubuntu22.04/clear_folders.sh @@ -0,0 +1,8 @@ +#! /bin/bash + +sudo rm -r /opt/nvidia/driver/cache +sudo rm -r /opt/nvidia/driver/driver-workdir +sudo rm -r /opt/nvidia/driver/lib +sudo rm -r /opt/nvidia/driver/lib64-workdir +sudo rm -r /opt/nvidia/driver/usr +sudo rm -r /opt/nvidia/driver/usr-bin-workdir diff --git a/ubuntu22.04/nvidia-driver b/ubuntu22.04/nvidia-driver index eec6011f..b4bfd5a0 100755 --- a/ubuntu22.04/nvidia-driver +++ b/ubuntu22.04/nvidia-driver @@ -80,10 +80,6 @@ _install_prerequisites() ( echo "Installing Linux kernel module files..." apt-get -qq download linux-image-${KERNEL_VERSION} && dpkg -x linux-image*.deb . - #{ apt-get -qq download linux-modules-${KERNEL_VERSION} && dpkg -x linux-modules*.deb . || true; } 2> /dev/null - #mv lib/modules/${KERNEL_VERSION}/modules.* /lib/modules/${KERNEL_VERSION} - #mv lib/modules/${KERNEL_VERSION}/kernel /lib/modules/${KERNEL_VERSION} - #depmod ${KERNEL_VERSION} echo "Generating Linux kernel version string..." @@ -289,7 +285,7 @@ _load_driver() { fi echo "Loading ipmi and i2c_core kernel modules..." - modprobe -a i2c_core ipmi_msghandler ipmi_devintf + #modprobe -a i2c_core ipmi_msghandler ipmi_devintf echo "Loading NVIDIA driver kernel modules..." set -o xtrace +o nounset @@ -429,11 +425,6 @@ _install_driver() { echo "Installing NVIDIA driver kernel modules..." cd /usr/src/nvidia-${DRIVER_VERSION} - #if [ -d /lib/modules/${KERNEL_VERSION}/kernel/drivers/video ]; then - #rm -rf /lib/modules/${KERNEL_VERSION}/kernel/drivers/video - #else - #rm -rf /lib/modules/${KERNEL_VERSION}/video - #fi if [ "${ACCEPT_LICENSE}" = "yes" ]; then install_args+=("--accept-license") @@ -549,16 +540,12 @@ init() { echo "$second_line" fi - if [ "$first_line" = "DRIVER_VERSION=550.54.15" ] && [ "$second_line" = "KERNEL_VERSION=5.15.0-102-generic" ]; then - #if [ -f "${INSTALL_DIR}/cache" ]; then + if grep -q "DRIVER_VERSION=550.54.15" /opt/nvidia/driver/cache && grep -q "KERNEL_VERSION=5.15.0-102-generic" /opt/nvidia/driver/cache; then + #if [ "$first_line" = "DRIVER_VERSION=550.54.15" ] && [ "$second_line" = "KERNEL_VERSION=5.15.0-102-generic" ]; then echo "Container install exists. Re-using the installation and loading the driver" - # _load_driver - #mkdir -p /lib/modules/`uname -r`/kernel/drivers/video - mount -t overlay -o lowerdir=/usr/lib/x86_64-linux-gnu,upperdir=${INSTALL_DIR}/usr/lib/x86_64-linux-gnu,workdir=${INSTALL_DIR}/lib64-workdir none /usr/lib/x86_64-linux-gnu mount -t overlay -o lowerdir=/usr/bin,upperdir=${INSTALL_DIR}/usr/bin,workdir=${INSTALL_DIR}/usr-bin-workdir none /usr/bin - #mount -t overlay -o lowerdir=/lib/modules/`uname -r`/kernel/drivers/video,upperdir=${INSTALL_DIR}/lib/modules/`uname -r`/kernel/drivers/video,workdir=${INSTALL_DIR}/driver-workdir none /lib/modules/`uname -r`/kernel/drivers/video _load_driver @@ -614,7 +601,6 @@ init() { trap "_shutdown" EXIT _unload_driver || exit 1 - #_unmount_rootfs if _kernel_requires_package; then _update_ca_certificates @@ -622,14 +608,10 @@ init() { _resolve_kernel_version || exit 1 _install_prerequisites _create_driver_package - #_remove_prerequisites - #_cleanup_package_cache fi _install_driver _load_driver || exit 1 - #_mount_rootfs - #_write_kernel_update_hook echo "DRIVER_VERSION=${DRIVER_VERSION}" > ${INSTALL_DIR}/cache echo "KERNEL_VERSION=${KERNEL_VERSION}" >> ${INSTALL_DIR}/cache From 0daabe1650681c8775445f87ee937b1019d56c4f Mon Sep 17 00:00:00 2001 From: Arjun Gadiyar Date: Wed, 12 Jun 2024 10:58:12 -0700 Subject: [PATCH 3/6] in progress changes --- ubuntu22.04/nvidia-driver | 51 ++++++++++++++++++++------------------- 1 file changed, 26 insertions(+), 25 deletions(-) diff --git a/ubuntu22.04/nvidia-driver b/ubuntu22.04/nvidia-driver index b4bfd5a0..e6f0022a 100755 --- a/ubuntu22.04/nvidia-driver +++ b/ubuntu22.04/nvidia-driver @@ -440,13 +440,13 @@ _install_driver() { } # Mount the driver rootfs into the run directory with the exception of sysfs. -#_mount_rootfs() { -# echo "Mounting NVIDIA driver rootfs..." -# mount --make-runbindable /sys -# mount --make-private /sys -# mkdir -p ${RUN_DIR}/driver -# mount --rbind / ${RUN_DIR}/driver -#} +_mount_rootfs() { + echo "Mounting NVIDIA driver rootfs..." + mount --make-runbindable /sys + mount --make-private /sys + mkdir -p ${RUN_DIR}/driver + mount --rbind / ${RUN_DIR}/driver +} # Unmount the driver rootfs from the run directory. _unmount_rootfs() { @@ -524,30 +524,19 @@ _start_vgpu_topology_daemon() { nvidia-topologyd } -init() { - if [ "${DRIVER_TYPE}" = "vgpu" ]; then - _find_vgpu_driver_version || exit 1 - fi - +restart_check() { export PATH="${INSTALL_DIR}/usr/bin:${PATH}" - first_line="" - second_line="" - if [ -e "/opt/nvidia/driver/cache" ]; then - first_line=$(head -n 1 "/opt/nvidia/driver/cache") - second_line=$(sed -n '2p' "/opt/nvidia/driver/cache") - echo "$first_line" - echo "$second_line" - fi + #VERIFY THAT THE INIT CALL TO METHOD WORKS + #FIX THIS - NEED TO CHANGE KERNEL VERSION CHECK - if grep -q "DRIVER_VERSION=550.54.15" /opt/nvidia/driver/cache && grep -q "KERNEL_VERSION=5.15.0-102-generic" /opt/nvidia/driver/cache; then - #if [ "$first_line" = "DRIVER_VERSION=550.54.15" ] && [ "$second_line" = "KERNEL_VERSION=5.15.0-102-generic" ]; then + if grep -q "${DRIVER_VERSION}" /opt/nvidia/driver/cache && grep -q "KERNEL_VERSION=5.15.0-102-generic" /opt/nvidia/driver/cache; then echo "Container install exists. Re-using the installation and loading the driver" - mount -t overlay -o lowerdir=/usr/lib/x86_64-linux-gnu,upperdir=${INSTALL_DIR}/usr/lib/x86_64-linux-gnu,workdir=${INSTALL_DIR}/lib64-workdir none /usr/lib/x86_64-linux-gnu + mount -t overlay -o lowerdir=/usr/lib/x86_64-linux-gnu,upperdir=${INSTALL_DIR}/usr/lib/x86_64-linux-gnu,workdir=${INSTALL_DIR}/lib64-workdir none /usr/lib/x86_64-linux-gnu mount -t overlay -o lowerdir=/usr/bin,upperdir=${INSTALL_DIR}/usr/bin,workdir=${INSTALL_DIR}/usr-bin-workdir none /usr/bin - - _load_driver + + _load_driver echo "Done, now waiting for signal" sleep infinity & @@ -567,6 +556,15 @@ init() { mount -t overlay -o lowerdir=/usr/lib/x86_64-linux-gnu,upperdir=${INSTALL_DIR}/usr/lib/x86_64-linux-gnu,workdir=${INSTALL_DIR}/lib64-workdir none /usr/lib/x86_64-linux-gnu mount -t overlay -o lowerdir=/usr/bin,upperdir=${INSTALL_DIR}/usr/bin,workdir=${INSTALL_DIR}/usr-bin-workdir none /usr/bin +} + +init() { + if [ "${DRIVER_TYPE}" = "vgpu" ]; then + _find_vgpu_driver_version || exit 1 + fi + + + # Install the userspace components and copy the kernel module sources. sh NVIDIA-Linux-$DRIVER_ARCH-$DRIVER_VERSION.run -x && \ cd NVIDIA-Linux-$DRIVER_ARCH-$DRIVER_VERSION && \ @@ -601,6 +599,7 @@ init() { trap "_shutdown" EXIT _unload_driver || exit 1 + _unmount_rootfs if _kernel_requires_package; then _update_ca_certificates @@ -612,6 +611,8 @@ init() { _install_driver _load_driver || exit 1 + _mount_rootfs + _write_kernel_update_hook echo "DRIVER_VERSION=${DRIVER_VERSION}" > ${INSTALL_DIR}/cache echo "KERNEL_VERSION=${KERNEL_VERSION}" >> ${INSTALL_DIR}/cache From 24a4338cc843f125c4f289f8c885ebe1239e1094 Mon Sep 17 00:00:00 2001 From: Arjun Gadiyar Date: Wed, 12 Jun 2024 22:12:58 +0000 Subject: [PATCH 4/6] Nvidia-ctk and separated out changes --- ubuntu22.04/nvidia-driver | 25 ++++++++++++++++++++----- 1 file changed, 20 insertions(+), 5 deletions(-) diff --git a/ubuntu22.04/nvidia-driver b/ubuntu22.04/nvidia-driver index e6f0022a..df7087b0 100755 --- a/ubuntu22.04/nvidia-driver +++ b/ubuntu22.04/nvidia-driver @@ -16,6 +16,7 @@ NVIDIA_MODESET_MODULE_PARAMS=() NVIDIA_PEERMEM_MODULE_PARAMS=() TARGETARCH=${TARGETARCH:?"Missing TARGETARCH env"} INSTALL_DIR=${INSTALL_DIR:-"/opt/nvidia/driver"} +RESTARTS_ENABLED="${RESTARTS_ENABLED:-false}" export PATH="${INSTALL_DIR}/usr/bin:${PATH}" @@ -71,15 +72,27 @@ _install_prerequisites() ( trap "rm -rf ${tmp_dir}" EXIT cd ${tmp_dir} + + if [ "${RESTARTS_ENABLED}" = false ]; then + rm -rf /lib/modules/${KERNEL_VERSION} + fi - rm -rf /lib/modules/${KERNEL_VERSION} + #rm -rf /lib/modules/${KERNEL_VERSION} mkdir -p /lib/modules/${KERNEL_VERSION}/proc echo "Installing Linux kernel headers..." - #apt-get -qq install --no-install-recommends linux-headers-${KERNEL_VERSION} > /dev/null + apt-get -qq install --no-install-recommends linux-headers-${KERNEL_VERSION} > /dev/null echo "Installing Linux kernel module files..." apt-get -qq download linux-image-${KERNEL_VERSION} && dpkg -x linux-image*.deb . + if [ "${RESTARTS_ENABLED}" = false ]; then + mv lib/modules/${KERNEL_VERSION}/modules.* /lib/modules/${KERNEL_VERSION} + mv lib/modules/${KERNEL_VERSION}/kernel /lib/modules/${KERNEL_VERSION} + depmod ${KERNEL_VERSION} + fi + #mv lib/modules/${KERNEL_VERSION}/modules.* /lib/modules/${KERNEL_VERSION} + #mv lib/modules/${KERNEL_VERSION}/kernel /lib/modules/${KERNEL_VERSION} + #depmod ${KERNEL_VERSION} echo "Generating Linux kernel version string..." @@ -530,7 +543,7 @@ restart_check() { #VERIFY THAT THE INIT CALL TO METHOD WORKS #FIX THIS - NEED TO CHANGE KERNEL VERSION CHECK - if grep -q "${DRIVER_VERSION}" /opt/nvidia/driver/cache && grep -q "KERNEL_VERSION=5.15.0-102-generic" /opt/nvidia/driver/cache; then + if grep -q "${DRIVER_VERSION}" /opt/nvidia/driver/cache && grep -q "${KERNEL_VERSION}" /opt/nvidia/driver/cache; then echo "Container install exists. Re-using the installation and loading the driver" mount -t overlay -o lowerdir=/usr/lib/x86_64-linux-gnu,upperdir=${INSTALL_DIR}/usr/lib/x86_64-linux-gnu,workdir=${INSTALL_DIR}/lib64-workdir none /usr/lib/x86_64-linux-gnu @@ -562,8 +575,10 @@ init() { if [ "${DRIVER_TYPE}" = "vgpu" ]; then _find_vgpu_driver_version || exit 1 fi - - + + if [ "${RESTARTS_ENABLED}" = true ]; then + restart_check + fi # Install the userspace components and copy the kernel module sources. sh NVIDIA-Linux-$DRIVER_ARCH-$DRIVER_VERSION.run -x && \ From d384432d717d79e04e2e929646095781f0816cd4 Mon Sep 17 00:00:00 2001 From: Arjun Gadiyar Date: Wed, 12 Jun 2024 23:13:31 +0000 Subject: [PATCH 5/6] container restart final changes --- ubuntu22.04/nvidia-driver | 54 ++++++++++++++++++++++++++++----------- 1 file changed, 39 insertions(+), 15 deletions(-) diff --git a/ubuntu22.04/nvidia-driver b/ubuntu22.04/nvidia-driver index df7087b0..e3fc95c7 100755 --- a/ubuntu22.04/nvidia-driver +++ b/ubuntu22.04/nvidia-driver @@ -298,15 +298,23 @@ _load_driver() { fi echo "Loading ipmi and i2c_core kernel modules..." - #modprobe -a i2c_core ipmi_msghandler ipmi_devintf + if [ "${RESTARTS_ENABLED}" = false ]; then + modprobe -a i2c_core ipmi_msghandler ipmi_devintf + fi echo "Loading NVIDIA driver kernel modules..." set -o xtrace +o nounset - - if [[ ! -e /sys/module/nvidia ]]; then - insmod "${INSTALL_DIR}/lib/modules/$(uname -r)/kernel/drivers/video/nvidia.ko" "${NVIDIA_MODULE_PARAMS[@]}" - insmod "${INSTALL_DIR}/lib/modules/$(uname -r)/kernel/drivers/video/nvidia-uvm.ko" "${NVIDIA_UVM_MODULE_PARAMS[@]}" - insmod "${INSTALL_DIR}/lib/modules/$(uname -r)/kernel/drivers/video/nvidia-modeset.ko" "${NVIDIA_MODESET_MODULE_PARAMS[@]}" + + if [ "${RESTARTS_ENABLED}" = true ]; then + if [[ ! -e /sys/module/nvidia ]]; then + insmod "${INSTALL_DIR}/lib/modules/$(uname -r)/kernel/drivers/video/nvidia.ko" "${NVIDIA_MODULE_PARAMS[@]}" + insmod "${INSTALL_DIR}/lib/modules/$(uname -r)/kernel/drivers/video/nvidia-uvm.ko" "${NVIDIA_UVM_MODULE_PARAMS[@]}" + insmod "${INSTALL_DIR}/lib/modules/$(uname -r)/kernel/drivers/video/nvidia-modeset.ko" "${NVIDIA_MODESET_MODULE_PARAMS[@]}" + fi + else + modprobe nvidia "${NVIDIA_MODULE_PARAMS[@]}" + modprobe nvidia-uvm "${NVIDIA_UVM_MODULE_PARAMS[@]}" + modprobe nvidia-modeset "${NVIDIA_MODESET_MODULE_PARAMS[@]}" fi set +o xtrace -o nounset @@ -314,7 +322,11 @@ _load_driver() { if _gpu_direct_rdma_enabled; then echo "Loading NVIDIA Peer Memory kernel module..." set -o xtrace +o nounset - insmod ${INSTALL_DIR}/lib/modules/$(uname -r)/kernel/drivers/video/nvidia-peermem.ko "${NVIDIA_PEERMEM_MODULE_PARAMS[@]}" + if [ "${RESTARTS_ENABLED}" = true ]; then + insmod ${INSTALL_DIR}/lib/modules/$(uname -r)/kernel/drivers/video/nvidia-peermem.ko "${NVIDIA_PEERMEM_MODULE_PARAMS[@]}" + else + modprobe nvidia-peermem "${NVIDIA_PEERMEM_MODULE_PARAMS[@]}" + fi set +o xtrace -o nounset fi @@ -331,8 +343,11 @@ _load_driver() { fi echo "Starting nvidia-gridd.." - LD_LIBRARY_PATH=${INSTALL_DIR}/usr/lib/$DRIVER_ARCH-linux-gnu/nvidia/gridd nvidia-gridd - + if [ "${RESTARTS_ENABLED}" = true ]; then + LD_LIBRARY_PATH=${INSTALL_DIR}/usr/lib/$DRIVER_ARCH-linux-gnu/nvidia/gridd nvidia-gridd + else + LD_LIBRARY_PATH=/usr/lib/$DRIVER_ARCH-linux-gnu/nvidia/gridd nvidia-gridd + fi # Start virtual topology daemon _start_vgpu_topology_daemon fi @@ -439,15 +454,24 @@ _install_driver() { echo "Installing NVIDIA driver kernel modules..." cd /usr/src/nvidia-${DRIVER_VERSION} + if [ "${RESTARTS_ENABLED}" = false ]; then + if [ -d /lib/modules/${KERNEL_VERSION}/kernel/drivers/video ]; then + rm -rf /lib/modules/${KERNEL_VERSION}/kernel/drivers/video + else + rm -rf /lib/modules/${KERNEL_VERSION}/video + fi + fi + if [ "${ACCEPT_LICENSE}" = "yes" ]; then install_args+=("--accept-license") fi - - mkdir -p /lib/modules/`uname -r`/kernel/drivers/video - mkdir -p ${INSTALL_DIR}/lib/modules/`uname -r`/kernel/drivers/video - mkdir -p ${INSTALL_DIR}/driver-workdir - - mount -t overlay -o lowerdir=/lib/modules/`uname -r`/kernel/drivers/video,upperdir=${INSTALL_DIR}/lib/modules/`uname -r`/kernel/drivers/video,workdir=${INSTALL_DIR}/driver-workdir none /lib/modules/`uname -r`/kernel/drivers/video + + if [ "${RESTARTS_ENABLED}" = true ]; then + mkdir -p /lib/modules/`uname -r`/kernel/drivers/video + mkdir -p ${INSTALL_DIR}/lib/modules/`uname -r`/kernel/drivers/video + mkdir -p ${INSTALL_DIR}/driver-workdir + mount -t overlay -o lowerdir=/lib/modules/`uname -r`/kernel/drivers/video,upperdir=${INSTALL_DIR}/lib/modules/`uname -r`/kernel/drivers/video,workdir=${INSTALL_DIR}/driver-workdir none /lib/modules/`uname -r`/kernel/drivers/video + fi nvidia-installer --kernel-module-only --no-drm --ui=none --no-nouveau-check -m=${KERNEL_TYPE} ${install_args[@]+"${install_args[@]}"} } From e085737d822cdaf89e34c41f33f9ed626a5a6b26 Mon Sep 17 00:00:00 2001 From: Arjun Date: Wed, 3 Jul 2024 21:16:41 +0000 Subject: [PATCH 6/6] updated PERSIST_DRIVER --- ubuntu22.04/nvidia-driver | 32 ++++++++++++++++++++++---------- 1 file changed, 22 insertions(+), 10 deletions(-) diff --git a/ubuntu22.04/nvidia-driver b/ubuntu22.04/nvidia-driver index e3fc95c7..cd5c02f1 100755 --- a/ubuntu22.04/nvidia-driver +++ b/ubuntu22.04/nvidia-driver @@ -16,7 +16,7 @@ NVIDIA_MODESET_MODULE_PARAMS=() NVIDIA_PEERMEM_MODULE_PARAMS=() TARGETARCH=${TARGETARCH:?"Missing TARGETARCH env"} INSTALL_DIR=${INSTALL_DIR:-"/opt/nvidia/driver"} -RESTARTS_ENABLED="${RESTARTS_ENABLED:-false}" +PERSIST_DRIVER="${PERSIST_DRIVER:-false}" export PATH="${INSTALL_DIR}/usr/bin:${PATH}" @@ -73,7 +73,7 @@ _install_prerequisites() ( trap "rm -rf ${tmp_dir}" EXIT cd ${tmp_dir} - if [ "${RESTARTS_ENABLED}" = false ]; then + if [ "${PERSIST_DRIVER}" = false ]; then rm -rf /lib/modules/${KERNEL_VERSION} fi @@ -85,7 +85,7 @@ _install_prerequisites() ( echo "Installing Linux kernel module files..." apt-get -qq download linux-image-${KERNEL_VERSION} && dpkg -x linux-image*.deb . - if [ "${RESTARTS_ENABLED}" = false ]; then + if [ "${PERSIST_DRIVER}" = false ]; then mv lib/modules/${KERNEL_VERSION}/modules.* /lib/modules/${KERNEL_VERSION} mv lib/modules/${KERNEL_VERSION}/kernel /lib/modules/${KERNEL_VERSION} depmod ${KERNEL_VERSION} @@ -298,14 +298,14 @@ _load_driver() { fi echo "Loading ipmi and i2c_core kernel modules..." - if [ "${RESTARTS_ENABLED}" = false ]; then + if [ "${PERSIST_DRIVER}" = false ]; then modprobe -a i2c_core ipmi_msghandler ipmi_devintf fi echo "Loading NVIDIA driver kernel modules..." set -o xtrace +o nounset - if [ "${RESTARTS_ENABLED}" = true ]; then + if [ "${PERSIST_DRIVER}" = true ]; then if [[ ! -e /sys/module/nvidia ]]; then insmod "${INSTALL_DIR}/lib/modules/$(uname -r)/kernel/drivers/video/nvidia.ko" "${NVIDIA_MODULE_PARAMS[@]}" insmod "${INSTALL_DIR}/lib/modules/$(uname -r)/kernel/drivers/video/nvidia-uvm.ko" "${NVIDIA_UVM_MODULE_PARAMS[@]}" @@ -322,7 +322,7 @@ _load_driver() { if _gpu_direct_rdma_enabled; then echo "Loading NVIDIA Peer Memory kernel module..." set -o xtrace +o nounset - if [ "${RESTARTS_ENABLED}" = true ]; then + if [ "${PERSIST_DRIVER}" = true ]; then insmod ${INSTALL_DIR}/lib/modules/$(uname -r)/kernel/drivers/video/nvidia-peermem.ko "${NVIDIA_PEERMEM_MODULE_PARAMS[@]}" else modprobe nvidia-peermem "${NVIDIA_PEERMEM_MODULE_PARAMS[@]}" @@ -343,7 +343,7 @@ _load_driver() { fi echo "Starting nvidia-gridd.." - if [ "${RESTARTS_ENABLED}" = true ]; then + if [ "${PERSIST_DRIVER}" = true ]; then LD_LIBRARY_PATH=${INSTALL_DIR}/usr/lib/$DRIVER_ARCH-linux-gnu/nvidia/gridd nvidia-gridd else LD_LIBRARY_PATH=/usr/lib/$DRIVER_ARCH-linux-gnu/nvidia/gridd nvidia-gridd @@ -454,7 +454,7 @@ _install_driver() { echo "Installing NVIDIA driver kernel modules..." cd /usr/src/nvidia-${DRIVER_VERSION} - if [ "${RESTARTS_ENABLED}" = false ]; then + if [ "${PERSIST_DRIVER}" = false ]; then if [ -d /lib/modules/${KERNEL_VERSION}/kernel/drivers/video ]; then rm -rf /lib/modules/${KERNEL_VERSION}/kernel/drivers/video else @@ -466,7 +466,7 @@ _install_driver() { install_args+=("--accept-license") fi - if [ "${RESTARTS_ENABLED}" = true ]; then + if [ "${PERSIST_DRIVER}" = true ]; then mkdir -p /lib/modules/`uname -r`/kernel/drivers/video mkdir -p ${INSTALL_DIR}/lib/modules/`uname -r`/kernel/drivers/video mkdir -p ${INSTALL_DIR}/driver-workdir @@ -600,7 +600,7 @@ init() { _find_vgpu_driver_version || exit 1 fi - if [ "${RESTARTS_ENABLED}" = true ]; then + if [ "${PERSIST_DRIVER}" = true ]; then restart_check fi @@ -678,6 +678,15 @@ update() { fi exec 3>&- + mkdir -p ${INSTALL_DIR}/usr/lib/x86_64-linux-gnu + mkdir -p ${INSTALL_DIR}/lib64-workdir + + mkdir -p ${INSTALL_DIR}/usr/bin + mkdir -p ${INSTALL_DIR}/usr-bin-workdir + + mount -t overlay -o lowerdir=/usr/lib/x86_64-linux-gnu,upperdir=${INSTALL_DIR}/usr/lib/x86_64-linux-gnu,workdir=${INSTALL_DIR}/lib64-workdir none /usr/lib/x86_64-linux-gnu + mount -t overlay -o lowerdir=/usr/bin,upperdir=${INSTALL_DIR}/usr/bin,workdir=${INSTALL_DIR}/usr-bin-workdir none /usr/bin + # vgpu driver version is choosen dynamically during runtime, so pre-compile modules for # only non-vgpu driver types if [ "${DRIVER_TYPE}" != "vgpu" ]; then @@ -718,6 +727,9 @@ update() { _remove_prerequisites _cleanup_package_cache + echo "DRIVER_VERSION=${DRIVER_VERSION}" > ${INSTALL_DIR}/cache + echo "KERNEL_VERSION=${KERNEL_VERSION}" >> ${INSTALL_DIR}/cache + echo "Done" exit 0 }