Skip to content

Commit

Permalink
fix precompiled driver container failures when enabling OpenRM
Browse files Browse the repository at this point in the history
Signed-off-by: Tariq Ibrahim <[email protected]>
  • Loading branch information
tariq1890 committed Dec 12, 2024
1 parent 6b78622 commit 070e32e
Show file tree
Hide file tree
Showing 4 changed files with 70 additions and 12 deletions.
7 changes: 1 addition & 6 deletions ubuntu22.04/precompiled/Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -49,14 +49,9 @@ RUN if [ -n "${CVE_UPDATES}" ]; then \
fi

# update pkg cache and install pkgs for userspace driver libs
RUN apt-get update && apt-get install -y --no-install-recommends nvidia-driver-${DRIVER_BRANCH}-server \
RUN apt-get update && apt-get install -y --download-only --no-install-recommends nvidia-driver-${DRIVER_BRANCH}-server \
nvidia-fabricmanager-${DRIVER_BRANCH}=${DRIVER_VERSION}-1 \
libnvidia-nscq-${DRIVER_BRANCH}=${DRIVER_VERSION}-1 && \
apt-get purge -y \
libnvidia-egl-wayland1 \
nvidia-dkms-${DRIVER_BRANCH}-server \
nvidia-kernel-source-${DRIVER_BRANCH}-server \
xserver-xorg-video-nvidia-${DRIVER_BRANCH}-server && \
rm -rf /var/lib/apt/lists/*;

# update pkg cache and download pkgs for driver module installation during runtime.
Expand Down
34 changes: 34 additions & 0 deletions ubuntu22.04/precompiled/nvidia-driver
Original file line number Diff line number Diff line change
Expand Up @@ -101,6 +101,25 @@ _load_driver() {
echo "Parsing kernel module parameters..."
_get_module_params

local nv_fw_search_path="$RUN_DIR/driver/lib/firmware"
local set_fw_path="true"
local fw_path_config_file="/sys/module/firmware_class/parameters/path"
for param in "${NVIDIA_MODULE_PARAMS[@]}"; do
if [[ "$param" == "NVreg_EnableGpuFirmware=0" ]]; then
set_fw_path="false"
fi
done

if [[ "$set_fw_path" == "true" ]]; then
echo "Configuring the following firmware search path in '$fw_path_config_file': $nv_fw_search_path"
if [[ ! -z $(grep '[^[:space:]]' $fw_path_config_file) ]]; then
echo "WARNING: A search path is already configured in $fw_path_config_file"
echo " Retaining the current configuration"
else
echo -n "$nv_fw_search_path" > $fw_path_config_file || echo "WARNING: Failed to configure firmware search path"
fi
fi

echo "Loading ipmi and i2c_core kernel modules..."
modprobe -a i2c_core ipmi_msghandler ipmi_devintf

Expand All @@ -124,6 +143,10 @@ _load_driver() {

DRIVER_VERSION=$(nvidia-smi -q | grep "Driver Version" | awk -F: '{print $2}' | xargs)
if _assert_nvswitch_system; then
echo "Installing NVIDIA fabric manager and libnvidia NSCQ packages..."
apt-get install -y --no-install-recommends \
nvidia-fabricmanager-${DRIVER_BRANCH}=${DRIVER_VERSION}-1 \
libnvidia-nscq-${DRIVER_BRANCH}=${DRIVER_VERSION}-1

echo "Starting NVIDIA fabric manager daemon..."
nv-fabricmanager -c /usr/share/nvidia/nvswitch/fabricmanager.cfg
Expand Down Expand Up @@ -211,6 +234,17 @@ _unload_driver() {

# Link and install the kernel modules from a precompiled packages
_install_driver() {
# Install necessary userspace, fabric manager and libnvidia-nscq packages
apt-get install -y --no-install-recommends nvidia-driver-${DRIVER_BRANCH}-server

# Uninstall unnecessary packages installed as a part of the nvidia-driver-${DRIVER_BRANCH}-server package
apt-get purge -y \
libnvidia-egl-wayland1 \
nvidia-dkms-${DRIVER_BRANCH}-server \
nvidia-kernel-source-${DRIVER_BRANCH}-server \
xserver-xorg-video-nvidia-${DRIVER_BRANCH}-server

# Now install the precompiled kernel module packages signed by Canonical
if [ "$OPEN_KERNEL_MODULES_ENABLED" = true ]; then
echo "Installing Open NVIDIA driver kernel modules..."
apt-get install --no-install-recommends -y \
Expand Down
7 changes: 1 addition & 6 deletions ubuntu24.04/precompiled/Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -41,14 +41,9 @@ RUN if [ -n "${CVE_UPDATES}" ]; then \
fi

# update pkg cache and install pkgs for userspace driver libs
RUN apt-get update && apt-get install -y --no-install-recommends nvidia-driver-${DRIVER_BRANCH}-server \
RUN apt-get update && apt-get install -y --download-only --no-install-recommends nvidia-driver-${DRIVER_BRANCH}-server \
nvidia-fabricmanager-${DRIVER_BRANCH}=${DRIVER_VERSION}-1 \
libnvidia-nscq-${DRIVER_BRANCH}=${DRIVER_VERSION}-1 && \
apt-get purge -y \
libnvidia-egl-wayland1 \
nvidia-dkms-${DRIVER_BRANCH}-server \
nvidia-kernel-source-${DRIVER_BRANCH}-server \
xserver-xorg-video-nvidia-${DRIVER_BRANCH}-server && \
rm -rf /var/lib/apt/lists/*;

# update pkg cache and download pkgs for driver module installation during runtime.
Expand Down
34 changes: 34 additions & 0 deletions ubuntu24.04/precompiled/nvidia-driver
Original file line number Diff line number Diff line change
Expand Up @@ -101,6 +101,25 @@ _load_driver() {
echo "Parsing kernel module parameters..."
_get_module_params

local nv_fw_search_path="$RUN_DIR/driver/lib/firmware"
local set_fw_path="true"
local fw_path_config_file="/sys/module/firmware_class/parameters/path"
for param in "${NVIDIA_MODULE_PARAMS[@]}"; do
if [[ "$param" == "NVreg_EnableGpuFirmware=0" ]]; then
set_fw_path="false"
fi
done

if [[ "$set_fw_path" == "true" ]]; then
echo "Configuring the following firmware search path in '$fw_path_config_file': $nv_fw_search_path"
if [[ ! -z $(grep '[^[:space:]]' $fw_path_config_file) ]]; then
echo "WARNING: A search path is already configured in $fw_path_config_file"
echo " Retaining the current configuration"
else
echo -n "$nv_fw_search_path" > $fw_path_config_file || echo "WARNING: Failed to configure firmware search path"
fi
fi

echo "Loading ipmi and i2c_core kernel modules..."
modprobe -a i2c_core ipmi_msghandler ipmi_devintf

Expand All @@ -124,6 +143,10 @@ _load_driver() {

DRIVER_VERSION=$(nvidia-smi -q | grep "Driver Version" | awk -F: '{print $2}' | xargs)
if _assert_nvswitch_system; then
echo "Installing NVIDIA fabric manager and libnvidia NSCQ packages..."
apt-get install -y --no-install-recommends \
nvidia-fabricmanager-${DRIVER_BRANCH}=${DRIVER_VERSION}-1 \
libnvidia-nscq-${DRIVER_BRANCH}=${DRIVER_VERSION}-1

echo "Starting NVIDIA fabric manager daemon..."
nv-fabricmanager -c /usr/share/nvidia/nvswitch/fabricmanager.cfg
Expand Down Expand Up @@ -211,6 +234,17 @@ _unload_driver() {

# Link and install the kernel modules from a precompiled packages
_install_driver() {
# Install necessary driver userspace packages
apt-get install -y --no-install-recommends nvidia-driver-${DRIVER_BRANCH}-server

# Uninstall unnecessary packages installed as a part of the nvidia-driver-${DRIVER_BRANCH}-server package
apt-get purge -y \
libnvidia-egl-wayland1 \
nvidia-dkms-${DRIVER_BRANCH}-server \
nvidia-kernel-source-${DRIVER_BRANCH}-server \
xserver-xorg-video-nvidia-${DRIVER_BRANCH}-server

# Now install the precompiled kernel module packages signed by Canonical
if [ "$OPEN_KERNEL_MODULES_ENABLED" = true ]; then
echo "Installing Open NVIDIA driver kernel modules..."
apt-get install --no-install-recommends -y \
Expand Down

0 comments on commit 070e32e

Please sign in to comment.