Skip to content

Commit

Permalink
Merge branch 'rhel-kernel-version' into 'main'
Browse files Browse the repository at this point in the history
extract RHEL VERSION string from the RHEL kernel version

See merge request nvidia/container-images/driver!292
  • Loading branch information
tariq1890 committed Feb 27, 2024
2 parents f3670ff + c0a9c24 commit 6dcacbb
Show file tree
Hide file tree
Showing 2 changed files with 63 additions and 38 deletions.
59 changes: 37 additions & 22 deletions rhel8/nvidia-driver
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,8 @@ NVIDIA_PEERMEM_MODULE_PARAMS=()
TARGETARCH=${TARGETARCH:?"Missing TARGETARCH env"}
USE_HOST_MOFED="${USE_HOST_MOFED:-false}"
DNF_RELEASEVER=${DNF_RELEASEVER:-""}
RHEL_VERSION=${RHEL_VERSION:-""}
RHEL_MAJOR_VERSION=8

OPEN_KERNEL_MODULES_ENABLED=${OPEN_KERNEL_MODULES_ENABLED:-false}
[[ "${OPEN_KERNEL_MODULES_ENABLED}" == "true" ]] && KERNEL_TYPE=kernel-open || KERNEL_TYPE=kernel
Expand Down Expand Up @@ -43,27 +45,29 @@ _cleanup_package_cache() {
fi
}

_resolve_rhel_version() {
if [ -f /host-etc/os-release ]; then
echo "Resolving RHEL version..."
local version=""
local id=$(cat /host-etc/os-release | grep ^ID= | awk -F= '{print $2}' | sed -e 's/^"//' -e 's/"$//')
if [ "${id}" = "rhcos" ]; then
version=$(cat /host-etc/os-release | grep RHEL_VERSION | awk -F= '{print $2}' | sed -e 's/^"//' -e 's/"$//')
elif [ "${id}" = "rhel" ]; then
version=$(cat /host-etc/os-release | grep VERSION_ID | awk -F= '{print $2}' | sed -e 's/^"//' -e 's/"$//')
fi
if [ -z "${version}" ]; then
echo "Could not resolve RHEL version" >&2
return 1
fi
RHEL_VERSION="${version}"
echo "Proceeding with RHEL version ${RHEL_VERSION}"
_get_rhel_version_from_kernel() {
local rhel_version_underscore rhel_version_arr
rhel_version_underscore=$(echo "${KERNEL_VERSION}" | sed 's/.*el\([0-9]\+_[0-9]\+\).*/\1/g')
# For e.g. :- from the kernel version 4.18.0-513.9.1.el8_9, we expect to extract the string "8_9"
if [[ ! ${rhel_version_underscore} =~ ^[0-9]+_[0-9]+$ ]]; then
echo "Unable to resolve RHEL version from kernel version" >&2
return 1
fi
IFS='_' read -r -a rhel_version_arr <<< "$rhel_version_underscore"
if [[ ${#rhel_version_arr[@]} -ne 2 ]]; then
echo "Unable to resolve RHEL version from kernel version" >&2
return 1
fi
RHEL_VERSION="${rhel_version_arr[0]}.${rhel_version_arr[1]}"
echo "RHEL VERSION successfully resolved from kernel: ${RHEL_VERSION}"
return 0
}

# set dnf release version as rhel version by default
if [[ -z "${DNF_RELEASEVER}" ]]; then
DNF_RELEASEVER="${RHEL_VERSION}"
fi
_resolve_rhel_version() {
_get_rhel_version_from_kernel || RHEL_VERSION="${RHEL_MAJOR_VERSION}"
# set dnf release version as rhel version by default
if [[ -z "${DNF_RELEASEVER}" ]]; then
DNF_RELEASEVER="${RHEL_VERSION}"
fi
return 0
}
Expand Down Expand Up @@ -112,6 +116,17 @@ _install_prerequisites() (
dnf config-manager --set-disabled rhel-8-for-$DRIVER_ARCH-baseos-eus-rpms || true
fi

# try with EUS disabled, if it does not work, then try just major version
if ! dnf makecache --releasever=${DNF_RELEASEVER}; then
# If pointing to DNF_RELEASEVER does not work, we point to the RHEL_MAJOR_VERSION as a last resort
if ! dnf makecache --releasever=${RHEL_MAJOR_VERSION}; then
echo "FATAL: failed to update the dnf metadata cache after multiple attempts with releasevers ${DNF_RELEASEVER}, ${RHEL_MAJOR_VERSION}"
exit 1
else
DNF_RELEASEVER=${RHEL_MAJOR_VERSION}
fi
fi

echo "Installing Linux kernel headers..."
dnf -q -y --releasever=${DNF_RELEASEVER} install kernel-headers-${KERNEL_VERSION} kernel-devel-${KERNEL_VERSION} > /dev/null
ln -s /usr/src/kernels/${KERNEL_VERSION} /lib/modules/${KERNEL_VERSION}/build
Expand Down Expand Up @@ -656,7 +671,7 @@ update() {
fi
exec 3>&-

# vgpu driver version is choosen dynamically during runtime, so pre-compile modules for
# vgpu driver version is chosen dynamically during runtime, so pre-compile modules for
# only non-vgpu driver types
if [ "${DRIVER_TYPE}" != "vgpu" ]; then
# Install the userspace components and copy the kernel module sources.
Expand Down Expand Up @@ -717,7 +732,7 @@ reload_nvidia_peermem() {
exit 1
}

# probe by gpu-opertor for liveness/startup checks for nvidia-peermem module to be loaded when MOFED drivers are ready
# probe by gpu-operator for liveness/startup checks for nvidia-peermem module to be loaded when MOFED drivers are ready
probe_nvidia_peermem() {
if lsmod | grep mlx5_core > /dev/null 2>&1; then
if [ ! -f /sys/module/nvidia_peermem/refcnt ]; then
Expand Down
42 changes: 26 additions & 16 deletions rhel8/precompiled/nvidia-driver
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,9 @@ NVIDIA_MODESET_MODULE_PARAMS=()
NVIDIA_PEERMEM_MODULE_PARAMS=()
TARGETARCH=${TARGETARCH:?"Missing TARGETARCH env"}
USE_HOST_MOFED="${USE_HOST_MOFED:-false}"
DNF_RELEASEVER=${DNF_RELEASEVER:-""}
RHEL_VERSION=${RHEL_VERSION:-""}
RHEL_MAJOR_VERSION=8

DRIVER_ARCH=${TARGETARCH/amd64/x86_64} && DRIVER_ARCH=${DRIVER_ARCH/arm64/aarch64}
echo "DRIVER_ARCH is $DRIVER_ARCH"
Expand All @@ -39,22 +42,29 @@ _cleanup_package_cache() {
fi
}

_get_rhel_version_from_kernel() {
local rhel_version_underscore rhel_version_arr
rhel_version_underscore=$(echo "${KERNEL_VERSION}" | sed 's/.*el\([0-9]\+_[0-9]\+\).*/\1/g')
# For the Kernel version 4.18.0-513.9.1.el8_9, we expect to extract the string "8_9"
if [[ ! ${rhel_version_underscore} =~ ^[0-9]+_[0-9]+$ ]]; then
echo "Unable to resolve RHEL version from kernel version" >&2
return 1
fi
IFS='_' read -r -a rhel_version_arr <<< "$rhel_version_underscore"
if [[ ${#rhel_version_arr[@]} -ne 2 ]]; then
echo "Unable to resolve RHEL version from kernel version" >&2
return 1
fi
RHEL_VERSION="${rhel_version_arr[0]}.${rhel_version_arr[1]}"
echo "RHEL VERSION successfully resolved from kernel: ${RHEL_VERSION}"
return 0
}

_resolve_rhel_version() {
if [ -f /host-etc/os-release ]; then
echo "Resolving RHEL version..."
local version=""
local id=$(cat /host-etc/os-release | grep ^ID= | awk -F= '{print $2}' | sed -e 's/^"//' -e 's/"$//')
if [ "${id}" = "rhcos" ]; then
version=$(cat /host-etc/os-release | grep RHEL_VERSION | awk -F= '{print $2}' | sed -e 's/^"//' -e 's/"$//')
elif [ "${id}" = "rhel" ]; then
version=$(cat /host-etc/os-release | grep VERSION_ID | awk -F= '{print $2}' | sed -e 's/^"//' -e 's/"$//')
fi
if [ -z "${version}" ]; then
echo "Could not resolve RHEL version" >&2
return 1
fi
RHEL_VERSION="${version}"
echo "Proceeding with RHEL version ${RHEL_VERSION}"
_get_rhel_version_from_kernel || RHEL_VERSION="${RHEL_MAJOR_VERSION}"
# set dnf release version as rhel version by default
if [[ -z "${DNF_RELEASEVER}" ]]; then
DNF_RELEASEVER="${RHEL_VERSION}"
fi
return 0
}
Expand Down Expand Up @@ -413,7 +423,7 @@ reload_nvidia_peermem() {
exit 1
}

# probe by gpu-opertor for liveness/startup checks for nvidia-peermem module to be loaded when MOFED drivers are ready
# probe by gpu-operator for liveness/startup checks for nvidia-peermem module to be loaded when MOFED drivers are ready
probe_nvidia_peermem() {
if lsmod | grep mlx5_core > /dev/null 2>&1; then
if [ ! -f /sys/module/nvidia_peermem/refcnt ]; then
Expand Down

0 comments on commit 6dcacbb

Please sign in to comment.