From 19ec7da8108011ca85a7fa89db76aa2a70f7fcc0 Mon Sep 17 00:00:00 2001 From: Lujie Duan Date: Fri, 29 Nov 2024 13:56:09 -0500 Subject: [PATCH] [Testing] Fix RL9 Nvidia driver issue due to RL9 new release (#1839) --- .../applications/dcgm/centos_rhel/install | 45 +++++++++++-------- .../applications/dcgmv1/centos_rhel/install | 43 +++++++++++------- .../applications/nvml/centos_rhel/install | 42 ++++++++++------- 3 files changed, 78 insertions(+), 52 deletions(-) diff --git a/integration_test/third_party_apps_test/applications/dcgm/centos_rhel/install b/integration_test/third_party_apps_test/applications/dcgm/centos_rhel/install index 9831b116f8..43ad713267 100644 --- a/integration_test/third_party_apps_test/applications/dcgm/centos_rhel/install +++ b/integration_test/third_party_apps_test/applications/dcgm/centos_rhel/install @@ -1,6 +1,6 @@ set -e source /etc/os-release -VERSION_ID=${VERSION_ID%%.*} +MAJOR_VERSION_ID=${VERSION_ID%%.*} verify_driver() { # Verify NVIDIA driver: @@ -18,24 +18,32 @@ install_cuda_from_runfile() { # Remove existing installation before using the runfile remove_cuda_package remove_driver_package + # For Rocky Linux 9: when a new OS version becomes available, the default + # repo setting (/etc/yum.repos.d/rocky.repo) will automatically point to the + # new version's repo. This is problematic since the new OS is not available + # right away on GCE. Set up the matched repo to install the correct + # kernel-devel-$(uname -r) + # Not needed for RL8 since 8.10 is already the last RL8 release. + if [[ $ID == rocky && "${MAJOR_VERSION_ID}" == 9 ]]; then + cat <