diff --git a/api/v1/clusterpolicy_types.go b/api/v1/clusterpolicy_types.go index 2f6a8c1d1..4e4a6ef5f 100644 --- a/api/v1/clusterpolicy_types.go +++ b/api/v1/clusterpolicy_types.go @@ -457,6 +457,12 @@ type DriverSpec struct { // +operator-sdk:gen-csv:customresourcedefinitions.specDescriptors.x-descriptors="urn:alm:descriptor:com.tectonic.ui:booleanSwitch" UsePrecompiled *bool `json:"usePrecompiled,omitempty"` + // UseOpenKernelModules indicates if the open GPU kernel modules should be used + // +operator-sdk:gen-csv:customresourcedefinitions.specDescriptors=true + // +operator-sdk:gen-csv:customresourcedefinitions.specDescriptors.displayName="Enable use of open GPU kernel modules" + // +operator-sdk:gen-csv:customresourcedefinitions.specDescriptors.x-descriptors="urn:alm:descriptor:com.tectonic.ui:booleanSwitch" + UseOpenKernelModules *bool `json:"useOpenKernelModules,omitempty"` + // Enabled indicates if deployment of NVIDIA Driver through operator is enabled // +operator-sdk:gen-csv:customresourcedefinitions.specDescriptors=true // +operator-sdk:gen-csv:customresourcedefinitions.specDescriptors.displayName="Enable NVIDIA Driver deployment through GPU Operator" @@ -1755,6 +1761,15 @@ func (d *DriverSpec) UsePrecompiledDrivers() bool { return *d.UsePrecompiled } +// OpenKernelModulesEnabled returns true if driver install is enabled using open GPU kernel modules +func (d *DriverSpec) OpenKernelModulesEnabled() bool { + if d.UseOpenKernelModules == nil { + // default is false if not specified by user + return false + } + return *d.UseOpenKernelModules +} + // IsEnabled returns true if device-plugin is enabled(default) through gpu-operator func (p *DevicePluginSpec) IsEnabled() bool { if p.Enabled == nil { diff --git a/api/v1/zz_generated.deepcopy.go b/api/v1/zz_generated.deepcopy.go index 6b58447c1..28b108edd 100644 --- a/api/v1/zz_generated.deepcopy.go +++ b/api/v1/zz_generated.deepcopy.go @@ -589,6 +589,11 @@ func (in *DriverSpec) DeepCopyInto(out *DriverSpec) { *out = new(bool) **out = **in } + if in.UseOpenKernelModules != nil { + in, out := &in.UseOpenKernelModules, &out.UseOpenKernelModules + *out = new(bool) + **out = **in + } if in.Enabled != nil { in, out := &in.Enabled, &out.Enabled *out = new(bool) diff --git a/api/v1alpha1/nvidiadriver_types.go b/api/v1alpha1/nvidiadriver_types.go index 201c0d42d..91deefb25 100644 --- a/api/v1alpha1/nvidiadriver_types.go +++ b/api/v1alpha1/nvidiadriver_types.go @@ -51,6 +51,12 @@ type NVIDIADriverSpec struct { // +kubebuilder:validation:XValidation:rule="self == oldSelf",message="usePrecompiled is an immutable field. Please create a new NvidiaDriver resource instead when you want to change this setting." UsePrecompiled *bool `json:"usePrecompiled,omitempty"` + // UseOpenKernelModules indicates if the open GPU kernel modules should be used + // +operator-sdk:gen-csv:customresourcedefinitions.specDescriptors=true + // +operator-sdk:gen-csv:customresourcedefinitions.specDescriptors.displayName="Enable use of open GPU kernel modules" + // +operator-sdk:gen-csv:customresourcedefinitions.specDescriptors.x-descriptors="urn:alm:descriptor:com.tectonic.ui:booleanSwitch" + UseOpenKernelModules *bool `json:"useOpenKernelModules,omitempty"` + // NVIDIA Driver container startup probe settings StartupProbe *ContainerProbeSpec `json:"startupProbe,omitempty"` diff --git a/api/v1alpha1/zz_generated.deepcopy.go b/api/v1alpha1/zz_generated.deepcopy.go index 68702b9f3..75e8a3f70 100644 --- a/api/v1alpha1/zz_generated.deepcopy.go +++ b/api/v1alpha1/zz_generated.deepcopy.go @@ -273,6 +273,11 @@ func (in *NVIDIADriverSpec) DeepCopyInto(out *NVIDIADriverSpec) { *out = new(bool) **out = **in } + if in.UseOpenKernelModules != nil { + in, out := &in.UseOpenKernelModules, &out.UseOpenKernelModules + *out = new(bool) + **out = **in + } if in.StartupProbe != nil { in, out := &in.StartupProbe, &out.StartupProbe *out = new(ContainerProbeSpec) diff --git a/bundle/manifests/nvidia.com_clusterpolicies.yaml b/bundle/manifests/nvidia.com_clusterpolicies.yaml index ae1e1e246..b46ae27f9 100644 --- a/bundle/manifests/nvidia.com_clusterpolicies.yaml +++ b/bundle/manifests/nvidia.com_clusterpolicies.yaml @@ -939,6 +939,10 @@ spec: description: UseNvidiaDriverCRD indicates if the deployment of NVIDIA Driver is managed by the NVIDIADriver CRD type type: boolean + useOpenKernelModules: + description: UseOpenKernelModules indicates if the open GPU kernel + modules should be used + type: boolean usePrecompiled: description: UsePrecompiled indicates if deployment of NVIDIA Driver using pre-compiled modules is enabled diff --git a/bundle/manifests/nvidia.com_nvidiadrivers.yaml b/bundle/manifests/nvidia.com_nvidiadrivers.yaml index f5907eb3a..659748f1c 100644 --- a/bundle/manifests/nvidia.com_nvidiadrivers.yaml +++ b/bundle/manifests/nvidia.com_nvidiadrivers.yaml @@ -558,6 +558,10 @@ spec: minimum: 1 type: integer type: object + useOpenKernelModules: + description: UseOpenKernelModules indicates if the open GPU kernel + modules should be used + type: boolean usePrecompiled: description: UsePrecompiled indicates if deployment of NVIDIA Driver using pre-compiled modules is enabled diff --git a/config/crd/bases/nvidia.com_clusterpolicies.yaml b/config/crd/bases/nvidia.com_clusterpolicies.yaml index ae1e1e246..b46ae27f9 100644 --- a/config/crd/bases/nvidia.com_clusterpolicies.yaml +++ b/config/crd/bases/nvidia.com_clusterpolicies.yaml @@ -939,6 +939,10 @@ spec: description: UseNvidiaDriverCRD indicates if the deployment of NVIDIA Driver is managed by the NVIDIADriver CRD type type: boolean + useOpenKernelModules: + description: UseOpenKernelModules indicates if the open GPU kernel + modules should be used + type: boolean usePrecompiled: description: UsePrecompiled indicates if deployment of NVIDIA Driver using pre-compiled modules is enabled diff --git a/config/crd/bases/nvidia.com_nvidiadrivers.yaml b/config/crd/bases/nvidia.com_nvidiadrivers.yaml index f5907eb3a..659748f1c 100644 --- a/config/crd/bases/nvidia.com_nvidiadrivers.yaml +++ b/config/crd/bases/nvidia.com_nvidiadrivers.yaml @@ -558,6 +558,10 @@ spec: minimum: 1 type: integer type: object + useOpenKernelModules: + description: UseOpenKernelModules indicates if the open GPU kernel + modules should be used + type: boolean usePrecompiled: description: UsePrecompiled indicates if deployment of NVIDIA Driver using pre-compiled modules is enabled diff --git a/controllers/object_controls.go b/controllers/object_controls.go index 45a6aec16..c8fa5e337 100644 --- a/controllers/object_controls.go +++ b/controllers/object_controls.go @@ -148,6 +148,8 @@ const ( PodControllerRevisionHashLabelKey = "controller-revision-hash" // DefaultCCModeEnvName is the name of the envvar for configuring default CC mode on all compatible GPUs on the node DefaultCCModeEnvName = "DEFAULT_CC_MODE" + // OpenKernelModulesEnabledEnvName is the name of the driver-container envvar for enabling open GPU kernel module support + OpenKernelModulesEnabledEnvName = "OPEN_KERNEL_MODULES_ENABLED" ) // ContainerProbe defines container probe types @@ -2859,12 +2861,16 @@ func transformDriverContainer(obj *appsv1.DaemonSet, config *gpuv1.ClusterPolicy if len(config.Driver.Args) > 0 { driverContainer.Args = config.Driver.Args } - // set/append environment variables for exporter container + // set/append environment variables for driver container if len(config.Driver.Env) > 0 { for _, env := range config.Driver.Env { setContainerEnv(driverContainer, env.Name, env.Value) } } + if config.Driver.OpenKernelModulesEnabled() { + setContainerEnv(driverContainer, OpenKernelModulesEnabledEnvName, "true") + } + // set container probe timeouts if config.Driver.StartupProbe != nil { setContainerProbe(driverContainer, config.Driver.StartupProbe, Startup) diff --git a/deployments/gpu-operator/crds/nvidia.com_clusterpolicies_crd.yaml b/deployments/gpu-operator/crds/nvidia.com_clusterpolicies_crd.yaml index ae1e1e246..b46ae27f9 100644 --- a/deployments/gpu-operator/crds/nvidia.com_clusterpolicies_crd.yaml +++ b/deployments/gpu-operator/crds/nvidia.com_clusterpolicies_crd.yaml @@ -939,6 +939,10 @@ spec: description: UseNvidiaDriverCRD indicates if the deployment of NVIDIA Driver is managed by the NVIDIADriver CRD type type: boolean + useOpenKernelModules: + description: UseOpenKernelModules indicates if the open GPU kernel + modules should be used + type: boolean usePrecompiled: description: UsePrecompiled indicates if deployment of NVIDIA Driver using pre-compiled modules is enabled diff --git a/deployments/gpu-operator/crds/nvidia.com_nvidiadrivers.yaml b/deployments/gpu-operator/crds/nvidia.com_nvidiadrivers.yaml index f5907eb3a..659748f1c 100644 --- a/deployments/gpu-operator/crds/nvidia.com_nvidiadrivers.yaml +++ b/deployments/gpu-operator/crds/nvidia.com_nvidiadrivers.yaml @@ -558,6 +558,10 @@ spec: minimum: 1 type: integer type: object + useOpenKernelModules: + description: UseOpenKernelModules indicates if the open GPU kernel + modules should be used + type: boolean usePrecompiled: description: UsePrecompiled indicates if deployment of NVIDIA Driver using pre-compiled modules is enabled diff --git a/manifests/state-driver/0500_daemonset.yaml b/manifests/state-driver/0500_daemonset.yaml index 1be175f4b..724ee0f5e 100644 --- a/manifests/state-driver/0500_daemonset.yaml +++ b/manifests/state-driver/0500_daemonset.yaml @@ -174,6 +174,10 @@ spec: # always use runc for driver containers - name: NVIDIA_VISIBLE_DEVICES value: void + {{- if .Driver.Spec.UseOpenKernelModules }} + - name: OPEN_KERNEL_MODULES_ENABLED + value: "true" + {{- end }} {{- if and (.Openshift) (.Runtime.OpenshiftVersion) }} - name: OPENSHIFT_VERSION value: {{ .Runtime.OpenshiftVersion | quote }}