From 297cc12a25a29ceee56849b3ae50438f8cc73e21 Mon Sep 17 00:00:00 2001 From: Vishesh Tanksale Date: Thu, 27 Jun 2024 23:47:14 +0000 Subject: [PATCH] Adding transformation for kata-manager daemonset for supporting CRI-O Signed-off-by: Vishesh Tanksale Co-authored-by: Christopher Desiniotis --- controllers/object_controls.go | 83 ++++++++++------------ controllers/transforms_test.go | 126 +++++++++++++++++++++++++++------ 2 files changed, 144 insertions(+), 65 deletions(-) diff --git a/controllers/object_controls.go b/controllers/object_controls.go index a287dd780..376302fb7 100644 --- a/controllers/object_controls.go +++ b/controllers/object_controls.go @@ -1238,15 +1238,43 @@ func TransformToolkit(obj *appsv1.DaemonSet, config *gpuv1.ClusterPolicySpec, n // configure runtime runtime := n.runtime.String() - setContainerEnv(&(obj.Spec.Template.Spec.Containers[0]), "RUNTIME", runtime) + err = transformForRuntime(obj, config, runtime, "nvidia-container-toolkit-ctr") + if err != nil { + return fmt.Errorf("error transforming toolkit daemonset : %w", err) + } + + // Update CRI-O hooks path to use default path for non OCP cases + if n.openshift == "" && n.runtime == gpuv1.CRIO { + for index, volume := range obj.Spec.Template.Spec.Volumes { + if volume.Name == "crio-hooks" { + obj.Spec.Template.Spec.Volumes[index].HostPath.Path = "/usr/share/containers/oci/hooks.d" + } + } + } + return nil +} + +func transformForRuntime(obj *appsv1.DaemonSet, config *gpuv1.ClusterPolicySpec, runtime string, containerName string) error { + var mainContainer *corev1.Container + for i, ctr := range obj.Spec.Template.Spec.Containers { + if ctr.Name == containerName { + mainContainer = &obj.Spec.Template.Spec.Containers[i] + break + } + } + if mainContainer == nil { + return fmt.Errorf("failed to find main container %q", containerName) + } + + setContainerEnv(mainContainer, "RUNTIME", runtime) if runtime == gpuv1.Containerd.String() { // Set the runtime class name that is to be configured for containerd - setContainerEnv(&(obj.Spec.Template.Spec.Containers[0]), "CONTAINERD_RUNTIME_CLASS", getRuntimeClass(config)) + setContainerEnv(mainContainer, "CONTAINERD_RUNTIME_CLASS", getRuntimeClass(config)) } // setup mounts for runtime config file - runtimeConfigFile, err := getRuntimeConfigFile(&(obj.Spec.Template.Spec.Containers[0]), runtime) + runtimeConfigFile, err := getRuntimeConfigFile(mainContainer, runtime) if err != nil { return fmt.Errorf("error getting path to runtime config file: %v", err) } @@ -1262,19 +1290,19 @@ func TransformToolkit(obj *appsv1.DaemonSet, config *gpuv1.ClusterPolicySpec, n configEnvvarName = "CRIO_CONFIG" } - setContainerEnv(&(obj.Spec.Template.Spec.Containers[0]), configEnvvarName, DefaultRuntimeConfigTargetDir+sourceConfigFileName) + setContainerEnv(mainContainer, configEnvvarName, DefaultRuntimeConfigTargetDir+sourceConfigFileName) volMountConfigName := fmt.Sprintf("%s-config", runtime) volMountConfig := corev1.VolumeMount{Name: volMountConfigName, MountPath: DefaultRuntimeConfigTargetDir} - obj.Spec.Template.Spec.Containers[0].VolumeMounts = append(obj.Spec.Template.Spec.Containers[0].VolumeMounts, volMountConfig) + mainContainer.VolumeMounts = append(mainContainer.VolumeMounts, volMountConfig) configVol := corev1.Volume{Name: volMountConfigName, VolumeSource: corev1.VolumeSource{HostPath: &corev1.HostPathVolumeSource{Path: path.Dir(runtimeConfigFile), Type: newHostPathType(corev1.HostPathDirectoryOrCreate)}}} obj.Spec.Template.Spec.Volumes = append(obj.Spec.Template.Spec.Volumes, configVol) // setup mounts for runtime socket file - runtimeSocketFile, err := getRuntimeSocketFile(&(obj.Spec.Template.Spec.Containers[0]), runtime) + runtimeSocketFile, err := getRuntimeSocketFile(mainContainer, runtime) if err != nil { - return fmt.Errorf("error getting path to runtime socket: %v", err) + return fmt.Errorf("error getting path to runtime socket: %w", err) } if runtimeSocketFile != "" { sourceSocketFileName := path.Base(runtimeSocketFile) @@ -1285,24 +1313,15 @@ func TransformToolkit(obj *appsv1.DaemonSet, config *gpuv1.ClusterPolicySpec, n } else if runtime == gpuv1.Docker.String() { socketEnvvarName = "DOCKER_SOCKET" } - setContainerEnv(&(obj.Spec.Template.Spec.Containers[0]), socketEnvvarName, DefaultRuntimeSocketTargetDir+sourceSocketFileName) + setContainerEnv(mainContainer, socketEnvvarName, DefaultRuntimeSocketTargetDir+sourceSocketFileName) volMountSocketName := fmt.Sprintf("%s-socket", runtime) volMountSocket := corev1.VolumeMount{Name: volMountSocketName, MountPath: DefaultRuntimeSocketTargetDir} - obj.Spec.Template.Spec.Containers[0].VolumeMounts = append(obj.Spec.Template.Spec.Containers[0].VolumeMounts, volMountSocket) + mainContainer.VolumeMounts = append(mainContainer.VolumeMounts, volMountSocket) socketVol := corev1.Volume{Name: volMountSocketName, VolumeSource: corev1.VolumeSource{HostPath: &corev1.HostPathVolumeSource{Path: path.Dir(runtimeSocketFile)}}} obj.Spec.Template.Spec.Volumes = append(obj.Spec.Template.Spec.Volumes, socketVol) } - - // Update CRI-O hooks path to use default path for non OCP cases - if n.openshift == "" && n.runtime == gpuv1.CRIO { - for index, volume := range obj.Spec.Template.Spec.Volumes { - if volume.Name == "crio-hooks" { - obj.Spec.Template.Spec.Volumes[index].HostPath.Path = "/usr/share/containers/oci/hooks.d" - } - } - } return nil } @@ -1827,34 +1846,10 @@ func TransformKataManager(obj *appsv1.DaemonSet, config *gpuv1.ClusterPolicySpec // setup mounts for runtime config file runtime := n.runtime.String() - runtimeConfigFile, err := getRuntimeConfigFile(&(obj.Spec.Template.Spec.Containers[0]), runtime) + err = transformForRuntime(obj, config, runtime, "nvidia-kata-manager") if err != nil { - return fmt.Errorf("error getting path to runtime config file: %v", err) + return fmt.Errorf("error transforming kata-manager daemonset : %w", err) } - sourceConfigFileName := path.Base(runtimeConfigFile) - setContainerEnv(&(obj.Spec.Template.Spec.Containers[0]), "CONTAINERD_CONFIG", filepath.Join(DefaultRuntimeConfigTargetDir, sourceConfigFileName)) - - volMountConfigName := fmt.Sprintf("%s-config", runtime) - volMountConfig := corev1.VolumeMount{Name: volMountConfigName, MountPath: DefaultRuntimeConfigTargetDir} - obj.Spec.Template.Spec.Containers[0].VolumeMounts = append(obj.Spec.Template.Spec.Containers[0].VolumeMounts, volMountConfig) - - configVol := corev1.Volume{Name: volMountConfigName, VolumeSource: corev1.VolumeSource{HostPath: &corev1.HostPathVolumeSource{Path: path.Dir(runtimeConfigFile), Type: newHostPathType(corev1.HostPathDirectoryOrCreate)}}} - obj.Spec.Template.Spec.Volumes = append(obj.Spec.Template.Spec.Volumes, configVol) - - // setup mounts for runtime socket file - runtimeSocketFile, err := getRuntimeSocketFile(&(obj.Spec.Template.Spec.Containers[0]), runtime) - if err != nil { - return fmt.Errorf("error getting path to runtime socket: %v", err) - } - sourceSocketFileName := path.Base(runtimeSocketFile) - setContainerEnv(&(obj.Spec.Template.Spec.Containers[0]), "CONTAINERD_SOCKET", filepath.Join(DefaultRuntimeSocketTargetDir, sourceSocketFileName)) - - volMountSocketName := fmt.Sprintf("%s-socket", runtime) - volMountSocket := corev1.VolumeMount{Name: volMountSocketName, MountPath: DefaultRuntimeSocketTargetDir} - obj.Spec.Template.Spec.Containers[0].VolumeMounts = append(obj.Spec.Template.Spec.Containers[0].VolumeMounts, volMountSocket) - - socketVol := corev1.Volume{Name: volMountSocketName, VolumeSource: corev1.VolumeSource{HostPath: &corev1.HostPathVolumeSource{Path: path.Dir(runtimeSocketFile)}}} - obj.Spec.Template.Spec.Volumes = append(obj.Spec.Template.Spec.Volumes, socketVol) // Compute hash of kata manager config and add an annotation with the value. // If the kata config changes, a new revision of the daemonset will be diff --git a/controllers/transforms_test.go b/controllers/transforms_test.go index cd8d397c1..9e2185296 100644 --- a/controllers/transforms_test.go +++ b/controllers/transforms_test.go @@ -17,12 +17,15 @@ package controllers import ( + "path/filepath" "testing" "github.com/stretchr/testify/require" appsv1 "k8s.io/api/apps/v1" corev1 "k8s.io/api/core/v1" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + + gpuv1 "github.com/NVIDIA/gpu-operator/api/nvidia/v1" ) // Daemonset is a DaemonSet wrapper used for testing @@ -40,10 +43,10 @@ func NewDaemonset() Daemonset { Template: corev1.PodTemplateSpec{ Spec: corev1.PodSpec{ InitContainers: []corev1.Container{ - {Name: "foo", Image: "foo"}, + {Name: "initCtr", Image: "initCtrImage"}, }, Containers: []corev1.Container{ - {Name: "foo", Image: "foo"}, + {Name: "mainCtr", Image: "mainCtrImage"}, }, }, }, @@ -52,12 +55,13 @@ func NewDaemonset() Daemonset { return Daemonset{ds} } -func (d Daemonset) WithHostPathVolume(name string, path string) Daemonset { +func (d Daemonset) WithHostPathVolume(name string, path string, hostPathType *corev1.HostPathType) Daemonset { volume := corev1.Volume{ Name: name, VolumeSource: corev1.VolumeSource{ HostPath: &corev1.HostPathVolumeSource{ Path: path, + Type: hostPathType, }, }, } @@ -65,6 +69,33 @@ func (d Daemonset) WithHostPathVolume(name string, path string) Daemonset { return d } +func (d Daemonset) WithVolumeMount(name string, path string, containerName string) Daemonset { + var ctr *corev1.Container + for i, c := range d.Spec.Template.Spec.InitContainers { + if c.Name == containerName { + ctr = &d.Spec.Template.Spec.InitContainers[i] + break + } + } + for i, c := range d.Spec.Template.Spec.Containers { + if c.Name == containerName { + ctr = &d.Spec.Template.Spec.Containers[i] + break + } + } + + if ctr == nil { + return d + } + + volumeMount := corev1.VolumeMount{ + Name: name, + MountPath: path, + } + ctr.VolumeMounts = append(ctr.VolumeMounts, volumeMount) + return d +} + func (d Daemonset) WithEnvVar(name string, value string) Daemonset { for index := range d.Spec.Template.Spec.InitContainers { ctr := &d.Spec.Template.Spec.InitContainers[index] @@ -77,6 +108,16 @@ func (d Daemonset) WithEnvVar(name string, value string) Daemonset { return d } +func (d Daemonset) WithEnvVarForCtr(name string, value string, containerName string) Daemonset { + for index, c := range d.Spec.Template.Spec.Containers { + if c.Name == containerName { + ctr := &d.Spec.Template.Spec.Containers[index] + ctr.Env = append(ctr.Env, corev1.EnvVar{Name: name, Value: value}) + } + } + return d +} + func (d Daemonset) WithInitContainer(container corev1.Container) Daemonset { d.Spec.Template.Spec.InitContainers = append(d.Spec.Template.Spec.InitContainers, container) return d @@ -101,39 +142,39 @@ func TestTransformForHostRoot(t *testing.T) { description: "empty host root is a no-op", hostRoot: "", input: NewDaemonset(). - WithHostPathVolume(hostRootVolumeName, "/"). - WithHostPathVolume(hostDevCharVolumeName, "/"), + WithHostPathVolume(hostRootVolumeName, "/", nil). + WithHostPathVolume(hostDevCharVolumeName, "/", nil), expectedOutput: NewDaemonset(). - WithHostPathVolume(hostRootVolumeName, "/"). - WithHostPathVolume(hostDevCharVolumeName, "/"), + WithHostPathVolume(hostRootVolumeName, "/", nil). + WithHostPathVolume(hostDevCharVolumeName, "/", nil), }, { description: "custom host root with host-root and host-dev-char volumes", hostRoot: "/custom-root", input: NewDaemonset(). - WithHostPathVolume(hostRootVolumeName, "/"). - WithHostPathVolume(hostDevCharVolumeName, "/"), + WithHostPathVolume(hostRootVolumeName, "/", nil). + WithHostPathVolume(hostDevCharVolumeName, "/", nil), expectedOutput: NewDaemonset(). - WithHostPathVolume(hostRootVolumeName, "/custom-root"). - WithHostPathVolume(hostDevCharVolumeName, "/custom-root/dev/char"). + WithHostPathVolume(hostRootVolumeName, "/custom-root", nil). + WithHostPathVolume(hostDevCharVolumeName, "/custom-root/dev/char", nil). WithEnvVar(HostRootEnvName, "/custom-root"), }, { description: "custom host root with host-root volume", hostRoot: "/custom-root", input: NewDaemonset(). - WithHostPathVolume(hostRootVolumeName, "/"), + WithHostPathVolume(hostRootVolumeName, "/", nil), expectedOutput: NewDaemonset(). - WithHostPathVolume(hostRootVolumeName, "/custom-root"). + WithHostPathVolume(hostRootVolumeName, "/custom-root", nil). WithEnvVar(HostRootEnvName, "/custom-root"), }, { description: "custom host root with host-dev-char volume", hostRoot: "/custom-root", input: NewDaemonset(). - WithHostPathVolume(hostDevCharVolumeName, "/"), + WithHostPathVolume(hostDevCharVolumeName, "/", nil), expectedOutput: NewDaemonset(). - WithHostPathVolume(hostDevCharVolumeName, "/custom-root/dev/char"), + WithHostPathVolume(hostDevCharVolumeName, "/custom-root/dev/char", nil), }, } @@ -163,7 +204,7 @@ func TestTransformForDriverInstallDir(t *testing.T) { description: "empty driverInstallDir is a no-op", driverInstallDir: "", input: NewDaemonset(). - WithHostPathVolume(driverInstallDirVolumeName, "/run/nvidia/driver"). + WithHostPathVolume(driverInstallDirVolumeName, "/run/nvidia/driver", nil). WithInitContainer( corev1.Container{ Name: "driver-validation", @@ -172,7 +213,7 @@ func TestTransformForDriverInstallDir(t *testing.T) { }, }), expectedOutput: NewDaemonset(). - WithHostPathVolume(driverInstallDirVolumeName, "/run/nvidia/driver"). + WithHostPathVolume(driverInstallDirVolumeName, "/run/nvidia/driver", nil). WithInitContainer( corev1.Container{ Name: "driver-validation", @@ -185,15 +226,15 @@ func TestTransformForDriverInstallDir(t *testing.T) { description: "custom driverInstallDir with driver-install-dir volume", driverInstallDir: "/custom-root", input: NewDaemonset(). - WithHostPathVolume(driverInstallDirVolumeName, "/run/nvidia/driver"), + WithHostPathVolume(driverInstallDirVolumeName, "/run/nvidia/driver", nil), expectedOutput: NewDaemonset(). - WithHostPathVolume(driverInstallDirVolumeName, "/custom-root"), + WithHostPathVolume(driverInstallDirVolumeName, "/custom-root", nil), }, { description: "custom driverInstallDir with driver-install-dir volume and driver-validation initContainer", driverInstallDir: "/custom-root", input: NewDaemonset(). - WithHostPathVolume(driverInstallDirVolumeName, "/run/nvidia/driver"). + WithHostPathVolume(driverInstallDirVolumeName, "/run/nvidia/driver", nil). WithInitContainer( corev1.Container{ Name: "driver-validation", @@ -202,7 +243,7 @@ func TestTransformForDriverInstallDir(t *testing.T) { }, }), expectedOutput: NewDaemonset(). - WithHostPathVolume(driverInstallDirVolumeName, "/custom-root"). + WithHostPathVolume(driverInstallDirVolumeName, "/custom-root", nil). WithInitContainer( corev1.Container{ Name: "driver-validation", @@ -224,3 +265,46 @@ func TestTransformForDriverInstallDir(t *testing.T) { }) } } + +func TestTransformForRuntime(t *testing.T) { + testCases := []struct { + description string + runtime gpuv1.Runtime + input Daemonset + expectedOutput Daemonset + }{ + { + description: "containerd", + runtime: gpuv1.Containerd, + input: NewDaemonset(), + expectedOutput: NewDaemonset(). + WithHostPathVolume("containerd-config", filepath.Dir(DefaultContainerdConfigFile), newHostPathType(corev1.HostPathDirectoryOrCreate)). + WithHostPathVolume("containerd-socket", filepath.Dir(DefaultContainerdSocketFile), nil). + WithVolumeMount("containerd-config", DefaultRuntimeConfigTargetDir, "mainCtr"). + WithVolumeMount("containerd-socket", DefaultRuntimeSocketTargetDir, "mainCtr"). + WithEnvVarForCtr("RUNTIME", gpuv1.Containerd.String(), "mainCtr"). + WithEnvVarForCtr("CONTAINERD_RUNTIME_CLASS", DefaultRuntimeClass, "mainCtr"). + WithEnvVarForCtr("CONTAINERD_CONFIG", filepath.Join(DefaultRuntimeConfigTargetDir, filepath.Base(DefaultContainerdConfigFile)), "mainCtr"). + WithEnvVarForCtr("CONTAINERD_SOCKET", filepath.Join(DefaultRuntimeSocketTargetDir, filepath.Base(DefaultContainerdSocketFile)), "mainCtr"), + }, + { + description: "crio", + runtime: gpuv1.CRIO, + input: NewDaemonset(), + expectedOutput: NewDaemonset(). + WithHostPathVolume("crio-config", filepath.Dir(DefaultCRIOConfigFile), newHostPathType(corev1.HostPathDirectoryOrCreate)). + WithVolumeMount("crio-config", DefaultRuntimeConfigTargetDir, "mainCtr"). + WithEnvVarForCtr("RUNTIME", gpuv1.CRIO.String(), "mainCtr"). + WithEnvVarForCtr("CRIO_CONFIG", filepath.Join(DefaultRuntimeConfigTargetDir, filepath.Base(DefaultCRIOConfigFile)), "mainCtr"), + }, + } + + cp := &gpuv1.ClusterPolicySpec{Operator: gpuv1.OperatorSpec{RuntimeClass: DefaultRuntimeClass}} + for _, tc := range testCases { + t.Run(tc.description, func(t *testing.T) { + err := transformForRuntime(tc.input.DaemonSet, cp, tc.runtime.String(), "mainCtr") + require.NoError(t, err) + require.EqualValues(t, tc.expectedOutput, tc.input) + }) + } +}