From 40378cabd37b2946b23e8dda31ca43648da6de62 Mon Sep 17 00:00:00 2001 From: Moshe Levi Date: Thu, 29 Jun 2023 10:22:32 +0300 Subject: [PATCH 1/4] add support for Dynamic Resource Allocation Signed-off-by: Moshe Levi --- pkg/kubeletclient/kubeletclient.go | 41 +++++++++++--- pkg/kubeletclient/kubeletclient_test.go | 71 ++++++++++++++++++++++--- 2 files changed, 97 insertions(+), 15 deletions(-) diff --git a/pkg/kubeletclient/kubeletclient.go b/pkg/kubeletclient/kubeletclient.go index d0438f012..60876403f 100644 --- a/pkg/kubeletclient/kubeletclient.go +++ b/pkg/kubeletclient/kubeletclient.go @@ -21,6 +21,7 @@ import ( "net/url" "os" "path/filepath" + "strings" "time" "golang.org/x/net/context" @@ -137,19 +138,45 @@ func (rc *kubeletClient) GetPodResourceMap(pod *v1.Pod) (map[string]*types.Resou for _, pr := range rc.resources { if pr.Name == name && pr.Namespace == ns { for _, cnt := range pr.Containers { - for _, dev := range cnt.Devices { - if rInfo, ok := resourceMap[dev.ResourceName]; ok { - rInfo.DeviceIDs = append(rInfo.DeviceIDs, dev.DeviceIds...) - } else { - resourceMap[dev.ResourceName] = &types.ResourceInfo{DeviceIDs: dev.DeviceIds} - } - } + rc.getDevicePluginResources(cnt.Devices, resourceMap) + rc.getDRAResources(cnt.DynamicResources, resourceMap) } } } return resourceMap, nil } +func (rc *kubeletClient) getDevicePluginResources(devices []*podresourcesapi.ContainerDevices, resourceMap map[string]*types.ResourceInfo) { + for _, dev := range devices { + if rInfo, ok := resourceMap[dev.ResourceName]; ok { + rInfo.DeviceIDs = append(rInfo.DeviceIDs, dev.DeviceIds...) + } else { + resourceMap[dev.ResourceName] = &types.ResourceInfo{DeviceIDs: dev.DeviceIds} + } + } +} + +func (rc *kubeletClient) getDRAResources(dynamicResources []*podresourcesapi.DynamicResource, resourceMap map[string]*types.ResourceInfo) { + for _, dynamicResource := range dynamicResources { + var deviceIDs []string + for _, claimResource := range dynamicResource.ClaimResources { + for _, cdiDevice := range claimResource.CDIDevices { + res := strings.Split(cdiDevice.Name, "=") + if len(res) == 2 { + deviceIDs = append(deviceIDs, res[1]) + } else { + logging.Errorf("GetPodResourceMap: Invalid CDI format") + } + } + } + if rInfo, ok := resourceMap[dynamicResource.ClassName]; ok { + rInfo.DeviceIDs = append(rInfo.DeviceIDs, deviceIDs...) + } else { + resourceMap[dynamicResource.ClassName] = &types.ResourceInfo{DeviceIDs: deviceIDs} + } + } +} + func hasKubeletAPIEndpoint(url *url.URL) bool { // Check for kubelet resource API socket file if _, err := os.Stat(url.Path); err != nil { diff --git a/pkg/kubeletclient/kubeletclient_test.go b/pkg/kubeletclient/kubeletclient_test.go index 668055176..f8c493eee 100644 --- a/pkg/kubeletclient/kubeletclient_test.go +++ b/pkg/kubeletclient/kubeletclient_test.go @@ -60,10 +60,6 @@ func (m *fakeResourceServer) Get(_ context.Context, _ *podresourcesapi.GetPodRes } func (m *fakeResourceServer) List(_ context.Context, _ *podresourcesapi.ListPodResourcesRequest) (*podresourcesapi.ListPodResourcesResponse, error) { - podName := "pod-name" - podNamespace := "pod-namespace" - containerName := "container-name" - devs := []*podresourcesapi.ContainerDevices{ { ResourceName: "resource", @@ -71,18 +67,49 @@ func (m *fakeResourceServer) List(_ context.Context, _ *podresourcesapi.ListPodR }, } + cdiDevices := []*podresourcesapi.CDIDevice{ + { + Name: "cdi-kind=cdi-resource", + }, + } + + claimsResource := []*podresourcesapi.ClaimResource{ + { + CDIDevices: cdiDevices, + }, + } + + dynamicResources := []*podresourcesapi.DynamicResource{ + { + ClassName: "resource-class", + ClaimName: "resource-claim", + ClaimNamespace: "dynamic-resource-pod-namespace", + ClaimResources: claimsResource, + }, + } + resp := &podresourcesapi.ListPodResourcesResponse{ PodResources: []*podresourcesapi.PodResources{ { - Name: podName, - Namespace: podNamespace, + Name: "pod-name", + Namespace: "pod-namespace", Containers: []*podresourcesapi.ContainerResources{ { - Name: containerName, + Name: "container-name", Devices: devs, }, }, }, + { + Name: "dynamic-resource-pod-name", + Namespace: "dynamic-resource-pod-namespace", + Containers: []*podresourcesapi.ContainerResources{ + { + Name: "dynamic-resource-container-name", + DynamicResources: dynamicResources, + }, + }, + }, }, } return resp, nil @@ -188,7 +215,7 @@ var _ = Describe("Kubelet resource endpoint data read operations", func() { }) }) Context("GetPodResourceMap() with valid pod name and namespace", func() { - It("should return no error", func() { + It("should return no error with device plugin resource", func() { podUID := k8sTypes.UID("970a395d-bb3b-11e8-89df-408d5c537d23") fakePod := &v1.Pod{ ObjectMeta: metav1.ObjectMeta{ @@ -216,6 +243,34 @@ var _ = Describe("Kubelet resource endpoint data read operations", func() { Expect(resourceMap).To(Equal(outputRMap)) }) + It("should return no error with dynamic resource", func() { + podUID := k8sTypes.UID("9f94e27b-4233-43d6-bd10-f73b4de6f456") + fakePod := &v1.Pod{ + ObjectMeta: metav1.ObjectMeta{ + Name: "dynamic-resource-pod-name", + Namespace: "dynamic-resource-pod-namespace", + UID: podUID, + }, + Spec: v1.PodSpec{ + Containers: []v1.Container{ + { + Name: "dynamic-resource-container-name", + }, + }, + }, + } + client, err := getKubeletClient(testKubeletSocket) + Expect(err).NotTo(HaveOccurred()) + + outputRMap := map[string]*mtypes.ResourceInfo{ + "resource-class": {DeviceIDs: []string{"cdi-resource"}}, + } + resourceMap, err := client.GetPodResourceMap(fakePod) + Expect(err).NotTo(HaveOccurred()) + Expect(resourceMap).ShouldNot(BeNil()) + Expect(resourceMap).To(Equal(outputRMap)) + }) + It("should return an error with garbage socket value", func() { u, err := url.Parse("/badfilepath!?//") Expect(err).NotTo(HaveOccurred()) From 202533cf1dc69624549a291c148f5a63d78ebc72 Mon Sep 17 00:00:00 2001 From: Moshe Levi Date: Mon, 6 Nov 2023 13:24:40 +0200 Subject: [PATCH 2/4] support for Dynamic Resource Allocation doc update Signed-off-by: Moshe Levi --- docs/how-to-use.md | 121 +++++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 117 insertions(+), 4 deletions(-) diff --git a/docs/how-to-use.md b/docs/how-to-use.md index 566275c4b..07f02254f 100644 --- a/docs/how-to-use.md +++ b/docs/how-to-use.md @@ -511,7 +511,7 @@ spec: EOF ``` -We can then create a pod which uses the `default-route` key in the JSON formatted `k8s.v1.cni.cncf.io/networks` annotation. +We can then create a pod which uses the `default-route` key in the JSON formatted `k8s.v1.cni.cncf.io/networks` annotation. ``` cat < Date: Mon, 18 Mar 2024 09:57:00 +0100 Subject: [PATCH 3/4] Add DRA Integration E2E test Signed-off-by: Vasilis Remmas --- .github/workflows/kind-e2e.yml | 4 ++ .gitignore | 1 + e2e/get_tools.sh | 1 + e2e/setup_cluster.sh | 14 +++++ e2e/templates/dra-integration.yml.j2 | 49 +++++++++++++++++ e2e/templates/multus-daemonset-thick.yml.j2 | 6 +++ e2e/test-dra-integration.sh | 59 +++++++++++++++++++++ 7 files changed, 134 insertions(+) create mode 100644 e2e/templates/dra-integration.yml.j2 create mode 100755 e2e/test-dra-integration.sh diff --git a/.github/workflows/kind-e2e.yml b/.github/workflows/kind-e2e.yml index 72539edda..ebe92e1bb 100644 --- a/.github/workflows/kind-e2e.yml +++ b/.github/workflows/kind-e2e.yml @@ -85,6 +85,10 @@ jobs: working-directory: ./e2e run: ./test-default-route1.sh + - name: Test DRA integration + working-directory: ./e2e + run: ./test-dra-integration.sh + - name: Export kind logs if: always() run: | diff --git a/.gitignore b/.gitignore index 186b9dfa0..af074ab8c 100644 --- a/.gitignore +++ b/.gitignore @@ -2,6 +2,7 @@ bin/ e2e/bin/ e2e/yamls/ +e2e/repos/ # GOPATH created by the build script gopath/ diff --git a/e2e/get_tools.sh b/e2e/get_tools.sh index fbc5b4057..c8dbf292e 100755 --- a/e2e/get_tools.sh +++ b/e2e/get_tools.sh @@ -13,3 +13,4 @@ curl -Lo ./bin/koko https://github.com/redhat-nfvpe/koko/releases/download/v0.83 chmod +x ./bin/koko curl -Lo ./bin/jq https://github.com/stedolan/jq/releases/download/jq-1.6/jq-linux64 chmod +x ./bin/jq +wget -qO- https://get.helm.sh/helm-v3.14.3-linux-amd64.tar.gz | tar xvzf - --strip-components=1 -C ./bin linux-amd64/helm diff --git a/e2e/setup_cluster.sh b/e2e/setup_cluster.sh index 40286628a..6963405bc 100755 --- a/e2e/setup_cluster.sh +++ b/e2e/setup_cluster.sh @@ -34,7 +34,21 @@ nodes: nodeRegistration: kubeletExtraArgs: pod-manifest-path: "/etc/kubernetes/manifests/" + feature-gates: "DynamicResourceAllocation=true,KubeletPodResourcesDynamicResources=true" - role: worker +# Required by DRA Integration +## +featureGates: + DynamicResourceAllocation: true +runtimeConfig: + "api/alpha": "true" +containerdConfigPatches: +# Enable CDI as described in +# https://github.com/container-orchestrated-devices/container-device-interface#containerd-configuration +- |- + [plugins."io.containerd.grpc.v1.cri"] + enable_cdi = true +## EOF # load multus image from container host to kind node diff --git a/e2e/templates/dra-integration.yml.j2 b/e2e/templates/dra-integration.yml.j2 new file mode 100644 index 000000000..33334d900 --- /dev/null +++ b/e2e/templates/dra-integration.yml.j2 @@ -0,0 +1,49 @@ +--- +apiVersion: resource.k8s.io/v1alpha2 +kind: ResourceClaimTemplate +metadata: + name: gpu.example.com +spec: + spec: + resourceClassName: gpu.example.com +--- +apiVersion: "k8s.cni.cncf.io/v1" +kind: NetworkAttachmentDefinition +metadata: + name: dra-net + annotations: + k8s.v1.cni.cncf.io/resourceName: gpu.example.com +spec: + config: '{ + "cniVersion": "{{ CNI_VERSION }}", + "plugins": [{ + "name": "mynet", + "type": "dummy", + "ipam": { + "type": "host-local", + "subnet": "10.1.2.0/24" + } + }] + }' +--- +apiVersion: v1 +kind: Pod +metadata: + name: dra-integration + labels: + app: dra-integration + annotations: + k8s.v1.cni.cncf.io/networks: default/dra-net +spec: + containers: + - name: ctr0 + image: ubuntu:22.04 + command: ["bash", "-c"] + args: ["export; sleep 9999"] + resources: + claims: + - name: gpu + resourceClaims: + - name: gpu + source: + resourceClaimTemplateName: gpu.example.com diff --git a/e2e/templates/multus-daemonset-thick.yml.j2 b/e2e/templates/multus-daemonset-thick.yml.j2 index 035304188..d839cd9e6 100644 --- a/e2e/templates/multus-daemonset-thick.yml.j2 +++ b/e2e/templates/multus-daemonset-thick.yml.j2 @@ -158,6 +158,9 @@ spec: - name: multus-daemon-config mountPath: /etc/cni/net.d/multus.d readOnly: true + - name: kubelet-pod-resources + mountPath: /var/lib/kubelet/pod-resources + readOnly: true env: - name: MULTUS_NODE_NAME valueFrom: @@ -187,6 +190,9 @@ spec: - name: cnibin hostPath: path: /opt/cni/bin + - name: kubelet-pod-resources + hostPath: + path: /var/lib/kubelet/pod-resources - name: multus-daemon-config configMap: name: multus-daemon-config diff --git a/e2e/test-dra-integration.sh b/e2e/test-dra-integration.sh new file mode 100755 index 000000000..997996cb4 --- /dev/null +++ b/e2e/test-dra-integration.sh @@ -0,0 +1,59 @@ +#!/bin/sh +set -o errexit + +export PATH=${PATH}:./bin + +# This test is using an example implementation of a DRA driver. This driver is mocking GPU resources. At our test we +# don't care about what these resources are. We want to ensure that such resource is correctly passed in the Pod using +# Multus configurations. A couple of notes: +# - We explitictly don't pin the revision of the dra-example-driver to a specific commit to ensure that the integration +# continues to work even when the dra-example-driver is updated (which may also indicate API changes on the DRA). +# - The chart and latest is image is not published somewhere, therefore we have to build locally. This leads to slower +# e2e suite runs. +echo "installing dra-example-driver" +repo_path="repos/dra-example-driver" + +rm -rf $repo_path || true +git clone https://github.com/kubernetes-sigs/dra-example-driver.git ${repo_path} +${repo_path}/demo/build-driver.sh +KIND_CLUSTER_NAME=kind ${repo_path}/demo/scripts/load-driver-image-into-kind.sh +chart_path=${repo_path}/deployments/helm/dra-example-driver/ +overriden_values_path=${chart_path}/overriden_values.yaml + +# With the thick plugin, in kind, the primary network on the control plane is not always working as expected. The pods +# sometimes are not able to communicate with the control plane and the error looks like this: +# failed to list *v1alpha2.PodSchedulingContext: Get "https://10.96.0.1:443/apis/resource.k8s.io/v1alpha2/podschedulingcontexts?limit=500&resourceVersion=0": dial tcp 10.96.0.1:443: connect: no route to host +# We override the values here to schedule the controller on the worker nodes where the network is working as expected. +cat <> ${overriden_values_path} +controller: + nodeSelector: null + tolerations: null +EOF + +helm install \ + -n dra-example-driver \ + --create-namespace \ + -f ${overriden_values_path} \ + dra-example-driver \ + ${chart_path} + +echo "installing testing pods" +kubectl create -f yamls/dra-integration.yml +kubectl wait --for=condition=ready -l app=dra-integration --timeout=300s pod + +echo "check dra-integration pod for DRA injected environment variable" + +# We can validate that the resource is correctly injected by checking an environment variable this dra driver is injecting +# in the Pod. +# https://github.com/kubernetes-sigs/dra-example-driver/blob/be2b8b1db47b8c757440e955ce5ced88c23bfe86/cmd/dra-example-kubeletplugin/cdi.go#L71C20-L71C44 +env_variable=$(kubectl exec dra-integration -- bash -c "echo \$DRA_RESOURCE_DRIVER_NAME | grep gpu.resource.example.com") +if [ $? -eq 0 ];then + echo "dra-integration pod has DRA injected environment variable" +else + echo "dra-integration pod doesn't have DRA injected environment variable" + exit 1 +fi + +echo "cleanup resources" +kubectl delete -f yamls/dra-integration.yml +helm uninstall -n dra-example-driver dra-example-driver From c9d411c2c2658c7a3be30223fa05a7d2a59d0174 Mon Sep 17 00:00:00 2001 From: Vasilis Remmas Date: Mon, 13 May 2024 13:43:45 +0200 Subject: [PATCH 4/4] Add warning in docs that DRA is alpha and in preview Signed-off-by: Vasilis Remmas --- docs/how-to-use.md | 15 +++++++++++---- 1 file changed, 11 insertions(+), 4 deletions(-) diff --git a/docs/how-to-use.md b/docs/how-to-use.md index 07f02254f..78ffabc9a 100644 --- a/docs/how-to-use.md +++ b/docs/how-to-use.md @@ -637,9 +637,15 @@ If you wish to have auto configuration use the `readinessindicatorfile` in the c ### Run pod with network annotation and Dynamic Resource Allocation driver -Dynamic Resource Allocation is alternative mechanism to device plugin which allow to requests pod and container resources. The feature is alpha in k8s 1.27. +> :warning: Dynamic Resource Allocation (DRA) is [currently an alpha](https://kubernetes.io/docs/concepts/scheduling-eviction/dynamic-resource-allocation/), +> and is subject to change. Please consider this functionality as a preview. The architecture and usage of DRA in +> Multus CNI may change in the future as this technology matures. -The following sections describe how to use DRA with multus and Nvidia DRA driver. Other DRA networking driver vendors should follow similar concepts to make use of multus DRA support. +Dynamic Resource Allocation is alternative mechanism to device plugin which allows to requests pod and container +resources. + +The following sections describe how to use DRA with multus and NVIDIA DRA driver. Other DRA networking driver vendors +should follow similar concepts to make use of multus DRA support. #### Prerequisite @@ -650,9 +656,10 @@ The following sections describe how to use DRA with multus and Nvidia DRA driver #### Install DRA driver -The current example uses Nvidia DRA driver for networking. This DRA driver is not publicly available. +The current example uses NVIDIA DRA driver for networking. This DRA driver is not publicly available. An alternative to +this DRA driver is available at [dra-example-driver](https://github.com/kubernetes-sigs/dra-example-driver). -#### Create dynamic resource class with nvidia network DRA driver +#### Create dynamic resource class with NVIDIA network DRA driver The `ResourceClass` defines the resource pool of `sf-pool-1`.