From 92ad2afed9172a38f707548865f3f47553622d6c Mon Sep 17 00:00:00 2001 From: Tobias Giese Date: Mon, 25 Nov 2024 10:33:20 +0100 Subject: [PATCH] Get rid of kubectl depepdency in container image We should not use the kubectl binary inside our containers because of potential CVEs. To get rid of the binary we can use the crdutil from the k8s-operator-libs. Signed-off-by: Tobias Giese --- Makefile | 5 + cmd/apply-crds/main.go | 27 +++ .../gpu-operator/templates/cleanup_crd.yaml | 3 +- .../gpu-operator/templates/upgrade_crd.yaml | 11 +- deployments/gpu-operator/values.yaml | 1 + docker/Dockerfile | 9 +- go.mod | 11 +- go.sum | 12 +- .../k8s-operator-libs/pkg/crdutil/README.md | 61 ++++++ .../k8s-operator-libs/pkg/crdutil/crdutil.go | 205 ++++++++++++++++++ vendor/k8s.io/client-go/util/retry/OWNERS | 4 + vendor/k8s.io/client-go/util/retry/util.go | 105 +++++++++ vendor/modules.txt | 9 +- 13 files changed, 437 insertions(+), 26 deletions(-) create mode 100644 cmd/apply-crds/main.go create mode 100644 vendor/github.com/NVIDIA/k8s-operator-libs/pkg/crdutil/README.md create mode 100644 vendor/github.com/NVIDIA/k8s-operator-libs/pkg/crdutil/crdutil.go create mode 100644 vendor/k8s.io/client-go/util/retry/OWNERS create mode 100644 vendor/k8s.io/client-go/util/retry/util.go diff --git a/Makefile b/Makefile index ee7eaabcf..e08eb1b8a 100644 --- a/Makefile +++ b/Makefile @@ -86,6 +86,11 @@ gpu-operator: CGO_ENABLED=0 GOOS=$(GOOS) \ go build -ldflags "-s -w -X $(VERSION_PKG).gitCommit=$(GIT_COMMIT) -X $(VERSION_PKG).version=$(VERSION)" -o gpu-operator ./cmd/gpu-operator/... +# Build apply-crds binary +apply-crds: + CGO_ENABLED=0 GOOS=$(GOOS) \ + go build -ldflags "-s -w -X $(VERSION_PKG).gitCommit=$(GIT_COMMIT) -X $(VERSION_PKG).version=$(VERSION)" -o apply-crds ./cmd/apply-crds/... + # Run against the configured Kubernetes cluster in ~/.kube/config run: generate check manifests go run ./cmd/gpu-operator/... diff --git a/cmd/apply-crds/main.go b/cmd/apply-crds/main.go new file mode 100644 index 000000000..1e4daec89 --- /dev/null +++ b/cmd/apply-crds/main.go @@ -0,0 +1,27 @@ +/* + * Copyright (c), NVIDIA CORPORATION. All rights reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +// Package main uses the crdutil package to ensure CRDs are created +// or updated in the cluster during Helm chart installation. +package main + +import ( + "github.com/NVIDIA/k8s-operator-libs/pkg/crdutil" +) + +func main() { + crdutil.EnsureCRDsCmd() +} diff --git a/deployments/gpu-operator/templates/cleanup_crd.yaml b/deployments/gpu-operator/templates/cleanup_crd.yaml index fd0c1b799..44f8e5f24 100644 --- a/deployments/gpu-operator/templates/cleanup_crd.yaml +++ b/deployments/gpu-operator/templates/cleanup_crd.yaml @@ -32,11 +32,12 @@ spec: {{- end }} containers: - name: cleanup-crd - image: {{ include "gpu-operator.fullimage" . }} + image: {{ .Values.operator.imageCleanupCRD }} imagePullPolicy: {{ .Values.operator.imagePullPolicy }} command: - /bin/sh - -c + args: - > kubectl delete clusterpolicy cluster-policy; kubectl delete crd clusterpolicies.nvidia.com; diff --git a/deployments/gpu-operator/templates/upgrade_crd.yaml b/deployments/gpu-operator/templates/upgrade_crd.yaml index 6552558af..8811c2887 100644 --- a/deployments/gpu-operator/templates/upgrade_crd.yaml +++ b/deployments/gpu-operator/templates/upgrade_crd.yaml @@ -83,13 +83,12 @@ spec: image: {{ include "gpu-operator.fullimage" . }} imagePullPolicy: {{ .Values.operator.imagePullPolicy }} command: - - /bin/sh - - -c - - > - kubectl apply -f /opt/gpu-operator/nvidia.com_clusterpolicies.yaml; - kubectl apply -f /opt/gpu-operator/nvidia.com_nvidiadrivers.yaml; + - /usr/bin/apply-crds + args: + - --crds-file=/opt/gpu-operator/nvidia.com_clusterpolicies.yaml + - --crds-file=/opt/gpu-operator/nvidia.com_nvidiadrivers.yaml {{- if .Values.nfd.enabled }} - kubectl apply -f /opt/gpu-operator/nfd-api-crds.yaml; + - --crds-file=/opt/gpu-operator/nfd-api-crds.yaml {{- end }} restartPolicy: OnFailure {{- end }} diff --git a/deployments/gpu-operator/values.yaml b/deployments/gpu-operator/values.yaml index a9f68bf59..2a58ef0cb 100644 --- a/deployments/gpu-operator/values.yaml +++ b/deployments/gpu-operator/values.yaml @@ -77,6 +77,7 @@ operator: use_ocp_driver_toolkit: false # cleanup CRD on chart un-install cleanupCRD: false + imageCleanupCRD: bitnami/kubectl:1.31 # upgrade CRD on chart upgrade, requires --disable-openapi-validation flag # to be passed during helm upgrade. upgradeCRD: true diff --git a/docker/Dockerfile b/docker/Dockerfile index 9678b5a2a..9f9f8bb82 100644 --- a/docker/Dockerfile +++ b/docker/Dockerfile @@ -53,7 +53,7 @@ COPY *.mk . # Build ARG VERSION="unknown" ARG GIT_COMMIT="unknown" -RUN make gpu-operator +RUN make gpu-operator apply-crds FROM nvcr.io/nvidia/cuda:12.6.2-base-ubi9 @@ -76,6 +76,7 @@ LABEL vsc-ref=${GIT_COMMIT} WORKDIR / COPY --from=builder /workspace/gpu-operator /usr/bin/ +COPY --from=builder /workspace/apply-crds /usr/bin/ RUN mkdir -p /opt/gpu-operator/manifests COPY assets /opt/gpu-operator/ @@ -83,12 +84,6 @@ COPY manifests /opt/gpu-operator/manifests RUN mkdir /licenses && mv /NGC-DL-CONTAINER-LICENSE /licenses/NGC-DL-CONTAINER-LICENSE COPY hack/must-gather.sh /usr/bin/gather -# Install must-gather dependency: `kubectl` -ARG TARGETARCH -RUN OS_ARCH=${TARGETARCH/x86_64/amd64} && OS_ARCH=${OS_ARCH/aarch64/arm64} && curl -LO https://storage.googleapis.com/kubernetes-release/release/$(curl -s https://storage.googleapis.com/kubernetes-release/release/stable.txt)/bin/linux/${OS_ARCH}/kubectl -RUN chmod +x ./kubectl -RUN mv ./kubectl /usr/local/bin - # Add CRD resource into the image for helm upgrades COPY deployments/gpu-operator/crds/nvidia.com_clusterpolicies.yaml /opt/gpu-operator/nvidia.com_clusterpolicies.yaml COPY deployments/gpu-operator/crds/nvidia.com_nvidiadrivers.yaml /opt/gpu-operator/nvidia.com_nvidiadrivers.yaml diff --git a/go.mod b/go.mod index c306ecd65..c589af668 100644 --- a/go.mod +++ b/go.mod @@ -8,7 +8,7 @@ require ( github.com/Masterminds/sprig/v3 v3.3.0 github.com/NVIDIA/go-nvlib v0.7.0 github.com/NVIDIA/k8s-kata-manager v0.2.2 - github.com/NVIDIA/k8s-operator-libs v0.0.0-20240826221728-249ba446fa35 + github.com/NVIDIA/k8s-operator-libs v0.0.0-20241120073822-1ad8938d7274 github.com/NVIDIA/nvidia-container-toolkit v1.17.2 github.com/davecgh/go-spew v1.1.2-0.20180830191138-d8f796af33cc github.com/go-logr/logr v1.4.2 @@ -169,10 +169,10 @@ require ( gopkg.in/yaml.v3 v3.0.1 // indirect helm.sh/helm/v3 v3.16.1 // indirect k8s.io/apiserver v0.31.2 // indirect - k8s.io/cli-runtime v0.31.1 // indirect + k8s.io/cli-runtime v0.31.2 // indirect k8s.io/component-base v0.31.2 // indirect k8s.io/kube-openapi v0.0.0-20240228011516-70dd3763d340 // indirect - k8s.io/kubectl v0.31.0 // indirect + k8s.io/kubectl v0.31.2 // indirect k8s.io/utils v0.0.0-20240921022957-49e7df575cb6 // indirect oras.land/oras-go v1.2.5 // indirect sigs.k8s.io/json v0.0.0-20221116044647-bc3834ca7abd // indirect @@ -180,3 +180,8 @@ require ( sigs.k8s.io/kustomize/kyaml v0.17.1 // indirect sigs.k8s.io/structured-merge-diff/v4 v4.4.1 // indirect ) + +// DROP BEFORE MERGE! +// Implements https://github.com/NVIDIA/k8s-operator-libs/pull/58 +// This is only for testing. +replace github.com/NVIDIA/k8s-operator-libs => github.com/tobiasgiese/k8s-operator-libs v0.0.0-20241125092837-e8a080621717 diff --git a/go.sum b/go.sum index 3c2dda11a..d9c5e51a8 100644 --- a/go.sum +++ b/go.sum @@ -28,8 +28,6 @@ github.com/NVIDIA/go-nvlib v0.7.0 h1:Z/J7skMdLbTiHvomKVsGYsttfQMZj5FwNYIFXhZ4i/c github.com/NVIDIA/go-nvlib v0.7.0/go.mod h1:9UrsLGx/q1OrENygXjOuM5Ey5KCtiZhbvBlbUIxtGWY= github.com/NVIDIA/k8s-kata-manager v0.2.2 h1:+xVIp4yLfCjZ31Dfrm9LOKo4T47b4g+DV6XkwAqalns= github.com/NVIDIA/k8s-kata-manager v0.2.2/go.mod h1:UGjGQUcpXTegwyOc5IwcyLTzPKwO9lOIkqw/qUzk8Q0= -github.com/NVIDIA/k8s-operator-libs v0.0.0-20240826221728-249ba446fa35 h1:w9DXPTJCq9k2PVpdBQJrWE4vAmZcFaSHKLpM/xos9WI= -github.com/NVIDIA/k8s-operator-libs v0.0.0-20240826221728-249ba446fa35/go.mod h1:sw6XRI5wq0Q+nSgaWa1Pyo/ZKxQebc70x6VIznDAxtM= github.com/NVIDIA/nvidia-container-toolkit v1.17.2 h1:iE6PK9SQH3HyDrOolu27xn3CJgURR3bDtnbfFrxdML8= github.com/NVIDIA/nvidia-container-toolkit v1.17.2/go.mod h1:R6bNf6ca0IjjACa0ncKGvsrx6zSjsgz8QkFyBDk5szU= github.com/Shopify/logrus-bugsnag v0.0.0-20171204204709-577dee27f20d h1:UrqY+r/OJnIp5u0s1SbQ8dVfLCZJsnvazdBP5hS4iRs= @@ -351,6 +349,8 @@ github.com/stretchr/testify v1.6.1/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/ github.com/stretchr/testify v1.7.0/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg= github.com/stretchr/testify v1.9.0 h1:HtqpIVDClZ4nwg75+f6Lvsy/wHu+3BoSGCbBAcpTsTg= github.com/stretchr/testify v1.9.0/go.mod h1:r2ic/lqez/lEtzL7wO/rwa5dbSLXVDPFyf8C91i36aY= +github.com/tobiasgiese/k8s-operator-libs v0.0.0-20241125092837-e8a080621717 h1:456lFgNispD2ff9fpni9sYB3838p14O30zN0cyoeFmI= +github.com/tobiasgiese/k8s-operator-libs v0.0.0-20241125092837-e8a080621717/go.mod h1:g8DW2t4Vit91uLdqCxsjKbKYrwCdb/oB9q/YOXdUjmQ= github.com/ulikunitz/xz v0.5.12 h1:37Nm15o69RwBkXM0J6A5OlE67RZTfzUxTj8fB3dfcsc= github.com/ulikunitz/xz v0.5.12/go.mod h1:nbz6k7qbPmH4IRqmfOplQw/tblSgqTqBwxkY0oWt/14= github.com/urfave/cli/v2 v2.27.5 h1:WoHEJLdsXr6dDWoJgMq/CboDmyY/8HMMH1fTECbih+w= @@ -488,8 +488,8 @@ k8s.io/apimachinery v0.31.2 h1:i4vUt2hPK56W6mlT7Ry+AO8eEsyxMD1U44NR22CLTYw= k8s.io/apimachinery v0.31.2/go.mod h1:rsPdaZJfTfLsNJSQzNHQvYoTmxhoOEofxtOsF3rtsMo= k8s.io/apiserver v0.31.2 h1:VUzOEUGRCDi6kX1OyQ801m4A7AUPglpsmGvdsekmcI4= k8s.io/apiserver v0.31.2/go.mod h1:o3nKZR7lPlJqkU5I3Ove+Zx3JuoFjQobGX1Gctw6XuE= -k8s.io/cli-runtime v0.31.1 h1:/ZmKhmZ6hNqDM+yf9s3Y4KEYakNXUn5sod2LWGGwCuk= -k8s.io/cli-runtime v0.31.1/go.mod h1:pKv1cDIaq7ehWGuXQ+A//1OIF+7DI+xudXtExMCbe9U= +k8s.io/cli-runtime v0.31.2 h1:7FQt4C4Xnqx8V1GJqymInK0FFsoC+fAZtbLqgXYVOLQ= +k8s.io/cli-runtime v0.31.2/go.mod h1:XROyicf+G7rQ6FQJMbeDV9jqxzkWXTYD6Uxd15noe0Q= k8s.io/client-go v0.31.2 h1:Y2F4dxU5d3AQj+ybwSMqQnpZH9F30//1ObxOKlTI9yc= k8s.io/client-go v0.31.2/go.mod h1:NPa74jSVR/+eez2dFsEIHNa+3o09vtNaWwWwb1qSxSs= k8s.io/component-base v0.31.2 h1:Z1J1LIaC0AV+nzcPRFqfK09af6bZ4D1nAOpWsy9owlA= @@ -498,8 +498,8 @@ k8s.io/klog/v2 v2.130.1 h1:n9Xl7H1Xvksem4KFG4PYbdQCQxqc/tTUyrgXaOhHSzk= k8s.io/klog/v2 v2.130.1/go.mod h1:3Jpz1GvMt720eyJH1ckRHK1EDfpxISzJ7I9OYgaDtPE= k8s.io/kube-openapi v0.0.0-20240228011516-70dd3763d340 h1:BZqlfIlq5YbRMFko6/PM7FjZpUb45WallggurYhKGag= k8s.io/kube-openapi v0.0.0-20240228011516-70dd3763d340/go.mod h1:yD4MZYeKMBwQKVht279WycxKyM84kkAx2DPrTXaeb98= -k8s.io/kubectl v0.31.0 h1:kANwAAPVY02r4U4jARP/C+Q1sssCcN/1p9Nk+7BQKVg= -k8s.io/kubectl v0.31.0/go.mod h1:pB47hhFypGsaHAPjlwrNbvhXgmuAr01ZBvAIIUaI8d4= +k8s.io/kubectl v0.31.2 h1:gTxbvRkMBwvTSAlobiTVqsH6S8Aa1aGyBcu5xYLsn8M= +k8s.io/kubectl v0.31.2/go.mod h1:EyASYVU6PY+032RrTh5ahtSOMgoDRIux9V1JLKtG5xM= k8s.io/utils v0.0.0-20240921022957-49e7df575cb6 h1:MDF6h2H/h4tbzmtIKTuctcwZmY0tY9mD9fNT47QO6HI= k8s.io/utils v0.0.0-20240921022957-49e7df575cb6/go.mod h1:OLgZIPagt7ERELqWJFomSt595RzquPNLL48iOWgYOg0= oras.land/oras-go v1.2.5 h1:XpYuAwAb0DfQsunIyMfeET92emK8km3W4yEzZvUbsTo= diff --git a/vendor/github.com/NVIDIA/k8s-operator-libs/pkg/crdutil/README.md b/vendor/github.com/NVIDIA/k8s-operator-libs/pkg/crdutil/README.md new file mode 100644 index 000000000..15aa4bd44 --- /dev/null +++ b/vendor/github.com/NVIDIA/k8s-operator-libs/pkg/crdutil/README.md @@ -0,0 +1,61 @@ +# CRD Apply Tool + +This tool is designed to help deploy and manage Custom Resource Definitions (CRDs) in a Kubernetes cluster. +It applies all CRDs found in specified directories, providing a solution to some of the limitations of Helm when it comes to managing CRDs. + +## Motivation + +While Helm is commonly used for managing Kubernetes resources, it has certain restrictions with CRDs: + +- CRDs placed in Helm's top-level `crds/` directory are not updated on upgrades or rollbacks. +- Placing CRDs in Helm’s `templates/` directory is not entirely safe, as deletions and upgrades of CRDs are not always handled properly. + +This tool offers a more reliable way to apply CRDs, ensuring they are created or updated as needed. + +## Features + +- **Apply CRDs from multiple directories**: Allows specifying multiple directories containing CRD YAML manifests. +- **Recursive directory search**: Walks through each specified directory to find and apply all YAML files. +- **Safe update mechanism**: Checks if a CRD already exists; if so, it updates it with the latest version. +- **Handles multiple YAML documents**: Supports files containing multiple CRD documents separated by YAML document delimiters. + +## Usage + +Compile and run the tool by providing the `-crds-dir` flag with paths to the directories containing the CRD YAML files: + +```bash +go build -o crd-apply-tool +./crd-apply-tool -crds-dir /path/to/crds1 -crds-dir /path/to/crds2 +``` + +In a Helm pre-install hook it can look like: + +```yaml +apiVersion: batch/v1 +kind: Job +metadata: + name: upgrade-crd + annotations: + "helm.sh/hook": pre-install,pre-upgrade + "helm.sh/hook-weight": "1" + "helm.sh/hook-delete-policy": hook-succeeded,before-hook-creation +spec: + template: + metadata: + name: upgrade-crd + spec: + containers: + - name: upgrade-crd + image: path-to-your/crd-apply-image + imagePullPolicy: IfNotPresent + command: + - /apply-crds + args: + - --crds-dir=/crds/operator +``` + +> Note: the image must contain all your CRDs in e.g. the `/crds/operator` directory. + +## Flags + +- `-crds-dir` (required): Specifies a directory path that contains the CRD manifests in YAML format. This flag can be provided multiple times to apply CRDs from multiple directories. diff --git a/vendor/github.com/NVIDIA/k8s-operator-libs/pkg/crdutil/crdutil.go b/vendor/github.com/NVIDIA/k8s-operator-libs/pkg/crdutil/crdutil.go new file mode 100644 index 000000000..7d519d74c --- /dev/null +++ b/vendor/github.com/NVIDIA/k8s-operator-libs/pkg/crdutil/crdutil.go @@ -0,0 +1,205 @@ +/* +Copyright 2024 NVIDIA CORPORATION & AFFILIATES + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package crdutil + +import ( + "context" + "flag" + "fmt" + "io" + "log" + "os" + "path/filepath" + "strings" + + apiextensionsv1 "k8s.io/apiextensions-apiserver/pkg/apis/apiextensions/v1" + "k8s.io/apiextensions-apiserver/pkg/client/clientset/clientset" + v1 "k8s.io/apiextensions-apiserver/pkg/client/clientset/clientset/typed/apiextensions/v1" + apierrors "k8s.io/apimachinery/pkg/api/errors" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/apimachinery/pkg/util/wait" + "k8s.io/apimachinery/pkg/util/yaml" + "k8s.io/client-go/util/retry" + ctrl "sigs.k8s.io/controller-runtime" +) + +type StringList []string + +func (s *StringList) String() string { + return strings.Join(*s, ", ") +} + +func (s *StringList) Set(value string) error { + *s = append(*s, value) + return nil +} + +var ( + crdsDir StringList + crds StringList +) + +func initFlags() { + flag.Var(&crdsDir, "crds-dir", "Path to the directory containing the CRD manifests") + flag.Var(&crds, "crds-file", "Single CRDs file with CRD manifests to apply") + flag.Parse() + + if len(crdsDir) == 0 && len(crds) == 0 { + log.Fatalf("CRDs directory or single CRDs are required") + } + + for _, crdDir := range crdsDir { + if _, err := os.Stat(crdDir); os.IsNotExist(err) { + log.Fatalf("CRDs directory %s does not exist", crdsDir) + } + } + + for _, crd := range crds { + if _, err := os.Stat(crd); os.IsNotExist(err) { + log.Fatalf("CRD file %s does not exist", crd) + } + } +} + +// EnsureCRDsCmd reads each YAML file in the directory, splits it into documents, and applies each CRD to the cluster. +// The parameter --crds-dir is required and should point to the directory containing the CRD manifests. +func EnsureCRDsCmd() { + ctx := context.Background() + + initFlags() + + config, err := ctrl.GetConfig() + if err != nil { + log.Fatalf("Failed to get Kubernetes config: %v", err) + } + + client, err := clientset.NewForConfig(config) + if err != nil { + log.Fatalf("Failed to create API extensions client: %v", err) + } + + if err := walkCrdsDir(ctx, client.ApiextensionsV1().CustomResourceDefinitions()); err != nil { + log.Fatalf("Failed to apply CRDs: %v", err) + } + + if err := applyCrdFiles(ctx, client.ApiextensionsV1().CustomResourceDefinitions()); err != nil { + log.Fatalf("Failed to apply CRDs: %v", err) + } +} + +// walkCrdsDir walks the CRDs directory and applies each YAML file. +func walkCrdsDir(ctx context.Context, crdClient v1.CustomResourceDefinitionInterface) error { + for _, crdDir := range crdsDir { + // Walk the directory recursively and apply each YAML file. + err := filepath.Walk(crdDir, func(path string, info os.FileInfo, err error) error { + if err != nil { + return err + } + if info.IsDir() || filepath.Ext(path) != ".yaml" { + return nil + } + + log.Printf("Apply CRDs from file: %s", path) + if err := applyCRDsFromFile(ctx, crdClient, path); err != nil { + return fmt.Errorf("apply CRD %s: %w", path, err) + } + return nil + }) + if err != nil { + return fmt.Errorf("walk the path %s: %w", crdsDir, err) + } + } + return nil +} + +func applyCrdFiles(ctx context.Context, crdClient v1.CustomResourceDefinitionInterface) error { + for _, crdFile := range crds { + log.Printf("Apply CRDs from file: %s", crdFile) + if err := applyCRDsFromFile(ctx, crdClient, crdFile); err != nil { + return fmt.Errorf("apply CRD %s: %w", crdFile, err) + } + } + return nil +} + +// applyCRDsFromFile reads a YAML file, splits it into documents, and applies each CRD to the cluster. +func applyCRDsFromFile(ctx context.Context, crdClient v1.CustomResourceDefinitionInterface, filePath string) error { + file, err := os.Open(filePath) + if err != nil { + return fmt.Errorf("open file %q: %w", filePath, err) + } + defer file.Close() + + // Create a decoder that reads multiple YAML documents. + decoder := yaml.NewYAMLOrJSONDecoder(file, 4096) + var crdsToApply []*apiextensionsv1.CustomResourceDefinition + for { + crd := &apiextensionsv1.CustomResourceDefinition{} + if err := decoder.Decode(crd); err != nil { + if err == io.EOF { + break + } + return fmt.Errorf("decode YAML: %w", err) + } + if crd.GetObjectKind().GroupVersionKind().Kind != "CustomResourceDefinition" { + log.Printf("Skipping non-CRD object %s", crd.GetName()) + continue + } + crdsToApply = append(crdsToApply, crd) + } + + // Apply each CRD separately. + for _, crd := range crdsToApply { + err := wait.ExponentialBackoffWithContext(ctx, retry.DefaultBackoff, func(context.Context) (bool, error) { + if err := applyCRD(ctx, crdClient, crd); err != nil { + log.Printf("Failed to apply CRD %s: %v", crd.Name, err) + return false, nil + } + return true, nil + }) + if err != nil { + return fmt.Errorf("apply CRD %s: %w", crd.Name, err) + } + } + return nil +} + +// applyCRD creates or updates the CRD. +func applyCRD( + ctx context.Context, + crdClient v1.CustomResourceDefinitionInterface, + crd *apiextensionsv1.CustomResourceDefinition, +) error { + // Check if CRD already exists in cluster and create if not found. + curCRD, err := crdClient.Get(ctx, crd.Name, metav1.GetOptions{}) + if apierrors.IsNotFound(err) { + log.Printf("Create CRD %s", crd.Name) + _, err = crdClient.Create(ctx, crd, metav1.CreateOptions{}) + if err != nil { + return fmt.Errorf("create CRD %s: %w", crd.Name, err) + } + } else { + log.Printf("Update CRD %s", crd.Name) + // Set resource version to update an existing CRD. + crd.SetResourceVersion(curCRD.GetResourceVersion()) + _, err = crdClient.Update(ctx, crd, metav1.UpdateOptions{}) + if err != nil { + return fmt.Errorf("update CRD %s: %w", crd.Name, err) + } + } + return nil +} diff --git a/vendor/k8s.io/client-go/util/retry/OWNERS b/vendor/k8s.io/client-go/util/retry/OWNERS new file mode 100644 index 000000000..75736b5aa --- /dev/null +++ b/vendor/k8s.io/client-go/util/retry/OWNERS @@ -0,0 +1,4 @@ +# See the OWNERS docs at https://go.k8s.io/owners + +reviewers: + - caesarxuchao diff --git a/vendor/k8s.io/client-go/util/retry/util.go b/vendor/k8s.io/client-go/util/retry/util.go new file mode 100644 index 000000000..0c6e504a6 --- /dev/null +++ b/vendor/k8s.io/client-go/util/retry/util.go @@ -0,0 +1,105 @@ +/* +Copyright 2016 The Kubernetes Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package retry + +import ( + "time" + + "k8s.io/apimachinery/pkg/api/errors" + "k8s.io/apimachinery/pkg/util/wait" +) + +// DefaultRetry is the recommended retry for a conflict where multiple clients +// are making changes to the same resource. +var DefaultRetry = wait.Backoff{ + Steps: 5, + Duration: 10 * time.Millisecond, + Factor: 1.0, + Jitter: 0.1, +} + +// DefaultBackoff is the recommended backoff for a conflict where a client +// may be attempting to make an unrelated modification to a resource under +// active management by one or more controllers. +var DefaultBackoff = wait.Backoff{ + Steps: 4, + Duration: 10 * time.Millisecond, + Factor: 5.0, + Jitter: 0.1, +} + +// OnError allows the caller to retry fn in case the error returned by fn is retriable +// according to the provided function. backoff defines the maximum retries and the wait +// interval between two retries. +func OnError(backoff wait.Backoff, retriable func(error) bool, fn func() error) error { + var lastErr error + err := wait.ExponentialBackoff(backoff, func() (bool, error) { + err := fn() + switch { + case err == nil: + return true, nil + case retriable(err): + lastErr = err + return false, nil + default: + return false, err + } + }) + if err == wait.ErrWaitTimeout { + err = lastErr + } + return err +} + +// RetryOnConflict is used to make an update to a resource when you have to worry about +// conflicts caused by other code making unrelated updates to the resource at the same +// time. fn should fetch the resource to be modified, make appropriate changes to it, try +// to update it, and return (unmodified) the error from the update function. On a +// successful update, RetryOnConflict will return nil. If the update function returns a +// "Conflict" error, RetryOnConflict will wait some amount of time as described by +// backoff, and then try again. On a non-"Conflict" error, or if it retries too many times +// and gives up, RetryOnConflict will return an error to the caller. +// +// err := retry.RetryOnConflict(retry.DefaultRetry, func() error { +// // Fetch the resource here; you need to refetch it on every try, since +// // if you got a conflict on the last update attempt then you need to get +// // the current version before making your own changes. +// pod, err := c.Pods("mynamespace").Get(name, metav1.GetOptions{}) +// if err != nil { +// return err +// } +// +// // Make whatever updates to the resource are needed +// pod.Status.Phase = v1.PodFailed +// +// // Try to update +// _, err = c.Pods("mynamespace").UpdateStatus(pod) +// // You have to return err itself here (not wrapped inside another error) +// // so that RetryOnConflict can identify it correctly. +// return err +// }) +// if err != nil { +// // May be conflict if max retries were hit, or may be something unrelated +// // like permissions or a network error +// return err +// } +// ... +// +// TODO: Make Backoff an interface? +func RetryOnConflict(backoff wait.Backoff, fn func() error) error { + return OnError(backoff, errors.IsConflict, fn) +} diff --git a/vendor/modules.txt b/vendor/modules.txt index 043958a76..c3c59fa4d 100644 --- a/vendor/modules.txt +++ b/vendor/modules.txt @@ -40,10 +40,11 @@ github.com/NVIDIA/go-nvlib/pkg/pciids # github.com/NVIDIA/k8s-kata-manager v0.2.2 ## explicit; go 1.22.0 github.com/NVIDIA/k8s-kata-manager/api/v1alpha1/config -# github.com/NVIDIA/k8s-operator-libs v0.0.0-20240826221728-249ba446fa35 +# github.com/NVIDIA/k8s-operator-libs v0.0.0-20241120073822-1ad8938d7274 => github.com/tobiasgiese/k8s-operator-libs v0.0.0-20241125092837-e8a080621717 ## explicit; go 1.22.0 github.com/NVIDIA/k8s-operator-libs/api/upgrade/v1alpha1 github.com/NVIDIA/k8s-operator-libs/pkg/consts +github.com/NVIDIA/k8s-operator-libs/pkg/crdutil github.com/NVIDIA/k8s-operator-libs/pkg/upgrade # github.com/NVIDIA/nvidia-container-toolkit v1.17.2 ## explicit; go 1.20 @@ -940,7 +941,7 @@ k8s.io/apimachinery/third_party/forked/golang/reflect # k8s.io/apiserver v0.31.2 ## explicit; go 1.22.0 k8s.io/apiserver/pkg/endpoints/deprecation -# k8s.io/cli-runtime v0.31.1 +# k8s.io/cli-runtime v0.31.2 ## explicit; go 1.22.0 k8s.io/cli-runtime/pkg/genericclioptions k8s.io/cli-runtime/pkg/genericiooptions @@ -1234,6 +1235,7 @@ k8s.io/client-go/util/flowcontrol k8s.io/client-go/util/homedir k8s.io/client-go/util/jsonpath k8s.io/client-go/util/keyutil +k8s.io/client-go/util/retry k8s.io/client-go/util/watchlist k8s.io/client-go/util/workqueue # k8s.io/component-base v0.31.2 @@ -1260,7 +1262,7 @@ k8s.io/kube-openapi/pkg/spec3 k8s.io/kube-openapi/pkg/util/proto k8s.io/kube-openapi/pkg/util/proto/validation k8s.io/kube-openapi/pkg/validation/spec -# k8s.io/kubectl v0.31.0 +# k8s.io/kubectl v0.31.2 ## explicit; go 1.22.0 k8s.io/kubectl/pkg/cmd/util k8s.io/kubectl/pkg/drain @@ -1442,3 +1444,4 @@ sigs.k8s.io/structured-merge-diff/v4/value sigs.k8s.io/yaml sigs.k8s.io/yaml/goyaml.v2 sigs.k8s.io/yaml/goyaml.v3 +# github.com/NVIDIA/k8s-operator-libs => github.com/tobiasgiese/k8s-operator-libs v0.0.0-20241125092837-e8a080621717