From da48c304648ec45956c97ed66fc1af5ea578fa6f Mon Sep 17 00:00:00 2001 From: adrianc Date: Mon, 2 Sep 2024 13:11:06 +0300 Subject: [PATCH] Add docs - Add Readme to project with basic install and usage - Add API documentation - Add Helm chart documentation Signed-off-by: adrianc --- Makefile | 25 + README.md | 143 +++++- api/v1alpha1/doc.go | 20 + api/v1alpha1/nodemaintenance_types.go | 2 +- .../maintenance-operator-chart/Chart.yaml | 2 +- .../maintenance-operator-chart/README.md | 32 ++ .../maintenance-operator-chart/values.yaml | 31 +- docs/api-reference.md | 439 ++++++++++++++++++ hack/api-docs/config.json | 28 ++ hack/api-docs/templates/members.tpl | 48 ++ hack/api-docs/templates/pkg.tpl | 49 ++ hack/api-docs/templates/type.tpl | 82 ++++ 12 files changed, 892 insertions(+), 9 deletions(-) create mode 100644 api/v1alpha1/doc.go create mode 100644 deployment/maintenance-operator-chart/README.md create mode 100644 docs/api-reference.md create mode 100644 hack/api-docs/config.json create mode 100644 hack/api-docs/templates/members.tpl create mode 100644 hack/api-docs/templates/pkg.tpl create mode 100644 hack/api-docs/templates/type.tpl diff --git a/Makefile b/Makefile index 95b0751..d2d361c 100644 --- a/Makefile +++ b/Makefile @@ -221,6 +221,18 @@ golangci-lint: curl -sSfL https://raw.githubusercontent.com/golangci/golangci-lint/master/install.sh | sh -s -- -b $(shell dirname $(GOLANGCI_LINT)) $(GOLANGCI_LINT_VERSION) ;\ } +GEN_CRD_API_REFERENCE_DOCS = $(LOCALBIN)/gen-crd-api-reference-docs +.PHONY: gen-crd-api-reference-docs ## Download gen-crd-api-reference-docs locally if necessary +gen-crd-api-reference-docs: $(GEN_CRD_API_REFERENCE_DOCS) +$(GEN_CRD_API_REFERENCE_DOCS): | $(LOCALBIN) + @ GOBIN=$(LOCALBIN) go install github.com/ahmetb/gen-crd-api-reference-docs@latest + +HELM_DOCS = $(LOCALBIN)/helm-docs +HELM_DOCS_VERSION ?= v1.14.2 +.PHONY: helm-docs ## Download helm-docs locally if necessary +helm-docs: $(HELM_DOCS) +$(HELM_DOCS): | $(LOCALBIN) + @ GOBIN=$(LOCALBIN) go install github.com/norwoodj/helm-docs/cmd/helm-docs@$(HELM_DOCS_VERSION) ##@ General # The help target prints out all targets with their descriptions organized @@ -281,6 +293,19 @@ lint-fix: golangci-lint ## Run golangci-lint linter and perform fixes generate-mocks: mockery ## generate mock objects PATH=$(LOCALBIN):$(PATH) go generate ./... + +.PHONY: generate-api-docs +generate-api-docs: gen-crd-api-reference-docs ## generate api documentation + $(GEN_CRD_API_REFERENCE_DOCS) -api-dir=./api/v1alpha1 -config=${CURDIR}/hack/api-docs/config.json \ + -template-dir=${CURDIR}/hack/api-docs/templates -out-file=$(BUILDDIR)/api-reference.html + $(CONTAINER_TOOL) run --rm --volume "`pwd`:/data:Z" pandoc/minimal -f html -t markdown_strict \ + --columns 200 /data/build/api-reference.html -o /data/docs/api-reference.md + rm $(BUILDDIR)/api-reference.html + +.PHONY: generate-helm-docs +generate-helm-docs: helm-docs ## generate helm documentation + cd deployment/maintenance-operator-chart && $(HELM_DOCS) + ##@ Build .PHONY: build diff --git a/README.md b/README.md index 9b02769..41c0c09 100644 --- a/README.md +++ b/README.md @@ -5,7 +5,144 @@ [![CodeQL](https://github.com/Mellanox/maintenance-operator/actions/workflows/codeql.yml/badge.svg)](https://github.com/Mellanox/maintenance-operator/actions/workflows/codeql.yml) [![Image push](https://github.com/Mellanox/maintenance-operator/actions/workflows/image-push-main.yml/badge.svg?event=push)](https://github.com/Mellanox/maintenance-operator/actions/workflows/image-push-main.yml) -# Nvidia Maintenance Operator -coordinates node maintenance operations in K8s cluster +# NVIDIA Maintenance Operator -> __NOTE__: This project is currently under active development. +NVIDIA Maintenance Operator provides Kubernetes API(Custom Resource Definition) to allow node maintenance operators in K8s cluster +in a coordinated manner. It performs some common operations to prepare a node for maintenance such as cordoning +the node as well as draining it. + +Users/Consumers can request to perform maintenance on a node by creating NodeMaintenance Custom Resource(CR). +The operator will then reconcile NodeMaintenance CRs. At high level this the the reconcile flow: + +1. Scheduling - schedule NodeMaintenance to be processed by the operator, taking into account constraints + such as the maximal allowed parallel operations. +2. Node preparation for maintenance such as cordon and draning of the node +3. Mark NodeMaintenance as Ready (via condition) +4. Cleanup on deletion of NodeMaintenance such as node uncordon + +## Deployment + +### Prerequisites + +* Kubernetes cluster + +### Helm + +#### Deploy latest from project sources + +```bash +# Clone project +git clone https://github.com/Mellanox/maintenance-operator.git ; cd maintenance-operator + +# Install Operator +helm install -n maintenance-operator --create-namespace --set operator.image.tag=latest maintenance-operator ./deployment/maintenance-operator-chart + +# View deployed resources +kubectl -n maintenance-operator get all +``` + +#### Deploy last release from OCI repo + +```bash +helm install -n maintenance-operator --create-namespace maintenance-operator oci://ghcr.io/mellanox/maintenance-operator-chart +``` + +### Kustomize (for development) + +```bash +# clone project +git clone https://github.com/Mellanox/maintenance-operator.git ; cd maintenance-operator + +# build image +IMG=harbor.mellanox.com/cloud-orchestration-dev/adrianc/maintenance-operator:latest make docker-build + +# push image +IMG=harbor.mellanox.com/cloud-orchestration-dev/adrianc/maintenance-operator:latest make docker-push + +# deploy +IMG=harbor.mellanox.com/cloud-orchestration-dev/adrianc/maintenance-operator:latest make deploy + +# undeploy +make undeploy +``` + +## CRDs + +### MaintenanceOperatorConfig + +The MaintenanceOperatorConfig CRD is used for operator runtime configuration + +for more information refer to [api-reference](docs/api-reference.md) + +#### Example MaintenanceOperatorConfig + +```yaml +apiVersion: maintenance.nvidia.com/v1alpha1 +kind: MaintenanceOperatorConfig +metadata: + name: default + namespace: maintenance-operator +spec: + logLevel: info + maxParallelOperations: 4 +``` + +In this example we configure the following for the operator: + +* Log level (`logLevel`) is set to `info` +* The max number of parallel maintenance operations (`maxParallelOperations`) is set to `4` + +### NodeMaintenance + +The NodeMaintenance CRD is used to request to perform a maintenance operation on a specific K8s node. +In addition, it specifies which common (K8s related operations) need to happend in order to preare a node for maintenance. + +Once the node is ready for maintenance the operator will set `Ready` condition in `status` field to `True` +After maintenance operation was done by the requestor, NodeMaintenance CR should be deleted to finish the maintenance operation. + +for more information refer to [api-reference](docs/api-reference.md) + +#### Example NodeMaintenance + +```yaml +apiVersion: maintenance.nvidia.com/v1alpha1 +kind: NodeMaintenance +metadata: + name: my-maintenance-operation + namespace: default +spec: + requestorID: some.one.acme.com + nodeName: wokrer-01 + cordon: true + waitForPodCompletion: + podSelector: "app=important" + timeoutSeconds: 0 + drainSpec: + force: true + podSelector: "" + timeoutSeconds: 0 + deleteEmptyDir: true + podEvictionFilters: + - byResourceNameRegex: nvidia.com/gpu-* + - byResourceNameRegex: nvidia.com/rdma* + +``` + +In this example we sequest to perform maintenance for node `worker-1`. + +the following steps will occur before the node is marked as ready for maintenance: + +1. cordon of `worker-1` node +2. waiting for pods with `app: important` label to finish +3. draining of `worker-1` with the provided `drainSpec` + 1. force draining of pods even if they dont belong to a controller + 2. allow draining of pods with emptyDir mount + 3. only drain pods that consume either `nvidia.com/gpu-*`, `nvidia.com/rdma*` resources + +once the node is ready for maintenance `Ready` condition will be `True` + +```bash +$ kubectl get nodemaintenances.maintenance.nvidia.com -A +NAME NODE REQUESTOR READY PHASE FAILED +my-maintenance-operation worker-01 some.one.acme.com True Ready +``` diff --git a/api/v1alpha1/doc.go b/api/v1alpha1/doc.go new file mode 100644 index 0000000..4f301ba --- /dev/null +++ b/api/v1alpha1/doc.go @@ -0,0 +1,20 @@ +/* + 2024 NVIDIA CORPORATION & AFFILIATES + + Licensed under the Apache License, Version 2.0 (the License); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an AS IS BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +*/ + +// Package v1alpha1 contains API Schema definitions for the maintenance.nvidia.com v1alpha1 API group +// +kubebuilder:object:generate=true +// +groupName=maintenance.nvidia.com +package v1alpha1 diff --git a/api/v1alpha1/nodemaintenance_types.go b/api/v1alpha1/nodemaintenance_types.go index d61913b..6fa4d64 100644 --- a/api/v1alpha1/nodemaintenance_types.go +++ b/api/v1alpha1/nodemaintenance_types.go @@ -122,7 +122,7 @@ type WaitForPodCompletionSpec struct { // DrainSpec describes configuration for node drain during automatic upgrade type DrainSpec struct { - // Force indicates if force draining is allowed + // Force draining even if there are pods that do not declare a controller // +kubebuilder:validation:Optional // +kubebuilder:default:=false Force bool `json:"force,omitempty"` diff --git a/deployment/maintenance-operator-chart/Chart.yaml b/deployment/maintenance-operator-chart/Chart.yaml index 8ebbc68..8b73ff4 100644 --- a/deployment/maintenance-operator-chart/Chart.yaml +++ b/deployment/maintenance-operator-chart/Chart.yaml @@ -3,4 +3,4 @@ name: maintenance-operator-chart description: Maintenance Operator Helm Chart type: application version: 0.0.1 -appVersion: "v0.0.1-main" +appVersion: "latest" diff --git a/deployment/maintenance-operator-chart/README.md b/deployment/maintenance-operator-chart/README.md new file mode 100644 index 0000000..d778a54 --- /dev/null +++ b/deployment/maintenance-operator-chart/README.md @@ -0,0 +1,32 @@ +# maintenance-operator-chart + +![Version: 0.0.1](https://img.shields.io/badge/Version-0.0.1-informational?style=flat-square) ![Type: application](https://img.shields.io/badge/Type-application-informational?style=flat-square) ![AppVersion: latest](https://img.shields.io/badge/AppVersion-latest-informational?style=flat-square) + +Maintenance Operator Helm Chart + +## Values + +| Key | Type | Default | Description | +|-----|------|---------|-------------| +| imagePullSecrets | list | `[]` | image pull secrets for the operator | +| metricsService | object | `{"ports":[{"name":"https","port":8443,"protocol":"TCP","targetPort":"https"}],"type":"ClusterIP"}` | metrics service configurations | +| operator.admissionController.certificates.certManager.enable | bool | `true` | use cert-manager for certificates | +| operator.admissionController.certificates.certManager.generateSelfSigned | bool | `true` | generate self-signed certificiates with cert-manager | +| operator.admissionController.certificates.custom.enable | bool | `false` | enable custom certificates using secrets | +| operator.admissionController.certificates.secretNames.operator | string | `"operator-webhook-cert"` | secret name containing certificates for the operator admission controller | +| operator.admissionController.enable | bool | `true` | enable admission controller of the operator | +| operator.affinity | object | `{"nodeAffinity":{"preferredDuringSchedulingIgnoredDuringExecution":[{"preference":{"matchExpressions":[{"key":"node-role.kubernetes.io/master","operator":"Exists"}]},"weight":1},{"preference":{"matchExpressions":[{"key":"node-role.kubernetes.io/control-plane","operator":"Exists"}]},"weight":1}]}}` | node affinity for the operator | +| operator.image.repository | string | `"ghcr.io/mellanox/maintenance-operator"` | repository to use for the operator image | +| operator.image.tag | string | `nil` | image tag to use for the operator image | +| operator.nodeSelector | object | `{}` | node selector for the operator | +| operator.replicas | int | `1` | operator deployment number of repplicas | +| operator.resources | object | `{"limits":{"cpu":"500m","memory":"128Mi"},"requests":{"cpu":"10m","memory":"64Mi"}}` | specify resource requests and limits for the operator | +| operator.serviceAccount.annotations | object | `{}` | set annotations for the operator service account | +| operator.tolerations | list | `[{"effect":"NoSchedule","key":"node-role.kubernetes.io/master","operator":"Exists"},{"effect":"NoSchedule","key":"node-role.kubernetes.io/control-plane","operator":"Exists"}]` | toleration for the operator | +| operatorConfig | object | `{"logLevel":"info","maxNodeMaintenanceTimeSeconds":null,"maxParallelOperations":null,"maxUnavailable":null}` | operator configuration values. fields here correspond to fields in MaintenanceOperatorConfig CR | +| operatorConfig.logLevel | string | `"info"` | log level configuration | +| operatorConfig.maxNodeMaintenanceTimeSeconds | string | `nil` | max time for node maintenance | +| operatorConfig.maxParallelOperations | string | `nil` | max number of parallel operations | +| operatorConfig.maxUnavailable | string | `nil` | max number of unavailable nodes | +| webhookService | object | `{"ports":[{"port":443,"protocol":"TCP","targetPort":9443}],"type":"ClusterIP"}` | webhook service configurations | + diff --git a/deployment/maintenance-operator-chart/values.yaml b/deployment/maintenance-operator-chart/values.yaml index 1177072..685b615 100644 --- a/deployment/maintenance-operator-chart/values.yaml +++ b/deployment/maintenance-operator-chart/values.yaml @@ -1,7 +1,10 @@ operator: image: + # -- repository to use for the operator image repository: ghcr.io/mellanox/maintenance-operator - #tag: latest + # -- image tag to use for the operator image + tag: null + # -- toleration for the operator tolerations: - key: "node-role.kubernetes.io/master" operator: "Exists" @@ -9,7 +12,9 @@ operator: - key: "node-role.kubernetes.io/control-plane" operator: "Exists" effect: "NoSchedule" + # -- node selector for the operator nodeSelector: {} + # -- node affinity for the operator affinity: nodeAffinity: preferredDuringSchedulingIgnoredDuringExecution: @@ -23,6 +28,7 @@ operator: matchExpressions: - key: "node-role.kubernetes.io/control-plane" operator: Exists + # -- specify resource requests and limits for the operator resources: limits: cpu: 500m @@ -30,18 +36,25 @@ operator: requests: cpu: 10m memory: 64Mi + # -- operator deployment number of repplicas replicas: 1 serviceAccount: + # -- set annotations for the operator service account annotations: {} admissionController: + # -- enable admission controller of the operator enable: true certificates: secretNames: + # -- secret name containing certificates for the operator admission controller operator: "operator-webhook-cert" certManager: + # -- use cert-manager for certificates enable: true + # -- generate self-signed certificiates with cert-manager generateSelfSigned: true custom: + # -- enable custom certificates using secrets enable: false # operator: # caCrt: | @@ -60,13 +73,21 @@ operator: # ... # -----END EC PRIVATE KEY----- +# -- operator configuration values. fields here correspond to fields in MaintenanceOperatorConfig CR operatorConfig: + # -- log level configuration logLevel: info -# maxParallelOperations: nil -# maxUnavailable: nil -# maxNodeMaintenanceTimeSeconds: 1600 + # operatorConfig.maxParallelOperations -- max number of parallel operations + maxParallelOperations: null + # -- max number of unavailable nodes + maxUnavailable: null + # -- max time for node maintenance + maxNodeMaintenanceTimeSeconds: null +# -- image pull secrets for the operator imagePullSecrets: [] + +# -- metrics service configurations metricsService: ports: - name: https @@ -74,6 +95,8 @@ metricsService: protocol: TCP targetPort: https type: ClusterIP + +# -- webhook service configurations webhookService: ports: - port: 443 diff --git a/docs/api-reference.md b/docs/api-reference.md new file mode 100644 index 0000000..89c3be1 --- /dev/null +++ b/docs/api-reference.md @@ -0,0 +1,439 @@ +Packages: + +- [maintenance.nvidia.com/v1alpha1](#maintenance.nvidia.com%2fv1alpha1) + +## maintenance.nvidia.com/v1alpha1 + +Package v1alpha1 contains API Schema definitions for the maintenance.nvidia.com v1alpha1 API group + +Resource Types: + +### DrainSpec + +(*Appears on:*[NodeMaintenanceSpec](#maintenance.nvidia.com/v1alpha1.NodeMaintenanceSpec)) + +DrainSpec describes configuration for node drain during automatic upgrade + + ++++ + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
FieldDescription
force
+bool

Force draining even if there are pods that do not declare a controller

podSelector
+string

PodSelector specifies a label selector to filter pods on the node that need to be drained For more details on label selectors, see: https://kubernetes.io/docs/concepts/overview/working-with-objects/labels/#label-selectors

timeoutSeconds
+int32

TimeoutSecond specifies the length of time in seconds to wait before giving up drain, zero means infinite

deleteEmptyDir
+bool

DeleteEmptyDir indicates if should continue even if there are pods using emptyDir (local data that will be deleted when the node is drained)

podEvictionFilters
+[]PodEvictionFiterEntry

PodEvictionFilters specifies filters for pods that need to undergo eviction during drain. if specified. only pods that match PodEvictionFilters will be evicted during drain operation. if +unspecified. all non-daemonset pods will be evicted. logical OR is performed between filter entires. logical AND is performed within different filters in a filter entry.

+ +### DrainStatus + +(*Appears on:*[NodeMaintenanceStatus](#maintenance.nvidia.com/v1alpha1.NodeMaintenanceStatus)) + +DrainStatus represents the status of draining for the node + + ++++ + + + + + + + + + + + + + + + + + + + + + + + + +
FieldDescription
totalPods
+int32

TotalPods is the number of pods on the node at the time NodeMaintenance started draining

evictionPods
+int32

EvictionPods is the total number of pods that need to be evicted at the time NodeMaintenance started draining

drainProgress
+int32

DrainProgress represents the draining progress as percentage

waitForEviction
+[]string

WaitForEviction is the list of namespaced named pods that need to be evicted

+ +### MaintenanceOperatorConfig + +MaintenanceOperatorConfig is the Schema for the maintenanceoperatorconfigs API + + ++++ + + + + + + + + + + + + + + + + +
FieldDescription
metadata
+Kubernetes meta/v1.ObjectMeta
Refer to the Kubernetes API documentation for the fields of the metadata field.
spec
+MaintenanceOperatorConfigSpec

+
+ ++++ + + + + + + + + + + + + + + + + + + +
maxParallelOperations
+k8s.io/apimachinery/pkg/util/intstr.IntOrString

MaxParallelOperations indicates the maximal number nodes that can undergo maintenance at a given time. 0 means no limit value can be an absolute number (ex: 5) or a percentage of total nodes in +the cluster (ex: 10%). absolute number is calculated from percentage by rounding up. defaults to 1. The actual number of nodes that can undergo maintenance may be lower depending on the value of +MaintenanceOperatorConfigSpec.MaxUnavailable.

maxUnavailable
+k8s.io/apimachinery/pkg/util/intstr.IntOrString

MaxUnavailable is the maximum number of nodes that can become unavailable in the cluster. value can be an absolute number (ex: 5) or a percentage of total nodes in the cluster (ex: 10%). +absolute number is calculated from percentage by rounding up. by default, unset. new nodes will not be processed if the number of unavailable node will exceed this value

logLevel
+OperatorLogLevel

LogLevel is the operator logging level

maxNodeMaintenanceTimeSeconds
+int32

MaxNodeMaintenanceTimeSeconds is the time from when a NodeMaintenance is marked as ready (phase: Ready) until the NodeMaintenance is considered stale and removed by the operator. should be less +than idle time for any autoscaler that is running. default to 30m (1600 seconds)

+ +### MaintenanceOperatorConfigSpec + +(*Appears on:*[MaintenanceOperatorConfig](#maintenance.nvidia.com/v1alpha1.MaintenanceOperatorConfig)) + +MaintenanceOperatorConfigSpec defines the desired state of MaintenanceOperatorConfig + + ++++ + + + + + + + + + + + + + + + + + + + + + + + + +
FieldDescription
maxParallelOperations
+k8s.io/apimachinery/pkg/util/intstr.IntOrString

MaxParallelOperations indicates the maximal number nodes that can undergo maintenance at a given time. 0 means no limit value can be an absolute number (ex: 5) or a percentage of total nodes in +the cluster (ex: 10%). absolute number is calculated from percentage by rounding up. defaults to 1. The actual number of nodes that can undergo maintenance may be lower depending on the value of +MaintenanceOperatorConfigSpec.MaxUnavailable.

maxUnavailable
+k8s.io/apimachinery/pkg/util/intstr.IntOrString

MaxUnavailable is the maximum number of nodes that can become unavailable in the cluster. value can be an absolute number (ex: 5) or a percentage of total nodes in the cluster (ex: 10%). +absolute number is calculated from percentage by rounding up. by default, unset. new nodes will not be processed if the number of unavailable node will exceed this value

logLevel
+OperatorLogLevel

LogLevel is the operator logging level

maxNodeMaintenanceTimeSeconds
+int32

MaxNodeMaintenanceTimeSeconds is the time from when a NodeMaintenance is marked as ready (phase: Ready) until the NodeMaintenance is considered stale and removed by the operator. should be less +than idle time for any autoscaler that is running. default to 30m (1600 seconds)

+ +### NodeMaintenance + +NodeMaintenance is the Schema for the nodemaintenances API + + ++++ + + + + + + + + + + + + + + + + + + + + +
FieldDescription
metadata
+Kubernetes meta/v1.ObjectMeta
Refer to the Kubernetes API documentation for the fields of the metadata field.
spec
+NodeMaintenanceSpec

+
+ ++++ + + + + + + + + + + + + + + + + + + + + + + + + + + +
requestorID
+string

RequestorID MUST follow domain name notation format (https://tools.ietf.org/html/rfc1035#section-2.3.1) It MUST be 63 characters +or less, beginning and ending with an alphanumeric character ([a-z0-9A-Z]) with dashes (-), dots (.), and alphanumerics between. caller SHOULD NOT create multiple objects with same requestorID and +nodeName. This field identifies the requestor of the operation.

additionalRequestors
+[]string

AdditionalRequestors is a set of additional requestor IDs which are using the same NodeMaintenance request. addition or removal of requiestor IDs to this list MUST be made with update operation +(and retry on failure) which will replace the entire list.

nodeName
+string

NodeName is The name of the node that maintenance operation will be performed on creation fails if node obj does not exist (webhook)

cordon
+bool

Cordon if set, marks node as unschedulable during maintenance operation

waitForPodCompletion
+WaitForPodCompletionSpec

WaitForPodCompletion specifies pods via selector to wait for completion before performing drain operation if not provided, will not wait for pods to complete

drainSpec
+DrainSpec

DrainSpec specifies how a node will be drained. if not provided, no draining will be performed.

status
+NodeMaintenanceStatus
+ +### NodeMaintenanceSpec + +(*Appears on:*[NodeMaintenance](#maintenance.nvidia.com/v1alpha1.NodeMaintenance)) + +NodeMaintenanceSpec defines the desired state of NodeMaintenance + + ++++ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
FieldDescription
requestorID
+string

RequestorID MUST follow domain name notation format (https://tools.ietf.org/html/rfc1035#section-2.3.1) It MUST be 63 characters +or less, beginning and ending with an alphanumeric character ([a-z0-9A-Z]) with dashes (-), dots (.), and alphanumerics between. caller SHOULD NOT create multiple objects with same requestorID and +nodeName. This field identifies the requestor of the operation.

additionalRequestors
+[]string

AdditionalRequestors is a set of additional requestor IDs which are using the same NodeMaintenance request. addition or removal of requiestor IDs to this list MUST be made with update operation +(and retry on failure) which will replace the entire list.

nodeName
+string

NodeName is The name of the node that maintenance operation will be performed on creation fails if node obj does not exist (webhook)

cordon
+bool

Cordon if set, marks node as unschedulable during maintenance operation

waitForPodCompletion
+WaitForPodCompletionSpec

WaitForPodCompletion specifies pods via selector to wait for completion before performing drain operation if not provided, will not wait for pods to complete

drainSpec
+DrainSpec

DrainSpec specifies how a node will be drained. if not provided, no draining will be performed.

+ +### NodeMaintenanceStatus + +(*Appears on:*[NodeMaintenance](#maintenance.nvidia.com/v1alpha1.NodeMaintenance)) + +NodeMaintenanceStatus defines the observed state of NodeMaintenance + + ++++ + + + + + + + + + + + + + + + + + + + + +
FieldDescription
conditions
+[]Kubernetes meta/v1.Condition

Conditions represents observations of NodeMaintenance current state

waitForCompletion
+[]string

WaitForCompletion is the list of namespaced named pods that we wait to complete

drain
+DrainStatus

Drain represents the drain status of the node

+ +### OperatorLogLevel (`string` alias) + +(*Appears on:*[MaintenanceOperatorConfigSpec](#maintenance.nvidia.com/v1alpha1.MaintenanceOperatorConfigSpec)) + +OperatorLogLevel is the operator log level. one of: \[“debug”, “info”, “error”\] + +### PodEvictionFiterEntry + +(*Appears on:*[DrainSpec](#maintenance.nvidia.com/v1alpha1.DrainSpec)) + +PodEvictionFiterEntry defines filters for Pod evictions during drain operation + + ++++ + + + + + + + + + + + + +
FieldDescription
byResourceNameRegex
+string

ByResourceNameRegex filters pods by the name of the resources they consume using regex.

+ +### WaitForPodCompletionSpec + +(*Appears on:*[NodeMaintenanceSpec](#maintenance.nvidia.com/v1alpha1.NodeMaintenanceSpec)) + +WaitForPodCompletionSpec describes the configuration for waiting on pods completion + + ++++ + + + + + + + + + + + + + + + + +
FieldDescription
podSelector
+string

PodSelector specifies a label selector for the pods to wait for completion For more details on label selectors, see: https://kubernetes.io/docs/concepts/overview/working-with-objects/labels/#label-selectors

timeoutSeconds
+int32

TimeoutSecond specifies the length of time in seconds to wait before giving up on pod termination, zero means infinite

+ +-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- + +*Generated with `gen-crd-api-reference-docs` on git commit `7ade571`.* diff --git a/hack/api-docs/config.json b/hack/api-docs/config.json new file mode 100644 index 0000000..12a4e45 --- /dev/null +++ b/hack/api-docs/config.json @@ -0,0 +1,28 @@ +{ + "hideMemberFields": [ + "TypeMeta" + ], + "hideTypePatterns": [ + "ParseError$", + "List$" + ], + "externalPackages": [ + { + "typeMatchPrefix": "^k8s\\.io/apimachinery/pkg/util/intstr\\.IntOrString$", + "docsURLTemplate": "https://pkg.go.dev/k8s.io/apimachinery/pkg/util/intstr#IntOrString" + }, + { + "typeMatchPrefix": "^k8s\\.io/apimachinery/pkg/apis/meta/v1\\.Duration$", + "docsURLTemplate": "https://pkg.go.dev/k8s.io/apimachinery/pkg/apis/meta/v1#Duration" + }, + { + "typeMatchPrefix": "^k8s\\.io/(api|apimachinery/pkg/apis)/", + "docsURLTemplate": "https://kubernetes.io/docs/reference/generated/kubernetes-api/v1.31/#{{lower .TypeIdentifier}}-{{arrIndex .PackageSegments -1}}-{{arrIndex .PackageSegments -2}}" + } + ], + "typeDisplayNamePrefixOverrides": { + "k8s.io/api/": "Kubernetes ", + "k8s.io/apimachinery/pkg/apis/": "Kubernetes " + }, + "markdownDisabled": false +} diff --git a/hack/api-docs/templates/members.tpl b/hack/api-docs/templates/members.tpl new file mode 100644 index 0000000..448ce2f --- /dev/null +++ b/hack/api-docs/templates/members.tpl @@ -0,0 +1,48 @@ +{{ define "members" }} + +{{ range .Members }} +{{ if not (hiddenMember .)}} + + + {{ fieldName . }}
+ + {{ if linkForType .Type }} + + {{ typeDisplayName .Type }} + + {{ else }} + {{ typeDisplayName .Type }} + {{ end }} + + + + {{ if fieldEmbedded . }} +

+ (Members of {{ fieldName . }} are embedded into this type.) +

+ {{ end}} + + {{ if isOptionalMember .}} + (Optional) + {{ end }} + + {{ safe (renderComments .CommentLines) }} + + {{ if and (eq (.Type.Name.Name) "ObjectMeta") }} + Refer to the Kubernetes API documentation for the fields of the + metadata field. + {{ end }} + + {{ if or (eq (fieldName .) "spec") }} +
+
+ + {{ template "members" .Type }} +
+ {{ end }} + + +{{ end }} +{{ end }} + +{{ end }} \ No newline at end of file diff --git a/hack/api-docs/templates/pkg.tpl b/hack/api-docs/templates/pkg.tpl new file mode 100644 index 0000000..aacf6e0 --- /dev/null +++ b/hack/api-docs/templates/pkg.tpl @@ -0,0 +1,49 @@ +{{ define "packages" }} + +{{ with .packages}} +

Packages:

+ +{{ end}} + +{{ range .packages }} +

+ {{- packageDisplayName . -}} +

+ + {{ with (index .GoPackages 0 )}} + {{ with .DocComments }} +
+ {{ safe (renderComments .) }} +
+ {{ end }} + {{ end }} + + Resource Types: + + + {{ range (visibleTypes (sortedTypes .Types))}} + {{ template "type" . }} + {{ end }} +
+{{ end }} + +

+ Generated with gen-crd-api-reference-docs + {{ with .gitCommit }} on git commit {{ . }}{{end}}. +

+ +{{ end }} \ No newline at end of file diff --git a/hack/api-docs/templates/type.tpl b/hack/api-docs/templates/type.tpl new file mode 100644 index 0000000..9558611 --- /dev/null +++ b/hack/api-docs/templates/type.tpl @@ -0,0 +1,82 @@ + +{{ define "type" }} + +

+ {{- .Name.Name }} + {{ if eq .Kind "Alias" }}({{.Underlying}} alias){{ end -}} +

+{{ with (typeReferences .) }} +

+ (Appears on: + {{- $prev := "" -}} + {{- range . -}} + {{- if $prev -}}, {{ end -}} + {{- $prev = . -}} + {{ typeDisplayName . }} + {{- end -}} + ) +

+{{ end }} + +
+ {{ safe (renderComments .CommentLines) }} +
+ +{{ with (constantsOfType .) }} + + + + + + + + + {{- range . -}} + + {{- /* + renderComments implicitly creates a

element, so we + add one to the display name as well to make the contents + of the two cells align evenly. + */ -}} +

+ + + {{- end -}} + +
ValueDescription

{{ typeDisplayName . }}

{{ safe (renderComments .CommentLines) }}
+{{ end }} + +{{ if .Members }} + + + + + + + + + {{ if isExportedType . }} + + + + + + + + + {{ end }} + {{ template "members" .}} + +
FieldDescription
+ apiVersion
+ string
+ + {{apiGroup .}} + +
+ kind
+ string +
{{.Name.Name}}
+{{ end }} + +{{ end }}