From 8c4e1a4a3e06a47ccbb97b74cc47b127d10d78e8 Mon Sep 17 00:00:00 2001 From: Paul Farver Date: Tue, 6 Aug 2024 14:06:17 +0200 Subject: [PATCH] COO-261: Support tolerations and nodeselector in Monitoringstack (#540) * feat: support tolerations and nodeselectors * chore: generate resources * chore: update bundle --------- Co-authored-by: Simon Pasquier --- .../monitoring.rhobs_monitoringstacks.yaml | 44 ++++++++++ ...bility-operator.clusterserviceversion.yaml | 2 +- .../monitoring.rhobs_monitoringstacks.yaml | 44 ++++++++++ docs/api.md | 81 +++++++++++++++++++ pkg/apis/monitoring/v1alpha1/types.go | 11 ++- .../v1alpha1/zz_generated.deepcopy.go | 14 ++++ .../monitoring-stack/alertmanager.go | 2 + .../monitoring/monitoring-stack/components.go | 11 ++- 8 files changed, 202 insertions(+), 7 deletions(-) diff --git a/bundle/manifests/monitoring.rhobs_monitoringstacks.yaml b/bundle/manifests/monitoring.rhobs_monitoringstacks.yaml index cb0156bb..94f98b2b 100644 --- a/bundle/manifests/monitoring.rhobs_monitoringstacks.yaml +++ b/bundle/manifests/monitoring.rhobs_monitoringstacks.yaml @@ -108,6 +108,11 @@ spec: type: object type: object x-kubernetes-map-type: atomic + nodeSelector: + additionalProperties: + type: string + description: Define node selector for Monitoring Stack Pods. + type: object prometheusConfig: default: replicas: 2 @@ -1340,6 +1345,45 @@ spec: and must match the regular expression `[0-9]+(ms|s|m|h|d|w|y)` (milliseconds seconds minutes hours days weeks years). pattern: ^(0|(([0-9]+)y)?(([0-9]+)w)?(([0-9]+)d)?(([0-9]+)h)?(([0-9]+)m)?(([0-9]+)s)?(([0-9]+)ms)?)$ type: string + tolerations: + description: Define tolerations for Monitoring Stack Pods. + items: + description: |- + The pod this Toleration is attached to tolerates any taint that matches + the triple using the matching operator . + properties: + effect: + description: |- + Effect indicates the taint effect to match. Empty means match all taint effects. + When specified, allowed values are NoSchedule, PreferNoSchedule and NoExecute. + type: string + key: + description: |- + Key is the taint key that the toleration applies to. Empty means match all taint keys. + If the key is empty, operator must be Exists; this combination means to match all values and all keys. + type: string + operator: + description: |- + Operator represents a key's relationship to the value. + Valid operators are Exists and Equal. Defaults to Equal. + Exists is equivalent to wildcard for value, so that a pod can + tolerate all taints of a particular category. + type: string + tolerationSeconds: + description: |- + TolerationSeconds represents the period of time the toleration (which must be + of effect NoExecute, otherwise this field is ignored) tolerates the taint. By default, + it is not set, which means tolerate the taint forever (do not evict). Zero and + negative values will be treated as 0 (evict immediately) by the system. + format: int64 + type: integer + value: + description: |- + Value is the taint value the toleration matches to. + If the operator is Exists, the value should be empty, otherwise just a regular string. + type: string + type: object + type: array type: object status: description: |- diff --git a/bundle/manifests/observability-operator.clusterserviceversion.yaml b/bundle/manifests/observability-operator.clusterserviceversion.yaml index d75c74f3..5385474e 100644 --- a/bundle/manifests/observability-operator.clusterserviceversion.yaml +++ b/bundle/manifests/observability-operator.clusterserviceversion.yaml @@ -42,7 +42,7 @@ metadata: categories: Monitoring certified: "false" containerImage: observability-operator:0.3.4 - createdAt: "2024-07-29T15:10:24Z" + createdAt: "2024-08-06T11:27:09Z" description: A Go based Kubernetes operator to setup and manage highly available Monitoring Stack using Prometheus, Alertmanager and Thanos Querier. operators.operatorframework.io/builder: operator-sdk-v1.34.1 diff --git a/deploy/crds/common/monitoring.rhobs_monitoringstacks.yaml b/deploy/crds/common/monitoring.rhobs_monitoringstacks.yaml index 893b846b..6be43846 100644 --- a/deploy/crds/common/monitoring.rhobs_monitoringstacks.yaml +++ b/deploy/crds/common/monitoring.rhobs_monitoringstacks.yaml @@ -108,6 +108,11 @@ spec: type: object type: object x-kubernetes-map-type: atomic + nodeSelector: + additionalProperties: + type: string + description: Define node selector for Monitoring Stack Pods. + type: object prometheusConfig: default: replicas: 2 @@ -1340,6 +1345,45 @@ spec: and must match the regular expression `[0-9]+(ms|s|m|h|d|w|y)` (milliseconds seconds minutes hours days weeks years). pattern: ^(0|(([0-9]+)y)?(([0-9]+)w)?(([0-9]+)d)?(([0-9]+)h)?(([0-9]+)m)?(([0-9]+)s)?(([0-9]+)ms)?)$ type: string + tolerations: + description: Define tolerations for Monitoring Stack Pods. + items: + description: |- + The pod this Toleration is attached to tolerates any taint that matches + the triple using the matching operator . + properties: + effect: + description: |- + Effect indicates the taint effect to match. Empty means match all taint effects. + When specified, allowed values are NoSchedule, PreferNoSchedule and NoExecute. + type: string + key: + description: |- + Key is the taint key that the toleration applies to. Empty means match all taint keys. + If the key is empty, operator must be Exists; this combination means to match all values and all keys. + type: string + operator: + description: |- + Operator represents a key's relationship to the value. + Valid operators are Exists and Equal. Defaults to Equal. + Exists is equivalent to wildcard for value, so that a pod can + tolerate all taints of a particular category. + type: string + tolerationSeconds: + description: |- + TolerationSeconds represents the period of time the toleration (which must be + of effect NoExecute, otherwise this field is ignored) tolerates the taint. By default, + it is not set, which means tolerate the taint forever (do not evict). Zero and + negative values will be treated as 0 (evict immediately) by the system. + format: int64 + type: integer + value: + description: |- + Value is the taint value the toleration matches to. + If the operator is Exists, the value should be empty, otherwise just a regular string. + type: string + type: object + type: array type: object status: description: |- diff --git a/docs/api.md b/docs/api.md index c6b72b55..c1c5cc1c 100644 --- a/docs/api.md +++ b/docs/api.md @@ -115,6 +115,13 @@ To monitor everything, set to empty map selector. E.g. namespaceSelector: {}. To monitor resources in the namespace where Monitoring Stack was created in, set to null. E.g. namespaceSelector:.
false + + nodeSelector + map[string]string + + Define node selector for Monitoring Stack Pods.
+ + false prometheusConfig object @@ -152,6 +159,13 @@ and must match the regular expression `[0-9]+(ms|s|m|h|d|w|y)` (milliseconds sec Default: 120h
false + + tolerations + []object + + Define tolerations for Monitoring Stack Pods.
+ + false @@ -2680,6 +2694,73 @@ inside a container.
+### MonitoringStack.spec.tolerations[index] +[↩ Parent](#monitoringstackspec) + + + +The pod this Toleration is attached to tolerates any taint that matches +the triple using the matching operator . + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
NameTypeDescriptionRequired
effectstring + Effect indicates the taint effect to match. Empty means match all taint effects. +When specified, allowed values are NoSchedule, PreferNoSchedule and NoExecute.
+
false
keystring + Key is the taint key that the toleration applies to. Empty means match all taint keys. +If the key is empty, operator must be Exists; this combination means to match all values and all keys.
+
false
operatorstring + Operator represents a key's relationship to the value. +Valid operators are Exists and Equal. Defaults to Equal. +Exists is equivalent to wildcard for value, so that a pod can +tolerate all taints of a particular category.
+
false
tolerationSecondsinteger + TolerationSeconds represents the period of time the toleration (which must be +of effect NoExecute, otherwise this field is ignored) tolerates the taint. By default, +it is not set, which means tolerate the taint forever (do not evict). Zero and +negative values will be treated as 0 (evict immediately) by the system.
+
+ Format: int64
+
false
valuestring + Value is the taint value the toleration matches to. +If the operator is Exists, the value should be empty, otherwise just a regular string.
+
false
+ + ### MonitoringStack.status [↩ Parent](#monitoringstack) diff --git a/pkg/apis/monitoring/v1alpha1/types.go b/pkg/apis/monitoring/v1alpha1/types.go index e8f4e30e..5aa6f940 100644 --- a/pkg/apis/monitoring/v1alpha1/types.go +++ b/pkg/apis/monitoring/v1alpha1/types.go @@ -80,6 +80,14 @@ type MonitoringStackSpec struct { // +kubebuilder:default={requests:{cpu: "100m", memory: "256Mi"}, limits:{memory: "512Mi", cpu: "500m"}} Resources corev1.ResourceRequirements `json:"resources,omitempty"` + // Define tolerations for Monitoring Stack Pods. + // +optional + Tolerations []corev1.Toleration `json:"tolerations,omitempty"` + + // Define node selector for Monitoring Stack Pods. + // +optional + NodeSelector map[string]string `json:"nodeSelector,omitempty"` + // Define prometheus config // +optional // +kubebuilder:default={replicas: 2} @@ -267,5 +275,4 @@ type ThanosQuerierSpec struct { // ThanosQuerierStatus defines the observed state of ThanosQuerier. // It should always be reconstructable from the state of the cluster and/or outside world. -type ThanosQuerierStatus struct { -} +type ThanosQuerierStatus struct{} diff --git a/pkg/apis/monitoring/v1alpha1/zz_generated.deepcopy.go b/pkg/apis/monitoring/v1alpha1/zz_generated.deepcopy.go index 78e24035..3d661a2e 100644 --- a/pkg/apis/monitoring/v1alpha1/zz_generated.deepcopy.go +++ b/pkg/apis/monitoring/v1alpha1/zz_generated.deepcopy.go @@ -131,6 +131,20 @@ func (in *MonitoringStackSpec) DeepCopyInto(out *MonitoringStackSpec) { (*in).DeepCopyInto(*out) } in.Resources.DeepCopyInto(&out.Resources) + if in.Tolerations != nil { + in, out := &in.Tolerations, &out.Tolerations + *out = make([]corev1.Toleration, len(*in)) + for i := range *in { + (*in)[i].DeepCopyInto(&(*out)[i]) + } + } + if in.NodeSelector != nil { + in, out := &in.NodeSelector, &out.NodeSelector + *out = make(map[string]string, len(*in)) + for key, val := range *in { + (*out)[key] = val + } + } if in.PrometheusConfig != nil { in, out := &in.PrometheusConfig, &out.PrometheusConfig *out = new(PrometheusConfig) diff --git a/pkg/controllers/monitoring/monitoring-stack/alertmanager.go b/pkg/controllers/monitoring/monitoring-stack/alertmanager.go index 063f8680..2e2c0a39 100644 --- a/pkg/controllers/monitoring/monitoring-stack/alertmanager.go +++ b/pkg/controllers/monitoring/monitoring-stack/alertmanager.go @@ -42,6 +42,8 @@ func newAlertmanager( Replicas: &replicas, ServiceAccountName: rbacResourceName, AlertmanagerConfigSelector: resourceSelector, + NodeSelector: ms.Spec.NodeSelector, + Tolerations: ms.Spec.Tolerations, Affinity: &corev1.Affinity{ PodAntiAffinity: &corev1.PodAntiAffinity{ RequiredDuringSchedulingIgnoredDuringExecution: []corev1.PodAffinityTerm{ diff --git a/pkg/controllers/monitoring/monitoring-stack/components.go b/pkg/controllers/monitoring/monitoring-stack/components.go index 1b5a68ed..ac225b0e 100644 --- a/pkg/controllers/monitoring/monitoring-stack/components.go +++ b/pkg/controllers/monitoring/monitoring-stack/components.go @@ -15,9 +15,11 @@ import ( "github.com/rhobs/observability-operator/pkg/reconciler" ) -const AdditionalScrapeConfigsSelfScrapeKey = "self-scrape-config" -const PrometheusUserFSGroupID = int64(65534) -const AlertmanagerUserFSGroupID = int64(65535) +const ( + AdditionalScrapeConfigsSelfScrapeKey = "self-scrape-config" + PrometheusUserFSGroupID = int64(65534) + AlertmanagerUserFSGroupID = int64(65535) +) func stackComponentReconcilers( ms *stack.MonitoringStack, @@ -152,6 +154,8 @@ func newPrometheus( ProbeNamespaceSelector: ms.Spec.NamespaceSelector, ScrapeConfigSelector: prometheusSelector, ScrapeConfigNamespaceSelector: ms.Spec.NamespaceSelector, + NodeSelector: ms.Spec.NodeSelector, + Tolerations: ms.Spec.Tolerations, Affinity: &corev1.Affinity{ PodAntiAffinity: &corev1.PodAntiAffinity{ RequiredDuringSchedulingIgnoredDuringExecution: []corev1.PodAffinityTerm{ @@ -327,7 +331,6 @@ func newThanosSidecarService(ms *stack.MonitoringStack, instanceSelectorKey stri Labels: objectLabels(name, ms.Name, instanceSelectorKey, instanceSelectorValue), }, Spec: corev1.ServiceSpec{ - // NOTE: Setting this to "None" makes a "headless service" (no virtual // IP), which is useful when direct endpoint connections are preferred // and proxying is not required.