From 1f7053340ada73e530c843fcd047357cb24c0ca0 Mon Sep 17 00:00:00 2001 From: Matthieu Huin Date: Mon, 27 Nov 2023 22:29:00 +0100 Subject: [PATCH] Refactor podmonitor and disk usage alerting Create only one podmonitor for zookeeper, mariadb and gitserver (nodepool and zuul will be handled in a followup). This also enables simplifying the disk usage alert rule; create only one instead of one per component. Change-Id: I68c660562fb4d4d02e30fc137616f3a26f312919 --- controllers/git_server.go | 34 ------------------ controllers/libs/monitoring/monitoring.go | 8 ++--- controllers/mariadb.go | 33 ------------------ controllers/softwarefactory_controller.go | 42 ++++++++++++++++++++--- controllers/utils.go | 29 ++++++++++++++++ controllers/zookeeper.go | 34 ------------------ 6 files changed, 71 insertions(+), 109 deletions(-) diff --git a/controllers/git_server.go b/controllers/git_server.go index 45bcc596..391ee8db 100644 --- a/controllers/git_server.go +++ b/controllers/git_server.go @@ -11,7 +11,6 @@ import ( "strconv" "strings" - monitoringv1 "github.com/prometheus-operator/prometheus-operator/pkg/apis/monitoring/v1" sfv1 "github.com/softwarefactory-project/sf-operator/api/v1" "github.com/softwarefactory-project/sf-operator/controllers/libs/base" "github.com/softwarefactory-project/sf-operator/controllers/libs/conds" @@ -21,7 +20,6 @@ import ( "gopkg.in/yaml.v3" appsv1 "k8s.io/api/apps/v1" apiv1 "k8s.io/api/core/v1" - metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" ) const GitServerIdent = "git-server" @@ -33,36 +31,6 @@ const gsPiMountPath = "/entry" //go:embed static/git-server/update-system-config.sh var preInitScriptTemplate string -func (r *SFController) ensureGitServerPodMonitor() bool { - selector := metav1.LabelSelector{ - MatchLabels: map[string]string{ - "app": "sf", - "run": GitServerIdent, - }, - } - nePort := sfmonitoring.GetTruncatedPortName(GitServerIdent, sfmonitoring.NodeExporterPortNameSuffix) - desiredGSPodmonitor := sfmonitoring.MkPodMonitor(GitServerIdent+"-monitor", r.ns, []string{nePort}, selector) - // add annotations so we can handle lifecycle - annotations := map[string]string{ - "version": "1", - } - desiredGSPodmonitor.ObjectMeta.Annotations = annotations - currentGSpm := monitoringv1.PodMonitor{} - if !r.GetM(desiredGSPodmonitor.Name, ¤tGSpm) { - r.CreateR(&desiredGSPodmonitor) - return false - } else { - if !utils.MapEquals(¤tGSpm.ObjectMeta.Annotations, &annotations) { - r.log.V(1).Info("Git Server PodMonitor configuration changed, updating...") - currentGSpm.Spec = desiredGSPodmonitor.Spec - currentGSpm.ObjectMeta.Annotations = annotations - r.UpdateR(¤tGSpm) - return false - } - } - return true -} - // This function creates dummy connections to be used during the config-check func makeZuulConnectionConfig(spec *sfv1.ZuulSpec) string { var sb strings.Builder @@ -246,8 +214,6 @@ func (r *SFController) DeployGitServer() bool { svc := base.MkServicePod(GitServerIdent, r.ns, GitServerIdent+"-0", []int32{gsGitPort}, gsGitPortName) r.EnsureService(&svc) - r.ensureGitServerPodMonitor() - isStatefulset := r.IsStatefulSetReady(¤t) conds.UpdateConditions(&r.cr.Status.Conditions, GitServerIdent, isStatefulset) diff --git a/controllers/libs/monitoring/monitoring.go b/controllers/libs/monitoring/monitoring.go index d2c00199..3e8aad58 100644 --- a/controllers/libs/monitoring/monitoring.go +++ b/controllers/libs/monitoring/monitoring.go @@ -62,12 +62,12 @@ func MkNodeExporterSideCarContainer(serviceName string, volumeMounts []apiv1.Vol func MkDiskUsageRuleGroup(ns string, componentIdent string) monitoringv1.RuleGroup { // Create some default, interesting alerts diskFullAnnotations := map[string]string{ - "description": componentIdent + ": mountpoint {{ $labels.mountpoint }} on pod {{ $labels.pod }} has {{ $value | humanize1024 }}% free space left.", - "summary": componentIdent + " out of disk", + "description": "Mountpoint {{ $labels.mountpoint }} on pod {{ $labels.pod }} has {{ $value | humanize1024 }}% free space left.", + "summary": "{{ $labels.pod }}:{{ $labels.mountpoint }} out of disk", } diskFull3daysAnnotations := map[string]string{ - "description": componentIdent + ": mountpoint {{ $labels.mountpoint }} on pod {{ $labels.pod }} has {{ $value | humanize1024 }}% free space left and is expected to fill up in less than three days.", - "summary": componentIdent + " running out of disk", + "description": "Mountpoint {{ $labels.mountpoint }} on pod {{ $labels.pod }} has {{ $value | humanize1024 }}% free space left and is expected to fill up in less than three days.", + "summary": "{{ $labels.pod }}:{{ $labels.mountpoint }} running out of disk", } diskFull := MkPrometheusAlertRule( "OutOfDiskNow", diff --git a/controllers/mariadb.go b/controllers/mariadb.go index 4fd66240..1572e073 100644 --- a/controllers/mariadb.go +++ b/controllers/mariadb.go @@ -11,7 +11,6 @@ import ( "strconv" "github.com/go-sql-driver/mysql" - monitoringv1 "github.com/prometheus-operator/prometheus-operator/pkg/apis/monitoring/v1" "github.com/softwarefactory-project/sf-operator/controllers/libs/base" "github.com/softwarefactory-project/sf-operator/controllers/libs/conds" logging "github.com/softwarefactory-project/sf-operator/controllers/libs/logging" @@ -73,36 +72,6 @@ func createLogForwarderSidecar(r *SFController, annotations map[string]string) ( return volume, sidecar } -func (r *SFController) ensureMariaDBPodMonitor() bool { - selector := metav1.LabelSelector{ - MatchLabels: map[string]string{ - "app": "sf", - "run": MariaDBIdent, - }, - } - nePort := sfmonitoring.GetTruncatedPortName(MariaDBIdent, sfmonitoring.NodeExporterPortNameSuffix) - desiredDBPodmonitor := sfmonitoring.MkPodMonitor(MariaDBIdent+"-monitor", r.ns, []string{nePort}, selector) - // add annotations so we can handle lifecycle - annotations := map[string]string{ - "version": "1", - } - desiredDBPodmonitor.ObjectMeta.Annotations = annotations - currentDBpm := monitoringv1.PodMonitor{} - if !r.GetM(desiredDBPodmonitor.Name, ¤tDBpm) { - r.CreateR(&desiredDBPodmonitor) - return false - } else { - if !utils.MapEquals(¤tDBpm.ObjectMeta.Annotations, &annotations) { - r.log.V(1).Info("MariaDB PodMonitor configuration changed, updating...") - currentDBpm.Spec = desiredDBPodmonitor.Spec - currentDBpm.ObjectMeta.Annotations = annotations - r.UpdateR(¤tDBpm) - return false - } - } - return true -} - func (r *SFController) CreateDBInitContainer(username string, password string, dbname string) apiv1.Container { c := "CREATE DATABASE IF NOT EXISTS " + dbname + " CHARACTER SET utf8 COLLATE utf8_general_ci; " g := "GRANT ALL PRIVILEGES ON " + dbname + ".* TO '" + username + "'@'%' IDENTIFIED BY '${USER_PASSWORD}' WITH GRANT OPTION; FLUSH PRIVILEGES;" @@ -279,8 +248,6 @@ func (r *SFController) DeployMariadb() bool { } } - r.ensureMariaDBPodMonitor() - isReady := r.IsStatefulSetReady(¤t) && zuulDBSecret.Data != nil conds.UpdateConditions(&r.cr.Status.Conditions, MariaDBIdent, isReady) diff --git a/controllers/softwarefactory_controller.go b/controllers/softwarefactory_controller.go index f174425d..e459a0d6 100644 --- a/controllers/softwarefactory_controller.go +++ b/controllers/softwarefactory_controller.go @@ -129,13 +129,31 @@ func (r *SFController) cleanup() { Name: BuildLogsHttpdPortName, }, }) + + // clean up old podmonitors if they exist. Remove after next release + currentZKpm := monitoringv1.PodMonitor{} + if r.GetM(ZookeeperIdent+"-monitor", ¤tZKpm) { + r.DeleteR(¤tZKpm) + } + currentDBpm := monitoringv1.PodMonitor{} + if r.GetM(MariaDBIdent+"-monitor", ¤tDBpm) { + r.DeleteR(¤tDBpm) + } + currentGSpm := monitoringv1.PodMonitor{} + if r.GetM(GitServerIdent+"-monitor", ¤tGSpm) { + r.DeleteR(¤tGSpm) + } } func (r *SFController) Step() sfv1.SoftwareFactoryStatus { r.cleanup() - DURuleGroups := []monitoringv1.RuleGroup{} + DURuleGroups := []monitoringv1.RuleGroup{ + sfmonitoring.MkDiskUsageRuleGroup(r.ns, "sf"), + } + monitoredPorts := []string{} + selectorRunList := []string{} services := map[string]bool{} services["Zuul"] = false @@ -154,17 +172,20 @@ func (r *SFController) Step() sfv1.SoftwareFactoryStatus { // The git server service is needed to store system jobs (config-check and config-update) services["GitServer"] = r.DeployGitServer() if services["GitServer"] { - DURuleGroups = append(DURuleGroups, sfmonitoring.MkDiskUsageRuleGroup(r.ns, GitServerIdent)) + monitoredPorts = append(monitoredPorts, sfmonitoring.GetTruncatedPortName(GitServerIdent, sfmonitoring.NodeExporterPortNameSuffix)) + selectorRunList = append(selectorRunList, GitServerIdent) } services["MariaDB"] = r.DeployMariadb() if services["MariaDB"] { - DURuleGroups = append(DURuleGroups, sfmonitoring.MkDiskUsageRuleGroup(r.ns, MariaDBIdent)) + monitoredPorts = append(monitoredPorts, sfmonitoring.GetTruncatedPortName(MariaDBIdent, sfmonitoring.NodeExporterPortNameSuffix)) + selectorRunList = append(selectorRunList, MariaDBIdent) } services["Zookeeper"] = r.DeployZookeeper() if services["Zookeeper"] { - DURuleGroups = append(DURuleGroups, sfmonitoring.MkDiskUsageRuleGroup(r.ns, ZookeeperIdent)) + monitoredPorts = append(monitoredPorts, sfmonitoring.GetTruncatedPortName(ZookeeperIdent, sfmonitoring.NodeExporterPortNameSuffix)) + selectorRunList = append(selectorRunList, ZookeeperIdent) } if services["MariaDB"] && services["Zookeeper"] && services["GitServer"] { @@ -186,7 +207,20 @@ func (r *SFController) Step() sfv1.SoftwareFactoryStatus { } } + podMonitorSelector := metav1.LabelSelector{ + MatchLabels: map[string]string{ + "app": "sf", + }, + MatchExpressions: []metav1.LabelSelectorRequirement{ + { + Key: "run", + Operator: metav1.LabelSelectorOpIn, + Values: selectorRunList, + }, + }, + } // TODO? we could add this to the readiness computation. + r.EnsureSFPodMonitor(monitoredPorts, podMonitorSelector) r.EnsureDiskUsagePromRule(DURuleGroups) r.log.V(1).Info(messageInfo(r, services)) diff --git a/controllers/utils.go b/controllers/utils.go index e90a8645..6eea6559 100644 --- a/controllers/utils.go +++ b/controllers/utils.go @@ -710,3 +710,32 @@ func (r *SFController) EnsureDiskUsagePromRule(ruleGroups []monitoringv1.RuleGro } return true } + +func (r *SFController) EnsureSFPodMonitor(ports []string, selector metav1.LabelSelector) bool { + desiredPodMonitor := sfmonitoring.MkPodMonitor("sf-monitor", r.ns, ports, selector) + // add annotations so we can handle lifecycle + var portsChecksumable string + sort.Strings(ports) + for _, port := range ports { + portsChecksumable += port + " " + } + annotations := map[string]string{ + "version": "1", + "ports": utils.Checksum([]byte(portsChecksumable)), + } + desiredPodMonitor.ObjectMeta.Annotations = annotations + currentPodMonitor := monitoringv1.PodMonitor{} + if !r.GetM(desiredPodMonitor.Name, ¤tPodMonitor) { + r.CreateR(&desiredPodMonitor) + return false + } else { + if !utils.MapEquals(¤tPodMonitor.ObjectMeta.Annotations, &annotations) { + r.log.V(1).Info("SF PodMonitor configuration changed, updating...") + currentPodMonitor.Spec = desiredPodMonitor.Spec + currentPodMonitor.ObjectMeta.Annotations = annotations + r.UpdateR(¤tPodMonitor) + return false + } + } + return true +} diff --git a/controllers/zookeeper.go b/controllers/zookeeper.go index 10cf6c9f..3dcfcdc7 100644 --- a/controllers/zookeeper.go +++ b/controllers/zookeeper.go @@ -8,7 +8,6 @@ import ( "strconv" certv1 "github.com/cert-manager/cert-manager/pkg/apis/certmanager/v1" - monitoringv1 "github.com/prometheus-operator/prometheus-operator/pkg/apis/monitoring/v1" "github.com/softwarefactory-project/sf-operator/controllers/libs/base" "github.com/softwarefactory-project/sf-operator/controllers/libs/cert" "github.com/softwarefactory-project/sf-operator/controllers/libs/conds" @@ -17,7 +16,6 @@ import ( "github.com/softwarefactory-project/sf-operator/controllers/libs/utils" appsv1 "k8s.io/api/apps/v1" apiv1 "k8s.io/api/core/v1" - metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" ) //go:embed static/zookeeper/ok.sh @@ -50,36 +48,6 @@ const zkServerPort = 2888 const ZookeeperIdent = "zookeeper" const zkPIMountPath = "/config-scripts" -func (r *SFController) ensureZookeeperPodMonitor() bool { - selector := metav1.LabelSelector{ - MatchLabels: map[string]string{ - "app": "sf", - "run": ZookeeperIdent, - }, - } - nePort := sfmonitoring.GetTruncatedPortName(ZookeeperIdent, sfmonitoring.NodeExporterPortNameSuffix) - desiredZKPodmonitor := sfmonitoring.MkPodMonitor(ZookeeperIdent+"-monitor", r.ns, []string{nePort}, selector) - // add annotations so we can handle lifecycle - annotations := map[string]string{ - "version": "1", - } - desiredZKPodmonitor.ObjectMeta.Annotations = annotations - currentZKpm := monitoringv1.PodMonitor{} - if !r.GetM(desiredZKPodmonitor.Name, ¤tZKpm) { - r.CreateR(&desiredZKPodmonitor) - return false - } else { - if !utils.MapEquals(¤tZKpm.ObjectMeta.Annotations, &annotations) { - r.log.V(1).Info("Zookeeper PodMonitor configuration changed, updating...") - currentZKpm.Spec = desiredZKPodmonitor.Spec - currentZKpm.ObjectMeta.Annotations = annotations - r.UpdateR(¤tZKpm) - return false - } - } - return true -} - func createZKLogForwarderSidecar(r *SFController, annotations map[string]string) (apiv1.Volume, apiv1.Container) { fbForwarderConfig := make(map[string]string) @@ -246,8 +214,6 @@ func (r *SFController) DeployZookeeper() bool { pvcReadiness := r.reconcileExpandPVC(ZookeeperIdent+"-data-"+ZookeeperIdent+"-0", r.cr.Spec.Zookeeper.Storage) - r.ensureZookeeperPodMonitor() - isReady := r.IsStatefulSetReady(¤t) && pvcReadiness conds.UpdateConditions(&r.cr.Status.Conditions, ZookeeperIdent, isReady)