From 4e3d35f12170ebb5d37faf8f46619cf3020aee18 Mon Sep 17 00:00:00 2001 From: Matthieu Huin Date: Fri, 4 Oct 2024 17:27:23 +0200 Subject: [PATCH] fluent bit: configure sidecars for buffering, backpressure Set up a dedicated empty dir for buffering on the filesystem (buffering will drop after 100MB's worth of data). Set the max amount of chunks to a conservative value (designed to take at most 50% of the container's memory request). This should prevent pods from being OOMKilled when something goes wrong with log forwarding. Change-Id: If6abbea0c03b70f9213455572bcc1d416173e2c5 --- controllers/libs/base/static/images.yaml | 6 ++--- controllers/libs/logging/logging.go | 13 +++++++--- controllers/mariadb.go | 12 ++++----- controllers/nodepool.go | 12 ++++----- .../mariadb/fluentbit/fluent-bit.conf.tmpl | 23 +++++++++++------ .../nodepool/fluentbit/fluent-bit.conf.tmpl | 25 ++++++++++++------- .../static/zookeeper/fluent-bit.conf.tmpl | 23 +++++++++++------ controllers/zookeeper.go | 12 ++++----- 8 files changed, 77 insertions(+), 49 deletions(-) diff --git a/controllers/libs/base/static/images.yaml b/controllers/libs/base/static/images.yaml index 2468d31..b1c0548 100644 --- a/controllers/libs/base/static/images.yaml +++ b/controllers/libs/base/static/images.yaml @@ -61,7 +61,7 @@ images: version: v0.27.1 - name: fluentbit container: cr.fluentbit.io/fluent/fluent-bit - version: "2.1.10" + version: "3.1.9" - name: fluentbit-debug - container: cr.fluentbit.io/fluent/fluent-bit-debug - version: "2.1.10" + container: cr.fluentbit.io/fluent/fluent-bit + version: "3.1.9-debug" diff --git a/controllers/libs/logging/logging.go b/controllers/libs/logging/logging.go index 2a0e74d..7522485 100644 --- a/controllers/libs/logging/logging.go +++ b/controllers/libs/logging/logging.go @@ -117,7 +117,7 @@ func SetupLogForwarding(serviceName string, forwarderSpec *v1.FluentBitForwarder } } -func CreateFluentBitSideCarContainer(serviceName string, extraLabels []FluentBitLabel, volumeMounts []apiv1.VolumeMount, debug bool) apiv1.Container { +func CreateFluentBitSideCarContainer(serviceName string, extraLabels []FluentBitLabel, volumeMounts []apiv1.VolumeMount, debug bool) (apiv1.Container, apiv1.Volume) { var img = base.FluentBitImage(debug) container := base.MkContainer("fluentbit", img) container.Env = CreateForwarderEnvVars(serviceName, extraLabels) @@ -127,7 +127,14 @@ func CreateFluentBitSideCarContainer(serviceName string, extraLabels []FluentBit ContainerPort: 2020, }, } + // Note that the empty dir will be lost at restart. The idea is really to + // only provide buffering to prevent OOM killing of the pod. + storageEmptyDir := base.MkEmptyDirVolume(serviceName + "-fb-buf") + storageVolumeMount := apiv1.VolumeMount{ + Name: serviceName + "-fb-buf", + MountPath: "/buffer-storage/", + } container.Ports = ports - container.VolumeMounts = volumeMounts - return container + container.VolumeMounts = append(volumeMounts, storageVolumeMount) + return container, storageEmptyDir } diff --git a/controllers/mariadb.go b/controllers/mariadb.go index 29a57f0..05d216b 100644 --- a/controllers/mariadb.go +++ b/controllers/mariadb.go @@ -44,7 +44,7 @@ type ZuulDBOpts struct { Params map[string]string } -func createLogForwarderSidecar(r *SFController, annotations map[string]string) (apiv1.Volume, apiv1.Container) { +func createLogForwarderSidecar(r *SFController, annotations map[string]string) ([]apiv1.Volume, apiv1.Container) { fbForwarderConfig := make(map[string]string) var loggingParams = logging.CreateForwarderConfigTemplateParams("mariadb", r.cr.Spec.FluentBitLogForwarding) @@ -74,10 +74,10 @@ func createLogForwarderSidecar(r *SFController, annotations map[string]string) ( if r.cr.Spec.FluentBitLogForwarding.Debug != nil { fluentbitDebug = *r.cr.Spec.FluentBitLogForwarding.Debug } - sidecar := logging.CreateFluentBitSideCarContainer(MariaDBIdent, []logging.FluentBitLabel{}, volumeMounts, fluentbitDebug) + sidecar, storageEmptyDir := logging.CreateFluentBitSideCarContainer(MariaDBIdent, []logging.FluentBitLabel{}, volumeMounts, fluentbitDebug) annotations["mariadb-fluent-bit.conf"] = utils.Checksum([]byte(fbForwarderConfig["fluent-bit.conf"])) annotations["mariadb-fluent-bit-image"] = sidecar.Image - return volume, sidecar + return []apiv1.Volume{volume, storageEmptyDir}, sidecar } func (r *SFController) CreateDBInitContainer(username string, password string, dbname string) apiv1.Container { @@ -267,14 +267,14 @@ GRANT ALL ON *.* TO root@'%%' WITH GRANT OPTION;`, } annotations := map[string]string{ - "serial": "5", + "serial": "6", "image": base.MariaDBImage(), "limits": limitstr, } if r.cr.Spec.FluentBitLogForwarding != nil { - fbVolume, fbSidecar := createLogForwarderSidecar(r, annotations) + fbVolumes, fbSidecar := createLogForwarderSidecar(r, annotations) sts.Spec.Template.Spec.Containers = append(sts.Spec.Template.Spec.Containers, fbSidecar) - sts.Spec.Template.Spec.Volumes = append(sts.Spec.Template.Spec.Volumes, fbVolume) + sts.Spec.Template.Spec.Volumes = append(sts.Spec.Template.Spec.Volumes, fbVolumes...) } statsExporter := sfmonitoring.MkNodeExporterSideCarContainer(MariaDBIdent, volumeMountsStatsExporter) diff --git a/controllers/nodepool.go b/controllers/nodepool.go index 706fc98..bbb8d79 100644 --- a/controllers/nodepool.go +++ b/controllers/nodepool.go @@ -93,7 +93,7 @@ var nodepoolFluentBitLabels = []logging.FluentBitLabel{ }, } -func createImageBuildLogForwarderSidecar(r *SFController, annotations map[string]string) (apiv1.Volume, apiv1.Container) { +func createImageBuildLogForwarderSidecar(r *SFController, annotations map[string]string) ([]apiv1.Volume, apiv1.Container) { fbForwarderConfig := make(map[string]string) var loggingParams = logging.CreateForwarderConfigTemplateParams("diskimage-builder", r.cr.Spec.FluentBitLogForwarding) @@ -125,11 +125,11 @@ func createImageBuildLogForwarderSidecar(r *SFController, annotations map[string fluentbitDebug = *r.cr.Spec.FluentBitLogForwarding.Debug } builderFluentBitLabels := append(nodepoolFluentBitLabels, logging.FluentBitLabel{Key: "CONTAINER", Value: BuilderIdent}) - sidecar := logging.CreateFluentBitSideCarContainer("diskimage-builder", builderFluentBitLabels, volumeMounts, fluentbitDebug) + sidecar, storageEmptyDir := logging.CreateFluentBitSideCarContainer("diskimage-builder", builderFluentBitLabels, volumeMounts, fluentbitDebug) annotations["dib-fluent-bit.conf"] = utils.Checksum([]byte(fbForwarderConfig["fluent-bit.conf"])) annotations["dib-fluent-bit-parser"] = utils.Checksum([]byte(fbForwarderConfig["parsers.conf"])) annotations["dib-fluent-bit-image"] = sidecar.Image - return volume, sidecar + return []apiv1.Volume{volume, storageEmptyDir}, sidecar } @@ -524,7 +524,7 @@ func (r *SFController) DeployNodepoolBuilder(statsdExporterVolume apiv1.Volume, "statsd_mapping": utils.Checksum([]byte(nodepoolStatsdMappingConfig)), "image": base.NodepoolBuilderImage(), "nodepool-providers-secrets": getSecretsVersion(providersSecrets, providerSecretsExists), - "serial": "16", + "serial": "17", "corporate-ca-certs-version": getCMVersion(corporateCM, corporateCMExists), } @@ -583,9 +583,9 @@ func (r *SFController) DeployNodepoolBuilder(statsdExporterVolume apiv1.Volume, extraLoggingEnvVars := logging.SetupLogForwarding("nodepool-builder", r.cr.Spec.FluentBitLogForwarding, nodepoolFluentBitLabels, annotations) nb.Spec.Template.Spec.Containers[0].Env = append(nb.Spec.Template.Spec.Containers[0].Env, extraLoggingEnvVars...) if r.cr.Spec.FluentBitLogForwarding != nil { - fbVolume, fbSidecar := createImageBuildLogForwarderSidecar(r, annotations) + fbVolumes, fbSidecar := createImageBuildLogForwarderSidecar(r, annotations) nb.Spec.Template.Spec.Containers = append(nb.Spec.Template.Spec.Containers, fbSidecar) - nb.Spec.Template.Spec.Volumes = append(nb.Spec.Template.Spec.Volumes, fbVolume) + nb.Spec.Template.Spec.Volumes = append(nb.Spec.Template.Spec.Volumes, fbVolumes...) } nb.Spec.Template.ObjectMeta.Annotations = annotations diff --git a/controllers/static/mariadb/fluentbit/fluent-bit.conf.tmpl b/controllers/static/mariadb/fluentbit/fluent-bit.conf.tmpl index ab5016c..004686f 100644 --- a/controllers/static/mariadb/fluentbit/fluent-bit.conf.tmpl +++ b/controllers/static/mariadb/fluentbit/fluent-bit.conf.tmpl @@ -1,7 +1,12 @@ [SERVICE] - http_server On - http_port 2020 - log_level {{ .LoggingParams.LogLevel }} + http_server On + http_port 2020 + log_level {{ .LoggingParams.LogLevel }} + storage.path /buffer-storage + storage.sync normal + # A chunk is ~2MB, so this value is 128Mi / 4, where 128Mi is the default container profile's mem request + # TODO compute this automatically from code + storage.max_chunks_up 32 [INPUT] name tail tag mariadb.error @@ -10,6 +15,7 @@ refresh_interval 5 read_from_head True db /watch/mariadb_fluentbit.db + storage.type filesystem [FILTER] name modify match * @@ -45,9 +51,10 @@ {{ end }} {{- if .LoggingParams.ForwardInputConfig.InUse }} [OUTPUT] - name forward - match * - tag {{ .LoggingParams.Tag }} - host {{ .LoggingParams.ForwardInputConfig.Host }} - port {{ .LoggingParams.ForwardInputConfig.Port }} + name forward + match * + tag {{ .LoggingParams.Tag }} + host {{ .LoggingParams.ForwardInputConfig.Host }} + port {{ .LoggingParams.ForwardInputConfig.Port }} + storage.total_limit_size 100M {{ end }} diff --git a/controllers/static/nodepool/fluentbit/fluent-bit.conf.tmpl b/controllers/static/nodepool/fluentbit/fluent-bit.conf.tmpl index 5ceb723..52d3742 100644 --- a/controllers/static/nodepool/fluentbit/fluent-bit.conf.tmpl +++ b/controllers/static/nodepool/fluentbit/fluent-bit.conf.tmpl @@ -1,8 +1,13 @@ [SERVICE] - http_server On - http_port 2020 - log_level {{ .LoggingParams.LogLevel }} - parsers_file parsers.conf + http_server On + http_port 2020 + log_level {{ .LoggingParams.LogLevel }} + parsers_file parsers.conf + storage.path /buffer-storage + storage.sync normal + # A chunk is ~2MB, so this value is 128Mi / 4, where 128Mi is the default container profile's mem request + # TODO compute this automatically from code + storage.max_chunks_up 32 [INPUT] name tail tag diskimage-builder @@ -11,6 +16,7 @@ refresh_interval 5 read_from_head True db /watch/dib_fluentbit.db + storage.type filesystem [FILTER] name modify match * @@ -67,9 +73,10 @@ {{ end }} {{- if .LoggingParams.ForwardInputConfig.InUse }} [OUTPUT] - name forward - match * - tag {{ .LoggingParams.Tag }} - host {{ .LoggingParams.ForwardInputConfig.Host }} - port {{ .LoggingParams.ForwardInputConfig.Port }} + name forward + match * + tag {{ .LoggingParams.Tag }} + host {{ .LoggingParams.ForwardInputConfig.Host }} + port {{ .LoggingParams.ForwardInputConfig.Port }} + storage.total_limit_size 100M {{ end }} diff --git a/controllers/static/zookeeper/fluent-bit.conf.tmpl b/controllers/static/zookeeper/fluent-bit.conf.tmpl index 6ef292c..bcb1f45 100644 --- a/controllers/static/zookeeper/fluent-bit.conf.tmpl +++ b/controllers/static/zookeeper/fluent-bit.conf.tmpl @@ -1,7 +1,12 @@ [SERVICE] - http_server On - http_port 2020 - log_level {{ .LoggingParams.LogLevel }} + http_server On + http_port 2020 + log_level {{ .LoggingParams.LogLevel }} + storage.path /buffer-storage + storage.sync normal + # A chunk is ~2MB, so this value is 128Mi / 4, where 128Mi is the default container profile's mem request + # TODO compute this automatically from code + storage.max_chunks_up 32 [INPUT] name tail tag zookeeper @@ -10,6 +15,7 @@ refresh_interval 5 read_from_head True db /watch/zk_fluentbit.db + storage.type filesystem [FILTER] name modify match * @@ -41,9 +47,10 @@ {{ end }} {{- if .LoggingParams.ForwardInputConfig.InUse }} [OUTPUT] - name forward - match * - tag {{ .LoggingParams.Tag }} - host {{ .LoggingParams.ForwardInputConfig.Host }} - port {{ .LoggingParams.ForwardInputConfig.Port }} + name forward + match * + tag {{ .LoggingParams.Tag }} + host {{ .LoggingParams.ForwardInputConfig.Host }} + port {{ .LoggingParams.ForwardInputConfig.Port }} + storage.total_limit_size 100M {{ end }} diff --git a/controllers/zookeeper.go b/controllers/zookeeper.go index 8e649a3..6839edd 100644 --- a/controllers/zookeeper.go +++ b/controllers/zookeeper.go @@ -34,7 +34,7 @@ const zkSSLPort = 2281 const ZookeeperIdent = "zookeeper" const zkPIMountPath = "/config-scripts" -func createZKLogForwarderSidecar(r *SFController, annotations map[string]string) (apiv1.Volume, apiv1.Container) { +func createZKLogForwarderSidecar(r *SFController, annotations map[string]string) ([]apiv1.Volume, apiv1.Container) { fbForwarderConfig := make(map[string]string) var loggingParams = logging.CreateForwarderConfigTemplateParams("zookeeper", r.cr.Spec.FluentBitLogForwarding) @@ -64,10 +64,10 @@ func createZKLogForwarderSidecar(r *SFController, annotations map[string]string) if r.cr.Spec.FluentBitLogForwarding.Debug != nil { fluentbitDebug = *r.cr.Spec.FluentBitLogForwarding.Debug } - sidecar := logging.CreateFluentBitSideCarContainer("zookeeper", []logging.FluentBitLabel{}, volumeMounts, fluentbitDebug) + sidecar, storageEmptyDir := logging.CreateFluentBitSideCarContainer("zookeeper", []logging.FluentBitLabel{}, volumeMounts, fluentbitDebug) annotations["zk-fluent-bit.conf"] = utils.Checksum([]byte(fbForwarderConfig["fluent-bit.conf"])) annotations["zk-fluent-bit-image"] = sidecar.Image - return volume, sidecar + return []apiv1.Volume{volume, storageEmptyDir}, sidecar } func (r *SFController) DeployZookeeper() bool { @@ -97,7 +97,7 @@ func (r *SFController) DeployZookeeper() bool { annotations := map[string]string{ "configuration": utils.Checksum([]byte(configChecksumable)), "image": base.ZookeeperImage(), - "serial": "5", + "serial": "6", } volumeMountsStatsExporter := []apiv1.VolumeMount{ @@ -169,9 +169,9 @@ func (r *SFController) DeployZookeeper() bool { annotations["limits"] = base.UpdateContainerLimit(r.cr.Spec.Zookeeper.Limits, &zk.Spec.Template.Spec.Containers[0]) if r.cr.Spec.FluentBitLogForwarding != nil { - fbVolume, fbSidecar := createZKLogForwarderSidecar(r, annotations) + fbVolumes, fbSidecar := createZKLogForwarderSidecar(r, annotations) zk.Spec.Template.Spec.Containers = append(zk.Spec.Template.Spec.Containers, fbSidecar) - zk.Spec.Template.Spec.Volumes = append(zk.Spec.Template.Spec.Volumes, fbVolume) + zk.Spec.Template.Spec.Volumes = append(zk.Spec.Template.Spec.Volumes, fbVolumes...) } statsExporter := sfmonitoring.MkNodeExporterSideCarContainer(ZookeeperIdent, volumeMountsStatsExporter)