Skip to content

Commit

Permalink
fluent bit: configure sidecars for buffering, backpressure
Browse files Browse the repository at this point in the history
Set up a dedicated empty dir for buffering on the filesystem (buffering
will drop after 100MB's worth of data).
Set the max amount of chunks to a conservative value (designed to take
at most 50% of the container's memory request).

This should prevent pods from being OOMKilled when something goes
wrong with log forwarding.

Change-Id: If6abbea0c03b70f9213455572bcc1d416173e2c5
  • Loading branch information
mhuin committed Oct 8, 2024
1 parent e031e35 commit 4e3d35f
Show file tree
Hide file tree
Showing 8 changed files with 77 additions and 49 deletions.
6 changes: 3 additions & 3 deletions controllers/libs/base/static/images.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -61,7 +61,7 @@ images:
version: v0.27.1
- name: fluentbit
container: cr.fluentbit.io/fluent/fluent-bit
version: "2.1.10"
version: "3.1.9"
- name: fluentbit-debug
container: cr.fluentbit.io/fluent/fluent-bit-debug
version: "2.1.10"
container: cr.fluentbit.io/fluent/fluent-bit
version: "3.1.9-debug"
13 changes: 10 additions & 3 deletions controllers/libs/logging/logging.go
Original file line number Diff line number Diff line change
Expand Up @@ -117,7 +117,7 @@ func SetupLogForwarding(serviceName string, forwarderSpec *v1.FluentBitForwarder
}
}

func CreateFluentBitSideCarContainer(serviceName string, extraLabels []FluentBitLabel, volumeMounts []apiv1.VolumeMount, debug bool) apiv1.Container {
func CreateFluentBitSideCarContainer(serviceName string, extraLabels []FluentBitLabel, volumeMounts []apiv1.VolumeMount, debug bool) (apiv1.Container, apiv1.Volume) {
var img = base.FluentBitImage(debug)
container := base.MkContainer("fluentbit", img)
container.Env = CreateForwarderEnvVars(serviceName, extraLabels)
Expand All @@ -127,7 +127,14 @@ func CreateFluentBitSideCarContainer(serviceName string, extraLabels []FluentBit
ContainerPort: 2020,
},
}
// Note that the empty dir will be lost at restart. The idea is really to
// only provide buffering to prevent OOM killing of the pod.
storageEmptyDir := base.MkEmptyDirVolume(serviceName + "-fb-buf")
storageVolumeMount := apiv1.VolumeMount{
Name: serviceName + "-fb-buf",
MountPath: "/buffer-storage/",
}
container.Ports = ports
container.VolumeMounts = volumeMounts
return container
container.VolumeMounts = append(volumeMounts, storageVolumeMount)
return container, storageEmptyDir
}
12 changes: 6 additions & 6 deletions controllers/mariadb.go
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,7 @@ type ZuulDBOpts struct {
Params map[string]string
}

func createLogForwarderSidecar(r *SFController, annotations map[string]string) (apiv1.Volume, apiv1.Container) {
func createLogForwarderSidecar(r *SFController, annotations map[string]string) ([]apiv1.Volume, apiv1.Container) {

fbForwarderConfig := make(map[string]string)
var loggingParams = logging.CreateForwarderConfigTemplateParams("mariadb", r.cr.Spec.FluentBitLogForwarding)
Expand Down Expand Up @@ -74,10 +74,10 @@ func createLogForwarderSidecar(r *SFController, annotations map[string]string) (
if r.cr.Spec.FluentBitLogForwarding.Debug != nil {
fluentbitDebug = *r.cr.Spec.FluentBitLogForwarding.Debug
}
sidecar := logging.CreateFluentBitSideCarContainer(MariaDBIdent, []logging.FluentBitLabel{}, volumeMounts, fluentbitDebug)
sidecar, storageEmptyDir := logging.CreateFluentBitSideCarContainer(MariaDBIdent, []logging.FluentBitLabel{}, volumeMounts, fluentbitDebug)
annotations["mariadb-fluent-bit.conf"] = utils.Checksum([]byte(fbForwarderConfig["fluent-bit.conf"]))
annotations["mariadb-fluent-bit-image"] = sidecar.Image
return volume, sidecar
return []apiv1.Volume{volume, storageEmptyDir}, sidecar
}

func (r *SFController) CreateDBInitContainer(username string, password string, dbname string) apiv1.Container {
Expand Down Expand Up @@ -267,14 +267,14 @@ GRANT ALL ON *.* TO root@'%%' WITH GRANT OPTION;`,
}

annotations := map[string]string{
"serial": "5",
"serial": "6",
"image": base.MariaDBImage(),
"limits": limitstr,
}
if r.cr.Spec.FluentBitLogForwarding != nil {
fbVolume, fbSidecar := createLogForwarderSidecar(r, annotations)
fbVolumes, fbSidecar := createLogForwarderSidecar(r, annotations)
sts.Spec.Template.Spec.Containers = append(sts.Spec.Template.Spec.Containers, fbSidecar)
sts.Spec.Template.Spec.Volumes = append(sts.Spec.Template.Spec.Volumes, fbVolume)
sts.Spec.Template.Spec.Volumes = append(sts.Spec.Template.Spec.Volumes, fbVolumes...)
}

statsExporter := sfmonitoring.MkNodeExporterSideCarContainer(MariaDBIdent, volumeMountsStatsExporter)
Expand Down
12 changes: 6 additions & 6 deletions controllers/nodepool.go
Original file line number Diff line number Diff line change
Expand Up @@ -93,7 +93,7 @@ var nodepoolFluentBitLabels = []logging.FluentBitLabel{
},
}

func createImageBuildLogForwarderSidecar(r *SFController, annotations map[string]string) (apiv1.Volume, apiv1.Container) {
func createImageBuildLogForwarderSidecar(r *SFController, annotations map[string]string) ([]apiv1.Volume, apiv1.Container) {
fbForwarderConfig := make(map[string]string)
var loggingParams = logging.CreateForwarderConfigTemplateParams("diskimage-builder", r.cr.Spec.FluentBitLogForwarding)

Expand Down Expand Up @@ -125,11 +125,11 @@ func createImageBuildLogForwarderSidecar(r *SFController, annotations map[string
fluentbitDebug = *r.cr.Spec.FluentBitLogForwarding.Debug
}
builderFluentBitLabels := append(nodepoolFluentBitLabels, logging.FluentBitLabel{Key: "CONTAINER", Value: BuilderIdent})
sidecar := logging.CreateFluentBitSideCarContainer("diskimage-builder", builderFluentBitLabels, volumeMounts, fluentbitDebug)
sidecar, storageEmptyDir := logging.CreateFluentBitSideCarContainer("diskimage-builder", builderFluentBitLabels, volumeMounts, fluentbitDebug)
annotations["dib-fluent-bit.conf"] = utils.Checksum([]byte(fbForwarderConfig["fluent-bit.conf"]))
annotations["dib-fluent-bit-parser"] = utils.Checksum([]byte(fbForwarderConfig["parsers.conf"]))
annotations["dib-fluent-bit-image"] = sidecar.Image
return volume, sidecar
return []apiv1.Volume{volume, storageEmptyDir}, sidecar

}

Expand Down Expand Up @@ -524,7 +524,7 @@ func (r *SFController) DeployNodepoolBuilder(statsdExporterVolume apiv1.Volume,
"statsd_mapping": utils.Checksum([]byte(nodepoolStatsdMappingConfig)),
"image": base.NodepoolBuilderImage(),
"nodepool-providers-secrets": getSecretsVersion(providersSecrets, providerSecretsExists),
"serial": "16",
"serial": "17",
"corporate-ca-certs-version": getCMVersion(corporateCM, corporateCMExists),
}

Expand Down Expand Up @@ -583,9 +583,9 @@ func (r *SFController) DeployNodepoolBuilder(statsdExporterVolume apiv1.Volume,
extraLoggingEnvVars := logging.SetupLogForwarding("nodepool-builder", r.cr.Spec.FluentBitLogForwarding, nodepoolFluentBitLabels, annotations)
nb.Spec.Template.Spec.Containers[0].Env = append(nb.Spec.Template.Spec.Containers[0].Env, extraLoggingEnvVars...)
if r.cr.Spec.FluentBitLogForwarding != nil {
fbVolume, fbSidecar := createImageBuildLogForwarderSidecar(r, annotations)
fbVolumes, fbSidecar := createImageBuildLogForwarderSidecar(r, annotations)
nb.Spec.Template.Spec.Containers = append(nb.Spec.Template.Spec.Containers, fbSidecar)
nb.Spec.Template.Spec.Volumes = append(nb.Spec.Template.Spec.Volumes, fbVolume)
nb.Spec.Template.Spec.Volumes = append(nb.Spec.Template.Spec.Volumes, fbVolumes...)
}

nb.Spec.Template.ObjectMeta.Annotations = annotations
Expand Down
23 changes: 15 additions & 8 deletions controllers/static/mariadb/fluentbit/fluent-bit.conf.tmpl
Original file line number Diff line number Diff line change
@@ -1,7 +1,12 @@
[SERVICE]
http_server On
http_port 2020
log_level {{ .LoggingParams.LogLevel }}
http_server On
http_port 2020
log_level {{ .LoggingParams.LogLevel }}
storage.path /buffer-storage
storage.sync normal
# A chunk is ~2MB, so this value is 128Mi / 4, where 128Mi is the default container profile's mem request
# TODO compute this automatically from code
storage.max_chunks_up 32
[INPUT]
name tail
tag mariadb.error
Expand All @@ -10,6 +15,7 @@
refresh_interval 5
read_from_head True
db /watch/mariadb_fluentbit.db
storage.type filesystem
[FILTER]
name modify
match *
Expand Down Expand Up @@ -45,9 +51,10 @@
{{ end }}
{{- if .LoggingParams.ForwardInputConfig.InUse }}
[OUTPUT]
name forward
match *
tag {{ .LoggingParams.Tag }}
host {{ .LoggingParams.ForwardInputConfig.Host }}
port {{ .LoggingParams.ForwardInputConfig.Port }}
name forward
match *
tag {{ .LoggingParams.Tag }}
host {{ .LoggingParams.ForwardInputConfig.Host }}
port {{ .LoggingParams.ForwardInputConfig.Port }}
storage.total_limit_size 100M
{{ end }}
25 changes: 16 additions & 9 deletions controllers/static/nodepool/fluentbit/fluent-bit.conf.tmpl
Original file line number Diff line number Diff line change
@@ -1,8 +1,13 @@
[SERVICE]
http_server On
http_port 2020
log_level {{ .LoggingParams.LogLevel }}
parsers_file parsers.conf
http_server On
http_port 2020
log_level {{ .LoggingParams.LogLevel }}
parsers_file parsers.conf
storage.path /buffer-storage
storage.sync normal
# A chunk is ~2MB, so this value is 128Mi / 4, where 128Mi is the default container profile's mem request
# TODO compute this automatically from code
storage.max_chunks_up 32
[INPUT]
name tail
tag diskimage-builder
Expand All @@ -11,6 +16,7 @@
refresh_interval 5
read_from_head True
db /watch/dib_fluentbit.db
storage.type filesystem
[FILTER]
name modify
match *
Expand Down Expand Up @@ -67,9 +73,10 @@
{{ end }}
{{- if .LoggingParams.ForwardInputConfig.InUse }}
[OUTPUT]
name forward
match *
tag {{ .LoggingParams.Tag }}
host {{ .LoggingParams.ForwardInputConfig.Host }}
port {{ .LoggingParams.ForwardInputConfig.Port }}
name forward
match *
tag {{ .LoggingParams.Tag }}
host {{ .LoggingParams.ForwardInputConfig.Host }}
port {{ .LoggingParams.ForwardInputConfig.Port }}
storage.total_limit_size 100M
{{ end }}
23 changes: 15 additions & 8 deletions controllers/static/zookeeper/fluent-bit.conf.tmpl
Original file line number Diff line number Diff line change
@@ -1,7 +1,12 @@
[SERVICE]
http_server On
http_port 2020
log_level {{ .LoggingParams.LogLevel }}
http_server On
http_port 2020
log_level {{ .LoggingParams.LogLevel }}
storage.path /buffer-storage
storage.sync normal
# A chunk is ~2MB, so this value is 128Mi / 4, where 128Mi is the default container profile's mem request
# TODO compute this automatically from code
storage.max_chunks_up 32
[INPUT]
name tail
tag zookeeper
Expand All @@ -10,6 +15,7 @@
refresh_interval 5
read_from_head True
db /watch/zk_fluentbit.db
storage.type filesystem
[FILTER]
name modify
match *
Expand Down Expand Up @@ -41,9 +47,10 @@
{{ end }}
{{- if .LoggingParams.ForwardInputConfig.InUse }}
[OUTPUT]
name forward
match *
tag {{ .LoggingParams.Tag }}
host {{ .LoggingParams.ForwardInputConfig.Host }}
port {{ .LoggingParams.ForwardInputConfig.Port }}
name forward
match *
tag {{ .LoggingParams.Tag }}
host {{ .LoggingParams.ForwardInputConfig.Host }}
port {{ .LoggingParams.ForwardInputConfig.Port }}
storage.total_limit_size 100M
{{ end }}
12 changes: 6 additions & 6 deletions controllers/zookeeper.go
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,7 @@ const zkSSLPort = 2281
const ZookeeperIdent = "zookeeper"
const zkPIMountPath = "/config-scripts"

func createZKLogForwarderSidecar(r *SFController, annotations map[string]string) (apiv1.Volume, apiv1.Container) {
func createZKLogForwarderSidecar(r *SFController, annotations map[string]string) ([]apiv1.Volume, apiv1.Container) {

fbForwarderConfig := make(map[string]string)
var loggingParams = logging.CreateForwarderConfigTemplateParams("zookeeper", r.cr.Spec.FluentBitLogForwarding)
Expand Down Expand Up @@ -64,10 +64,10 @@ func createZKLogForwarderSidecar(r *SFController, annotations map[string]string)
if r.cr.Spec.FluentBitLogForwarding.Debug != nil {
fluentbitDebug = *r.cr.Spec.FluentBitLogForwarding.Debug
}
sidecar := logging.CreateFluentBitSideCarContainer("zookeeper", []logging.FluentBitLabel{}, volumeMounts, fluentbitDebug)
sidecar, storageEmptyDir := logging.CreateFluentBitSideCarContainer("zookeeper", []logging.FluentBitLabel{}, volumeMounts, fluentbitDebug)
annotations["zk-fluent-bit.conf"] = utils.Checksum([]byte(fbForwarderConfig["fluent-bit.conf"]))
annotations["zk-fluent-bit-image"] = sidecar.Image
return volume, sidecar
return []apiv1.Volume{volume, storageEmptyDir}, sidecar
}

func (r *SFController) DeployZookeeper() bool {
Expand Down Expand Up @@ -97,7 +97,7 @@ func (r *SFController) DeployZookeeper() bool {
annotations := map[string]string{
"configuration": utils.Checksum([]byte(configChecksumable)),
"image": base.ZookeeperImage(),
"serial": "5",
"serial": "6",
}

volumeMountsStatsExporter := []apiv1.VolumeMount{
Expand Down Expand Up @@ -169,9 +169,9 @@ func (r *SFController) DeployZookeeper() bool {
annotations["limits"] = base.UpdateContainerLimit(r.cr.Spec.Zookeeper.Limits, &zk.Spec.Template.Spec.Containers[0])

if r.cr.Spec.FluentBitLogForwarding != nil {
fbVolume, fbSidecar := createZKLogForwarderSidecar(r, annotations)
fbVolumes, fbSidecar := createZKLogForwarderSidecar(r, annotations)
zk.Spec.Template.Spec.Containers = append(zk.Spec.Template.Spec.Containers, fbSidecar)
zk.Spec.Template.Spec.Volumes = append(zk.Spec.Template.Spec.Volumes, fbVolume)
zk.Spec.Template.Spec.Volumes = append(zk.Spec.Template.Spec.Volumes, fbVolumes...)
}

statsExporter := sfmonitoring.MkNodeExporterSideCarContainer(ZookeeperIdent, volumeMountsStatsExporter)
Expand Down

0 comments on commit 4e3d35f

Please sign in to comment.