From db283da0882cdffc3d6ba425d0c31943890d88a7 Mon Sep 17 00:00:00 2001 From: Kaviraj Kanagaraj Date: Tue, 25 Jan 2022 10:31:22 +0100 Subject: [PATCH] Add `MaxSurge` and `MaxUnavailable` strategy to all Loki k8 workloads. (#5227) * Add `MaxSurge` and `MaxUnavailable` strategy to all loki k8 workloads. This fixes couple of issues. 1. By default these configs are 25% in k8, meaning during rollout 25% of pods are allowed to shutdown immediately. 2. Due to (1), during graceful shutdown process, 25% of all the pods access consul to `unregister()` from shared key value. (2) makes CAS rate of underlying KV store high (leads to lots of retry and failing) sometimes failing to unregister leaving the ring "unhealthy" Also this PR make these configs consistent across all k8 workloads. More details: https://github.com/grafana/dskit/issues/117 * Remove it from statefulset workloads Signed-off-by: Kaviraj --- production/ksonnet/loki/distributor.libsonnet | 4 +++- production/ksonnet/loki/gateway.libsonnet | 4 +++- production/ksonnet/loki/querier.libsonnet | 4 +++- production/ksonnet/loki/query-frontend.libsonnet | 4 +++- production/ksonnet/loki/query-scheduler.libsonnet | 4 +++- production/ksonnet/loki/ruler.libsonnet | 4 +++- 6 files changed, 18 insertions(+), 6 deletions(-) diff --git a/production/ksonnet/loki/distributor.libsonnet b/production/ksonnet/loki/distributor.libsonnet index 7fd299c737c58..9633381ed9311 100644 --- a/production/ksonnet/loki/distributor.libsonnet +++ b/production/ksonnet/loki/distributor.libsonnet @@ -29,7 +29,9 @@ local k = import 'ksonnet-util/kausal.libsonnet'; $._config.overrides_configmap_mount_name, $._config.overrides_configmap_mount_path, ) + - k.util.antiAffinity, + k.util.antiAffinity + + deployment.mixin.spec.strategy.rollingUpdate.withMaxSurge(5) + + deployment.mixin.spec.strategy.rollingUpdate.withMaxUnavailable(1), distributor_service: k.util.serviceFor($.distributor_deployment), diff --git a/production/ksonnet/loki/gateway.libsonnet b/production/ksonnet/loki/gateway.libsonnet index 36ffc51f1f055..e99628e2fe7f8 100644 --- a/production/ksonnet/loki/gateway.libsonnet +++ b/production/ksonnet/loki/gateway.libsonnet @@ -98,7 +98,9 @@ local k = import 'ksonnet-util/kausal.libsonnet'; }) + k.util.configVolumeMount('gateway-config', '/etc/nginx') + k.util.secretVolumeMount('gateway-secret', '/etc/nginx/secrets', defaultMode=420) + - k.util.antiAffinity, + k.util.antiAffinity + + deployment.mixin.spec.strategy.rollingUpdate.withMaxSurge(5) + + deployment.mixin.spec.strategy.rollingUpdate.withMaxUnavailable(1), gateway_service: k.util.serviceFor($.gateway_deployment), diff --git a/production/ksonnet/loki/querier.libsonnet b/production/ksonnet/loki/querier.libsonnet index 1bb5c0f66d904..848c6c06ef827 100644 --- a/production/ksonnet/loki/querier.libsonnet +++ b/production/ksonnet/loki/querier.libsonnet @@ -34,7 +34,9 @@ local k = import 'ksonnet-util/kausal.libsonnet'; $._config.overrides_configmap_mount_name, $._config.overrides_configmap_mount_path, ) + - k.util.antiAffinity + k.util.antiAffinity + + deployment.mixin.spec.strategy.rollingUpdate.withMaxSurge(5) + + deployment.mixin.spec.strategy.rollingUpdate.withMaxUnavailable(1) else {}, // PVC for queriers when running as statefulsets diff --git a/production/ksonnet/loki/query-frontend.libsonnet b/production/ksonnet/loki/query-frontend.libsonnet index d495ff2ab5929..404de05bce234 100644 --- a/production/ksonnet/loki/query-frontend.libsonnet +++ b/production/ksonnet/loki/query-frontend.libsonnet @@ -35,7 +35,9 @@ local k = import 'ksonnet-util/kausal.libsonnet'; $._config.overrides_configmap_mount_name, $._config.overrides_configmap_mount_path, ) + - k.util.antiAffinity, + k.util.antiAffinity + + deployment.mixin.spec.strategy.rollingUpdate.withMaxSurge(5) + + deployment.mixin.spec.strategy.rollingUpdate.withMaxUnavailable(1), local service = k.core.v1.service, diff --git a/production/ksonnet/loki/query-scheduler.libsonnet b/production/ksonnet/loki/query-scheduler.libsonnet index 7b15ee4d583ad..4f71e2f7de527 100644 --- a/production/ksonnet/loki/query-scheduler.libsonnet +++ b/production/ksonnet/loki/query-scheduler.libsonnet @@ -51,7 +51,9 @@ local k = import 'ksonnet-util/kausal.libsonnet'; $._config.overrides_configmap_mount_name, $._config.overrides_configmap_mount_path, ) + - k.util.antiAffinity + k.util.antiAffinity + + deployment.mixin.spec.strategy.rollingUpdate.withMaxSurge(5) + + deployment.mixin.spec.strategy.rollingUpdate.withMaxUnavailable(1) else {}, local service = k.core.v1.service, diff --git a/production/ksonnet/loki/ruler.libsonnet b/production/ksonnet/loki/ruler.libsonnet index ebaf6d7fbe79e..85dce3f9e22db 100644 --- a/production/ksonnet/loki/ruler.libsonnet +++ b/production/ksonnet/loki/ruler.libsonnet @@ -44,7 +44,9 @@ local k = import 'ksonnet-util/kausal.libsonnet'; $._config.overrides_configmap_mount_name, $._config.overrides_configmap_mount_path, ) + - k.util.antiAffinity + k.util.antiAffinity + + deployment.mixin.spec.strategy.rollingUpdate.withMaxSurge(5) + + deployment.mixin.spec.strategy.rollingUpdate.withMaxUnavailable(1) else {}, ruler_service: if !$._config.ruler_enabled