From 5c153c9cac47462f35a2c1301cc94314357b4be9 Mon Sep 17 00:00:00 2001 From: Krzysztof Nazarewski <3494992+nazarewk@users.noreply.github.com> Date: Tue, 13 Sep 2022 15:04:16 +0200 Subject: [PATCH] handle stale BeforeHookCreation resources fixes https://github.com/argoproj/gitops-engine/issues/446 Signed-off-by: Krzysztof Nazarewski <3494992+nazarewk@users.noreply.github.com> --- pkg/sync/sync_context.go | 21 ++++++++++++++++++--- 1 file changed, 18 insertions(+), 3 deletions(-) diff --git a/pkg/sync/sync_context.go b/pkg/sync/sync_context.go index abd4379f7..416e1294c 100644 --- a/pkg/sync/sync_context.go +++ b/pkg/sync/sync_context.go @@ -30,7 +30,7 @@ import ( "github.com/argoproj/gitops-engine/pkg/diff" "github.com/argoproj/gitops-engine/pkg/health" "github.com/argoproj/gitops-engine/pkg/sync/common" - "github.com/argoproj/gitops-engine/pkg/sync/hook" + hookutil "github.com/argoproj/gitops-engine/pkg/sync/hook" resourceutil "github.com/argoproj/gitops-engine/pkg/sync/resource" "github.com/argoproj/gitops-engine/pkg/utils/kube" kubeutil "github.com/argoproj/gitops-engine/pkg/utils/kube" @@ -290,6 +290,21 @@ const ( // getOperationPhase returns a hook status from an _live_ unstructured object func (sc *syncContext) getOperationPhase(hook *unstructured.Unstructured) (common.OperationPhase, string, error) { + // start by detecting resources that: + // 1. have BeforeHookCreation deletion policies + // 2. were already deleted from the cluster + // 3. DELETE watch event from kubernetes was not processed yet, + // therefore old version is still present in cache prematurely ending the sync wave, + // this can happen under high load of controller and/or k8s control plane + // fixes https://github.com/argoproj/gitops-engine/issues/446 + // related to artificial sync wave delays in ArgoCD: + // https://github.com/argoproj/argo-cd/blob/9fac0f6ae6e52d6f4978a1eaaf51fbffb9c0958a/controller/sync.go#L465-L485 + for _, policy := range hookutil.DeletePolicies(hook) { + if policy == common.HookDeletePolicyBeforeHookCreation && sc.startedAt.After(hook.GetCreationTimestamp().Time) { + return common.OperationRunning, fmt.Sprintf("%s pending recreation", hook.GetName()), nil + } + } + phase := common.OperationSucceeded message := fmt.Sprintf("%s created", hook.GetName()) @@ -627,8 +642,8 @@ func (sc *syncContext) getSyncTasks() (_ syncTasks, successful bool) { obj := obj(resource.Target, resource.Live) // this creates garbage tasks - if hook.IsHook(obj) { - sc.log.WithValues("group", obj.GroupVersionKind().Group, "kind", obj.GetKind(), "namespace", obj.GetNamespace(), "name", obj.GetName()).V(1).Info("Skipping hook") + if hookutil.IsHook(obj) { + sc.log.WithValues("group", obj.GroupVersionKind().Group, "kind", obj.GetKind(), "namespace", obj.GetNamespace(), "name", obj.GetName()).V(1).Info("Skipping hookutil") continue }