From 4d5d84c4824110c7e30ac5fc18eaf66fa4a875cb Mon Sep 17 00:00:00 2001 From: CerealBoy Date: Mon, 19 Aug 2024 13:42:25 +1000 Subject: [PATCH 1/2] Only Cancel() the k8s job when Await returned an error --- internal/job/executor.go | 16 ++++++++++------ 1 file changed, 10 insertions(+), 6 deletions(-) diff --git a/internal/job/executor.go b/internal/job/executor.go index b531448519..ba48243465 100644 --- a/internal/job/executor.go +++ b/internal/job/executor.go @@ -1268,13 +1268,17 @@ func (e *Executor) kubernetesSetup(ctx context.Context, k8sAgentSocket *kubernet go func() { // If the k8s client is interrupted because the "server" agent is // stopped or unreachable, we should stop running the job. - // If the k8s client is interrupted because our own ctx was cancelled, - // then the job is already stopping, so there's no point logging an - // error. - if err := k8sAgentSocket.Await(ctx, kubernetes.RunStateInterrupt); err != nil && !errors.Is(err, context.Canceled) { - e.shell.Errorf("Error waiting for client interrupt: %v", err) + err := k8sAgentSocket.Await(ctx, kubernetes.RunStateInterrupt) + if err != nil { + // If the k8s client is interrupted because our own ctx was cancelled, + // then the job is already stopping, so there's no point logging an + // error. + if !errors.Is(err, context.Canceled) { + e.shell.Errorf("Error waiting for client interrupt: %v", err) + } + // If there's an error from Await, we should Cancel the job. + e.Cancel() } - e.Cancel() }() return nil } From 2642551ae6415fe23e0eee92022881e0512bb907 Mon Sep 17 00:00:00 2001 From: CerealBoy Date: Mon, 19 Aug 2024 14:35:33 +1000 Subject: [PATCH 2/2] Switch around the error handling on Await to better proceed with job completion --- internal/job/executor.go | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/internal/job/executor.go b/internal/job/executor.go index ba48243465..c15b94ae80 100644 --- a/internal/job/executor.go +++ b/internal/job/executor.go @@ -1269,16 +1269,16 @@ func (e *Executor) kubernetesSetup(ctx context.Context, k8sAgentSocket *kubernet // If the k8s client is interrupted because the "server" agent is // stopped or unreachable, we should stop running the job. err := k8sAgentSocket.Await(ctx, kubernetes.RunStateInterrupt) + // If the k8s client is interrupted because our own ctx was cancelled, + // then the job is already stopping, so there's no point logging an + // error. + if errors.Is(err, context.Canceled) { + return + } if err != nil { - // If the k8s client is interrupted because our own ctx was cancelled, - // then the job is already stopping, so there's no point logging an - // error. - if !errors.Is(err, context.Canceled) { - e.shell.Errorf("Error waiting for client interrupt: %v", err) - } - // If there's an error from Await, we should Cancel the job. - e.Cancel() + e.shell.Errorf("Error waiting for client interrupt: %v", err) } + e.Cancel() }() return nil }