skovtunenko · xakep666 · Nov 8, 2022 · Nov 8, 2022 · BMalaichik · Dec 19, 2022
diff --git a/hook.go b/hook.go
@@ -27,6 +27,7 @@ type Hook struct {
 
 	order    Order                     // order is Hook order.
 	name     string                    // name is an optional component name for pretty-printing in logs.
+	preStop  time.Duration             // time to wait _before_ triggering shutdown hook.
 	timeout  time.Duration             // timeout is max hookFunc execution timeout.
 	hookFunc func(ctx context.Context) // hookFunc is a user-defined termination hook function.
 }
@@ -40,6 +41,15 @@ func (h *Hook) WithName(name string) *Hook {
 	return h
 }
 
+// WithPreStopSleep sets (optional) period between signal receive and shutdown hook call. It does not depend on hook timeout.
+//
+// This needed for correct graceful termination of network services in Kubernetes.
+// See https://blog.palark.com/graceful-shutdown-in-kubernetes-is-not-always-trivial/ for detailed information.
+func (h *Hook) WithPreStopSleep(t time.Duration) *Hook {
+	h.preStop = t
+	return h
+}
+
 // Register registers termination [Hook] that should finish execution in less than given timeout.
 //
 // Timeout duration must be greater than zero; if not, timeout of 1 min will be used.

diff --git a/terminator.go b/terminator.go
@@ -55,6 +55,7 @@ func withSignals(ctx context.Context, chSignals chan os.Signal, sig ...os.Signal
 
 	// function invoke cancel once a signal arrived OR parent context is done:
 	go func() {
+		defer signal.Stop(chSignals)
 		defer cancel()
 
 		select {
@@ -140,6 +141,10 @@ func (t *Terminator) waitShutdown(appCtx context.Context) {
 			go func(f Hook) {
 				defer runWg.Done()
 
+				if f.preStop > 0 {
+					time.Sleep(f.preStop)
+				}
+
 				ctx, cancel := context.WithTimeout(context.Background(), f.timeout)
 				defer cancel()
 

diff --git a/terminator_test.go b/terminator_test.go
@@ -3,9 +3,11 @@ package graterm
 import (
 	"context"
 	"errors"
+	"github.com/stretchr/testify/assert"
 	"github.com/stretchr/testify/require"
 	"log"
 	"os"
+	"os/exec"
 	"os/signal"
 	"runtime"
 	"sync"
@@ -277,6 +279,39 @@ func TestTerminator_Wait(t *testing.T) {
 	}
 }
 
+func TestTerminator_restores_signals(t *testing.T) {
+	if os.Getenv("HANG_ON_STOP") == "1" {
+		terminator, ctx := NewWithSignals(context.Background(), syscall.SIGINT, syscall.SIGTERM)
+		require.NotNil(t, ctx)
+
+		terminator.WithOrder(0).Register(5*time.Second, func(ctx context.Context) {
+			select {}
+		})
+
+		_ = terminator.Wait(ctx, time.Minute)
+		return
+	}
+
+	cmd := exec.Command(os.Args[0], "-test.run=TestTerminator_restores_signal")
+	cmd.Env = append(cmd.Env, "HANG_ON_STOP=1")
+	require.NoError(t, cmd.Start())
+
+	time.Sleep(100 * time.Millisecond)
+	require.NoError(t, cmd.Process.Signal(syscall.SIGINT))
+
+	time.Sleep(100 * time.Millisecond)
+	require.NoError(t, cmd.Process.Signal(syscall.SIGINT))
+
+	var exitErr *exec.ExitError
+	require.ErrorAs(t, cmd.Wait(), &exitErr)
+
+	ws, ok := exitErr.Sys().(syscall.WaitStatus)
+	require.Truef(t, ok, "exit error is not syscall.WaitStatus, but %T", exitErr.Sys())
+
+	assert.True(t, ws.Signaled(), "process stopped not by signal")
+	assert.Equal(t, syscall.SIGINT, ws.Signal(), "process stopped not by SIGINT")
+}
+
 func Test_withSignals(t *testing.T) {
 	t.Run("termination_on_SIGHUP", func(t *testing.T) {
 		rootCtx, cancel := context.WithCancel(context.Background())