🌟 enhancement(VSecM Sentinel): refactored forever loops (#946)

v0lkan · web-flow · commit 3ec61885b992 · 2024-04-26T13:04:58.000-07:00
updated infinite loops with early crashes instead

Signed-off-by: Volkan Özçelik &lt;ovolkan@vmware.com&gt;
diff --git a/app/sentinel/background/initialization/connectivity.go b/app/sentinel/background/initialization/connectivity.go
@@ -13,94 +13,85 @@ package initialization
 import (
 	"context"
 	"github.com/pkg/errors"
+	"github.com/spiffe/go-spiffe/v2/workloadapi"
 	"time"
 
 	"github.com/vmware-tanzu/secrets-manager/app/sentinel/internal/safe"
 	"github.com/vmware-tanzu/secrets-manager/core/backoff"
-	"github.com/vmware-tanzu/secrets-manager/core/env"
 	log "github.com/vmware-tanzu/secrets-manager/core/log/std"
 	"github.com/vmware-tanzu/secrets-manager/core/spiffe"
 )
 
 func ensureApiConnectivity(ctx context.Context, cid *string) {
-	terminateAsap := env.TerminateSentinelOnInitCommandConnectivityFailure()
-
 	log.TraceLn(cid, "Before checking api connectivity")
 
-	for {
-		s := backoffStrategy()
-
-		err := backoff.Retry("RunInitCommands:CheckConnectivity", func() error {
-			log.TraceLn(cid, "RunInitCommands:CheckConnectivity: checking connectivity to safe")
+	s := backoffStrategy()
+	err := backoff.Retry("RunInitCommands:CheckConnectivity", func() error {
+		log.TraceLn(cid, "RunInitCommands:CheckConnectivity: checking connectivity to safe")
 
-			src, acquired := spiffe.AcquireSourceForSentinel(ctx)
-			if !acquired {
-				log.TraceLn(cid, "RunInitCommands:CheckConnectivity: failed to acquire source.")
-				if terminateAsap {
-					panic("RunInitCommands:CheckConnectivity: failed to acquire source")
-				}
+		src, acquired := spiffe.AcquireSourceForSentinel(ctx)
+		if !acquired {
+			log.TraceLn(cid, "RunInitCommands:CheckConnectivity: failed to acquire source.")
 
-				return errors.New("RunInitCommands:CheckConnectivity: failed to acquire source")
-			}
+			return errors.New("RunInitCommands:CheckConnectivity: failed to acquire source")
+		}
 
-			log.TraceLn(cid, "RunInitCommands:CheckConnectivity: acquired source successfully")
+		log.TraceLn(cid, "RunInitCommands:CheckConnectivity: acquired source successfully")
 
-			if err := safe.Check(ctx, src); err != nil {
-				log.TraceLn(cid, "RunInitCommands:CheckConnectivity: failed to verify connection to safe:", err.Error())
-				if terminateAsap {
-					panic("RunInitCommands:CheckConnectivity: failed to verify connection to safe")
-				}
+		if err := safe.Check(ctx, src); err != nil {
+			log.TraceLn(cid, "RunInitCommands:CheckConnectivity: failed to verify connection to safe:", err.Error())
 
-				return errors.Wrap(err, "RunInitCommands:CheckConnectivity: cannot establish connection to safe 001")
-			}
+			return errors.Wrap(err, "RunInitCommands:CheckConnectivity: cannot establish connection to safe 001")
+		}
 
-			log.TraceLn(cid, "RunInitCommands:CheckConnectivity: success")
-			return nil
-		}, s)
+		log.TraceLn(cid, "RunInitCommands:CheckConnectivity: success")
+		return nil
+	}, s)
 
-		if err == nil {
-			log.TraceLn(cid, "exiting backoffs")
-			break
-		}
+	if err == nil {
+		log.TraceLn(cid, "exiting backoffs")
+		return
 	}
+
+	// I shouldn't be here.
+	panic("RunInitCommands:CheckConnectivity: failed to verify connection to safe")
 }
 
-func ensureSourceAcquisition(ctx context.Context, cid *string) {
+func ensureSourceAcquisition(ctx context.Context, cid *string) *workloadapi.X509Source {
 	// If `true`, instead of retrying with a backoff, kill the pod, and let the
 	// deployment controller restart it to initiate a new retry.
-	terminateAsap := env.TerminateSentinelOnInitCommandConnectivityFailure()
 
-	waitInterval := env.InitCommandRunnerWaitIntervalForSentinel()
-	time.Sleep(waitInterval)
+	log.TraceLn(cid, "RunInitCommands: acquiring source 001")
 
-	for {
-		log.TraceLn(cid, "RunInitCommands: acquiring source 001")
+	s := backoff.Strategy{
+		MaxRetries:  20,
+		Delay:       1000,
+		Exponential: true,
+		MaxDuration: 30 * time.Second,
+	}
 
-		s := backoff.Strategy{
-			MaxRetries:  20,
-			Delay:       1000,
-			Exponential: true,
-			MaxDuration: 30 * time.Second,
-		}
+	var src *workloadapi.X509Source
 
-		err := backoff.Retry("RunInitCommands:AcquireSource", func() error {
-			log.TraceLn(cid, "RunInitCommands:AcquireSource: acquireSourceForSentinel: 000")
-			_, acquired := spiffe.AcquireSourceForSentinel(ctx)
-			if !acquired {
-				log.TraceLn(cid, "RunInitCommands:AcquireSource: failed to acquire source.")
-				if terminateAsap {
-					panic("RunInitCommands:AcquireSource: failed to acquire source")
-				}
+	err := backoff.Retry("RunInitCommands:AcquireSource", func() error {
+		log.TraceLn(cid, "RunInitCommands:AcquireSource: acquireSourceForSentinel: 000")
 
-				return errors.New("RunInitCommands:AcquireSource: failed to acquire source 000")
-			}
+		acq, acquired := spiffe.AcquireSourceForSentinel(ctx)
+		src = acq
 
-			return nil
-		}, s)
+		if !acquired {
+			log.TraceLn(cid, "RunInitCommands:AcquireSource: failed to acquire source.")
 
-		if err == nil {
-			log.TraceLn(cid, "RunInitCommands:AcquireSource: got source. breaking.")
-			break
+			return errors.New("RunInitCommands:AcquireSource: failed to acquire source 000")
 		}
+
+		return nil
+	}, s)
+
+	if err == nil {
+		log.TraceLn(cid, "RunInitCommands:AcquireSource: got source. breaking.")
+		return src
 	}
+
+	// I shouldn't be here.
+	panic("RunInitCommands:AcquireSource: failed to acquire source")
 }
diff --git a/app/sentinel/background/initialization/io.go b/app/sentinel/background/initialization/io.go
@@ -45,15 +45,9 @@ func parseCommandsFile(ctx context.Context, cid *string, scanner *bufio.Scanner)
 	log.TraceLn(cid, "Before parsing commands 002")
 
 	sc := entity.SentinelCommand{}
-	terminateAsap := env.TerminateSentinelOnInitCommandConnectivityFailure()
 
 	if scanner == nil {
-		if terminateAsap {
-			log.ErrorLn(cid, "RunInitCommands: error scanning commands file")
-			panic("RunInitCommands: error scanning commands file")
-		}
-
-		return
+		panic("RunInitCommands: error scanning commands file")
 	}
 
 	log.TraceLn(cid, "beginning scan")
@@ -97,10 +91,8 @@ dance:
 						"RunInitCommands:ProcessCommandBlock:error:",
 						err.Error(),
 					)
-					if terminateAsap {
-						panic("RunInitCommands:ProcessCommandBlock failed")
-					}
 				}
+
 				return err
 			}, s)
 
@@ -110,9 +102,10 @@ dance:
 					"RunInitCommands: error processing command block: ",
 					err.Error(),
 				)
-				if terminateAsap {
-					panic("RunInitCommands: error processing command block")
-				}
+
+				// If command failed, then the initialization is not totally successful.
+				// Thus, it is best to crash the container to restart the initialization.
+				panic("RunInitCommands:ProcessCommandBlock failed")
 			}
 
 			log.TraceLn(cid, "scanner: after delimiter")
@@ -181,8 +174,9 @@ dance:
 			"RunInitCommands: Error reading initialization file: ",
 			err.Error(),
 		)
-		if terminateAsap {
-			panic("RunInitCommands: Error reading initialization file")
-		}
+
+		// If command failed, then the initialization is not totally successful.
+		// Thus, it is best to crash the container to restart the initialization.
+		panic("RunInitCommands: Error in scanning the file")
 	}
 }
diff --git a/app/sentinel/background/initialization/keystone.go b/app/sentinel/background/initialization/keystone.go
@@ -15,13 +15,10 @@ import (
 
 	"github.com/vmware-tanzu/secrets-manager/core/backoff"
 	entity "github.com/vmware-tanzu/secrets-manager/core/entity/data/v1"
-	"github.com/vmware-tanzu/secrets-manager/core/env"
 	log "github.com/vmware-tanzu/secrets-manager/core/log/std"
 )
 
 func markKeystone(ctx context.Context, cid *string) bool {
-	terminateAsap := env.TerminateSentinelOnInitCommandConnectivityFailure()
-
 	s := backoffStrategy()
 	err := backoff.Retry("RunInitCommands:MarkKeystone", func() error {
 		log.TraceLn(cid, "RunInitCommands:MarkKeystone: retrying with exponential backoff")
@@ -33,22 +30,13 @@ func markKeystone(ctx context.Context, cid *string) bool {
 			Secret:      "keystone-init",
 		})
 
-		if err != nil {
-			if terminateAsap {
-				panic("RunInitCommands: error setting keystone secret")
-			}
-		}
-
 		return err
 	}, s)
 
-	if err != nil {
-		log.ErrorLn(cid, "RunInitCommands: error setting keystone secret: ", err.Error())
-		if terminateAsap {
-			panic("RunInitCommands: error setting keystone secret")
-		}
-		return false
+	if err == nil {
+		return true
 	}
 
-	return true
+	log.ErrorLn(cid, "RunInitCommands: error setting keystone secret: ", err.Error())
+	panic("RunInitCommands: error setting keystone secret")
 }
diff --git a/app/sentinel/background/initialization/run.go b/app/sentinel/background/initialization/run.go
@@ -12,7 +12,6 @@ package initialization
 
 import (
 	"context"
-	"github.com/spiffe/go-spiffe/v2/workloadapi"
 	"os"
 	"time"
 
@@ -47,9 +46,19 @@ import (
 // If the file cannot be opened, the function logs an informational message and
 // returns early. Errors encountered while reading the file or closing it are
 // logged as errors.
-func RunInitCommands(ctx context.Context, source *workloadapi.X509Source) {
+func RunInitCommands(ctx context.Context) {
 	cid := ctx.Value("correlationId").(*string)
 
+	waitInterval := env.InitCommandRunnerWaitBeforeExecIntervalForSentinel()
+	time.Sleep(waitInterval)
+
+	// Ensure that we can acquire a source before proceeding.
+	source := ensureSourceAcquisition(ctx, cid)
+
+	// Now, we are sure that we can acquire a source.
+	// Try to do a VSecM Safe API request with the source.
+	ensureApiConnectivity(ctx, cid)
+
 	// No need to proceed if initialization has been completed already.
 	if initCommandsExecutedAlready(ctx, source) {
 		log.TraceLn(cid, "RunInitCommands: executed already. exiting")
@@ -58,12 +67,6 @@ func RunInitCommands(ctx context.Context, source *workloadapi.X509Source) {
 
 	log.TraceLn(cid, "RunInitCommands: starting the init flow")
 
-	// Ensure that we can acquire a source before proceeding.
-	ensureSourceAcquisition(ctx, cid)
-	// Now, we are sure that we can acquire a source.
-	// Try to do a VSecM Safe API request with the source.
-	ensureApiConnectivity(ctx, cid)
-
 	// Now we know that we can establish a connection to VSecM Safe
 	// and execute API requests. So, we can safely run init commands.
 
@@ -100,13 +103,14 @@ func RunInitCommands(ctx context.Context, source *workloadapi.X509Source) {
 	if !success {
 		log.TraceLn(cid, "RunInitCommands: failed to mark keystone. exiting")
 
-		// If we cannot set the keystone secret, we should not proceed.
+		// If we cannot set the keystone secret, better to retry everything.
+		panic("RunInitCommands: failed to set keystone secret")
 		return
 	}
 
 	// Wait before notifying Keystone. This way, if there are things that
 	// take time to reconcile, they have a chance to do so.
-	waitInterval := env.InitCommandRunnerWaitIntervalBeforeInitComplete()
+	waitInterval = env.InitCommandRunnerWaitIntervalBeforeInitComplete()
 	time.Sleep(waitInterval)
 
 	// Everything is set up.
diff --git a/app/sentinel/background/initialization/validation.go b/app/sentinel/background/initialization/validation.go
@@ -23,24 +23,24 @@ func initCommandsExecutedAlready(ctx context.Context, src *workloadapi.X509Sourc
 
 	log.TraceLn(cid, "check:initCommandsExecutedAlready")
 
-	s := backoffStrategy()
 	initialized := false
-	for {
-		err := backoff.Retry("RunInitCommands:CheckConnectivity", func() error {
-			i, err := safe.CheckInitialization(ctx, src)
-			if err != nil {
-				return err
-			}
-			initialized = i
-			return nil
-		}, s)
 
+	s := backoffStrategy()
+	err := backoff.Retry("RunInitCommands:CheckConnectivity", func() error {
+		i, err := safe.CheckInitialization(ctx, src)
 		if err != nil {
-			log.ErrorLn(cid, "check:backoff:error", err.Error())
-			continue
+			return err
 		}
 
-		log.TraceLn(cid, "check:return initialized:", initialized)
+		initialized = i
+
+		return nil
+	}, s)
+
+	if err == nil {
 		return initialized
 	}
+
+	// I shouldn't be here.
+	panic("RunInitCommands:initCommandsExecutedAlready: failed to check command initialization")
 }
diff --git a/app/sentinel/background/main.go b/app/sentinel/background/main.go
diff --git a/core/env/sentinel.go b/core/env/sentinel.go