Add options to configure probes

In kubernetes probes are used to determine when to restart a container, to know when a container is ready to start accepting traffic, or to know when when a container application has started. There are 3 types of probes: liveness, readiness and startup probes. Among there liveness and readiness are the most common and also the ones required by the users in the Go survey. This PR adds support to specify the liveness and readiness probes in the weaver kube deployer. The knobs exposed to the user are not that many: 1) Knobs that define things like: - how often to probe (optional) - how many seconds to wait for an answer before declaring the probe failing (optional) - how many consecutive successful probes to declare the container as healthy (optional) - how many consecutive failed probes to declare the container as unhealthy (optional) 2) Define what the probe should do - should you use HTTP for probing? If so, you can specify the port and a path to check for probing (port is mandatory, path is optional) - should you use TCP for probing? If so, you can specify the port - should you use a custom list of commands? We took the minimum number of knobs that might make sense for the user to configure probes. I think it should cover most of the usecases and scenarios someone might run into.
ServiceWeaver · Oct 19, 2023 · bafb29c · bafb29c
1 parent 0082ceb
commit bafb29c
Show file tree

Hide file tree

Showing 2 changed files with 169 additions and 31 deletions.
diff --git a/cmd/weaver-kube/deploy.go b/cmd/weaver-kube/deploy.go
@@ -178,6 +178,11 @@ func deploy(ctx context.Context, args []string) error {
 		config.ServiceAccount = "default"
 	}
 
+	// Validate the probe part of the config.
+	if err := checkProbeConfig(config.Probe); err != nil {
+		return fmt.Errorf("invalid probe spec: %w", err)
+	}
+
 	binListeners, err := bin.ReadListeners(app.Binary)
 	if err != nil {
 		return fmt.Errorf("cannot read listeners from binary %s: %w", app.Binary, err)
@@ -250,3 +255,39 @@ func checkVersionCompatibility(appBinary string) error {
 	}
 	return nil
 }
+
+// checkProbeConfig validates the configuration options for the probes.
+func checkProbeConfig(probes map[string]impl.ProbeOptions) error {
+	for pn, pb := range probes {
+		if pn != "liveness" && pn != "readiness" {
+			return fmt.Errorf("unknown probe type %s; possible values are liveness and readiness", pn)
+		}
+
+		// Check that exactly one of the probe handlers is set.
+		phSet := 0
+		if pb.Http != nil {
+			phSet++
+		}
+		if pb.Tcp != nil {
+			phSet++
+		}
+		if pb.Exec != nil {
+			phSet++
+		}
+		if phSet != 1 {
+			return fmt.Errorf("exactly one probe handler should be specified; %d provided", phSet)
+		}
+
+		// Validate the handlers.
+		if pb.Http != nil && (pb.Http.Port < 1 || pb.Http.Port > 65535) {
+			return fmt.Errorf("kube.probe.%s.http: invalid port %d", pn, pb.Http.Port)
+		}
+		if pb.Tcp != nil && (pb.Tcp.Port < 1 || pb.Tcp.Port > 65535) {
+			return fmt.Errorf("kube.probe.%s.tcp: invalid port %d", pn, pb.Tcp.Port)
+		}
+		if pb.Exec != nil && len(pb.Exec.Cmd) == 0 {
+			return fmt.Errorf("kube.probe.%s.cmd: no commands specified", pn)
+		}
+	}
+	return nil
+}
diff --git a/internal/impl/kube.go b/internal/impl/kube.go
@@ -72,35 +72,6 @@ type replicaSet struct {
 	traceServiceURL string                        // trace exporter URL
 }
 
-// ListenerOptions stores configuration options for a listener.
-type ListenerOptions struct {
-	// If specified, the listener service will have the name set to this value.
-	// Otherwise, we will generate a unique name for each app version.
-	ServiceName string `toml:"service_name"`
-
-	// Is the listener public, i.e., should it receive ingress traffic
-	// from the public internet. If false, the listener is configured only
-	// for cluster-internal access.
-	Public bool
-
-	// If specified, the port inside the container on which the listener
-	// is reachable. If zero or not specified, the first available port
-	// is used.
-	Port int32
-}
-
-// resourceRequirements stores the resource requirements configuration for running pods.
-type resourceRequirements struct {
-	// Describes the minimum amount of CPU required to run the pod.
-	RequestsCPU string `toml:"requests_cpu"`
-	// Describes the minimum amount of memory required to run the pod.
-	RequestsMem string `toml:"requests_mem"`
-	// Describes the maximum amount of CPU allowed to run the pod.
-	LimitsCPU string `toml:"limits_cpu"`
-	// Describes the maximum amount of memory allowed to run the pod.
-	LimitsMem string `toml:"limits_mem"`
-}
-
 // KubeConfig stores the configuration information for one execution of a
 // Service Weaver application deployed using the Kube deployer.
 type KubeConfig struct {
@@ -190,6 +161,91 @@ type KubeConfig struct {
 	//
 	// [1] https://pkg.go.dev/k8s.io/apimachinery/pkg/api/resource#example-MustParse.
 	Resources resourceRequirements
+
+	// Probes to check the readiness/liveness of the pods. Note that
+	Probe map[string]ProbeOptions
+}
+
+// ListenerOptions stores configuration options for a listener.
+type ListenerOptions struct {
+	// If specified, the listener service will have the name set to this value.
+	// Otherwise, we will generate a unique name for each app version.
+	ServiceName string `toml:"service_name"`
+
+	// Is the listener public, i.e., should it receive ingress traffic
+	// from the public internet. If false, the listener is configured only
+	// for cluster-internal access.
+	Public bool
+
+	// If specified, the port inside the container on which the listener
+	// is reachable. If zero or not specified, the first available port
+	// is used.
+	Port int32
+}
+
+// resourceRequirements stores the resource requirements configuration for running pods.
+type resourceRequirements struct {
+	// Describes the minimum amount of CPU required to run the pod.
+	RequestsCPU string `toml:"requests_cpu"`
+	// Describes the minimum amount of memory required to run the pod.
+	RequestsMem string `toml:"requests_mem"`
+	// Describes the maximum amount of CPU allowed to run the pod.
+	LimitsCPU string `toml:"limits_cpu"`
+	// Describes the maximum amount of memory allowed to run the pod.
+	LimitsMem string `toml:"limits_mem"`
+}
+
+// ProbeOptions stores the probes [1] configuration for the pods. These options
+// mirror the Kubernetes probe options available in [2].
+//
+// [1] https://kubernetes.io/docs/tasks/configure-pod-container/configure-liveness-readiness-startup-probes/
+// [2] https://github.com/kubernetes/api/blob/v0.28.3/core/v1/types.go#L2277
+//
+// TODO(rgrandl): There are a few more knobs available in the kubernetes probe
+// definition. We can enable more knobs if really needed.
+type ProbeOptions struct {
+	// How often to perform the probe.
+	PeriodSecs int32 `toml:"period_secs"`
+	// Number of seconds after which the probe times out.
+	TimeoutSecs int32 `toml:"timeout_secs"`
+	// Minimum consecutive successes for the probe to be considered successful after having failed.
+	SuccessThreshold int32 `toml:"success_threshold"`
+	// Minimum consecutive failures for the probe to be considered failed after having succeeded.
+	FailureThreshold int32 `toml:"failure_threshold"`
+
+	// Probe behavior. Note that only of the following should be set by the user.
+
+	// The probe action is taken by executing commands.
+	Exec *execAction
+	// The probe action is taken by executing HTTP GET requests.
+	Http *httpAction
+	// The probe action is taken by executing TCP requests.
+	Tcp *tcpAction
+}
+
+// execAction describes the probe action when using a list of commands. It mirrors
+// Kubernetes ExecAction [1].
+//
+// [1] https://github.com/kubernetes/api/blob/v0.28.3/core/v1/types.go#L2265
+type execAction struct {
+	Cmd []string // List of commands to execute inside the container.
+}
+
+// httpAction describes the probe action when using HTTP. It mirrors Kubernetes
+// HTTPGetAction [1].
+//
+// [1] https://github.com/kubernetes/api/blob/v0.28.3/core/v1/types.go#L2208
+type httpAction struct {
+	Path string // Path to access on the HTTP server.
+	Port int32  // Port number to access on the container.
+}
+
+// tcpAction describes the probe action when using TCP. It mirrors Kubernetes
+// TCPSocketAction [1].
+//
+// [1] https://github.com/kubernetes/api/blob/v0.28.3/core/v1/types.go#L2241
+type tcpAction struct {
+	Port int32 // Port number to access on the container.
 }
 
 // shortenComponent shortens the given component name to be of the format
@@ -435,7 +491,7 @@ func (r *replicaSet) buildContainer(cfg *KubeConfig) (corev1.Container, error) {
 		return corev1.Container{}, err
 	}
 
-	return corev1.Container{
+	c := corev1.Container{
 		Name:            appContainerName,
 		Image:           r.image,
 		ImagePullPolicy: corev1.PullIfNotPresent,
@@ -450,7 +506,48 @@ func (r *replicaSet) buildContainer(cfg *KubeConfig) (corev1.Container, error) {
 		// for debugging.
 		TTY:   true,
 		Stdin: true,
-	}, nil
+	}
+
+	// Add probes if any.
+	for pn, pb := range cfg.Probe {
+		probe := &corev1.Probe{}
+		if pb.TimeoutSecs > 0 {
+			probe.TimeoutSeconds = pb.TimeoutSecs
+		}
+		if pb.PeriodSecs > 0 {
+			probe.PeriodSeconds = pb.PeriodSecs
+		}
+		if pb.SuccessThreshold > 0 {
+			probe.SuccessThreshold = pb.SuccessThreshold
+		}
+		if pb.FailureThreshold > 0 {
+			probe.FailureThreshold = pb.FailureThreshold
+		}
+		if pb.Tcp != nil {
+			probe.TCPSocket = &corev1.TCPSocketAction{Port: intstr.IntOrString{IntVal: pb.Tcp.Port}}
+		}
+		if pb.Http != nil {
+			probe.HTTPGet = &corev1.HTTPGetAction{Port: intstr.IntOrString{IntVal: pb.Http.Port}}
+			if pb.Http.Path != "" {
+				// If no path specified, the HTTPGetAction will do health checks on "/".
+				probe.HTTPGet.Path = pb.Http.Path
+			}
+		}
+		if pb.Exec != nil {
+			// Command is optional for an ExecAction. However, it's confusing why that's
+			// the case, especially that this is the only parameter to configure for an
+			// ExecAction.
+			probe.Exec = &corev1.ExecAction{Command: pb.Exec.Cmd}
+		}
+
+		if pn == "liveness" {
+			c.LivenessProbe = probe
+		}
+		if pn == "readiness" {
+			c.ReadinessProbe = probe
+		}
+	}
+	return c, nil
 }
 
 // GenerateYAMLs generates Kubernetes YAML configurations for a given