Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
13 changes: 13 additions & 0 deletions operator/api/config/v1alpha1/defaults.go
Original file line number Diff line number Diff line change
Expand Up @@ -79,6 +79,19 @@ func SetDefaults_ServerConfiguration(serverConfig *ServerConfiguration) {
serverConfig.Webhooks.ServerCertDir = defaultWebhookServerTLSServerCertDir
}

if serverConfig.Webhooks.CertManagement == nil {
serverConfig.Webhooks.CertManagement = &WebhookCertManagement{}
}
if serverConfig.Webhooks.CertManagement.AutoProvision == nil {
serverConfig.Webhooks.CertManagement.AutoProvision = ptr.To(true)
}
if serverConfig.Webhooks.CertManagement.SecretName == "" {
serverConfig.Webhooks.CertManagement.SecretName = "grove-webhook-server-cert"
}
if serverConfig.Webhooks.CertManagement.CertManagerEnabled == nil {
serverConfig.Webhooks.CertManagement.CertManagerEnabled = ptr.To(false)
}

if serverConfig.HealthProbes == nil {
serverConfig.HealthProbes = &Server{}
}
Expand Down
21 changes: 21 additions & 0 deletions operator/api/config/v1alpha1/types.go
Original file line number Diff line number Diff line change
Expand Up @@ -135,6 +135,27 @@ type WebhookServer struct {
Server `json:",inline"`
// ServerCertDir is the directory containing the server certificate and key.
ServerCertDir string `json:"serverCertDir"`
// CertManagement defines the certificate management configuration.
// +optional
CertManagement *WebhookCertManagement `json:"certManagement,omitempty"`
}

// WebhookCertManagement defines how webhook certificates are managed.
type WebhookCertManagement struct {
// AutoProvision enables automatic certificate generation and rotation.
// If set to false, you must provide your own certificates via Secret.
// Default: true
// +optional
AutoProvision *bool `json:"autoProvision,omitempty"`
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Hmm... maybe just default to auto provision in the code if SecretName isn't set, then the user doesn't have to remember to disable and we don't need to track this variable.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

You're right. It seems that we can detect secretName or certFilesPath user want to mount to decide whether to auto provision

// SecretName is the name of the secret containing the webhook server certificate.
// Default: grove-webhook-server-cert
// +optional
SecretName string `json:"secretName,omitempty"`
// CertManagerEnabled indicates whether to annotate webhook configurations for cert-manager CA injection.
// This requires cert-manager to be installed in the cluster.
// Default: false
// +optional
CertManagerEnabled *bool `json:"certManagerEnabled,omitempty"`
}

// Server contains information for HTTP(S) server configuration.
Expand Down
5 changes: 5 additions & 0 deletions operator/charts/templates/_helpers.tpl
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,11 @@ config.yaml: |
server:
webhooks:
port: {{ .Values.config.server.webhooks.port }}
serverCertDir: {{ .Values.config.server.webhooks.certDir }}
certManagement:
autoProvision: {{ .Values.config.server.webhooks.certManagement.autoProvision }}
secretName: {{ .Values.config.server.webhooks.certManagement.secretName }}
certManagerEnabled: {{ .Values.config.server.webhooks.certManagement.certManagerEnabled }}
healthProbes:
port: {{ .Values.config.server.healthProbes.port }}
metrics:
Expand Down
4 changes: 4 additions & 0 deletions operator/charts/templates/authorizer-webhook-config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,10 @@ kind: ValidatingWebhookConfiguration
metadata:
name: authorizer-webhook
namespace: {{ .Release.Namespace }}
{{- if .Values.config.server.webhooks.certManagement.certManagerEnabled }}
annotations:
cert-manager.io/inject-ca-from: {{ .Release.Namespace }}/{{ .Values.config.server.webhooks.certManagement.secretName }}
{{- end }}
labels:
{{- include "operator.authorizer.webhook.labels" . | nindent 4 }}
webhooks:
Expand Down
8 changes: 6 additions & 2 deletions operator/charts/templates/deployment.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -60,7 +60,7 @@ spec:
mountPath: /var/run/secrets/kubernetes.io/serviceaccount
readOnly: true
- name: grove-webhook-server-cert
mountPath: /etc/grove-operator/webhook-certs
mountPath: {{ .Values.webhookServerCert.mountPath }}
readOnly: true
env:
- name: GROVE_OPERATOR_SERVICE_ACCOUNT_NAME
Expand Down Expand Up @@ -91,6 +91,10 @@ spec:
configMap:
name: {{ include "operator.config.name" . }}
- name: grove-webhook-server-cert
{{- if .Values.webhookServerCert.volume }}
{{- toYaml .Values.webhookServerCert.volume | nindent 10 }}
{{- else }}
secret:
secretName: grove-webhook-server-cert
secretName: {{ .Values.config.server.webhooks.certManagement.secretName }}
defaultMode: 420
{{- end }}
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,10 @@ kind: MutatingWebhookConfiguration
metadata:
name: podcliqueset-defaulting-webhook
namespace: {{ .Release.Namespace }}
{{- if .Values.config.server.webhooks.certManagement.certManagerEnabled }}
annotations:
cert-manager.io/inject-ca-from: {{ .Release.Namespace }}/{{ .Values.config.server.webhooks.certManagement.secretName }}
{{- end }}
labels:
{{- include "operator.pcs.defaulting.webhook.labels" . | nindent 4 }}
webhooks:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,10 @@ kind: ValidatingWebhookConfiguration
metadata:
name: podcliqueset-validating-webhook
namespace: {{ .Release.Namespace }}
{{- if .Values.config.server.webhooks.certManagement.certManagerEnabled }}
annotations:
cert-manager.io/inject-ca-from: {{ .Release.Namespace }}/{{ .Values.config.server.webhooks.certManagement.secretName }}
{{- end }}
labels:
{{- include "operator.pcs.validating.webhook.labels" . | nindent 4 }}
webhooks:
Expand Down
4 changes: 3 additions & 1 deletion operator/charts/templates/webhook-server-cert-secret.yaml
Original file line number Diff line number Diff line change
@@ -1,11 +1,13 @@
{{- if .Values.config.server.webhooks.certManagement.autoProvision }}
apiVersion: v1
kind: Secret
metadata:
name: grove-webhook-server-cert
name: {{ .Values.config.server.webhooks.certManagement.secretName }}
namespace: {{ .Release.Namespace }}
labels:
{{- include "operator.server.secret.labels" . | nindent 4 }}
type: kubernetes.io/tls
data:
tls.crt: ""
tls.key: ""
{{- end }}
47 changes: 47 additions & 0 deletions operator/charts/values.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,19 @@ config:
server:
webhooks:
port: 9443
# certDir is the directory path where certificate files are located.
# The operator will look for tls.crt and tls.key in this directory.
# Default: /etc/grove-operator/webhook-certs
certDir: /etc/grove-operator/webhook-certs
certManagement:
# autoProvision enables automatic certificate generation and rotation.
# Set to false to use external certificates (e.g., cert-manager).
autoProvision: true
# secretName is the name of the secret containing the webhook certificate.
secretName: grove-webhook-server-cert
# certManagerEnabled adds cert-manager.io/inject-ca-from annotation to webhook configs.
# Requires cert-manager to be installed in the cluster.
certManagerEnabled: false
healthProbes:
enable: false
port: 9444
Expand Down Expand Up @@ -134,6 +147,40 @@ webhookServerSecret:
app.kubernetes.io/name: grove-webhook-server-secret
app.kubernetes.io/part-of: grove

# Webhook server certificate volume configuration
webhookServerCert:
# mountPath is where the certificate volume will be mounted in the container.
# This should typically match config.server.webhooks.certDir
mountPath: /etc/grove-operator/webhook-certs

# volume defines the volume source for certificates.
# If not specified, defaults to using the Secret defined in config.server.webhooks.certManagement.secretName
#
# Example 1 - Using hostPath (for local certificate files):
# volume:
# hostPath:
# path: /host/path/to/certs
# type: Directory
#
# Example 2 - Using CSI driver (Vault):
# volume:
# csi:
# driver: secrets-store.csi.k8s.io
# readOnly: true
# volumeAttributes:
# secretProviderClass: "grove-webhook-certs"
#
# Example 3 - Using ConfigMap (for testing only):
# volume:
# configMap:
# name: grove-webhook-certs
#
# Example 4 - Using emptyDir (for testing):
# volume:
# emptyDir: {}
#
volume: null

priorityClass:
enabled: false
name: grove-operator-priority-critical
Expand Down
12 changes: 10 additions & 2 deletions operator/cmd/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -62,8 +62,16 @@ func main() {
}

webhookCertsReadyCh := make(chan struct{})
if err = cert.ManageWebhookCerts(mgr, operatorCfg.Server.Webhooks.ServerCertDir, operatorCfg.Authorizer.Enabled, webhookCertsReadyCh); err != nil {
logger.Error(err, "failed to setup cert rotation")
certMgmt := operatorCfg.Server.Webhooks.CertManagement
if err = cert.ManageWebhookCerts(
mgr,
operatorCfg.Server.Webhooks.ServerCertDir,
certMgmt.SecretName,
operatorCfg.Authorizer.Enabled,
*certMgmt.AutoProvision,
webhookCertsReadyCh,
); err != nil {
logger.Error(err, "failed to setup certificate management")
os.Exit(1)
}

Expand Down
69 changes: 66 additions & 3 deletions operator/internal/controller/cert/cert.go
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@ import (
"fmt"
"os"
"strings"
"time"

"github.com/ai-dynamo/grove/operator/internal/constants"
authorizationwebhook "github.com/ai-dynamo/grove/operator/internal/webhook/admission/pcs/authorization"
Expand All @@ -39,16 +40,30 @@ const (
)

// ManageWebhookCerts registers the cert-controller with the manager which will be used to manage
// webhook certificates.
func ManageWebhookCerts(mgr ctrl.Manager, certDir string, authorizerEnabled bool, certsReadyCh chan struct{}) error {
// webhook certificates. If autoProvision is false, it skips automatic certificate management
// and only ensures that certificates are mounted from the provided Secret.
func ManageWebhookCerts(mgr ctrl.Manager, certDir string, secretName string, authorizerEnabled bool, autoProvision bool, certsReadyCh chan struct{}) error {
namespace, err := getOperatorNamespace()
if err != nil {
return err
}

// If autoProvision is disabled, just verify certs exist and notify readiness
if !autoProvision {
logger := ctrl.Log.WithName("cert-external")
logger.Info("Automatic certificate provisioning is disabled, using external certificates",
"secretName", secretName, "certDir", certDir)

// Start a goroutine to wait for externally managed certificates
go waitForExternalCerts(logger, certDir, certsReadyCh)
return nil
}

// Auto-provision mode: use cert-controller
rotator := &cert.CertRotator{
SecretKey: types.NamespacedName{
Namespace: namespace,
Name: "grove-webhook-server-cert",
Name: secretName,
},
CertDir: certDir,
CAName: certificateAuthorityName,
Expand Down Expand Up @@ -109,3 +124,51 @@ func getOperatorNamespace() (string, error) {
}
return namespace, nil
}

// waitForExternalCerts waits for externally managed certificates to be available
// in the specified directory. This is used when autoProvision is disabled.
func waitForExternalCerts(logger logr.Logger, certDir string, certsReadyCh chan struct{}) {
const (
maxRetries = 30
retryInterval = 2 * time.Second
certFileName = "tls.crt"
keyFileName = "tls.key"
)

certPath := fmt.Sprintf("%s/%s", certDir, certFileName)
keyPath := fmt.Sprintf("%s/%s", certDir, keyFileName)

for i := 0; i < maxRetries; i++ {
// Check if both certificate and key files exist
certExists := fileExists(certPath)
keyExists := fileExists(keyPath)

if certExists && keyExists {
logger.Info("External certificates found and ready",
"certPath", certPath, "keyPath", keyPath)
close(certsReadyCh)
return
}

if i < maxRetries-1 {
logger.Info("Waiting for external certificates to be mounted",
"attempt", i+1, "maxRetries", maxRetries,
"certExists", certExists, "keyExists", keyExists)
time.Sleep(retryInterval)
}
}

logger.Error(fmt.Errorf("timeout waiting for external certificates"),
"Failed to find certificates after maximum retries",
"certPath", certPath, "keyPath", keyPath)
// Don't close the channel - this will cause the readiness check to fail
}

// fileExists checks if a file exists and is not a directory
func fileExists(path string) bool {
info, err := os.Stat(path)
if err != nil {
return false
}
return !info.IsDir()
}
Loading