diff --git a/operator/Makefile b/operator/Makefile index 0c2ebbb7..6870f774 100644 --- a/operator/Makefile +++ b/operator/Makefile @@ -69,7 +69,7 @@ format: $(GOIMPORTS_REVISER) # Run all unit tests .PHONY: test-unit test-unit: - @go test ./... + @go test $(shell go list ./... | grep -v './test/integration') # Run all unit tests with code coverage .PHONY: test-cover @@ -82,6 +82,21 @@ cover-html: test-cover @go tool cover -html=coverage.out -o coverage.html @echo "Coverage report generated at coverage.html" + +# Run all tests (unit + integration) +.PHONY: test-all +test-all: test-unit test-integration-controllers test-integration-webhooks +# Run controller-specific integration tests +.PHONY: test-integration-controllers +test-integration-controllers: $(SETUP_ENVTEST) + @KUBEBUILDER_ASSETS="$(shell $(SETUP_ENVTEST) use --print path)" go test ./test/integration/controllers/... -v + +# Run webhook integration tests +.PHONY: test-integration-webhooks +test-integration-webhooks: $(SETUP_ENVTEST) + @KUBEBUILDER_ASSETS="$(shell $(SETUP_ENVTEST) use --print path)" go test ./test/integration/webhooks/... -v + + # Make targets for local development and testing # ------------------------------------------------------------- # Starts a local k8s cluster using kind. diff --git a/operator/api/core/v1alpha1/crds/embed.go b/operator/api/core/v1alpha1/crds/embed.go index 51ef8afc..99bdaf33 100644 --- a/operator/api/core/v1alpha1/crds/embed.go +++ b/operator/api/core/v1alpha1/crds/embed.go @@ -23,6 +23,8 @@ var ( podCliqueCRD string //go:embed grove.io_podgangsets.yaml podGangSetCRD string + //go:embed grove.io_podcliquescalinggroups.yaml + podCliqueScalingGroupCRD string ) // PodCliqueCRD returns the PodClique CRD @@ -34,3 +36,8 @@ func PodCliqueCRD() string { func PodGangSetCRD() string { return podGangSetCRD } + +// PodCliqueScalingGroupCRD returns the PodCliqueScalingGroup CRD +func PodCliqueScalingGroupCRD() string { + return podCliqueScalingGroupCRD +} diff --git a/operator/go.mod b/operator/go.mod index a00cd76e..30b68220 100644 --- a/operator/go.mod +++ b/operator/go.mod @@ -13,14 +13,16 @@ require ( github.com/stretchr/testify v1.10.0 go.uber.org/zap v1.27.0 k8s.io/api v0.33.4 + k8s.io/apiextensions-apiserver v0.33.4 k8s.io/apimachinery v0.33.4 k8s.io/client-go v0.33.4 - k8s.io/utils v0.0.0-20250604170112-4c0f3b243397 + k8s.io/utils v0.0.0-20250502105355-0f33e8f1c979 sigs.k8s.io/controller-runtime v0.21.0 ) require ( github.com/beorn7/perks v1.0.1 // indirect + github.com/blang/semver/v4 v4.0.0 // indirect github.com/cespare/xxhash/v2 v2.3.0 // indirect github.com/davecgh/go-spew v1.1.2-0.20180830191138-d8f796af33cc // indirect github.com/emicklei/go-restful/v3 v3.13.0 // indirect @@ -39,7 +41,7 @@ require ( github.com/json-iterator/go v1.1.12 // indirect github.com/mailru/easyjson v0.9.0 // indirect github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd // indirect - github.com/modern-go/reflect2 v1.0.2 // indirect + github.com/modern-go/reflect2 v1.0.3-0.20250322232337-35a7c28c31ee // indirect github.com/munnerz/goautoneg v0.0.0-20191010083416-a7dc8b61c822 // indirect github.com/onsi/ginkgo/v2 v2.23.3 // indirect github.com/pkg/errors v0.9.1 // indirect @@ -61,16 +63,15 @@ require ( golang.org/x/text v0.28.0 // indirect golang.org/x/time v0.12.0 // indirect gomodules.xyz/jsonpatch/v2 v2.5.0 // indirect - google.golang.org/protobuf v1.36.7 // indirect + google.golang.org/protobuf v1.36.8 // indirect gopkg.in/evanphx/json-patch.v4 v4.13.0 // indirect gopkg.in/inf.v0 v0.9.1 // indirect gopkg.in/yaml.v3 v3.0.1 // indirect - k8s.io/apiextensions-apiserver v0.33.4 // indirect k8s.io/klog/v2 v2.130.1 // indirect k8s.io/kube-openapi v0.0.0-20250814151709-d7b6acb124c3 // indirect sigs.k8s.io/json v0.0.0-20250730193827-2d320260d730 // indirect sigs.k8s.io/randfill v1.0.0 // indirect - sigs.k8s.io/structured-merge-diff/v4 v4.7.0 // indirect + sigs.k8s.io/structured-merge-diff/v4 v4.6.0 // indirect sigs.k8s.io/yaml v1.6.0 // indirect ) diff --git a/operator/go.sum b/operator/go.sum index e2a61d69..62f2a8a7 100644 --- a/operator/go.sum +++ b/operator/go.sum @@ -65,8 +65,9 @@ github.com/mailru/easyjson v0.9.0/go.mod h1:1+xMtQp2MRNVL/V1bOzuP3aP8VNwRW55fQUt github.com/modern-go/concurrent v0.0.0-20180228061459-e0a39a4cb421/go.mod h1:6dJC0mAP4ikYIbvyc7fijjWJddQyLn8Ig3JB5CqoB9Q= github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd h1:TRLaZ9cD/w8PVh93nsPXa1VrQ6jlwL5oN8l14QlcNfg= github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd/go.mod h1:6dJC0mAP4ikYIbvyc7fijjWJddQyLn8Ig3JB5CqoB9Q= -github.com/modern-go/reflect2 v1.0.2 h1:xBagoLtFs94CBntxluKeaWgTMpvLxC4ur3nMaC9Gz0M= github.com/modern-go/reflect2 v1.0.2/go.mod h1:yWuevngMOJpCy52FWWMvUC8ws7m/LJsjYzDa0/r8luk= +github.com/modern-go/reflect2 v1.0.3-0.20250322232337-35a7c28c31ee h1:W5t00kpgFdJifH4BDsTlE89Zl93FEloxaWZfGcifgq8= +github.com/modern-go/reflect2 v1.0.3-0.20250322232337-35a7c28c31ee/go.mod h1:yWuevngMOJpCy52FWWMvUC8ws7m/LJsjYzDa0/r8luk= github.com/munnerz/goautoneg v0.0.0-20191010083416-a7dc8b61c822 h1:C3w9PqII01/Oq1c1nUAm88MOHcQC9l5mIlSMApZMrHA= github.com/munnerz/goautoneg v0.0.0-20191010083416-a7dc8b61c822/go.mod h1:+n7T8mK8HuQTcFwEeznm/DIxMOiR9yIdICNftLE1DvQ= github.com/onsi/ginkgo/v2 v2.23.3 h1:edHxnszytJ4lD9D5Jjc4tiDkPBZ3siDeJJkUZJJVkp0= @@ -161,8 +162,8 @@ golang.org/x/xerrors v0.0.0-20191204190536-9bdfabe68543/go.mod h1:I/5z698sn9Ka8T golang.org/x/xerrors v0.0.0-20200804184101-5ec99f83aff1/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= gomodules.xyz/jsonpatch/v2 v2.5.0 h1:JELs8RLM12qJGXU4u/TO3V25KW8GreMKl9pdkk14RM0= gomodules.xyz/jsonpatch/v2 v2.5.0/go.mod h1:AH3dM2RI6uoBZxn3LVrfvJ3E0/9dG4cSrbuBJT4moAY= -google.golang.org/protobuf v1.36.7 h1:IgrO7UwFQGJdRNXH/sQux4R1Dj1WAKcLElzeeRaXV2A= -google.golang.org/protobuf v1.36.7/go.mod h1:jduwjTPXsFjZGTmRluh+L6NjiWu7pchiJ2/5YcXBHnY= +google.golang.org/protobuf v1.36.8 h1:xHScyCOEuuwZEc6UtSOvPbAT4zRh0xcNRYekJwfqyMc= +google.golang.org/protobuf v1.36.8/go.mod h1:fuxRtAxBytpl4zzqUh6/eyUujkJdNiuEkXntxiD/uRU= gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= gopkg.in/check.v1 v1.0.0-20201130134442-10cb98267c6c h1:Hei/4ADfdWqJk1ZMxUNpqntNwaWcugrBjAiHlqqRiVk= gopkg.in/check.v1 v1.0.0-20201130134442-10cb98267c6c/go.mod h1:JHkPIbrfpd72SG/EVd6muEfDQjcINNoR0C8j2r3qZ4Q= @@ -186,8 +187,8 @@ k8s.io/kube-aggregator v0.33.1 h1:PigQUqAvd6Y4hBjQAqhKz3lEJC2VHLL4bSOEuS06a40= k8s.io/kube-aggregator v0.33.1/go.mod h1:16/wlU5Lj7hNJSv7JSu5FLvxyrgiJVLCHzfVoECAsuI= k8s.io/kube-openapi v0.0.0-20250318190949-c8a335a9a2ff h1:/usPimJzUKKu+m+TE36gUyGcf03XZEP0ZIKgKj35LS4= k8s.io/kube-openapi v0.0.0-20250318190949-c8a335a9a2ff/go.mod h1:5jIi+8yX4RIb8wk3XwBo5Pq2ccx4FP10ohkbSKCZoK8= -k8s.io/utils v0.0.0-20250604170112-4c0f3b243397 h1:hwvWFiBzdWw1FhfY1FooPn3kzWuJ8tmbZBHi4zVsl1Y= -k8s.io/utils v0.0.0-20250604170112-4c0f3b243397/go.mod h1:OLgZIPagt7ERELqWJFomSt595RzquPNLL48iOWgYOg0= +k8s.io/utils v0.0.0-20250502105355-0f33e8f1c979 h1:jgJW5IePPXLGB8e/1wvd0Ich9QE97RvvF3a8J3fP/Lg= +k8s.io/utils v0.0.0-20250502105355-0f33e8f1c979/go.mod h1:OLgZIPagt7ERELqWJFomSt595RzquPNLL48iOWgYOg0= sigs.k8s.io/controller-runtime v0.21.0 h1:CYfjpEuicjUecRk+KAeyYh+ouUBn4llGyDYytIGcJS8= sigs.k8s.io/controller-runtime v0.21.0/go.mod h1:OSg14+F65eWqIu4DceX7k/+QRAbTTvxeQSNSOQpukWM= sigs.k8s.io/json v0.0.0-20250730193827-2d320260d730 h1:IpInykpT6ceI+QxKBbEflcR5EXP7sU1kvOlxwZh5txg= @@ -195,8 +196,8 @@ sigs.k8s.io/json v0.0.0-20250730193827-2d320260d730/go.mod h1:mdzfpAEoE6DHQEN0uh sigs.k8s.io/randfill v0.0.0-20250304075658-069ef1bbf016/go.mod h1:XeLlZ/jmk4i1HRopwe7/aU3H5n1zNUcX6TM94b3QxOY= sigs.k8s.io/randfill v1.0.0 h1:JfjMILfT8A6RbawdsK2JXGBR5AQVfd+9TbzrlneTyrU= sigs.k8s.io/randfill v1.0.0/go.mod h1:XeLlZ/jmk4i1HRopwe7/aU3H5n1zNUcX6TM94b3QxOY= -sigs.k8s.io/structured-merge-diff/v4 v4.7.0 h1:qPeWmscJcXP0snki5IYF79Z8xrl8ETFxgMd7wez1XkI= -sigs.k8s.io/structured-merge-diff/v4 v4.7.0/go.mod h1:dDy58f92j70zLsuZVuUX5Wp9vtxXpaZnkPGWeqDfCps= +sigs.k8s.io/structured-merge-diff/v4 v4.6.0 h1:IUA9nvMmnKWcj5jl84xn+T5MnlZKThmUW1TdblaLVAc= +sigs.k8s.io/structured-merge-diff/v4 v4.6.0/go.mod h1:dDy58f92j70zLsuZVuUX5Wp9vtxXpaZnkPGWeqDfCps= sigs.k8s.io/yaml v1.4.0/go.mod h1:Ejl7/uTz7PSA4eKMyQCUTnhZYNmLIl+5c2lQPGR2BPY= sigs.k8s.io/yaml v1.6.0 h1:G8fkbMSAFqgEFgh4b1wmtzDnioxFCUgTZhlbj5P9QYs= sigs.k8s.io/yaml v1.6.0/go.mod h1:796bPqUfzR/0jLAl6XjHl3Ck7MiyVv8dbTdyT3/pMf4= diff --git a/operator/internal/client/scheme.go b/operator/internal/client/scheme.go index 825f2f42..4bd86472 100644 --- a/operator/internal/client/scheme.go +++ b/operator/internal/client/scheme.go @@ -21,6 +21,7 @@ import ( podgangsetv1alpha1 "github.com/NVIDIA/grove/operator/api/core/v1alpha1" podgangv1alpha1 "github.com/NVIDIA/grove/scheduler/api/core/v1alpha1" + apiextensionsv1 "k8s.io/apiextensions-apiserver/pkg/apis/apiextensions/v1" "k8s.io/apimachinery/pkg/runtime" utilruntime "k8s.io/apimachinery/pkg/util/runtime" k8sscheme "k8s.io/client-go/kubernetes/scheme" @@ -35,6 +36,7 @@ func init() { podgangsetv1alpha1.AddToScheme, podgangv1alpha1.AddToScheme, k8sscheme.AddToScheme, + apiextensionsv1.AddToScheme, ) utilruntime.Must(localSchemeBuilder.AddToScheme(Scheme)) } diff --git a/operator/internal/component/podgangset/podclique/podclique_test.go b/operator/internal/component/podgangset/podclique/podclique_test.go index ca6231a4..08056445 100644 --- a/operator/internal/component/podgangset/podclique/podclique_test.go +++ b/operator/internal/component/podgangset/podclique/podclique_test.go @@ -100,7 +100,7 @@ func TestGetExistingResourceNames(t *testing.T) { WithReplicas(tc.pgsReplicas). WithCliqueStartupType(ptr.To(grovecorev1alpha1.CliqueStartupTypeAnyOrder)) for _, pclqTemplateName := range tc.podCliqueTemplateNames { - pgsBuilder.WithPodCliqueParameters(pclqTemplateName, 1, nil) + pgsBuilder.WithPodCliqueParameters(pclqTemplateName, 1, 1, nil) } pgs := pgsBuilder.Build() // Create existing objects diff --git a/operator/test/integration/controllers/podgangset/reconciler_test.go b/operator/test/integration/controllers/podgangset/reconciler_test.go new file mode 100644 index 00000000..cd56c9fa --- /dev/null +++ b/operator/test/integration/controllers/podgangset/reconciler_test.go @@ -0,0 +1,114 @@ +// /* +// Copyright 2025 The Grove Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// */ + +package podgangset_test + +import ( + "testing" + "time" + + grovecorev1alpha1 "github.com/NVIDIA/grove/operator/api/core/v1alpha1" + "github.com/NVIDIA/grove/operator/test/integration/framework" + "github.com/NVIDIA/grove/operator/test/utils" + + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" + "k8s.io/utils/ptr" + "sigs.k8s.io/controller-runtime/pkg/client" +) + +func TestPodGangSetCreatesChildResources(t *testing.T) { + // Setup test environment with PGS controller only + env, err := framework.NewEnvBuilder(t). + WithController(framework.ControllerPodGangSet). + WithNamespace("test-ns"). + Build() + require.NoError(t, err) + // Start the environment + err = env.Start() + require.NoError(t, err) + defer env.Shutdown() + + // Create a simple PGS with 2 cliques + pgs := utils.NewPodGangSetBuilder("test-pgs", "test-ns"). + WithMinimal(). + WithReplicas(1). + WithPodCliqueParameters("clique-1", 2, 2, nil). + WithPodCliqueParameters("clique-2", 1, 1, nil). + WithPodCliqueParameters("clique-3", 1, 1, nil). + WithPodCliqueScalingGroupConfig(grovecorev1alpha1.PodCliqueScalingGroupConfig{ + Name: "new", + CliqueNames: []string{"clique-3"}, + Replicas: ptr.To[int32](1), + MinAvailable: ptr.To[int32](1), + ScaleConfig: nil, + }).Build() + + // Submit PGS to cluster + err = env.Client.Create(env.Ctx, pgs) + require.NoError(t, err) + + // Debug: Check if PGS is actually in the cluster and monitor status changes + time.Sleep(2 * time.Second) + fetchedPGS := &grovecorev1alpha1.PodGangSet{} + err = env.Client.Get(env.Ctx, client.ObjectKey{Name: "test-pgs", Namespace: "test-ns"}, fetchedPGS) + require.NoError(t, err, "Should be able to fetch PGS from cluster") + + // Wait for PCSG creation using Eventually with better polling + assert.Eventually(t, func() bool { + pcsgList := &grovecorev1alpha1.PodCliqueScalingGroupList{} + err = env.Client.List(env.Ctx, pcsgList, client.InNamespace("test-ns")) + if err != nil { + t.Logf("Error listing PCSGs: %v", err) + return false + } + t.Logf("Found %d PCSGs", len(pcsgList.Items)) + return len(pcsgList.Items) == 1 + }, 15*time.Second, 500*time.Millisecond, "PCSG should be created") + + // Wait for PCLQ creation using Eventually with better polling + assert.Eventually(t, func() bool { + pclqList := &grovecorev1alpha1.PodCliqueList{} + err := env.Client.List(env.Ctx, pclqList, client.InNamespace("test-ns")) + if err != nil { + t.Logf("Error listing PCLQs: %v", err) + return false + } + t.Logf("Found %d PCLQs", len(pclqList.Items)) + return len(pclqList.Items) == 3 + }, 20*time.Second, 500*time.Millisecond, "All non-scaling-group PCLQs should be created") + + // Verify final state + pcsgList := &grovecorev1alpha1.PodCliqueScalingGroupList{} + err = env.Client.List(env.Ctx, pcsgList, client.InNamespace("test-ns")) + require.NoError(t, err) + require.Len(t, pcsgList.Items, 1) + + pclqList := &grovecorev1alpha1.PodCliqueList{} + err = env.Client.List(env.Ctx, pclqList, client.InNamespace("test-ns")) + require.NoError(t, err) + require.Len(t, pclqList.Items, 3) + + // Verify ownership and basic properties + pcsg := pcsgList.Items[0] + assert.Equal(t, "test-pgs", pcsg.Labels["app.kubernetes.io/part-of"]) + assert.Equal(t, string(pgs.UID), string(pcsg.GetOwnerReferences()[0].UID)) + + for _, pclq := range pclqList.Items { + assert.Equal(t, "test-pgs", pclq.Labels["app.kubernetes.io/part-of"]) + assert.Equal(t, string(pgs.UID), string(pclq.GetOwnerReferences()[0].UID)) + } +} diff --git a/operator/test/integration/framework/builder.go b/operator/test/integration/framework/builder.go new file mode 100644 index 00000000..eddffc99 --- /dev/null +++ b/operator/test/integration/framework/builder.go @@ -0,0 +1,196 @@ +// /* +// Copyright 2025 The Grove Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// */ + +package framework + +import ( + "context" + "fmt" + "testing" + + groveclient "github.com/NVIDIA/grove/operator/internal/client" + + corev1 "k8s.io/api/core/v1" + apiextensionsv1 "k8s.io/apiextensions-apiserver/pkg/apis/apiextensions/v1" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/apimachinery/pkg/runtime" + "sigs.k8s.io/controller-runtime/pkg/client" + "sigs.k8s.io/controller-runtime/pkg/envtest" +) + +// EnvBuilder builds a test environment with a fluent API +type EnvBuilder struct { + t *testing.T + + // Core components + env *envtest.Environment + ctx context.Context + cancel context.CancelFunc + + // Configuration + crds []*apiextensionsv1.CustomResourceDefinition + scheme *runtime.Scheme + webhookBuilder *WebhookConfigurationBuilder + // Controllers + controllers map[ControllerType]bool + webhooks map[WebhookType]bool + webhookOptions envtest.WebhookInstallOptions + // Namespaces + namespaces map[string]*corev1.Namespace + + // Pre-created objects + objects []client.Object +} + +// NewEnvBuilder creates a new environment builder with sensible defaults +func NewEnvBuilder(t *testing.T) *EnvBuilder { + return &EnvBuilder{ + t: t, + scheme: groveclient.Scheme, // Use Grove's production scheme + controllers: make(map[ControllerType]bool), + webhooks: make(map[WebhookType]bool), + namespaces: make(map[string]*corev1.Namespace), + webhookBuilder: NewWebhookConfigurationBuilder(), + } +} + +// WithCRDs adds custom CRDs to the test environment +func (b *EnvBuilder) WithCRDs(crds ...*apiextensionsv1.CustomResourceDefinition) *EnvBuilder { + b.crds = append(b.crds, crds...) + return b +} + +// WithController enables a specific controllers +func (b *EnvBuilder) WithController(controllerType ControllerType) *EnvBuilder { + b.controllers[controllerType] = true + return b +} + +// WithValidationWebhook enables validation webhooks +func (b *EnvBuilder) WithValidationWebhook() *EnvBuilder { + return b.withWebhook(WebhookValidation) +} + +// WithMutationWebhook enables mutation webhooks +func (b *EnvBuilder) WithMutationWebhook() *EnvBuilder { + return b.withWebhook(WebhookMutation) +} + +// withWebhook is a private helper that handles webhook configuration +func (b *EnvBuilder) withWebhook(webhookType WebhookType) *EnvBuilder { + switch webhookType { + case WebhookValidation: + validatingConfig := b.webhookBuilder.BuildValidatingConfig() + b.webhookOptions.ValidatingWebhooks = append(b.webhookOptions.ValidatingWebhooks, validatingConfig) + case WebhookMutation: + mutatingConfig := b.webhookBuilder.BuildMutatingConfig() + b.webhookOptions.MutatingWebhooks = append(b.webhookOptions.MutatingWebhooks, mutatingConfig) + } + + b.webhooks[webhookType] = true + return b +} + +// WithNamespace creates a namespace +func (b *EnvBuilder) WithNamespace(name string) *EnvBuilder { + return b.createNamespace(name, nil) +} + +// WithLabeledNamespace creates a namespace with labels +func (b *EnvBuilder) WithLabeledNamespace(name string, labels map[string]string) *EnvBuilder { + return b.createNamespace(name, labels) +} + +// createNamespace is a private helper that creates a namespace with optional labels +func (b *EnvBuilder) createNamespace(name string, labels map[string]string) *EnvBuilder { + b.namespaces[name] = &corev1.Namespace{ + ObjectMeta: metav1.ObjectMeta{ + Name: name, + Labels: labels, + }, + } + return b +} + +// WithObjects adds multiple objects to be created +func (b *EnvBuilder) WithObjects(objs ...client.Object) *EnvBuilder { + b.objects = append(b.objects, objs...) + return b +} + +// Build creates the test environment but does not start it +func (b *EnvBuilder) Build() (*TestEnv, error) { + b.ctx, b.cancel = context.WithCancel(context.Background()) + + if err := b.prepareCRDs(); err != nil { + return nil, err + } + + b.configureEnvironment() + + return b.createTestEnv(), nil +} + +// prepareCRDs loads and configures all required CRDs +func (b *EnvBuilder) prepareCRDs() error { + // Define CRD loaders with their descriptions + crdLoaders := []struct { + name string + loader func() ([]*apiextensionsv1.CustomResourceDefinition, error) + }{ + {"operator", getOperatorCRDs}, + {"scheduler", getSchedulerCRDs}, + } + + // Load all CRDs using unified error handling + for _, crdLoader := range crdLoaders { + crds, err := crdLoader.loader() + if err != nil { + return fmt.Errorf("failed to load %s CRDs: %w", crdLoader.name, err) + } + b.crds = append(b.crds, crds...) + } + + return nil +} + +// configureEnvironment sets up the envtest environment +func (b *EnvBuilder) configureEnvironment() { + b.env = &envtest.Environment{ + CRDs: b.crds, + Scheme: b.scheme, + WebhookInstallOptions: b.webhookOptions, + } +} + +// createTestEnv creates the TestEnv instance +func (b *EnvBuilder) createTestEnv() *TestEnv { + return &TestEnv{ + T: b.t, + Client: nil, // Will be created in Start() + Manager: nil, // Will be created in Start() + Ctx: b.ctx, + Objects: b.objects, + + // Internal fields for lifecycle management + env: b.env, + cancel: b.cancel, + + namespaceConfigs: b.namespaces, + controllers: b.controllers, + webhooks: b.webhooks, + } +} diff --git a/operator/test/integration/framework/components.go b/operator/test/integration/framework/components.go new file mode 100644 index 00000000..3572da09 --- /dev/null +++ b/operator/test/integration/framework/components.go @@ -0,0 +1,106 @@ +// /* +// Copyright 2025 The Grove Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// */ + +package framework + +import ( + "context" + "fmt" + "testing" + + configv1alpha1 "github.com/NVIDIA/grove/operator/api/config/v1alpha1" + "github.com/NVIDIA/grove/operator/internal/controller/podclique" + "github.com/NVIDIA/grove/operator/internal/controller/podcliquescalinggroup" + "github.com/NVIDIA/grove/operator/internal/controller/podgangset" + "github.com/NVIDIA/grove/operator/internal/webhook/admission/pgs/defaulting" + "github.com/NVIDIA/grove/operator/internal/webhook/admission/pgs/validation" + + corev1 "k8s.io/api/core/v1" + "k8s.io/utils/ptr" + "sigs.k8s.io/controller-runtime/pkg/client" + "sigs.k8s.io/controller-runtime/pkg/manager" +) + +// RegisterPodGangSetController registers the PodGangSet controller with the manager +func RegisterPodGangSetController(mgr manager.Manager, t *testing.T) error { + config := configv1alpha1.PodGangSetControllerConfiguration{ + ConcurrentSyncs: ptr.To(1), + } + t.Logf("Creating PodGangSet reconciler with concurrency=%d", *config.ConcurrentSyncs) + reconciler := podgangset.NewReconciler(mgr, config) + if err := reconciler.RegisterWithManager(mgr); err != nil { + return fmt.Errorf("failed to register PodGangSet controller: %w", err) + } + return nil +} + +// RegisterPodCliqueController registers the PodClique controller with the manager +func RegisterPodCliqueController(mgr manager.Manager, t *testing.T) error { + config := configv1alpha1.PodCliqueControllerConfiguration{ + ConcurrentSyncs: ptr.To(1), + } + t.Logf("Creating PodClique reconciler with concurrency=%d", *config.ConcurrentSyncs) + reconciler := podclique.NewReconciler(mgr, config) + if err := reconciler.RegisterWithManager(mgr); err != nil { + return fmt.Errorf("failed to register PodClique controller: %w", err) + } + return nil +} + +// RegisterScalingGroupController registers the PodCliqueScalingGroup controller with the manager +func RegisterScalingGroupController(mgr manager.Manager, t *testing.T) error { + config := configv1alpha1.PodCliqueScalingGroupControllerConfiguration{ + ConcurrentSyncs: ptr.To(1), + } + t.Logf("Creating PodCliqueScalingGroup reconciler with concurrency=%d", *config.ConcurrentSyncs) + reconciler := podcliquescalinggroup.NewReconciler(mgr, config) + if err := reconciler.RegisterWithManager(mgr); err != nil { + return fmt.Errorf("failed to register PodCliqueScalingGroup controller: %w", err) + } + return nil +} + +// RegisterValidationWebhook registers the validation webhook with the manager +func RegisterValidationWebhook(mgr manager.Manager, t *testing.T) error { + t.Logf("Creating validation webhook handler") + validatingWebhook := validation.NewHandler(mgr) + if err := validatingWebhook.RegisterWithManager(mgr); err != nil { + return fmt.Errorf("failed to register validation webhook: %w", err) + } + return nil +} + +// RegisterMutationWebhook registers the mutation webhook with the manager +func RegisterMutationWebhook(mgr manager.Manager, t *testing.T) error { + t.Logf("Creating mutation webhook handler") + defaultingWebhook := defaulting.NewHandler(mgr) + if err := defaultingWebhook.RegisterWithManager(mgr); err != nil { + return fmt.Errorf("failed to register mutation webhook: %w", err) + } + return nil +} + +// CreateNamespaces creates the provided namespaces using the given client +func CreateNamespaces(ctx context.Context, client client.Client, namespaces map[string]*corev1.Namespace, t *testing.T) error { + for _, ns := range namespaces { + t.Logf("Creating namespace: name=%s, labels=%v", ns.Name, ns.Labels) + if err := client.Create(ctx, ns); err != nil { + return fmt.Errorf("failed to create namespace %s: %w", ns.Name, err) + } + t.Logf("Namespace %s created successfully", ns.Name) + } + return nil +} diff --git a/operator/test/integration/framework/constants.go b/operator/test/integration/framework/constants.go new file mode 100644 index 00000000..032e9df8 --- /dev/null +++ b/operator/test/integration/framework/constants.go @@ -0,0 +1,38 @@ +// /* +// Copyright 2025 The Grove Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// */ + +package framework + +const ( + // DefaultWebhookTimeout is the default timeout for webhook requests + DefaultWebhookTimeout = 10 + // PGSMutatingWebhookName is the name for PodGangSet mutating webhook + PGSMutatingWebhookName = "pgs.mutating.webhooks.grove.io" + // PGSValidatingWebhookName is the name for PodGangSet validating webhook + PGSValidatingWebhookName = "pgs.validating.webhooks.grove.io" + + // TestWebhookServiceName is the service name used for webhook testing + TestWebhookServiceName = "grove-operator-test-webhook" + // MutationWebhookPath is the path for mutation webhooks + MutationWebhookPath = "/webhooks/default-podgangset" + // ValidationWebhookPath is the path for validation webhooks + ValidationWebhookPath = "/webhooks/validate-podgangset" + + // MutatingWebhookConfigName is the name for mutating webhook configuration + MutatingWebhookConfigName = "pgs-mutating-webhook-test" + // ValidatingWebhookConfigName is the name for validating webhook configuration + ValidatingWebhookConfigName = "pgs-validating-webhook-test" +) diff --git a/operator/test/integration/framework/crds.go b/operator/test/integration/framework/crds.go new file mode 100644 index 00000000..6272dfa5 --- /dev/null +++ b/operator/test/integration/framework/crds.go @@ -0,0 +1,76 @@ +// /* +// Copyright 2025 The Grove Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// */ + +package framework + +import ( + "fmt" + "strings" + + grovecrds "github.com/NVIDIA/grove/operator/api/core/v1alpha1/crds" + + schedulercrds "github.com/NVIDIA/grove/scheduler/api/core/v1alpha1/crds" + apiextensionsv1 "k8s.io/apiextensions-apiserver/pkg/apis/apiextensions/v1" + "k8s.io/apimachinery/pkg/util/yaml" +) + +func getOperatorCRDs() ([]*apiextensionsv1.CustomResourceDefinition, error) { + crdContents := []string{ + grovecrds.PodGangSetCRD(), + grovecrds.PodCliqueCRD(), + grovecrds.PodCliqueScalingGroupCRD(), + } + + var crds []*apiextensionsv1.CustomResourceDefinition + for _, crdContent := range crdContents { + crd, err := parseCRDFromYAML(crdContent) + if err != nil { + return nil, fmt.Errorf("failed to parse CRD: %w", err) + } + crds = append(crds, crd) + } + + return crds, nil +} + +func getSchedulerCRDs() ([]*apiextensionsv1.CustomResourceDefinition, error) { + crdContents := []string{ + schedulercrds.PodGangCRD(), + } + + var crds []*apiextensionsv1.CustomResourceDefinition + for _, crdContent := range crdContents { + crd, err := parseCRDFromYAML(crdContent) + if err != nil { + return nil, fmt.Errorf("failed to parse scheduler CRD: %w", err) + } + crds = append(crds, crd) + } + + return crds, nil +} + +// parseCRDFromYAML parses a CRD from YAML content +func parseCRDFromYAML(yamlContent string) (*apiextensionsv1.CustomResourceDefinition, error) { + crd := &apiextensionsv1.CustomResourceDefinition{} + + decoder := yaml.NewYAMLOrJSONDecoder(strings.NewReader(yamlContent), 4096) + if err := decoder.Decode(crd); err != nil { + return nil, fmt.Errorf("failed to decode YAML: %w", err) + } + + return crd, nil +} diff --git a/operator/test/integration/framework/environment.go b/operator/test/integration/framework/environment.go new file mode 100644 index 00000000..6e78a4e3 --- /dev/null +++ b/operator/test/integration/framework/environment.go @@ -0,0 +1,230 @@ +// /* +// Copyright 2025 The Grove Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// */ + +package framework + +import ( + "context" + "errors" + "fmt" + "testing" + "time" + + configv1alpha1 "github.com/NVIDIA/grove/operator/api/config/v1alpha1" + grovelogger "github.com/NVIDIA/grove/operator/internal/logger" + + corev1 "k8s.io/api/core/v1" + ctrl "sigs.k8s.io/controller-runtime" + "sigs.k8s.io/controller-runtime/pkg/client" + "sigs.k8s.io/controller-runtime/pkg/envtest" + "sigs.k8s.io/controller-runtime/pkg/manager" + "sigs.k8s.io/controller-runtime/pkg/webhook" +) + +// EnvironmentSetup handles low-level environment initialization +type EnvironmentSetup struct { + env *envtest.Environment + client client.Client + mgr manager.Manager + ctx context.Context + t *testing.T +} + +// NewEnvironmentSetup creates a new environment setup helper +func NewEnvironmentSetup(ctx context.Context, env *envtest.Environment, t *testing.T) *EnvironmentSetup { + return &EnvironmentSetup{ + env: env, + ctx: ctx, + t: t, + } +} + +// InitializeLogger sets up the controllers-runtime logger +func (es *EnvironmentSetup) InitializeLogger() { + ctrl.SetLogger(grovelogger.MustNewLogger(true, configv1alpha1.DebugLevel, configv1alpha1.LogFormatJSON)) +} + +// StartControlPlane starts the envtest control plane +func (es *EnvironmentSetup) StartControlPlane() error { + es.t.Logf("Starting envtest control plane with %d CRDs", len(es.env.CRDs)) + start := time.Now() + + cfg, err := es.env.Start() + if err != nil { + return fmt.Errorf("failed to start envtest: %w", err) + } + es.env.Config = cfg + + duration := time.Since(start) + es.t.Logf("Envtest control plane started successfully (took %v)", duration) + return nil +} + +// SetupClient creates and configures the Kubernetes client +func (es *EnvironmentSetup) SetupClient() (client.Client, error) { + kubeClient, err := client.New(es.env.Config, client.Options{Scheme: es.env.Scheme}) + if err != nil { + return nil, fmt.Errorf("failed to create client: %w", err) + } + es.client = kubeClient + return kubeClient, nil +} + +// CreateRequiredNamespaces creates the configured namespaces +func (es *EnvironmentSetup) CreateRequiredNamespaces(namespaces map[string]*corev1.Namespace) error { + if len(namespaces) == 0 { + return nil + } + es.t.Logf("Creating %d required namespaces", len(namespaces)) + if err := CreateNamespaces(es.ctx, es.client, namespaces, es.t); err != nil { + return err + } + es.t.Logf("All required namespaces created successfully") + return nil +} + +// CreateManager creates the controllers manager with webhook support if needed +func (es *EnvironmentSetup) CreateManager(webhooks map[WebhookType]bool, webhookOptions envtest.WebhookInstallOptions) (manager.Manager, error) { + es.t.Logf("Creating controller manager with %d webhooks", len(webhooks)) + + // Add webhook server configuration if webhooks are enabled + managerOpts := ctrl.Options{ + Scheme: es.env.Scheme, + LeaderElection: false, // Disable leader election in tests + HealthProbeBindAddress: "0", + } + if len(webhooks) > 0 { + es.t.Logf("Configuring webhook server: port=%d, host=%s", + webhookOptions.LocalServingPort, webhookOptions.LocalServingHost) + webhookServer := webhook.NewServer(webhook.Options{ + Port: webhookOptions.LocalServingPort, + Host: webhookOptions.LocalServingHost, + CertDir: webhookOptions.LocalServingCertDir, + }) + managerOpts.WebhookServer = webhookServer + } + + mgr, err := ctrl.NewManager(es.env.Config, managerOpts) + if err != nil { + return nil, err + } + es.mgr = mgr + es.t.Logf("Controller manager created successfully") + return mgr, nil +} + +// RegisterControllers registers controllers with the manager +func (es *EnvironmentSetup) RegisterControllers(mgr manager.Manager, controllers map[ControllerType]bool) error { + if len(controllers) == 0 { + return nil + } + + controllerRegisters := map[ControllerType]func() error{ + ControllerPodGangSet: func() error { return RegisterPodGangSetController(mgr, es.t) }, + ControllerPodClique: func() error { return RegisterPodCliqueController(mgr, es.t) }, + ControllerScalingGroup: func() error { return RegisterScalingGroupController(mgr, es.t) }, + } + + es.t.Logf("Registering controllers: %v", getControllerNames(controllers)) + + for controllerType := range controllers { + registerFunc, exists := controllerRegisters[controllerType] + if !exists { + return fmt.Errorf("unknown controller type: %s", controllerType) + } + if err := registerFunc(); err != nil { + return err + } + es.t.Logf("Controller %s registered successfully", controllerType) + } + + es.t.Logf("All controllers registered successfully") + return nil +} + +// RegisterWebhooks registers webhooks with the manager +func (es *EnvironmentSetup) RegisterWebhooks(mgr manager.Manager, webhooks map[WebhookType]bool) error { + if len(webhooks) == 0 { + return nil + } + + webhookRegisters := map[WebhookType]func() error{ + WebhookValidation: func() error { return RegisterValidationWebhook(mgr, es.t) }, + WebhookMutation: func() error { return RegisterMutationWebhook(mgr, es.t) }, + } + + es.t.Logf("Registering webhooks: %v", getWebhookNames(webhooks)) + + for webhookType := range webhooks { + registerFunc, exists := webhookRegisters[webhookType] + if !exists { + return fmt.Errorf("unknown webhook type: %s", webhookType) + } + if err := registerFunc(); err != nil { + return err + } + es.t.Logf("Webhook %s registered successfully", webhookType) + } + + es.t.Logf("All webhooks registered successfully") + return nil +} + +// RegisterComponents registers controllers and webhooks with the manager +func (es *EnvironmentSetup) RegisterComponents(mgr manager.Manager, controllers map[ControllerType]bool, webhooks map[WebhookType]bool) error { + if err := es.RegisterControllers(mgr, controllers); err != nil { + return err + } + if err := es.RegisterWebhooks(mgr, webhooks); err != nil { + return err + } + es.t.Logf("Component registration completed") + return nil +} + +// StartManager starts the manager and waits for cache sync +func (es *EnvironmentSetup) StartManager(mgr manager.Manager) error { + es.t.Logf("Starting controller manager in background") + go func() { + _ = mgr.Start(es.ctx) + }() + + es.t.Logf("Waiting for controller cache to sync") + if !mgr.GetCache().WaitForCacheSync(es.ctx) { + return errors.New("timed out waiting for cache to sync") + } + + es.t.Logf("Controller cache synced successfully") + return nil +} + +// getControllerNames returns a slice of controller names for logging +func getControllerNames(controllers map[ControllerType]bool) []string { + var names []string + for controllerType := range controllers { + names = append(names, string(controllerType)) + } + return names +} + +// getWebhookNames returns a slice of webhook names for logging +func getWebhookNames(webhooks map[WebhookType]bool) []string { + var names []string + for webhookType := range webhooks { + names = append(names, string(webhookType)) + } + return names +} diff --git a/operator/test/integration/framework/testenv.go b/operator/test/integration/framework/testenv.go new file mode 100644 index 00000000..49f95d86 --- /dev/null +++ b/operator/test/integration/framework/testenv.go @@ -0,0 +1,157 @@ +// /* +// Copyright 2025 The Grove Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// */ + +package framework + +import ( + "context" + "testing" + "time" + + corev1 "k8s.io/api/core/v1" + "sigs.k8s.io/controller-runtime/pkg/client" + "sigs.k8s.io/controller-runtime/pkg/envtest" + "sigs.k8s.io/controller-runtime/pkg/manager" +) + +// TestEnv represents a built and started test environment +type TestEnv struct { + T *testing.T + Client client.Client + Manager manager.Manager + Ctx context.Context + Objects []client.Object + + // Internal fields for lifecycle management + env *envtest.Environment + cancel context.CancelFunc + started bool + namespaceConfigs map[string]*corev1.Namespace + controllers map[ControllerType]bool + webhooks map[WebhookType]bool +} + +// Start starts the test environment and all its components +func (te *TestEnv) Start() error { + if te.started { + return nil // Already started + } + + overallStart := time.Now() + te.T.Logf("=== Starting Test Environment ===") + te.T.Logf("Environment configuration: controllers=%v, webhooks=%v, namespaces=%d", + te.getControllerNames(), te.getWebhookNames(), len(te.namespaceConfigs)) + + envSetup := NewEnvironmentSetup(te.Ctx, te.env, te.T) + + // Initialize logger + envSetup.InitializeLogger() + + // Start control plane + if err := envSetup.StartControlPlane(); err != nil { + return err + } + + // Setup client + kubeClient, err := envSetup.SetupClient() + if err != nil { + return err + } + te.Client = kubeClient + + // Create namespaces + if err = envSetup.CreateRequiredNamespaces(te.namespaceConfigs); err != nil { + return err + } + + // Setup and start manager + if err := te.setupAndStartManager(envSetup); err != nil { + return err + } + + te.started = true + overallDuration := time.Since(overallStart) + te.T.Logf("=== Test Environment Ready (total setup: %v) ===", overallDuration) + return nil +} + +// Shutdown stops the test environment and cleans up resources +func (te *TestEnv) Shutdown() { + if !te.started { + return // already stopped or never started + } + + te.T.Logf("=== Shutting Down Test Environment ===") + + if te.cancel != nil { + te.cancel() + } + + if te.env != nil { + te.T.Logf("Stopping envtest environment") + _ = te.env.Stop() + } + + te.started = false + te.T.Logf("Test environment shutdown complete") +} + +// getControllerNames returns a slice of enabled controller names for logging +func (te *TestEnv) getControllerNames() []string { + var names []string + for controllerType := range te.controllers { + names = append(names, string(controllerType)) + } + return names +} + +// getWebhookNames returns a slice of enabled webhook names for logging +func (te *TestEnv) getWebhookNames() []string { + var names []string + for webhookType := range te.webhooks { + names = append(names, string(webhookType)) + } + return names +} + +// setupAndStartManager creates, configures, and starts the manager if needed +func (te *TestEnv) setupAndStartManager(envSetup *EnvironmentSetup) error { + te.T.Logf("Creating controller manager with %d controllers and %d webhooks", + len(te.controllers), len(te.webhooks)) + + mgr, err := envSetup.CreateManager(te.webhooks, te.env.WebhookInstallOptions) + if err != nil { + return err + } + te.T.Logf("Controller manager created successfully") + te.Manager = mgr + + te.T.Logf("Registering components: controllers=%v, webhooks=%v", + te.getControllerNames(), te.getWebhookNames()) + if err = envSetup.RegisterComponents(te.Manager, te.controllers, te.webhooks); err != nil { + return err + } + te.T.Logf("All components registered successfully") + + te.T.Logf("Starting controller manager and waiting for cache sync") + if err := envSetup.StartManager(te.Manager); err != nil { + return err + } + te.T.Logf("Manager started successfully, cache synced") + // Switch to manager's client after startup + te.Client = te.Manager.GetClient() + return nil +} diff --git a/operator/test/integration/framework/types.go b/operator/test/integration/framework/types.go new file mode 100644 index 00000000..af11a7f7 --- /dev/null +++ b/operator/test/integration/framework/types.go @@ -0,0 +1,49 @@ +// /* +// Copyright 2025 The Grove Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// */ + +package framework + +// ControllerType represents the type of controllers to be registered +type ControllerType string + +const ( + // ControllerPodGangSet represents the PodGangSet controllers + ControllerPodGangSet ControllerType = "podgangset" + // ControllerPodClique represents the PodClique controllers + ControllerPodClique ControllerType = "podclique" + // ControllerScalingGroup represents the PodCliqueScalingGroup controllers + ControllerScalingGroup ControllerType = "scalinggroup" +) + +// String returns the string representation of the ControllerType +func (c ControllerType) String() string { + return string(c) +} + +// WebhookType represents the type of webhook to be registered +type WebhookType string + +const ( + // WebhookValidation represents validation webhooks + WebhookValidation WebhookType = "validation" + // WebhookMutation represents mutation webhooks + WebhookMutation WebhookType = "mutation" +) + +// String returns the string representation of the WebhookType +func (w WebhookType) String() string { + return string(w) +} diff --git a/operator/test/integration/framework/webhooks.go b/operator/test/integration/framework/webhooks.go new file mode 100644 index 00000000..8204e6b5 --- /dev/null +++ b/operator/test/integration/framework/webhooks.go @@ -0,0 +1,96 @@ +// /* +// Copyright 2025 The Grove Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// */ + +package framework + +import ( + admissionregistrationv1 "k8s.io/api/admissionregistration/v1" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/utils/ptr" +) + +// WebhookConfigurationBuilder builds webhook configurations +type WebhookConfigurationBuilder struct{} + +// NewWebhookConfigurationBuilder creates a new webhook configuration builder +func NewWebhookConfigurationBuilder() *WebhookConfigurationBuilder { + return &WebhookConfigurationBuilder{} +} + +// BuildMutatingConfig builds a mutating webhook configuration +func (w *WebhookConfigurationBuilder) BuildMutatingConfig() *admissionregistrationv1.MutatingWebhookConfiguration { + webhookConfig := &admissionregistrationv1.MutatingWebhookConfiguration{ + ObjectMeta: metav1.ObjectMeta{ + Name: MutatingWebhookConfigName, + }, + Webhooks: []admissionregistrationv1.MutatingWebhook{{ + Name: PGSMutatingWebhookName, + ClientConfig: admissionregistrationv1.WebhookClientConfig{ + Service: &admissionregistrationv1.ServiceReference{ + Name: TestWebhookServiceName, + Path: ptr.To(MutationWebhookPath), + }, + }, + Rules: defaultWebhookRules(), + FailurePolicy: ptr.To(admissionregistrationv1.Fail), + MatchPolicy: ptr.To(admissionregistrationv1.Exact), + SideEffects: ptr.To(admissionregistrationv1.SideEffectClassNone), + TimeoutSeconds: ptr.To[int32](DefaultWebhookTimeout), + AdmissionReviewVersions: []string{"v1"}, + }}, + } + return webhookConfig +} + +// BuildValidatingConfig builds a validating webhook configuration +func (w *WebhookConfigurationBuilder) BuildValidatingConfig() *admissionregistrationv1.ValidatingWebhookConfiguration { + webhookConfig := admissionregistrationv1.ValidatingWebhookConfiguration{ + ObjectMeta: metav1.ObjectMeta{ + Name: ValidatingWebhookConfigName, + }, + Webhooks: []admissionregistrationv1.ValidatingWebhook{{ + Name: PGSValidatingWebhookName, + ClientConfig: admissionregistrationv1.WebhookClientConfig{ + Service: &admissionregistrationv1.ServiceReference{ + Name: TestWebhookServiceName, + Path: ptr.To(ValidationWebhookPath), + }, + }, + Rules: defaultWebhookRules(), + FailurePolicy: ptr.To(admissionregistrationv1.Fail), + SideEffects: ptr.To(admissionregistrationv1.SideEffectClassNone), + TimeoutSeconds: ptr.To[int32](DefaultWebhookTimeout), + AdmissionReviewVersions: []string{"v1"}, + }}, + } + return &webhookConfig +} + +func defaultWebhookRules() []admissionregistrationv1.RuleWithOperations { + return []admissionregistrationv1.RuleWithOperations{ + { + Operations: []admissionregistrationv1.OperationType{ + admissionregistrationv1.Create, + admissionregistrationv1.Update, + }, + Rule: admissionregistrationv1.Rule{ + APIGroups: []string{"grove.io"}, + APIVersions: []string{"v1alpha1"}, + Resources: []string{"podgangsets"}, + }, + }, + } +} diff --git a/operator/test/integration/webhooks/defaulting_test.go b/operator/test/integration/webhooks/defaulting_test.go new file mode 100644 index 00000000..cb6dc7e6 --- /dev/null +++ b/operator/test/integration/webhooks/defaulting_test.go @@ -0,0 +1,61 @@ +// /* +// Copyright 2025 The Grove Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// */ + +package webhooks_test + +import ( + "testing" + + "github.com/NVIDIA/grove/operator/test/integration/framework" + "github.com/NVIDIA/grove/operator/test/utils" + + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" + corev1 "k8s.io/api/core/v1" + "k8s.io/utils/ptr" +) + +func TestPodGangSetDefaulting(t *testing.T) { + // Setup test environment with webhooks enabled + env, err := framework.NewEnvBuilder(t). + WithMutationWebhook(). + WithNamespace("test-ns"). + Build() + require.NoError(t, err) + // Start the environment + err = env.Start() + require.NoError(t, err) + defer env.Shutdown() + + // Verify that the manager and webhooks are properly set up + require.NotNil(t, env.Manager, "Manager should be created when webhooks are enabled") + + // Simple test: just verify that we can create a PodGangSet with minimal configuration + // (webhook functionality testing requires more complex envtest setup) + pgs := utils.NewPodGangSetBuilder("test-pgs", "test-ns"). + WithMinimal(). + Build() + + // Submit PGS to cluster + err = env.Client.Create(env.Ctx, pgs) + require.NoError(t, err) + + // Verify the PGS was created successfully (basic webhook integration test) + assert.Equal(t, corev1.RestartPolicyAlways, pgs.Spec.Template.Cliques[0].Spec.PodSpec.RestartPolicy) + assert.Equal(t, ptr.To[int64](30), pgs.Spec.Template.Cliques[0].Spec.PodSpec.TerminationGracePeriodSeconds) + + t.Logf("Webhook integration test completed - webhooks are registered and manager is working") +} diff --git a/operator/test/integration/webhooks/validation_test.go b/operator/test/integration/webhooks/validation_test.go new file mode 100644 index 00000000..8ca07b1f --- /dev/null +++ b/operator/test/integration/webhooks/validation_test.go @@ -0,0 +1,79 @@ +// /* +// Copyright 2025 The Grove Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// */ + +package webhooks_test + +import ( + "testing" + "time" + + "github.com/NVIDIA/grove/operator/test/integration/framework" + "github.com/NVIDIA/grove/operator/test/utils" + + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" +) + +func TestPodGangSetValidationWebhook(t *testing.T) { + // Setup test environment with webhooks enabled + env, err := framework.NewEnvBuilder(t). + WithMutationWebhook(). + WithValidationWebhook(). + WithNamespace("test-ns"). + Build() + require.NoError(t, err) + // Start the environment + err = env.Start() + require.NoError(t, err) + defer env.Shutdown() + + // Read the CA certificate from the webhook server + + t.Logf("Created ValidatingWebhookConfiguration for testing") + + // Test 1: Valid PodGangSet should succeed + t.Run("ValidPodGangSet", func(t *testing.T) { + validPGS := utils.NewPodGangSetBuilder("valid-pgs", "test-ns"). + WithMinimal(). + Build() + + // Valid PodGangSet should be accepted + err := env.Client.Create(env.Ctx, validPGS) + require.NoError(t, err, "Valid PodGangSet should be accepted by webhook") + t.Logf("Valid PodGangSet was accepted as expected") + }) + + // Test 2: Invalid PodGangSet should be rejected by validation webhook + t.Run("InvalidPodGangSet", func(t *testing.T) { + // Name is too long (>45 characters) - should be rejected by validation webhook + invalidPGS := utils.NewPodGangSetBuilder( + "this-podgangset-nddddddddddddddfdfdfdfdfdfdfdfdfdfdfddddddddddddddame-is-way-too-long-and-should-be-rejected-by-validation", + "test-ns"). + WithMinimal(). + Build() + + time.Sleep(10 * time.Second) // Ensure some delay + // Invalid PodGangSet should be rejected + err = env.Client.Create(env.Ctx, invalidPGS) + require.Error(t, err, "Invalid PodGangSet should be rejected by validation webhook") + + // Verify it's a webhook validation error (admission webhook errors contain validation errors) + assert.Contains(t, err.Error(), "admission webhook", "Error should be from admission webhook") + assert.Contains(t, err.Error(), "denied the request", "Error should indicate webhook denial") + + t.Logf("Invalid PodGangSet was correctly rejected: %v", err) + }) +} diff --git a/operator/test/utils/pgs.go b/operator/test/utils/pgs.go index b86013e5..90ab4697 100644 --- a/operator/test/utils/pgs.go +++ b/operator/test/utils/pgs.go @@ -17,11 +17,14 @@ package utils import ( + "time" + grovecorev1alpha1 "github.com/NVIDIA/grove/operator/api/core/v1alpha1" "github.com/google/uuid" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/apimachinery/pkg/types" + "k8s.io/utils/ptr" ) // PodGangSetBuilder is a builder for PodGangSet objects. @@ -49,9 +52,10 @@ func (b *PodGangSetBuilder) WithReplicas(replicas int32) *PodGangSetBuilder { } // WithPodCliqueParameters is a convenience function that creates a PodCliqueTemplateSpec given the parameters and adds it to the PodGangSet. -func (b *PodGangSetBuilder) WithPodCliqueParameters(name string, replicas int32, startsAfter []string) *PodGangSetBuilder { +func (b *PodGangSetBuilder) WithPodCliqueParameters(name string, replicas, minAvailable int32, startsAfter []string) *PodGangSetBuilder { pclqTemplateSpec := NewPodCliqueTemplateSpecBuilder(name). WithReplicas(replicas). + WithMinAvailable(minAvailable). WithStartsAfter(startsAfter). Build() return b.WithPodCliqueTemplateSpec(pclqTemplateSpec) @@ -70,6 +74,28 @@ func (b *PodGangSetBuilder) WithPodCliqueScalingGroupConfig(config grovecorev1al return b } +// WithMinimal creates a minimal valid PodGangSet that passes webhook validation. +// Sets terminationDelay and adds a single clique with minAvailable. +func (b *PodGangSetBuilder) WithMinimal() *PodGangSetBuilder { + b.WithTerminationDelay(30 * time.Second) + if len(b.pgs.Spec.Template.Cliques) == 0 { + b.WithPodCliqueParameters("default-clique", 1, 1, nil) + } + // Ensure all cliques have minAvailable set + for _, clique := range b.pgs.Spec.Template.Cliques { + if clique.Spec.MinAvailable == nil { + clique.Spec.MinAvailable = ptr.To(clique.Spec.Replicas) + } + } + return b +} + +// WithTerminationDelay sets the terminationDelay for the PodGangSet. +func (b *PodGangSetBuilder) WithTerminationDelay(delay time.Duration) *PodGangSetBuilder { + b.pgs.Spec.Template.TerminationDelay = &metav1.Duration{Duration: delay} + return b +} + // Build creates a PodGangSet object. func (b *PodGangSetBuilder) Build() *grovecorev1alpha1.PodGangSet { return b.pgs @@ -84,6 +110,9 @@ func createEmptyPodGangSet(name, namespace string) *grovecorev1alpha1.PodGangSet }, Spec: grovecorev1alpha1.PodGangSetSpec{ Replicas: 1, + Template: grovecorev1alpha1.PodGangSetTemplateSpec{ + Cliques: []*grovecorev1alpha1.PodCliqueTemplateSpec{}, + }, }, } }