Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
37 commits
Select commit Hold shift + click to select a range
ca99dda
wip
Ronkahn21 Aug 17, 2025
3c1a3ee
wip
Ronkahn21 Aug 17, 2025
2a7edf3
wip
Ronkahn21 Aug 19, 2025
ba4930b
wip
Ronkahn21 Aug 19, 2025
5d227cb
wip
Ronkahn21 Aug 20, 2025
b2a1fdf
wip
Ronkahn21 Aug 20, 2025
b815f02
refactor: enhance environment setup and webhook configuration
Ronkahn21 Aug 20, 2025
44f0e18
refactor: integrate webhook configuration into environment builder
Ronkahn21 Aug 20, 2025
1cdcc89
refactor: integrate webhook configuration into environment builder
Ronkahn21 Aug 20, 2025
94fbefb
refactor: enhance environment setup with detailed logging and testing…
Ronkahn21 Aug 20, 2025
aac1f66
refactor: streamline integration test setup and improve logging
Ronkahn21 Aug 20, 2025
39a01c5
refactor: simplify logging functions by removing receiver and enhanci…
Ronkahn21 Aug 20, 2025
3482d51
refactor: remove unnecessary blank lines in builder and environment f…
Ronkahn21 Aug 20, 2025
c2ebc31
refactor: remove pointer dereferencing for terminationDelay and minAv…
Ronkahn21 Aug 20, 2025
891edfd
refactor: add minAvailable parameter to WithPodCliqueParameters for b…
Ronkahn21 Aug 20, 2025
a08d870
chore: update dependencies to latest versions for improved stability …
Ronkahn21 Aug 20, 2025
7c63298
wip
Ronkahn21 Aug 17, 2025
74aefb0
wip
Ronkahn21 Aug 17, 2025
53fdd14
wip
Ronkahn21 Aug 19, 2025
53ef2fd
wip
Ronkahn21 Aug 19, 2025
338862b
wip
Ronkahn21 Aug 20, 2025
b6f0e14
wip
Ronkahn21 Aug 20, 2025
1759afe
refactor: enhance environment setup and webhook configuration
Ronkahn21 Aug 20, 2025
08fdc66
refactor: integrate webhook configuration into environment builder
Ronkahn21 Aug 20, 2025
2f2dd2e
refactor: integrate webhook configuration into environment builder
Ronkahn21 Aug 20, 2025
ca06275
refactor: enhance environment setup with detailed logging and testing…
Ronkahn21 Aug 20, 2025
11951b4
refactor: streamline integration test setup and improve logging
Ronkahn21 Aug 20, 2025
8d4f079
refactor: simplify logging functions by removing receiver and enhanci…
Ronkahn21 Aug 20, 2025
6d1b9f5
refactor: remove unnecessary blank lines in builder and environment f…
Ronkahn21 Aug 20, 2025
3c9d4f1
refactor: remove pointer dereferencing for terminationDelay and minAv…
Ronkahn21 Aug 20, 2025
845c3e2
refactor: add minAvailable parameter to WithPodCliqueParameters for b…
Ronkahn21 Aug 20, 2025
315274c
Merge remote-tracking branch 'origin/integration-test-infra' into int…
Ronkahn21 Aug 20, 2025
ec05312
chore: update dependencies for cert-controller and controller-runtime
Ronkahn21 Aug 20, 2025
542a01b
chore: update gnostic-models and kube-openapi dependencies to specifi…
Ronkahn21 Aug 24, 2025
b32aa6f
chore: exclude integration tests from unit test execution in Makefile
Ronkahn21 Aug 24, 2025
afd1d9d
chore: add controller and webhook registration functions to component…
Ronkahn21 Aug 24, 2025
4d0ddfa
chore: add registration functions for controllers and webhooks in com…
Ronkahn21 Aug 24, 2025
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
17 changes: 16 additions & 1 deletion operator/Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -69,7 +69,7 @@ format: $(GOIMPORTS_REVISER)
# Run all unit tests
.PHONY: test-unit
test-unit:
@go test ./...
@go test $(shell go list ./... | grep -v './test/integration')

# Run all unit tests with code coverage
.PHONY: test-cover
Expand All @@ -82,6 +82,21 @@ cover-html: test-cover
@go tool cover -html=coverage.out -o coverage.html
@echo "Coverage report generated at coverage.html"


# Run all tests (unit + integration)
.PHONY: test-all
test-all: test-unit test-integration-controllers test-integration-webhooks
# Run controller-specific integration tests
.PHONY: test-integration-controllers
test-integration-controllers: $(SETUP_ENVTEST)
@KUBEBUILDER_ASSETS="$(shell $(SETUP_ENVTEST) use --print path)" go test ./test/integration/controllers/... -v

# Run webhook integration tests
.PHONY: test-integration-webhooks
test-integration-webhooks: $(SETUP_ENVTEST)
@KUBEBUILDER_ASSETS="$(shell $(SETUP_ENVTEST) use --print path)" go test ./test/integration/webhooks/... -v


# Make targets for local development and testing
# -------------------------------------------------------------
# Starts a local k8s cluster using kind.
Expand Down
7 changes: 7 additions & 0 deletions operator/api/core/v1alpha1/crds/embed.go
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,8 @@ var (
podCliqueCRD string
//go:embed grove.io_podgangsets.yaml
podGangSetCRD string
//go:embed grove.io_podcliquescalinggroups.yaml
podCliqueScalingGroupCRD string
)

// PodCliqueCRD returns the PodClique CRD
Expand All @@ -34,3 +36,8 @@ func PodCliqueCRD() string {
func PodGangSetCRD() string {
return podGangSetCRD
}

// PodCliqueScalingGroupCRD returns the PodCliqueScalingGroup CRD
func PodCliqueScalingGroupCRD() string {
return podCliqueScalingGroupCRD
}
11 changes: 6 additions & 5 deletions operator/go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -13,14 +13,16 @@ require (
github.com/stretchr/testify v1.10.0
go.uber.org/zap v1.27.0
k8s.io/api v0.33.4
k8s.io/apiextensions-apiserver v0.33.4
k8s.io/apimachinery v0.33.4
k8s.io/client-go v0.33.4
k8s.io/utils v0.0.0-20250604170112-4c0f3b243397
k8s.io/utils v0.0.0-20250502105355-0f33e8f1c979
sigs.k8s.io/controller-runtime v0.21.0
)

require (
github.com/beorn7/perks v1.0.1 // indirect
github.com/blang/semver/v4 v4.0.0 // indirect
github.com/cespare/xxhash/v2 v2.3.0 // indirect
github.com/davecgh/go-spew v1.1.2-0.20180830191138-d8f796af33cc // indirect
github.com/emicklei/go-restful/v3 v3.13.0 // indirect
Expand All @@ -39,7 +41,7 @@ require (
github.com/json-iterator/go v1.1.12 // indirect
github.com/mailru/easyjson v0.9.0 // indirect
github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd // indirect
github.com/modern-go/reflect2 v1.0.2 // indirect
github.com/modern-go/reflect2 v1.0.3-0.20250322232337-35a7c28c31ee // indirect
github.com/munnerz/goautoneg v0.0.0-20191010083416-a7dc8b61c822 // indirect
github.com/onsi/ginkgo/v2 v2.23.3 // indirect
github.com/pkg/errors v0.9.1 // indirect
Expand All @@ -61,16 +63,15 @@ require (
golang.org/x/text v0.28.0 // indirect
golang.org/x/time v0.12.0 // indirect
gomodules.xyz/jsonpatch/v2 v2.5.0 // indirect
google.golang.org/protobuf v1.36.7 // indirect
google.golang.org/protobuf v1.36.8 // indirect
gopkg.in/evanphx/json-patch.v4 v4.13.0 // indirect
gopkg.in/inf.v0 v0.9.1 // indirect
gopkg.in/yaml.v3 v3.0.1 // indirect
k8s.io/apiextensions-apiserver v0.33.4 // indirect
k8s.io/klog/v2 v2.130.1 // indirect
k8s.io/kube-openapi v0.0.0-20250814151709-d7b6acb124c3 // indirect
sigs.k8s.io/json v0.0.0-20250730193827-2d320260d730 // indirect
sigs.k8s.io/randfill v1.0.0 // indirect
sigs.k8s.io/structured-merge-diff/v4 v4.7.0 // indirect
sigs.k8s.io/structured-merge-diff/v4 v4.6.0 // indirect
sigs.k8s.io/yaml v1.6.0 // indirect
)

Expand Down
15 changes: 8 additions & 7 deletions operator/go.sum
Original file line number Diff line number Diff line change
Expand Up @@ -65,8 +65,9 @@ github.com/mailru/easyjson v0.9.0/go.mod h1:1+xMtQp2MRNVL/V1bOzuP3aP8VNwRW55fQUt
github.com/modern-go/concurrent v0.0.0-20180228061459-e0a39a4cb421/go.mod h1:6dJC0mAP4ikYIbvyc7fijjWJddQyLn8Ig3JB5CqoB9Q=
github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd h1:TRLaZ9cD/w8PVh93nsPXa1VrQ6jlwL5oN8l14QlcNfg=
github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd/go.mod h1:6dJC0mAP4ikYIbvyc7fijjWJddQyLn8Ig3JB5CqoB9Q=
github.com/modern-go/reflect2 v1.0.2 h1:xBagoLtFs94CBntxluKeaWgTMpvLxC4ur3nMaC9Gz0M=
github.com/modern-go/reflect2 v1.0.2/go.mod h1:yWuevngMOJpCy52FWWMvUC8ws7m/LJsjYzDa0/r8luk=
github.com/modern-go/reflect2 v1.0.3-0.20250322232337-35a7c28c31ee h1:W5t00kpgFdJifH4BDsTlE89Zl93FEloxaWZfGcifgq8=
github.com/modern-go/reflect2 v1.0.3-0.20250322232337-35a7c28c31ee/go.mod h1:yWuevngMOJpCy52FWWMvUC8ws7m/LJsjYzDa0/r8luk=
github.com/munnerz/goautoneg v0.0.0-20191010083416-a7dc8b61c822 h1:C3w9PqII01/Oq1c1nUAm88MOHcQC9l5mIlSMApZMrHA=
github.com/munnerz/goautoneg v0.0.0-20191010083416-a7dc8b61c822/go.mod h1:+n7T8mK8HuQTcFwEeznm/DIxMOiR9yIdICNftLE1DvQ=
github.com/onsi/ginkgo/v2 v2.23.3 h1:edHxnszytJ4lD9D5Jjc4tiDkPBZ3siDeJJkUZJJVkp0=
Expand Down Expand Up @@ -161,8 +162,8 @@ golang.org/x/xerrors v0.0.0-20191204190536-9bdfabe68543/go.mod h1:I/5z698sn9Ka8T
golang.org/x/xerrors v0.0.0-20200804184101-5ec99f83aff1/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0=
gomodules.xyz/jsonpatch/v2 v2.5.0 h1:JELs8RLM12qJGXU4u/TO3V25KW8GreMKl9pdkk14RM0=
gomodules.xyz/jsonpatch/v2 v2.5.0/go.mod h1:AH3dM2RI6uoBZxn3LVrfvJ3E0/9dG4cSrbuBJT4moAY=
google.golang.org/protobuf v1.36.7 h1:IgrO7UwFQGJdRNXH/sQux4R1Dj1WAKcLElzeeRaXV2A=
google.golang.org/protobuf v1.36.7/go.mod h1:jduwjTPXsFjZGTmRluh+L6NjiWu7pchiJ2/5YcXBHnY=
google.golang.org/protobuf v1.36.8 h1:xHScyCOEuuwZEc6UtSOvPbAT4zRh0xcNRYekJwfqyMc=
google.golang.org/protobuf v1.36.8/go.mod h1:fuxRtAxBytpl4zzqUh6/eyUujkJdNiuEkXntxiD/uRU=
gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0=
gopkg.in/check.v1 v1.0.0-20201130134442-10cb98267c6c h1:Hei/4ADfdWqJk1ZMxUNpqntNwaWcugrBjAiHlqqRiVk=
gopkg.in/check.v1 v1.0.0-20201130134442-10cb98267c6c/go.mod h1:JHkPIbrfpd72SG/EVd6muEfDQjcINNoR0C8j2r3qZ4Q=
Expand All @@ -186,17 +187,17 @@ k8s.io/kube-aggregator v0.33.1 h1:PigQUqAvd6Y4hBjQAqhKz3lEJC2VHLL4bSOEuS06a40=
k8s.io/kube-aggregator v0.33.1/go.mod h1:16/wlU5Lj7hNJSv7JSu5FLvxyrgiJVLCHzfVoECAsuI=
k8s.io/kube-openapi v0.0.0-20250318190949-c8a335a9a2ff h1:/usPimJzUKKu+m+TE36gUyGcf03XZEP0ZIKgKj35LS4=
k8s.io/kube-openapi v0.0.0-20250318190949-c8a335a9a2ff/go.mod h1:5jIi+8yX4RIb8wk3XwBo5Pq2ccx4FP10ohkbSKCZoK8=
k8s.io/utils v0.0.0-20250604170112-4c0f3b243397 h1:hwvWFiBzdWw1FhfY1FooPn3kzWuJ8tmbZBHi4zVsl1Y=
k8s.io/utils v0.0.0-20250604170112-4c0f3b243397/go.mod h1:OLgZIPagt7ERELqWJFomSt595RzquPNLL48iOWgYOg0=
k8s.io/utils v0.0.0-20250502105355-0f33e8f1c979 h1:jgJW5IePPXLGB8e/1wvd0Ich9QE97RvvF3a8J3fP/Lg=
k8s.io/utils v0.0.0-20250502105355-0f33e8f1c979/go.mod h1:OLgZIPagt7ERELqWJFomSt595RzquPNLL48iOWgYOg0=
sigs.k8s.io/controller-runtime v0.21.0 h1:CYfjpEuicjUecRk+KAeyYh+ouUBn4llGyDYytIGcJS8=
sigs.k8s.io/controller-runtime v0.21.0/go.mod h1:OSg14+F65eWqIu4DceX7k/+QRAbTTvxeQSNSOQpukWM=
sigs.k8s.io/json v0.0.0-20250730193827-2d320260d730 h1:IpInykpT6ceI+QxKBbEflcR5EXP7sU1kvOlxwZh5txg=
sigs.k8s.io/json v0.0.0-20250730193827-2d320260d730/go.mod h1:mdzfpAEoE6DHQEN0uh9ZbOCuHbLK5wOm7dK4ctXE9Tg=
sigs.k8s.io/randfill v0.0.0-20250304075658-069ef1bbf016/go.mod h1:XeLlZ/jmk4i1HRopwe7/aU3H5n1zNUcX6TM94b3QxOY=
sigs.k8s.io/randfill v1.0.0 h1:JfjMILfT8A6RbawdsK2JXGBR5AQVfd+9TbzrlneTyrU=
sigs.k8s.io/randfill v1.0.0/go.mod h1:XeLlZ/jmk4i1HRopwe7/aU3H5n1zNUcX6TM94b3QxOY=
sigs.k8s.io/structured-merge-diff/v4 v4.7.0 h1:qPeWmscJcXP0snki5IYF79Z8xrl8ETFxgMd7wez1XkI=
sigs.k8s.io/structured-merge-diff/v4 v4.7.0/go.mod h1:dDy58f92j70zLsuZVuUX5Wp9vtxXpaZnkPGWeqDfCps=
sigs.k8s.io/structured-merge-diff/v4 v4.6.0 h1:IUA9nvMmnKWcj5jl84xn+T5MnlZKThmUW1TdblaLVAc=
sigs.k8s.io/structured-merge-diff/v4 v4.6.0/go.mod h1:dDy58f92j70zLsuZVuUX5Wp9vtxXpaZnkPGWeqDfCps=
sigs.k8s.io/yaml v1.4.0/go.mod h1:Ejl7/uTz7PSA4eKMyQCUTnhZYNmLIl+5c2lQPGR2BPY=
sigs.k8s.io/yaml v1.6.0 h1:G8fkbMSAFqgEFgh4b1wmtzDnioxFCUgTZhlbj5P9QYs=
sigs.k8s.io/yaml v1.6.0/go.mod h1:796bPqUfzR/0jLAl6XjHl3Ck7MiyVv8dbTdyT3/pMf4=
2 changes: 2 additions & 0 deletions operator/internal/client/scheme.go
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@ import (
podgangsetv1alpha1 "github.com/NVIDIA/grove/operator/api/core/v1alpha1"

podgangv1alpha1 "github.com/NVIDIA/grove/scheduler/api/core/v1alpha1"
apiextensionsv1 "k8s.io/apiextensions-apiserver/pkg/apis/apiextensions/v1"
"k8s.io/apimachinery/pkg/runtime"
utilruntime "k8s.io/apimachinery/pkg/util/runtime"
k8sscheme "k8s.io/client-go/kubernetes/scheme"
Expand All @@ -35,6 +36,7 @@ func init() {
podgangsetv1alpha1.AddToScheme,
podgangv1alpha1.AddToScheme,
k8sscheme.AddToScheme,
apiextensionsv1.AddToScheme,
)
utilruntime.Must(localSchemeBuilder.AddToScheme(Scheme))
}
Original file line number Diff line number Diff line change
Expand Up @@ -100,7 +100,7 @@ func TestGetExistingResourceNames(t *testing.T) {
WithReplicas(tc.pgsReplicas).
WithCliqueStartupType(ptr.To(grovecorev1alpha1.CliqueStartupTypeAnyOrder))
for _, pclqTemplateName := range tc.podCliqueTemplateNames {
pgsBuilder.WithPodCliqueParameters(pclqTemplateName, 1, nil)
pgsBuilder.WithPodCliqueParameters(pclqTemplateName, 1, 1, nil)
}
pgs := pgsBuilder.Build()
// Create existing objects
Expand Down
114 changes: 114 additions & 0 deletions operator/test/integration/controllers/podgangset/reconciler_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,114 @@
// /*
// Copyright 2025 The Grove Authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
// */

package podgangset_test

import (
"testing"
"time"

grovecorev1alpha1 "github.com/NVIDIA/grove/operator/api/core/v1alpha1"
"github.com/NVIDIA/grove/operator/test/integration/framework"
"github.com/NVIDIA/grove/operator/test/utils"

"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require"
"k8s.io/utils/ptr"
"sigs.k8s.io/controller-runtime/pkg/client"
)

func TestPodGangSetCreatesChildResources(t *testing.T) {
// Setup test environment with PGS controller only
env, err := framework.NewEnvBuilder(t).
WithController(framework.ControllerPodGangSet).
WithNamespace("test-ns").
Build()
require.NoError(t, err)
// Start the environment
err = env.Start()
require.NoError(t, err)
defer env.Shutdown()

// Create a simple PGS with 2 cliques
pgs := utils.NewPodGangSetBuilder("test-pgs", "test-ns").
WithMinimal().
WithReplicas(1).
WithPodCliqueParameters("clique-1", 2, 2, nil).
WithPodCliqueParameters("clique-2", 1, 1, nil).
WithPodCliqueParameters("clique-3", 1, 1, nil).
WithPodCliqueScalingGroupConfig(grovecorev1alpha1.PodCliqueScalingGroupConfig{
Name: "new",
CliqueNames: []string{"clique-3"},
Replicas: ptr.To[int32](1),
MinAvailable: ptr.To[int32](1),
ScaleConfig: nil,
}).Build()

// Submit PGS to cluster
err = env.Client.Create(env.Ctx, pgs)
require.NoError(t, err)

// Debug: Check if PGS is actually in the cluster and monitor status changes
time.Sleep(2 * time.Second)
fetchedPGS := &grovecorev1alpha1.PodGangSet{}
err = env.Client.Get(env.Ctx, client.ObjectKey{Name: "test-pgs", Namespace: "test-ns"}, fetchedPGS)
require.NoError(t, err, "Should be able to fetch PGS from cluster")

// Wait for PCSG creation using Eventually with better polling
assert.Eventually(t, func() bool {
pcsgList := &grovecorev1alpha1.PodCliqueScalingGroupList{}
err = env.Client.List(env.Ctx, pcsgList, client.InNamespace("test-ns"))
if err != nil {
t.Logf("Error listing PCSGs: %v", err)
return false
}
t.Logf("Found %d PCSGs", len(pcsgList.Items))
return len(pcsgList.Items) == 1
}, 15*time.Second, 500*time.Millisecond, "PCSG should be created")

// Wait for PCLQ creation using Eventually with better polling
assert.Eventually(t, func() bool {
pclqList := &grovecorev1alpha1.PodCliqueList{}
err := env.Client.List(env.Ctx, pclqList, client.InNamespace("test-ns"))
if err != nil {
t.Logf("Error listing PCLQs: %v", err)
return false
}
t.Logf("Found %d PCLQs", len(pclqList.Items))
return len(pclqList.Items) == 3
}, 20*time.Second, 500*time.Millisecond, "All non-scaling-group PCLQs should be created")

// Verify final state
pcsgList := &grovecorev1alpha1.PodCliqueScalingGroupList{}
err = env.Client.List(env.Ctx, pcsgList, client.InNamespace("test-ns"))
require.NoError(t, err)
require.Len(t, pcsgList.Items, 1)

pclqList := &grovecorev1alpha1.PodCliqueList{}
err = env.Client.List(env.Ctx, pclqList, client.InNamespace("test-ns"))
require.NoError(t, err)
require.Len(t, pclqList.Items, 3)

// Verify ownership and basic properties
pcsg := pcsgList.Items[0]
assert.Equal(t, "test-pgs", pcsg.Labels["app.kubernetes.io/part-of"])
assert.Equal(t, string(pgs.UID), string(pcsg.GetOwnerReferences()[0].UID))

for _, pclq := range pclqList.Items {
assert.Equal(t, "test-pgs", pclq.Labels["app.kubernetes.io/part-of"])
assert.Equal(t, string(pgs.UID), string(pclq.GetOwnerReferences()[0].UID))
}
}
Loading
Loading