Skip to content

Commit 8512527

Browse files
committed
add v2 autoscaler validation
Signed-off-by: alimaazamat <[email protected]>
1 parent 46082cc commit 8512527

File tree

2 files changed

+91
-13
lines changed

2 files changed

+91
-13
lines changed

ray-operator/controllers/ray/utils/validation.go

Lines changed: 7 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -103,7 +103,7 @@ func ValidateRayClusterSpec(spec *rayv1.RayClusterSpec, annotations map[string]s
103103
if err := validateRayGroupLabels(workerGroup.GroupName, workerGroup.RayStartParams, workerGroup.Labels); err != nil {
104104
return err
105105
}
106-
if err := validateWorkerGroupIdleTimeout(workerGroup); err != nil {
106+
if err := validateWorkerGroupIdleTimeout(workerGroup, spec); err != nil {
107107
return err
108108
}
109109
}
@@ -579,11 +579,16 @@ func validateLegacyDeletionPolicies(rayJob *rayv1.RayJob) error {
579579
}
580580

581581
// validateWorkerGroupIdleTimeout validates the idleTimeoutSeconds field in a worker group spec
582-
func validateWorkerGroupIdleTimeout(workerGroup rayv1.WorkerGroupSpec) error {
582+
func validateWorkerGroupIdleTimeout(workerGroup rayv1.WorkerGroupSpec, spec *rayv1.RayClusterSpec) error {
583583
idleTimeoutSeconds := workerGroup.IdleTimeoutSeconds
584584
if idleTimeoutSeconds != nil && *idleTimeoutSeconds < 0 {
585585
return fmt.Errorf("idleTimeoutSeconds must be non-negative, got %d", *idleTimeoutSeconds)
586586
}
587587

588+
// idleTimeoutSeconds only allowed on autoscaler v2
589+
if idleTimeoutSeconds != nil && !IsAutoscalingV2Enabled(spec) {
590+
return fmt.Errorf("worker group %s has idleTimeoutSeconds set, but autoscaler version is not v2. Please set .spec.autoscalerOptions.version to v2", workerGroup.GroupName)
591+
}
592+
588593
return nil
589594
}

ray-operator/controllers/ray/utils/validation_test.go

Lines changed: 84 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -1892,21 +1892,94 @@ func TestValidateWorkerGroupIdleTimeout(t *testing.T) {
18921892
},
18931893
expectedErr: "",
18941894
},
1895+
"should reject idleTimeoutSeconds when autoscaler version is not v2": {
1896+
spec: rayv1.RayClusterSpec{
1897+
EnableInTreeAutoscaling: ptr.To(true),
1898+
AutoscalerOptions: &rayv1.AutoscalerOptions{
1899+
Version: ptr.To(rayv1.AutoscalerVersionV1),
1900+
},
1901+
HeadGroupSpec: rayv1.HeadGroupSpec{
1902+
Template: podTemplateSpec(nil, nil),
1903+
},
1904+
WorkerGroupSpecs: []rayv1.WorkerGroupSpec{
1905+
{
1906+
GroupName: "worker-group-1",
1907+
Template: podTemplateSpec(nil, nil),
1908+
IdleTimeoutSeconds: ptr.To(int32(60)),
1909+
MinReplicas: ptr.To(int32(0)),
1910+
MaxReplicas: ptr.To(int32(10)),
1911+
},
1912+
},
1913+
},
1914+
expectedErr: "worker group worker-group-1 has idleTimeoutSeconds set, but autoscaler version is not v2. Please set .spec.autoscalerOptions.version to v2",
1915+
},
1916+
"should reject idleTimeoutSeconds when autoscaler version is not set": {
1917+
spec: rayv1.RayClusterSpec{
1918+
EnableInTreeAutoscaling: ptr.To(true),
1919+
HeadGroupSpec: rayv1.HeadGroupSpec{
1920+
Template: podTemplateSpec(nil, nil),
1921+
},
1922+
WorkerGroupSpecs: []rayv1.WorkerGroupSpec{
1923+
{
1924+
GroupName: "worker-group-1",
1925+
Template: podTemplateSpec(nil, nil),
1926+
IdleTimeoutSeconds: ptr.To(int32(60)),
1927+
MinReplicas: ptr.To(int32(0)),
1928+
MaxReplicas: ptr.To(int32(10)),
1929+
},
1930+
},
1931+
},
1932+
expectedErr: "worker group worker-group-1 has idleTimeoutSeconds set, but autoscaler version is not v2. Please set .spec.autoscalerOptions.version to v2",
1933+
},
1934+
"should reject idleTimeoutSeconds when AutoscalerOptions is nil": {
1935+
spec: rayv1.RayClusterSpec{
1936+
EnableInTreeAutoscaling: ptr.To(true),
1937+
AutoscalerOptions: nil,
1938+
HeadGroupSpec: rayv1.HeadGroupSpec{
1939+
Template: podTemplateSpec(nil, nil),
1940+
},
1941+
WorkerGroupSpecs: []rayv1.WorkerGroupSpec{
1942+
{
1943+
GroupName: "worker-group-1",
1944+
Template: podTemplateSpec(nil, nil),
1945+
IdleTimeoutSeconds: ptr.To(int32(60)),
1946+
MinReplicas: ptr.To(int32(0)),
1947+
MaxReplicas: ptr.To(int32(10)),
1948+
},
1949+
},
1950+
},
1951+
expectedErr: "worker group worker-group-1 has idleTimeoutSeconds set, but autoscaler version is not v2. Please set .spec.autoscalerOptions.version to v2",
1952+
},
1953+
"should accept worker group without idleTimeoutSeconds and without autoscaler v2": {
1954+
spec: rayv1.RayClusterSpec{
1955+
EnableInTreeAutoscaling: ptr.To(true),
1956+
AutoscalerOptions: &rayv1.AutoscalerOptions{
1957+
Version: ptr.To(rayv1.AutoscalerVersionV1),
1958+
},
1959+
HeadGroupSpec: rayv1.HeadGroupSpec{
1960+
Template: podTemplateSpec(nil, nil),
1961+
},
1962+
WorkerGroupSpecs: []rayv1.WorkerGroupSpec{
1963+
{
1964+
GroupName: "worker-group-1",
1965+
Template: podTemplateSpec(nil, nil),
1966+
MinReplicas: ptr.To(int32(0)),
1967+
MaxReplicas: ptr.To(int32(10)),
1968+
},
1969+
},
1970+
},
1971+
expectedErr: "",
1972+
},
18951973
}
18961974

1897-
for testName, tc := range tests {
1898-
t.Run(testName, func(t *testing.T) {
1975+
for name, tc := range tests {
1976+
t.Run(name, func(t *testing.T) {
18991977
err := ValidateRayClusterSpec(&tc.spec, nil)
1900-
if tc.expectedErr == "" {
1901-
if err != nil {
1902-
t.Errorf("expected no error, but got: %v", err)
1903-
}
1978+
if tc.expectedErr != "" {
1979+
require.Error(t, err)
1980+
require.EqualError(t, err, tc.expectedErr)
19041981
} else {
1905-
if err == nil {
1906-
t.Errorf("expected error: %s, but got no error", tc.expectedErr)
1907-
} else if err.Error() != tc.expectedErr {
1908-
t.Errorf("expected error: %s, but got: %v", tc.expectedErr, err)
1909-
}
1982+
require.NoError(t, err)
19101983
}
19111984
})
19121985
}

0 commit comments

Comments
 (0)