From 72e94b92ac4f8393e259abe36840ec9e7eaea663 Mon Sep 17 00:00:00 2001 From: Kensei Nakada Date: Thu, 28 Mar 2024 10:37:39 +0900 Subject: [PATCH] don't scale down when the replica number is close to preferredMaxReplicas --- pkg/recommender/recommender.go | 17 ++- pkg/recommender/recommender_test.go | 180 +++++++++++++++++++++++++++- 2 files changed, 193 insertions(+), 4 deletions(-) diff --git a/pkg/recommender/recommender.go b/pkg/recommender/recommender.go index f4fa5708..990ee0a7 100644 --- a/pkg/recommender/recommender.go +++ b/pkg/recommender/recommender.go @@ -103,6 +103,7 @@ func New( func (s *Service) updateVPARecommendation(ctx context.Context, tortoise *v1beta3.Tortoise, hpa *v2.HorizontalPodAutoscaler, replicaNum int32, now time.Time) (*v1beta3.Tortoise, error) { scaledUpBasedOnPreferredMaxReplicas := false + closeToPreferredMaxReplicas := false if hasHorizontal(tortoise) { // Handle TortoiseConditionTypeScaledUpBasedOnPreferredMaxReplicas condition first. if replicaNum >= s.preferredMaxReplicas && @@ -129,6 +130,9 @@ func (s *Service) updateVPARecommendation(ctx context.Context, tortoise *v1beta3 // Change TortoiseConditionTypeScaledUpBasedOnPreferredMaxReplicas to False. tortoise = utils.ChangeTortoiseCondition(tortoise, v1beta3.TortoiseConditionTypeScaledUpBasedOnPreferredMaxReplicas, v1.ConditionFalse, "ScaledUpBasedOnPreferredMaxReplicas", "the current number of replicas is not bigger than the preferred max replica number", now) } + if int32(float64(s.preferredMaxReplicas)*0.8) < replicaNum { + closeToPreferredMaxReplicas = true + } } logger := log.FromContext(ctx) @@ -188,7 +192,7 @@ func (s *Service) updateVPARecommendation(ctx context.Context, tortoise *v1beta3 var newSize int64 var reason string var err error - newSize, reason, err = s.calculateBestNewSize(ctx, tortoise, p, r.ContainerName, recom, k, hpa, replicaNum, req, minAllocatedResourcesMap[r.ContainerName], scaledUpBasedOnPreferredMaxReplicas, now) + newSize, reason, err = s.calculateBestNewSize(ctx, tortoise, p, r.ContainerName, recom, k, hpa, replicaNum, req, minAllocatedResourcesMap[r.ContainerName], scaledUpBasedOnPreferredMaxReplicas, closeToPreferredMaxReplicas) if err != nil { return tortoise, err } @@ -238,8 +242,7 @@ func (s *Service) calculateBestNewSize( replicaNum int32, resourceRequest resource.Quantity, minAllocatedResources corev1.ResourceList, - scaledUpBasedOnPreferredMaxReplicas bool, - now time.Time, + scaledUpBasedOnPreferredMaxReplicas, closeToPreferredMaxReplicas bool, ) (int64, string, error) { if p == v1beta3.AutoscalingTypeOff { // Just keep the current resource request. @@ -291,6 +294,14 @@ func (s *Service) calculateBestNewSize( return jastifiedNewSize, msg, nil } + if closeToPreferredMaxReplicas { + // The current replica number is close or more than preferredMaxReplicas. + // So, we just keep the current resource request + // until the replica number goes lower + // because scaling down the resource request might increase the replica number further more. + return resourceRequest.MilliValue(), "the current number of replicas is more than the preferred max replica number in this cluster, so nothing to do", nil + } + if replicaNum <= s.minimumMinReplicas { // The current replica number is less than or equal to the minimumMinReplicas. // The replica number is too small and hits the minReplicas. diff --git a/pkg/recommender/recommender_test.go b/pkg/recommender/recommender_test.go index 851ae30c..273531b1 100644 --- a/pkg/recommender/recommender_test.go +++ b/pkg/recommender/recommender_test.go @@ -3046,7 +3046,7 @@ func TestService_UpdateVPARecommendation(t *testing.T) { { name: "all horizontal: reduced resources based on VPA recommendation when unbalanced container size in multiple containers Pod", fields: fields{ - preferredMaxReplicas: 6, + preferredMaxReplicas: 10, maxCPU: "10000m", maxMemory: "100Gi", }, @@ -3220,6 +3220,184 @@ func TestService_UpdateVPARecommendation(t *testing.T) { }).Build(), wantErr: false, }, + { + name: "all horizontal: no scale down happens if replica num is close to preferredMaxReplicas, even when unbalanced container size in multiple containers Pod", + fields: fields{ + preferredMaxReplicas: 10, + maxCPU: "10000m", + maxMemory: "100Gi", + }, + args: args{ + tortoise: utils.NewTortoiseBuilder().AddAutoscalingPolicy(v1beta3.ContainerAutoscalingPolicy{ + ContainerName: "test-container", + Policy: map[corev1.ResourceName]v1beta3.AutoscalingType{ + corev1.ResourceCPU: v1beta3.AutoscalingTypeHorizontal, + corev1.ResourceMemory: v1beta3.AutoscalingTypeHorizontal, + }, + }).AddAutoscalingPolicy(v1beta3.ContainerAutoscalingPolicy{ + ContainerName: "test-container2", + Policy: map[corev1.ResourceName]v1beta3.AutoscalingType{ + corev1.ResourceCPU: v1beta3.AutoscalingTypeHorizontal, + corev1.ResourceMemory: v1beta3.AutoscalingTypeHorizontal, + }, + }).AddResourcePolicy(v1beta3.ContainerResourcePolicy{ + ContainerName: "test-container2", + MinAllocatedResources: createResourceList("100m", "100Mi"), + }).AddResourcePolicy(v1beta3.ContainerResourcePolicy{ + ContainerName: "test-container", + MinAllocatedResources: createResourceList("100m", "100Mi"), + }).AddContainerRecommendationFromVPA( + v1beta3.ContainerRecommendationFromVPA{ + ContainerName: "test-container", + MaxRecommendation: map[corev1.ResourceName]v1beta3.ResourceQuantity{ + corev1.ResourceCPU: { + Quantity: resource.MustParse("80m"), // smaller than expectation (800m+) + }, + corev1.ResourceMemory: { + Quantity: resource.MustParse("9Gi"), + }, + }, + }, + ).AddContainerRecommendationFromVPA( + v1beta3.ContainerRecommendationFromVPA{ + ContainerName: "test-container2", + MaxRecommendation: map[corev1.ResourceName]v1beta3.ResourceQuantity{ + corev1.ResourceCPU: { + Quantity: resource.MustParse("800m"), + }, + corev1.ResourceMemory: { + Quantity: resource.MustParse("0.7Gi"), // smaller than expectation (7Gi+) + }, + }, + }, + ).AddContainerResourceRequests(v1beta3.ContainerResourceRequests{ + ContainerName: "test-container", + Resource: createResourceList("1000m", "10Gi"), + }).AddContainerResourceRequests(v1beta3.ContainerResourceRequests{ + ContainerName: "test-container2", + Resource: createResourceList("1000m", "10Gi"), + }).Build(), + replicaNum: 9, + hpa: &v2.HorizontalPodAutoscaler{ + Spec: v2.HorizontalPodAutoscalerSpec{ + MinReplicas: ptr.To[int32](1), + Metrics: []v2.MetricSpec{ + { + Type: v2.ContainerResourceMetricSourceType, + ContainerResource: &v2.ContainerResourceMetricSource{ + Name: corev1.ResourceCPU, + Target: v2.MetricTarget{ + AverageUtilization: ptr.To[int32](80), + }, + Container: "test-container", + }, + }, + { + Type: v2.ContainerResourceMetricSourceType, + ContainerResource: &v2.ContainerResourceMetricSource{ + Name: corev1.ResourceMemory, + Target: v2.MetricTarget{ + AverageUtilization: ptr.To[int32](80), + }, + Container: "test-container", + }, + }, + { + Type: v2.ContainerResourceMetricSourceType, + ContainerResource: &v2.ContainerResourceMetricSource{ + Name: corev1.ResourceCPU, + Target: v2.MetricTarget{ + AverageUtilization: ptr.To[int32](70), + }, + Container: "test-container2", + }, + }, + { + Type: v2.ContainerResourceMetricSourceType, + ContainerResource: &v2.ContainerResourceMetricSource{ + Name: corev1.ResourceMemory, + Target: v2.MetricTarget{ + AverageUtilization: ptr.To[int32](70), + }, + Container: "test-container2", + }, + }, + }, + }, + }, + }, + want: utils.NewTortoiseBuilder().AddAutoscalingPolicy(v1beta3.ContainerAutoscalingPolicy{ + ContainerName: "test-container", + Policy: map[corev1.ResourceName]v1beta3.AutoscalingType{ + corev1.ResourceCPU: v1beta3.AutoscalingTypeHorizontal, + corev1.ResourceMemory: v1beta3.AutoscalingTypeHorizontal, + }, + }).AddAutoscalingPolicy(v1beta3.ContainerAutoscalingPolicy{ + ContainerName: "test-container2", + Policy: map[corev1.ResourceName]v1beta3.AutoscalingType{ + corev1.ResourceCPU: v1beta3.AutoscalingTypeHorizontal, + corev1.ResourceMemory: v1beta3.AutoscalingTypeHorizontal, + }, + }).AddResourcePolicy(v1beta3.ContainerResourcePolicy{ + ContainerName: "test-container2", + MinAllocatedResources: createResourceList("100m", "100Mi"), + }).AddResourcePolicy(v1beta3.ContainerResourcePolicy{ + ContainerName: "test-container", + MinAllocatedResources: createResourceList("100m", "100Mi"), + }).AddContainerRecommendationFromVPA( + v1beta3.ContainerRecommendationFromVPA{ + ContainerName: "test-container", + MaxRecommendation: map[corev1.ResourceName]v1beta3.ResourceQuantity{ + corev1.ResourceCPU: { + Quantity: resource.MustParse("80m"), + }, + corev1.ResourceMemory: { + Quantity: resource.MustParse("9Gi"), + }, + }, + }, + ).AddContainerRecommendationFromVPA( + v1beta3.ContainerRecommendationFromVPA{ + ContainerName: "test-container2", + MaxRecommendation: map[corev1.ResourceName]v1beta3.ResourceQuantity{ + corev1.ResourceCPU: { + Quantity: resource.MustParse("800m"), + }, + corev1.ResourceMemory: { + Quantity: resource.MustParse("0.7Gi"), + }, + }, + }, + ).AddContainerResourceRequests(v1beta3.ContainerResourceRequests{ + ContainerName: "test-container", + Resource: createResourceList("1000m", "10Gi"), + }).AddContainerResourceRequests(v1beta3.ContainerResourceRequests{ + ContainerName: "test-container2", + Resource: createResourceList("1000m", "10Gi"), + }).SetRecommendations(v1beta3.Recommendations{ + Vertical: v1beta3.VerticalRecommendations{ + ContainerResourceRecommendation: []v1beta3.RecommendedContainerResources{ + // no scale down. + { + ContainerName: "test-container", + RecommendedResource: createResourceList("1000m", "10Gi"), + }, + { + ContainerName: "test-container2", + RecommendedResource: createResourceList("1000m", "10Gi"), + }, + }, + }, + }).AddTortoiseConditions(v1beta3.TortoiseCondition{ + Type: v1beta3.TortoiseConditionTypeScaledUpBasedOnPreferredMaxReplicas, + Status: corev1.ConditionFalse, + Reason: "ScaledUpBasedOnPreferredMaxReplicas", + Message: "the current number of replicas is not bigger than the preferred max replica number", + LastTransitionTime: metav1.NewTime(now), + LastUpdateTime: metav1.NewTime(now), + }).Build(), + wantErr: false, + }, } for _, tt := range tests { t.Run(tt.name, func(t *testing.T) {