Skip to content

Commit 7e5934d

Browse files
authored
ignore high replica number for the first 30 min (#382)
* ignore VerticalScalingBasedOnPreferredMaxReplicas when a replica number is smaller than minreplicas * ignore high replica number for the first 30 min
1 parent fc471f1 commit 7e5934d

File tree

3 files changed

+186
-23
lines changed

3 files changed

+186
-23
lines changed

pkg/recommender/recommender.go

Lines changed: 54 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -102,6 +102,35 @@ func New(
102102
}
103103

104104
func (s *Service) updateVPARecommendation(ctx context.Context, tortoise *v1beta3.Tortoise, hpa *v2.HorizontalPodAutoscaler, replicaNum int32, now time.Time) (*v1beta3.Tortoise, error) {
105+
scaledUpBasedOnPreferredMaxReplicas := false
106+
if hasHorizontal(tortoise) {
107+
// Handle TortoiseConditionTypeScaledUpBasedOnPreferredMaxReplicas condition first.
108+
if replicaNum >= s.preferredMaxReplicas &&
109+
// If the current replica number is equal to the maximumMaxReplica,
110+
// increasing the resource request would not change the situation that the replica number is higher than preferredMaxReplicas.
111+
*hpa.Spec.MinReplicas < replicaNum &&
112+
features.Contains(s.featureFlags, features.VerticalScalingBasedOnPreferredMaxReplicas) &&
113+
allowVerticalScalingBasedOnPreferredMaxReplicas(tortoise, now) {
114+
115+
c := utils.GetTortoiseCondition(tortoise, v1beta3.TortoiseConditionTypeScaledUpBasedOnPreferredMaxReplicas)
116+
if c == nil || // no condition yet
117+
c.Status == v1.ConditionFalse {
118+
// It's the first time to notice that the current replica number is bigger than the preferred max replica number.
119+
// First 30min, we don't use VerticalScalingBasedOnPreferredMaxReplicas because this replica increase might be very temporal.
120+
// So, here we just change the condition to True, but doesn't trigger scaledUpBasedOnPreferredMaxReplicas.
121+
tortoise = utils.ChangeTortoiseCondition(tortoise, v1beta3.TortoiseConditionTypeScaledUpBasedOnPreferredMaxReplicas, v1.ConditionTrue, "ScaledUpBasedOnPreferredMaxReplicas", "the current number of replicas is bigger than the preferred max replica number", now)
122+
} else {
123+
// We keep increasing the size until we hit the maxResourceSize.
124+
tortoise = utils.ChangeTortoiseCondition(tortoise, v1beta3.TortoiseConditionTypeScaledUpBasedOnPreferredMaxReplicas, v1.ConditionTrue, "ScaledUpBasedOnPreferredMaxReplicas", "the current number of replicas is bigger than the preferred max replica number", now)
125+
scaledUpBasedOnPreferredMaxReplicas = true
126+
}
127+
}
128+
if replicaNum < s.preferredMaxReplicas {
129+
// Change TortoiseConditionTypeScaledUpBasedOnPreferredMaxReplicas to False.
130+
tortoise = utils.ChangeTortoiseCondition(tortoise, v1beta3.TortoiseConditionTypeScaledUpBasedOnPreferredMaxReplicas, v1.ConditionFalse, "ScaledUpBasedOnPreferredMaxReplicas", "the current number of replicas is not bigger than the preferred max replica number", now)
131+
}
132+
}
133+
105134
logger := log.FromContext(ctx)
106135
requestMap := map[string]map[corev1.ResourceName]resource.Quantity{}
107136
for _, r := range tortoise.Status.Conditions.ContainerResourceRequests {
@@ -159,7 +188,7 @@ func (s *Service) updateVPARecommendation(ctx context.Context, tortoise *v1beta3
159188
var newSize int64
160189
var reason string
161190
var err error
162-
newSize, reason, tortoise, err = s.calculateBestNewSize(ctx, tortoise, p, r.ContainerName, recom, k, hpa, replicaNum, req, minAllocatedResourcesMap[r.ContainerName], now)
191+
newSize, reason, err = s.calculateBestNewSize(ctx, tortoise, p, r.ContainerName, recom, k, hpa, replicaNum, req, minAllocatedResourcesMap[r.ContainerName], scaledUpBasedOnPreferredMaxReplicas, now)
163192
if err != nil {
164193
return tortoise, err
165194
}
@@ -185,7 +214,7 @@ func (s *Service) updateVPARecommendation(ctx context.Context, tortoise *v1beta3
185214
func allowVerticalScalingBasedOnPreferredMaxReplicas(tortoise *v1beta3.Tortoise, now time.Time) bool {
186215
for _, c := range tortoise.Status.Conditions.TortoiseConditions {
187216
if c.Type == v1beta3.TortoiseConditionTypeScaledUpBasedOnPreferredMaxReplicas && c.Status == v1.ConditionTrue {
188-
if c.LastTransitionTime.Add(30*time.Minute).After(now) && !c.LastTransitionTime.Time.Equal(now) {
217+
if c.LastTransitionTime.Add(30 * time.Minute).After(now) {
189218
// If the last transition time is within 30 minutes,
190219
// we don't allow the vertical scaling based on the preferred max replicas.
191220
return false
@@ -209,11 +238,12 @@ func (s *Service) calculateBestNewSize(
209238
replicaNum int32,
210239
resourceRequest resource.Quantity,
211240
minAllocatedResources corev1.ResourceList,
241+
scaledUpBasedOnPreferredMaxReplicas bool,
212242
now time.Time,
213-
) (int64, string, *v1beta3.Tortoise, error) {
243+
) (int64, string, error) {
214244
if p == v1beta3.AutoscalingTypeOff {
215245
// Just keep the current resource request.
216-
return resourceRequest.MilliValue(), "", tortoise, nil
246+
return resourceRequest.MilliValue(), "The autoscaling policy for this resource is Off", nil
217247
}
218248

219249
if p == v1beta3.AutoscalingTypeVertical {
@@ -222,7 +252,7 @@ func (s *Service) calculateBestNewSize(
222252
// We always follow the recommendation from VPA.
223253
newSize := float64(recommendedResourceRequest.MilliValue()) * (1 + s.bufferRatioOnVerticalResource)
224254
jastified := s.justifyNewSize(resourceRequest.MilliValue(), int64(newSize), k, minAllocatedResources, containerName)
225-
return jastified, fmt.Sprintf("change %v request (%v) (%v → %v) based on VPA suggestion", k, containerName, resourceRequest.MilliValue(), jastified), tortoise, nil
255+
return jastified, fmt.Sprintf("change %v request (%v) (%v → %v) based on VPA suggestion", k, containerName, resourceRequest.MilliValue(), jastified), nil
226256
}
227257

228258
// p == v1beta3.AutoscalingTypeHorizontal
@@ -231,22 +261,12 @@ func (s *Service) calculateBestNewSize(
231261
// make the container size bigger (just multiple by 1.3) so that the replica number will be descreased.
232262
//
233263
// Here also covers the scenario where the current replica num hits MaximumMaxReplicas.
234-
if replicaNum >= s.preferredMaxReplicas &&
235-
// If the current replica number is equal to the maximumMaxReplica, increasing the resource request would not change the situation that the replica number is higher than preferredMaxReplicas.
236-
*hpa.Spec.MinReplicas != replicaNum &&
237-
features.Contains(s.featureFlags, features.VerticalScalingBasedOnPreferredMaxReplicas) &&
238-
allowVerticalScalingBasedOnPreferredMaxReplicas(tortoise, now) {
264+
if scaledUpBasedOnPreferredMaxReplicas {
239265
// We keep increasing the size until we hit the maxResourceSize.
240266
newSize := int64(float64(resourceRequest.MilliValue()) * 1.3)
241267
jastifiedNewSize := s.justifyNewSize(resourceRequest.MilliValue(), newSize, k, minAllocatedResources, containerName)
242-
tortoise = utils.ChangeTortoiseCondition(tortoise, v1beta3.TortoiseConditionTypeScaledUpBasedOnPreferredMaxReplicas, v1.ConditionTrue, "ScaledUpBasedOnPreferredMaxReplicas", "the current number of replicas is bigger than the preferred max replica number", now)
243-
msg := fmt.Sprintf("the current number of replicas is bigger than the preferred max replica number in this cluster (%v), so make %v request (%s) bigger (%v → %v)", s.preferredMaxReplicas, k, containerName, resourceRequest.MilliValue(), jastifiedNewSize)
244-
return jastifiedNewSize, msg, tortoise, nil
245-
}
246-
247-
if replicaNum < s.preferredMaxReplicas {
248-
// Change TortoiseConditionTypeScaledUpBasedOnPreferredMaxReplicas to False.
249-
tortoise = utils.ChangeTortoiseCondition(tortoise, v1beta3.TortoiseConditionTypeScaledUpBasedOnPreferredMaxReplicas, v1.ConditionFalse, "ScaledUpBasedOnPreferredMaxReplicas", "the current number of replicas is not bigger than the preferred max replica number", now)
268+
msg := fmt.Sprintf("the current number of replicas (%v) is bigger than the preferred max replica number in this cluster (%v), so make %v request (%s) bigger (%v → %v)", replicaNum, s.preferredMaxReplicas, k, containerName, resourceRequest.MilliValue(), jastifiedNewSize)
269+
return jastifiedNewSize, msg, nil
250270
}
251271

252272
if replicaNum <= s.minimumMinReplicas {
@@ -263,7 +283,7 @@ func (s *Service) calculateBestNewSize(
263283
}
264284
jastified := s.justifyNewSize(resourceRequest.MilliValue(), newSize, k, minAllocatedResources, containerName)
265285

266-
return jastified, fmt.Sprintf("the current number of replicas is equal or smaller than the minimum min replica number in this cluster (%v), so make %v request (%v) smaller (%v → %v) based on VPA suggestion", s.minimumMinReplicas, k, containerName, resourceRequest.MilliValue(), jastified), tortoise, nil
286+
return jastified, fmt.Sprintf("the current number of replicas is equal or smaller than the minimum min replica number in this cluster (%v), so make %v request (%v) smaller (%v → %v) based on VPA suggestion", s.minimumMinReplicas, k, containerName, resourceRequest.MilliValue(), jastified), nil
267287
}
268288

269289
// The replica number is OK based on minimumMinReplicas and preferredMaxReplicas.
@@ -273,12 +293,12 @@ func (s *Service) calculateBestNewSize(
273293
// Also, if the current replica number is equal to the minReplicas,
274294
// we don't change the resource request based on the current resource utilization
275295
// because even if the resource utilization is low, it's due to the minReplicas.
276-
return s.justifyNewSize(resourceRequest.MilliValue(), resourceRequest.MilliValue(), k, minAllocatedResources, containerName), "nothing to do", tortoise, nil
296+
return s.justifyNewSize(resourceRequest.MilliValue(), resourceRequest.MilliValue(), k, minAllocatedResources, containerName), "nothing to do", nil
277297
}
278298

279299
targetUtilizationValue, err := hpaservice.GetHPATargetValue(ctx, hpa, containerName, k)
280300
if err != nil {
281-
return 0, "", tortoise, fmt.Errorf("get the target value from HPA: %w", err)
301+
return 0, "", fmt.Errorf("get the target value from HPA: %w", err)
282302
}
283303

284304
upperUtilization := (float64(recommendedResourceRequest.MilliValue()) / float64(resourceRequest.MilliValue())) * 100
@@ -293,12 +313,23 @@ func (s *Service) calculateBestNewSize(
293313
// so that the upper usage will be the target usage.
294314
newSize := int64(float64(recommendedResourceRequest.MilliValue()) * 100.0 / float64(targetUtilizationValue))
295315
jastified := s.justifyNewSize(resourceRequest.MilliValue(), newSize, k, minAllocatedResources, containerName)
296-
return jastified, fmt.Sprintf("the current resource usage (%v, %v%%) is too small and it's due to unbalanced container size, so make %v request (%v) smaller (%v → %v) based on VPA's recommendation and HPA target utilization %v%%", recommendedResourceRequest.MilliValue(), int(upperUtilization), k, containerName, resourceRequest.MilliValue(), jastified, targetUtilizationValue), tortoise, nil
316+
return jastified, fmt.Sprintf("the current resource usage (%v, %v%%) is too small and it's due to unbalanced container size, so make %v request (%v) smaller (%v → %v) based on VPA's recommendation and HPA target utilization %v%%", recommendedResourceRequest.MilliValue(), int(upperUtilization), k, containerName, resourceRequest.MilliValue(), jastified, targetUtilizationValue), nil
297317
}
298318

299319
// Just keep the current resource request.
300320
// Only do justification.
301-
return s.justifyNewSize(resourceRequest.MilliValue(), resourceRequest.MilliValue(), k, minAllocatedResources, containerName), "nothing to do", tortoise, nil
321+
return s.justifyNewSize(resourceRequest.MilliValue(), resourceRequest.MilliValue(), k, minAllocatedResources, containerName), "nothing to do", nil
322+
}
323+
324+
func hasHorizontal(tortoise *v1beta3.Tortoise) bool {
325+
for _, r := range tortoise.Status.AutoscalingPolicy {
326+
for _, p := range r.Policy {
327+
if p == v1beta3.AutoscalingTypeHorizontal {
328+
return true
329+
}
330+
}
331+
}
332+
return false
302333
}
303334

304335
func hasMultipleHorizontal(t *v1beta3.Tortoise) bool {

pkg/recommender/recommender_test.go

Lines changed: 122 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1613,6 +1613,14 @@ func TestService_UpdateVPARecommendation(t *testing.T) {
16131613
}).AddResourcePolicy(v1beta3.ContainerResourcePolicy{
16141614
ContainerName: "test-container",
16151615
MinAllocatedResources: createResourceList("100m", "100Mi"),
1616+
}).AddTortoiseConditions(v1beta3.TortoiseCondition{
1617+
Type: v1beta3.TortoiseConditionTypeScaledUpBasedOnPreferredMaxReplicas,
1618+
Status: corev1.ConditionTrue,
1619+
Reason: "ScaledUpBasedOnPreferredMaxReplicas",
1620+
Message: "the current number of replicas is bigger than the preferred max replica number",
1621+
// not recently updated.
1622+
LastTransitionTime: metav1.NewTime(now.Add(-31 * time.Minute)),
1623+
LastUpdateTime: metav1.NewTime(now.Add(-31 * time.Minute)),
16161624
}).AddContainerRecommendationFromVPA(
16171625
v1beta3.ContainerRecommendationFromVPA{
16181626
ContainerName: "test-container",
@@ -1674,6 +1682,112 @@ func TestService_UpdateVPARecommendation(t *testing.T) {
16741682
}).Build(),
16751683
wantErr: false,
16761684
},
1685+
{
1686+
name: "all horizontal: the temporal replica count being above preferredMaxReplicas doesn't trigger the resource increase of VerticalScalingBasedOnPreferredMaxReplicas",
1687+
fields: fields{
1688+
preferredMaxReplicas: 3,
1689+
maxCPU: "1000m",
1690+
maxMemory: "1Gi",
1691+
features: []features.FeatureFlag{features.VerticalScalingBasedOnPreferredMaxReplicas},
1692+
},
1693+
args: args{
1694+
hpa: &v2.HorizontalPodAutoscaler{
1695+
Spec: v2.HorizontalPodAutoscalerSpec{
1696+
MinReplicas: ptr.To[int32](1),
1697+
Metrics: []v2.MetricSpec{
1698+
{
1699+
Type: v2.ContainerResourceMetricSourceType,
1700+
ContainerResource: &v2.ContainerResourceMetricSource{
1701+
Name: corev1.ResourceCPU,
1702+
Target: v2.MetricTarget{
1703+
AverageUtilization: ptr.To[int32](80),
1704+
},
1705+
Container: "test-container",
1706+
},
1707+
},
1708+
{
1709+
Type: v2.ContainerResourceMetricSourceType,
1710+
ContainerResource: &v2.ContainerResourceMetricSource{
1711+
Name: corev1.ResourceMemory,
1712+
Target: v2.MetricTarget{
1713+
AverageUtilization: ptr.To[int32](80),
1714+
},
1715+
Container: "test-container",
1716+
},
1717+
},
1718+
},
1719+
},
1720+
},
1721+
tortoise: utils.NewTortoiseBuilder().AddAutoscalingPolicy(v1beta3.ContainerAutoscalingPolicy{
1722+
ContainerName: "test-container",
1723+
Policy: map[corev1.ResourceName]v1beta3.AutoscalingType{
1724+
corev1.ResourceCPU: v1beta3.AutoscalingTypeHorizontal,
1725+
corev1.ResourceMemory: v1beta3.AutoscalingTypeHorizontal,
1726+
},
1727+
}).AddResourcePolicy(v1beta3.ContainerResourcePolicy{
1728+
ContainerName: "test-container",
1729+
MinAllocatedResources: createResourceList("100m", "100Mi"),
1730+
}).AddContainerRecommendationFromVPA(
1731+
v1beta3.ContainerRecommendationFromVPA{
1732+
ContainerName: "test-container",
1733+
Recommendation: map[corev1.ResourceName]v1beta3.ResourceQuantity{
1734+
corev1.ResourceCPU: {
1735+
Quantity: resource.MustParse("500m"),
1736+
},
1737+
corev1.ResourceMemory: {
1738+
Quantity: resource.MustParse("500Mi"),
1739+
},
1740+
},
1741+
},
1742+
).AddContainerResourceRequests(v1beta3.ContainerResourceRequests{
1743+
ContainerName: "test-container",
1744+
Resource: createResourceList("500m", "500Mi"),
1745+
}).Build(),
1746+
replicaNum: 4,
1747+
},
1748+
want: utils.NewTortoiseBuilder().AddAutoscalingPolicy(v1beta3.ContainerAutoscalingPolicy{
1749+
ContainerName: "test-container",
1750+
Policy: map[corev1.ResourceName]v1beta3.AutoscalingType{
1751+
corev1.ResourceCPU: v1beta3.AutoscalingTypeHorizontal,
1752+
corev1.ResourceMemory: v1beta3.AutoscalingTypeHorizontal,
1753+
},
1754+
}).AddResourcePolicy(v1beta3.ContainerResourcePolicy{
1755+
ContainerName: "test-container",
1756+
MinAllocatedResources: createResourceList("100m", "100Mi"),
1757+
}).AddContainerRecommendationFromVPA(
1758+
v1beta3.ContainerRecommendationFromVPA{
1759+
ContainerName: "test-container",
1760+
Recommendation: map[corev1.ResourceName]v1beta3.ResourceQuantity{
1761+
corev1.ResourceCPU: {
1762+
Quantity: resource.MustParse("500m"),
1763+
},
1764+
corev1.ResourceMemory: {
1765+
Quantity: resource.MustParse("500Mi"),
1766+
},
1767+
},
1768+
},
1769+
).AddTortoiseConditions(v1beta3.TortoiseCondition{
1770+
Type: v1beta3.TortoiseConditionTypeScaledUpBasedOnPreferredMaxReplicas,
1771+
Status: corev1.ConditionTrue,
1772+
Reason: "ScaledUpBasedOnPreferredMaxReplicas",
1773+
Message: "the current number of replicas is bigger than the preferred max replica number",
1774+
LastTransitionTime: metav1.NewTime(now),
1775+
LastUpdateTime: metav1.NewTime(now),
1776+
}).AddContainerResourceRequests(v1beta3.ContainerResourceRequests{
1777+
ContainerName: "test-container",
1778+
Resource: createResourceList("500m", "500Mi"),
1779+
}).SetRecommendations(v1beta3.Recommendations{
1780+
Vertical: v1beta3.VerticalRecommendations{
1781+
ContainerResourceRecommendation: []v1beta3.RecommendedContainerResources{
1782+
{
1783+
ContainerName: "test-container",
1784+
RecommendedResource: createResourceList("500m", "500Mi"), // current
1785+
},
1786+
},
1787+
},
1788+
}).Build(),
1789+
wantErr: false,
1790+
},
16771791
{
16781792
name: "all horizontal: replica count above preferredMaxReplicas, but we recently increase the resource: don't increase the resources",
16791793
fields: fields{
@@ -2011,6 +2125,14 @@ func TestService_UpdateVPARecommendation(t *testing.T) {
20112125
}).AddResourcePolicy(v1beta3.ContainerResourcePolicy{
20122126
ContainerName: "test-container",
20132127
MinAllocatedResources: createResourceList("100m", "100Mi"),
2128+
}).AddTortoiseConditions(v1beta3.TortoiseCondition{
2129+
Type: v1beta3.TortoiseConditionTypeScaledUpBasedOnPreferredMaxReplicas,
2130+
Status: corev1.ConditionTrue,
2131+
Reason: "ScaledUpBasedOnPreferredMaxReplicas",
2132+
Message: "the current number of replicas is bigger than the preferred max replica number",
2133+
// not recently updated.
2134+
LastTransitionTime: metav1.NewTime(now.Add(-31 * time.Minute)),
2135+
LastUpdateTime: metav1.NewTime(now.Add(-31 * time.Minute)),
20142136
}).AddContainerRecommendationFromVPA(
20152137
v1beta3.ContainerRecommendationFromVPA{
20162138
ContainerName: "test-container",

pkg/utils/tortoise.go

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -35,6 +35,16 @@ func ChangeTortoiseCondition(t *v1beta3.Tortoise, conditionType v1beta3.Tortoise
3535
return t
3636
}
3737

38+
func GetTortoiseCondition(t *v1beta3.Tortoise, conditionType v1beta3.TortoiseConditionType) *v1beta3.TortoiseCondition {
39+
for i := range t.Status.Conditions.TortoiseConditions {
40+
if t.Status.Conditions.TortoiseConditions[i].Type == conditionType {
41+
return &t.Status.Conditions.TortoiseConditions[i]
42+
}
43+
}
44+
45+
return nil
46+
}
47+
3848
func ChangeTortoiseContainerResourcePhase(tortoise *v1beta3.Tortoise, containerName string, rn corev1.ResourceName, now time.Time, phase v1beta3.ContainerResourcePhase) *v1beta3.Tortoise {
3949
found := false
4050
for i, p := range tortoise.Status.ContainerResourcePhases {

0 commit comments

Comments
 (0)