Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat(scaler): scale out by group and wait until pods are scheduled #4907

Open
wants to merge 3 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
26 changes: 14 additions & 12 deletions go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -13,10 +13,10 @@ require (
github.com/Masterminds/semver v1.4.2
github.com/agiledragon/gomonkey/v2 v2.7.0
github.com/aws/aws-sdk-go v1.44.72
github.com/aws/aws-sdk-go-v2 v1.16.11
github.com/aws/aws-sdk-go-v2/config v1.17.1
github.com/aws/aws-sdk-go-v2/service/ec2 v1.53.0
github.com/aws/smithy-go v1.12.1
github.com/aws/aws-sdk-go-v2 v1.17.4
github.com/aws/aws-sdk-go-v2/config v1.18.13
github.com/aws/aws-sdk-go-v2/service/ec2 v1.86.0
github.com/aws/smithy-go v1.13.5
github.com/docker/docker v1.4.2-0.20200309214505-aa6a9891b09c
github.com/dustin/go-humanize v1.0.0
github.com/emicklei/go-restful v2.16.0+incompatible
Expand Down Expand Up @@ -76,6 +76,8 @@ require (
sigs.k8s.io/controller-runtime v0.7.2
)

require github.com/aws/aws-sdk-go-v2/service/ssooidc v1.14.2 // indirect

require (
cloud.google.com/go v0.51.0 // indirect
github.com/Azure/azure-pipeline-go v0.2.1 // indirect
Expand All @@ -94,14 +96,14 @@ require (
github.com/NYTimes/gziphandler v1.1.1 // indirect
github.com/PuerkitoBio/purell v1.1.1 // indirect
github.com/PuerkitoBio/urlesc v0.0.0-20170810143723-de5bf2ad4578 // indirect
github.com/aws/aws-sdk-go-v2/credentials v1.12.14 // indirect
github.com/aws/aws-sdk-go-v2/feature/ec2/imds v1.12.12 // indirect
github.com/aws/aws-sdk-go-v2/internal/configsources v1.1.18 // indirect
github.com/aws/aws-sdk-go-v2/internal/endpoints/v2 v2.4.12 // indirect
github.com/aws/aws-sdk-go-v2/internal/ini v1.3.19 // indirect
github.com/aws/aws-sdk-go-v2/service/internal/presigned-url v1.9.12 // indirect
github.com/aws/aws-sdk-go-v2/service/sso v1.11.17 // indirect
github.com/aws/aws-sdk-go-v2/service/sts v1.16.13 // indirect
github.com/aws/aws-sdk-go-v2/credentials v1.13.13 // indirect
github.com/aws/aws-sdk-go-v2/feature/ec2/imds v1.12.22 // indirect
github.com/aws/aws-sdk-go-v2/internal/configsources v1.1.28 // indirect
github.com/aws/aws-sdk-go-v2/internal/endpoints/v2 v2.4.22 // indirect
github.com/aws/aws-sdk-go-v2/internal/ini v1.3.29 // indirect
github.com/aws/aws-sdk-go-v2/service/internal/presigned-url v1.9.22 // indirect
github.com/aws/aws-sdk-go-v2/service/sso v1.12.2 // indirect
github.com/aws/aws-sdk-go-v2/service/sts v1.18.3 // indirect
github.com/beorn7/perks v1.0.1 // indirect
github.com/blang/semver v3.5.0+incompatible // indirect
github.com/cespare/xxhash/v2 v2.1.1 // indirect
Expand Down
50 changes: 26 additions & 24 deletions go.sum
Original file line number Diff line number Diff line change
Expand Up @@ -111,30 +111,32 @@ github.com/aws/aws-sdk-go v1.19.45/go.mod h1:KmX6BPdI08NWTb3/sm4ZGu5ShLoqVDhKgpi
github.com/aws/aws-sdk-go v1.28.2/go.mod h1:KmX6BPdI08NWTb3/sm4ZGu5ShLoqVDhKgpiN924inxo=
github.com/aws/aws-sdk-go v1.44.72 h1:i7J5XT7pjBjtl1OrdIhiQHzsG89wkZCcM1HhyK++3DI=
github.com/aws/aws-sdk-go v1.44.72/go.mod h1:y4AeaBuwd2Lk+GepC1E9v0qOiTws0MIWAX4oIKwKHZo=
github.com/aws/aws-sdk-go-v2 v1.16.11 h1:xM1ZPSvty3xVmdxiGr7ay/wlqv+MWhH0rMlyLdbC0YQ=
github.com/aws/aws-sdk-go-v2 v1.16.11/go.mod h1:WTACcleLz6VZTp7fak4EO5b9Q4foxbn+8PIz3PmyKlo=
github.com/aws/aws-sdk-go-v2/config v1.17.1 h1:BWxTjokU/69BZ4DnLrZco6OvBDii6ToEdfBL/y5I1nA=
github.com/aws/aws-sdk-go-v2/config v1.17.1/go.mod h1:uOxDHjBemNTF2Zos+fgG0NNfE86wn1OAHDTGxjMEYi0=
github.com/aws/aws-sdk-go-v2/credentials v1.12.14 h1:AtVG/amkjbDBfnPr/tuW2IG18HGNznP6L12Dx0rLz+Q=
github.com/aws/aws-sdk-go-v2/credentials v1.12.14/go.mod h1:opAndTyq+YN7IpVG57z2CeNuXSQMqTYxGGlYH0m0RMY=
github.com/aws/aws-sdk-go-v2/feature/ec2/imds v1.12.12 h1:wgJBHO58Pc1V1QAnzdVM3JK3WbE/6eUF0JxCZ+/izz0=
github.com/aws/aws-sdk-go-v2/feature/ec2/imds v1.12.12/go.mod h1:aZ4vZnyUuxedC7eD4JyEHpGnCz+O2sHQEx3VvAwklSE=
github.com/aws/aws-sdk-go-v2/internal/configsources v1.1.18 h1:OmiwoVyLKEqqD5GvB683dbSqxiOfvx4U2lDZhG2Esc4=
github.com/aws/aws-sdk-go-v2/internal/configsources v1.1.18/go.mod h1:348MLhzV1GSlZSMusdwQpXKbhD7X2gbI/TxwAPKkYZQ=
github.com/aws/aws-sdk-go-v2/internal/endpoints/v2 v2.4.12 h1:5mvQDtNWtI6H56+E4LUnLWEmATMB7oEh+Z9RurtIuC0=
github.com/aws/aws-sdk-go-v2/internal/endpoints/v2 v2.4.12/go.mod h1:ckaCVTEdGAxO6KwTGzgskxR1xM+iJW4lxMyDFVda2Fc=
github.com/aws/aws-sdk-go-v2/internal/ini v1.3.19 h1:g5qq9sgtEzt2szMaDqQO6fqKe026T6dHTFJp5NsPzkQ=
github.com/aws/aws-sdk-go-v2/internal/ini v1.3.19/go.mod h1:cVHo8KTuHjShb9V8/VjH3S/8+xPu16qx8fdGwmotJhE=
github.com/aws/aws-sdk-go-v2/service/ec2 v1.53.0 h1:ykXBf42stQW1SyoLCuhd405kyTq440+3TIOkqhsETMs=
github.com/aws/aws-sdk-go-v2/service/ec2 v1.53.0/go.mod h1:YbPg6ou7dlvFTJMmbV3zhec+A22S1Ow+ZB6k6xUs9oY=
github.com/aws/aws-sdk-go-v2/service/internal/presigned-url v1.9.12 h1:7iPTTX4SAI2U2VOogD7/gmHlsgnYSgoNHt7MSQXtG2M=
github.com/aws/aws-sdk-go-v2/service/internal/presigned-url v1.9.12/go.mod h1:1TODGhheLWjpQWSuhYuAUWYTCKwEjx2iblIFKDHjeTc=
github.com/aws/aws-sdk-go-v2/service/sso v1.11.17 h1:pXxu9u2z1UqSbjO9YA8kmFJBhFc1EVTDaf7A+S+Ivq8=
github.com/aws/aws-sdk-go-v2/service/sso v1.11.17/go.mod h1:mS5xqLZc/6kc06IpXn5vRxdLaED+jEuaSRv5BxtnsiY=
github.com/aws/aws-sdk-go-v2/service/sts v1.16.13 h1:dl8T0PJlN92rvEGOEUiD0+YPYdPEaCZK0TqHukvSfII=
github.com/aws/aws-sdk-go-v2/service/sts v1.16.13/go.mod h1:Ru3QVMLygVs/07UQ3YDur1AQZZp2tUNje8wfloFttC0=
github.com/aws/smithy-go v1.12.1 h1:yQRC55aXN/y1W10HgwHle01DRuV9Dpf31iGkotjt3Ag=
github.com/aws/smithy-go v1.12.1/go.mod h1:Tg+OJXh4MB2R/uN61Ko2f6hTZwB/ZYGOtib8J3gBHzA=
github.com/aws/aws-sdk-go-v2 v1.17.4 h1:wyC6p9Yfq6V2y98wfDsj6OnNQa4w2BLGCLIxzNhwOGY=
github.com/aws/aws-sdk-go-v2 v1.17.4/go.mod h1:uzbQtefpm44goOPmdKyAlXSNcwlRgF3ePWVW6EtJvvw=
github.com/aws/aws-sdk-go-v2/config v1.18.13 h1:v0xlYqbO6/EVlM8tUn2QEOA7btQxcgidEq2JRDBPTho=
github.com/aws/aws-sdk-go-v2/config v1.18.13/go.mod h1:r39wGSZB7wPDW1i54JyQXUpc5KsWjh5z/3S5D9eCqDg=
github.com/aws/aws-sdk-go-v2/credentials v1.13.13 h1:zw1KAc1kl00NYd3ofVmFrb09qnYlSQMeh+fmlQRAihI=
github.com/aws/aws-sdk-go-v2/credentials v1.13.13/go.mod h1:DW9nbIIF9MrIja0cBQrUpeWYQMSlNmP8fevLUyF9W38=
github.com/aws/aws-sdk-go-v2/feature/ec2/imds v1.12.22 h1:3aMfcTmoXtTZnaT86QlVaYh+BRMbvrrmZwIQ5jWqCZQ=
github.com/aws/aws-sdk-go-v2/feature/ec2/imds v1.12.22/go.mod h1:YGSIJyQ6D6FjKMQh16hVFSIUD54L4F7zTGePqYMYYJU=
github.com/aws/aws-sdk-go-v2/internal/configsources v1.1.28 h1:r+XwaCLpIvCKjBIYy/HVZujQS9tsz5ohHG3ZIe0wKoE=
github.com/aws/aws-sdk-go-v2/internal/configsources v1.1.28/go.mod h1:3lwChorpIM/BhImY/hy+Z6jekmN92cXGPI1QJasVPYY=
github.com/aws/aws-sdk-go-v2/internal/endpoints/v2 v2.4.22 h1:7AwGYXDdqRQYsluvKFmWoqpcOQJ4bH634SkYf3FNj/A=
github.com/aws/aws-sdk-go-v2/internal/endpoints/v2 v2.4.22/go.mod h1:EqK7gVrIGAHyZItrD1D8B0ilgwMD1GiWAmbU4u/JHNk=
github.com/aws/aws-sdk-go-v2/internal/ini v1.3.29 h1:J4xhFd6zHhdF9jPP0FQJ6WknzBboGMBNjKOv4iTuw4A=
github.com/aws/aws-sdk-go-v2/internal/ini v1.3.29/go.mod h1:TwuqRBGzxjQJIwH16/fOZodwXt2Zxa9/cwJC5ke4j7s=
github.com/aws/aws-sdk-go-v2/service/ec2 v1.86.0 h1:4dt0Mg5veHbMLcA2JAR9LDxvqXjtG0ZLQdxZG/2wHy4=
github.com/aws/aws-sdk-go-v2/service/ec2 v1.86.0/go.mod h1:jK4MhMMe6HIe4qnjGaQqQQECcsxRZ0q86oCq06T8IEE=
github.com/aws/aws-sdk-go-v2/service/internal/presigned-url v1.9.22 h1:LjFQf8hFuMO22HkV5VWGLBvmCLBCLPivUAmpdpnp4Vs=
github.com/aws/aws-sdk-go-v2/service/internal/presigned-url v1.9.22/go.mod h1:xt0Au8yPIwYXf/GYPy/vl4K3CgwhfQMYbrH7DlUUIws=
github.com/aws/aws-sdk-go-v2/service/sso v1.12.2 h1:EN102fWY7hI5u/2FPheTrwwMHkSXfl49RYkeEnJsrCU=
github.com/aws/aws-sdk-go-v2/service/sso v1.12.2/go.mod h1:IgV8l3sj22nQDd5qcAGY0WenwCzCphqdbFOpfktZPrI=
github.com/aws/aws-sdk-go-v2/service/ssooidc v1.14.2 h1:f1lmlce7r13CX1BPyPqt9oh/H+uqOWc9367lDoGGwNQ=
github.com/aws/aws-sdk-go-v2/service/ssooidc v1.14.2/go.mod h1:O1YSOg3aekZibh2SngvCRRG+cRHKKlYgxf/JBF/Kr/k=
github.com/aws/aws-sdk-go-v2/service/sts v1.18.3 h1:s49mSnsBZEXjfGBkRfmK+nPqzT7Lt3+t2SmAKNyHblw=
github.com/aws/aws-sdk-go-v2/service/sts v1.18.3/go.mod h1:b+psTJn33Q4qGoDaM7ZiOVVG8uVjGI6HaZ8WBHdgDgU=
github.com/aws/smithy-go v1.13.5 h1:hgz0X/DX0dGqTYpGALqXJoRKRj5oQ7150i5FdTePzO8=
github.com/aws/smithy-go v1.13.5/go.mod h1:Tg+OJXh4MB2R/uN61Ko2f6hTZwB/ZYGOtib8J3gBHzA=
github.com/benbjohnson/clock v1.1.0 h1:Q92kusRqC1XV2MjkWETPvjJVqKetz1OzxZB7mHJLju8=
github.com/benbjohnson/clock v1.1.0/go.mod h1:J11/hYXuz8f4ySSvYwY0FKfm+ezbsZBKZxNJlLklBHA=
github.com/beorn7/perks v1.0.1 h1:VlbKKnNfV8bJzeqoa4cOKqO6bYr3WgKZxO8Z16+hsOM=
Expand Down
7 changes: 7 additions & 0 deletions pkg/manager/member/tiflash_member_manager.go
Original file line number Diff line number Diff line change
Expand Up @@ -260,6 +260,13 @@ func (m *tiflashMemberManager) syncStatefulSet(tc *v1alpha1.TidbCluster) error {
if err != nil {
return err
}
if newSet.Spec.Replicas == nil {
newSet.Spec.Replicas = pointer.Int32Ptr(1)
}

if *newSet.Spec.Replicas >= 3 {
newSet.Spec.Replicas = pointer.Int32Ptr(3)
}
err = m.deps.StatefulSetControl.CreateStatefulSet(tc, newSet)
if err != nil {
return err
Expand Down
53 changes: 41 additions & 12 deletions pkg/manager/member/tiflash_scaler.go
Original file line number Diff line number Diff line change
Expand Up @@ -18,15 +18,16 @@ import (
"strconv"
"time"

"github.com/pingcap/tidb-operator/pkg/apis/label"
"github.com/pingcap/tidb-operator/pkg/apis/pingcap/v1alpha1"
"github.com/pingcap/tidb-operator/pkg/controller"
apps "k8s.io/api/apps/v1"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
errorutils "k8s.io/apimachinery/pkg/util/errors"
"k8s.io/apimachinery/pkg/util/sets"
"k8s.io/klog/v2"
podutil "k8s.io/kubernetes/pkg/api/v1/pod"

"github.com/pingcap/tidb-operator/pkg/apis/label"
"github.com/pingcap/tidb-operator/pkg/apis/pingcap/v1alpha1"
"github.com/pingcap/tidb-operator/pkg/controller"
)

type tiflashScaler struct {
Expand Down Expand Up @@ -56,31 +57,54 @@ func (s *tiflashScaler) ScaleOut(meta metav1.Object, oldSet *apps.StatefulSet, n
return nil
}

ls, err := label.New().Instance(tc.Name).Component(label.TiFlashLabelVal).Selector()
if err != nil {
return fmt.Errorf("invalid label selector: %w", err)
}

pods, err := s.deps.PodLister.Pods(tc.Namespace).List(ls)
if err != nil {
return fmt.Errorf("can't list pods with label selector %s: %w", ls, err)
}

if len(pods) < int(*oldSet.Spec.Replicas) {
return fmt.Errorf("wait until tiflash pods are created, expected in sts %v, current pods num %v", *oldSet.Spec.Replicas, len(pods))
}

unscheduledPods := sets.NewString()
for _, pod := range pods {
if pod.Spec.NodeName == "" {
unscheduledPods.Insert(pod.Name)
}
}

if unscheduledPods.Len() > 0 {
return fmt.Errorf("wait until tiflash pods are scheduled, unscheduled pods: %v", unscheduledPods)
}

scaleOutParallelism := tc.Spec.TiFlash.GetScaleOutParallelism()
_, ordinals, replicas, deleteSlots := scaleMulti(oldSet, newSet, scaleOutParallelism)
klog.Infof("scaling out tiflash statefulset %s/%s, ordinal: %v (replicas: %d, scale out parallelism: %d, delete slots: %v)",
oldSet.Namespace, oldSet.Name, ordinals, replicas, scaleOutParallelism, deleteSlots.List())

var (
errs []error
finishedOrdinals = sets.NewInt32()
updateReplicasAndDeleteSlots bool
errs []error
finishedOrdinals = sets.NewInt32()
)
for _, ordinal := range ordinals {
err := s.scaleOutOne(tc, ordinal)
if err != nil {
errs = append(errs, err)
} else {
finishedOrdinals.Insert(ordinal)
updateReplicasAndDeleteSlots = true
}
}
if updateReplicasAndDeleteSlots {
setReplicasAndDeleteSlotsByFinished(scalingOutFlag, newSet, oldSet, ordinals, finishedOrdinals)
} else {
resetReplicas(newSet, oldSet)
if err := errorutils.NewAggregate(errs); err != nil {
return err
}
return errorutils.NewAggregate(errs)
setReplicasAndDeleteSlotsByFinished(scalingOutFlag, newSet, oldSet, ordinals, finishedOrdinals)

return nil
}

func (s *tiflashScaler) scaleOutOne(tc *v1alpha1.TidbCluster, ordinal int32) error {
Expand Down Expand Up @@ -120,6 +144,11 @@ func (s *tiflashScaler) ScaleIn(meta metav1.Object, oldSet *apps.StatefulSet, ne
}
}

// If len(errs) != 0, resetReplicas is not useful.
// But only when len(ordinals) == 0, resetReplicas and len(errs) == 0 will both be true.
// However, len(ordinals) != 0 only when the first return value of scaleMulti is not zero (scaling != 0).
// And only when scaling != 0 this function will be called.
// TODO: resetReplicas is not needed
if updateReplicasAndDeleteSlots {
setReplicasAndDeleteSlotsByFinished(scalingInFlag, newSet, oldSet, ordinals, finishedOrdinals)
} else {
Expand Down
Loading