Skip to content

Commit

Permalink
Cluster watcher - retry options [v0.26.x] (#421)
Browse files Browse the repository at this point in the history
* backport

* comments

* make max jitter configurable

* rename to attempts
  • Loading branch information
jenshu authored Mar 9, 2023
1 parent 32358d3 commit 4013ce5
Show file tree
Hide file tree
Showing 9 changed files with 144 additions and 28 deletions.
11 changes: 11 additions & 0 deletions changelog/v0.26.2/retry-options.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
changelog:
- type: NEW_FEATURE
description: >
NewClusterWatcher now takes in an additional `RetryOptions` argument, which specifies how retries should be done if we fail to create or start the remote cluster manager.
issueLink: https://github.com/solo-io/gloo/issues/7814
resolvesIssue: false
- type: DEPENDENCY_BUMP
dependencyOwner: avast
dependencyRepo: retry-go
dependencyTag: v4.3.3
description: Update to latest retry-go version, which supports infinite retry.
2 changes: 1 addition & 1 deletion ci/oss_compliance/osa_provided.md
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,7 @@ Name|Version|License
[google.golang.org/protobuf](https://google.golang.org/protobuf)|v1.27.1|BSD 3-clause "New" or "Revised" License
[gopkg.in/inf.v0](https://gopkg.in/inf.v0)|v0.9.1|BSD 3-clause "New" or "Revised" License
[gopkg.in/yaml.v2](https://gopkg.in/yaml.v2)|v2.4.0|Apache License 2.0
[gopkg.in/yaml.v3](https://gopkg.in/yaml.v3)|v3.0.0-20210107192922-496545a6307b|MIT License
[gopkg.in/yaml.v3](https://gopkg.in/yaml.v3)|v3.0.1|MIT License
[k8s.io/api](https://k8s.io/api)|v0.21.4|Apache License 2.0
[k8s.io/apimachinery](https://k8s.io/apimachinery)|v0.21.4|Apache License 2.0
[k8s.io/client-go](https://k8s.io/client-go)|v0.21.4|Apache License 2.0
Expand Down
4 changes: 2 additions & 2 deletions codegen/render/kube_multicluster_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -120,7 +120,7 @@ var _ = WithRemoteClusterContextDescribe("Multicluster", func() {

Describe("clientset", func() {
It("works", func() {
cw := watch.NewClusterWatcher(ctx, manager.Options{Namespace: ns}, nil)
cw := watch.NewClusterWatcher(ctx, manager.Options{Namespace: ns}, watch.RetryOptions{}, nil)
err := cw.Run(masterManager)
Expect(err).NotTo(HaveOccurred())
mcClientset := multicluster.NewClient(cw)
Expand Down Expand Up @@ -187,7 +187,7 @@ var _ = WithRemoteClusterContextDescribe("Multicluster", func() {
}

BeforeEach(func() {
cw = watch.NewClusterWatcher(ctx, manager.Options{Namespace: ns}, nil)
cw = watch.NewClusterWatcher(ctx, manager.Options{Namespace: ns}, watch.RetryOptions{}, nil)
})

It("works when a loop is registered before the watcher is started", func() {
Expand Down
5 changes: 3 additions & 2 deletions go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ require (
github.com/BurntSushi/toml v0.3.1
github.com/Masterminds/semver v1.4.2
github.com/Masterminds/sprig/v3 v3.1.0
github.com/avast/retry-go v2.2.0+incompatible
github.com/avast/retry-go/v4 v4.3.3
github.com/aws/aws-sdk-go v1.30.15
github.com/envoyproxy/protoc-gen-validate v0.6.1
github.com/gertd/go-pluralize v0.1.1
Expand Down Expand Up @@ -70,6 +70,7 @@ require (
github.com/PuerkitoBio/purell v1.1.1 // indirect
github.com/PuerkitoBio/urlesc v0.0.0-20170810143723-de5bf2ad4578 // indirect
github.com/aokoli/goutils v1.0.1 // indirect
github.com/avast/retry-go v2.2.0+incompatible // indirect
github.com/beorn7/perks v1.0.1 // indirect
github.com/cespare/xxhash/v2 v2.1.1 // indirect
github.com/cockroachdb/apd/v2 v2.0.1 // indirect
Expand Down Expand Up @@ -141,7 +142,7 @@ require (
google.golang.org/grpc v1.35.0 // indirect
gopkg.in/inf.v0 v0.9.1 // indirect
gopkg.in/tomb.v1 v1.0.0-20141024135613-dd632973f1e7 // indirect
gopkg.in/yaml.v3 v3.0.0-20210107192922-496545a6307b // indirect
gopkg.in/yaml.v3 v3.0.1 // indirect
k8s.io/component-base v0.21.4 // indirect
k8s.io/gengo v0.0.0-20201214224949-b6c5ce23f027 // indirect
k8s.io/kube-openapi v0.0.0-20210305001622-591a79e4bda7 // indirect
Expand Down
12 changes: 10 additions & 2 deletions go.sum
Original file line number Diff line number Diff line change
Expand Up @@ -124,6 +124,8 @@ github.com/asaskevich/govalidator v0.0.0-20190424111038-f61b66f89f4a/go.mod h1:l
github.com/asaskevich/govalidator v0.0.0-20200108200545-475eaeb16496/go.mod h1:oGkLhpf+kjZl6xBf758TQhh5XrAeiJv/7FRz/2spLIg=
github.com/avast/retry-go v2.2.0+incompatible h1:m+w7mVLWa/oKqX2xYqiEKQQkeGH8DDEXB/XnjS54Wyw=
github.com/avast/retry-go v2.2.0+incompatible/go.mod h1:XtSnn+n/sHqQIpZ10K1qAevBhOOCWBLXXy3hyiqqBrY=
github.com/avast/retry-go/v4 v4.3.3 h1:G56Bp6mU0b5HE1SkaoVjscZjlQb0oy4mezwY/cGH19w=
github.com/avast/retry-go/v4 v4.3.3/go.mod h1:rg6XFaiuFYII0Xu3RDbZQkxCofFwruZKW8oEF1jpWiU=
github.com/aws/aws-sdk-go v1.15.11/go.mod h1:mFuSZ37Z9YOHbQEwBWztmVzqXrEkub65tZoCYDt7FT0=
github.com/aws/aws-sdk-go v1.26.7/go.mod h1:KmX6BPdI08NWTb3/sm4ZGu5ShLoqVDhKgpiN924inxo=
github.com/aws/aws-sdk-go v1.30.15 h1:Sd8QDVzzE8Sl+xNccmdj0HwMrFowv6uVUx9tGsCE1ZE=
Expand Down Expand Up @@ -862,15 +864,20 @@ github.com/stoewer/go-strcase v1.2.0/go.mod h1:IBiWB2sKIp3wVVQ3Y035++gc+knqhUQag
github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME=
github.com/stretchr/objx v0.1.1/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME=
github.com/stretchr/objx v0.2.0/go.mod h1:qt09Ya8vawLte6SNmTgCsAVtYtaKzEcn8ATUoHMkEqE=
github.com/stretchr/objx v0.4.0/go.mod h1:YvHI0jy2hoMjB+UWwv71VJQ9isScKT/TqJzVSSt89Yw=
github.com/stretchr/objx v0.5.0/go.mod h1:Yh+to48EsGEfYuaHDzXPcE3xhTkx73EhmCGUpEOglKo=
github.com/stretchr/testify v0.0.0-20170130113145-4d4bfba8f1d1/go.mod h1:a8OnRcib4nhh0OaRAV+Yts87kKdq0PP7pXfy6kDkUVs=
github.com/stretchr/testify v1.2.1/go.mod h1:a8OnRcib4nhh0OaRAV+Yts87kKdq0PP7pXfy6kDkUVs=
github.com/stretchr/testify v1.2.2/go.mod h1:a8OnRcib4nhh0OaRAV+Yts87kKdq0PP7pXfy6kDkUVs=
github.com/stretchr/testify v1.3.0/go.mod h1:M5WIy9Dh21IEIfnGCwXGc5bZfKNJtfHm1UVUgZn+9EI=
github.com/stretchr/testify v1.4.0/go.mod h1:j7eGeouHqKxXV5pUuKE4zz7dFj8WfuZ+81PSLYec5m4=
github.com/stretchr/testify v1.5.1/go.mod h1:5W2xD1RspED5o8YsWQXVCued0rvSQ+mT+I5cxcmMvtA=
github.com/stretchr/testify v1.6.1/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg=
github.com/stretchr/testify v1.7.0 h1:nwc3DEeHmmLAfoZucVR881uASk0Mfjw8xYJ99tb5CcY=
github.com/stretchr/testify v1.7.0/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg=
github.com/stretchr/testify v1.7.1/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg=
github.com/stretchr/testify v1.8.0/go.mod h1:yNjHg4UonilssWZ8iaSj1OCr/vHnekPRkoO+kdMU+MU=
github.com/stretchr/testify v1.8.1 h1:w7B6lhMri9wdJUVmEZPGGhZzrYTPvgJArz7wNPgYKsk=
github.com/stretchr/testify v1.8.1/go.mod h1:w2LPCIKwWwSfY2zedu0+kehJoqGctiVI29o6fzry7u4=
github.com/subosito/gotenv v1.2.0/go.mod h1:N0PQaV/YGNqwC0u51sEeR/aUtSLEXKX9iv69rRypqCw=
github.com/syndtr/gocapability v0.0.0-20170704070218-db04d3cc01c8/go.mod h1:hkRG7XYTFWNJGYcbNJQlaLq0fg1yr4J4t/NcTQtrfww=
github.com/tidwall/pretty v1.0.0/go.mod h1:XNkn88O1ChpSDQmQeStsy+sBenx6DDtFZJxhVysOjyk=
Expand Down Expand Up @@ -1429,8 +1436,9 @@ gopkg.in/yaml.v2 v2.4.0/go.mod h1:RDklbk79AGWmwhnvt/jBztapEOGDOx6ZbXqjP6csGnQ=
gopkg.in/yaml.v3 v3.0.0-20200121175148-a6ecf24a6d71/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM=
gopkg.in/yaml.v3 v3.0.0-20200313102051-9f266ea9e77c/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM=
gopkg.in/yaml.v3 v3.0.0-20200615113413-eeeca48fe776/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM=
gopkg.in/yaml.v3 v3.0.0-20210107192922-496545a6307b h1:h8qDotaEPuJATrMmW04NCwg7v22aHH28wwpauUhK9Oo=
gopkg.in/yaml.v3 v3.0.0-20210107192922-496545a6307b/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM=
gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA=
gopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM=
gotest.tools v2.2.0+incompatible/go.mod h1:DsYFclhRJ6vuDpmuTbkuFWG+y2sxOXAzmJt81HFBacw=
gotest.tools/v3 v3.0.2/go.mod h1:3SzNCllyD9/Y+b5r9JIKQ474KzkZyqLqEfYqMsX94Bk=
gotest.tools/v3 v3.0.3/go.mod h1:Z7Lb0S5l+klDB31fvDQX8ss/FlKDxtlFlw3Oa8Ymbl8=
Expand Down
1 change: 1 addition & 0 deletions pkg/bootstrap/bootstrap.go
Original file line number Diff line number Diff line change
Expand Up @@ -160,6 +160,7 @@ func StartMulti(
Namespace: "", // TODO (ilackarms): support configuring specific watch namespaces on remote clusters
Scheme: mgr.GetScheme(),
},
watch.RetryOptions{},
nil,
)

Expand Down
2 changes: 1 addition & 1 deletion pkg/multicluster/register/registrant.go
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ import (
"net/url"
"time"

"github.com/avast/retry-go"
"github.com/avast/retry-go/v4"
"github.com/hashicorp/go-multierror"
"github.com/rotisserie/eris"
"github.com/solo-io/skv2/pkg/api/multicluster.solo.io/v1alpha1"
Expand Down
2 changes: 1 addition & 1 deletion pkg/multicluster/register/registrant_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@ import (
k8s_core_v1_providers "github.com/solo-io/skv2/pkg/multicluster/internal/k8s/core/v1/providers"
rbac_v1_providers "github.com/solo-io/skv2/pkg/multicluster/internal/k8s/rbac.authorization.k8s.io/v1/providers"

"github.com/avast/retry-go"
"github.com/avast/retry-go/v4"
"github.com/golang/mock/gomock"
. "github.com/onsi/ginkgo"
. "github.com/onsi/gomega"
Expand Down
133 changes: 114 additions & 19 deletions pkg/multicluster/watch/watcher.go
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ import (
"sync"
"time"

"github.com/avast/retry-go"
"github.com/avast/retry-go/v4"
"github.com/rotisserie/eris"
"github.com/solo-io/go-utils/contextutils"
"github.com/solo-io/skv2/pkg/multicluster"
Expand All @@ -19,28 +19,70 @@ import (
"sigs.k8s.io/controller-runtime/pkg/manager"
)

// RetryOptions specify how to retry when a manager fails to be created or started.
type RetryOptions struct {
// DelayType is the type of retry delay (fixed or exponential backoff).
// Default is exponential backoff.
DelayType RetryDelayType

// Delay is the initial retry delay.
// Default is 1 second.
Delay *time.Duration

// MaxDelay is the maximum delay between retries.
// Default is 0 (no max).
MaxDelay *time.Duration

// MaxJitter is the maximum random jitter between retries.
// If set to 0, the delay intervals will contain no randomness.
// Default is the same value as delay (1 second).
MaxJitter *time.Duration

// Attempts is the maximum number of attempts.
// Default is 0 (retry forever).
Attempts *uint
}

// RetryDelayType is the type of delay to be used for manager creation retries.
type RetryDelayType int

const (
// RetryDelayType_Backoff means retry with exponential backoff (with random jitter).
RetryDelayType_Backoff RetryDelayType = iota

// RetryDelayType_Fixed means retry at a fixed interval (with random jitter).
RetryDelayType_Fixed
)

type clusterWatcher struct {
ctx context.Context
handlers *handlerList
managers *managerSet
options manager.Options
managerOptions manager.Options
watchNamespaces []string
retryOptions RetryOptions
}

var _ multicluster.Interface = &clusterWatcher{}

// NewClusterWatcher returns a *clusterWatcher, which watches for changes to kubeconfig secrets
// (which contain kubeconfigs for remote clusters).
// When ctx is cancelled, all cluster managers started by the clusterWatcher are stopped.
// Provided manager.Options are applied to all managers started by the clusterWatcher.
// If watchNamespaces is not empty, only secrets in the given namespaces will be watched. If empty, secrets in
// - When ctx is cancelled, all cluster managers started by the clusterWatcher are stopped.
// - Provided manager.Options are applied to all managers started by the clusterWatcher.
// - RetryOptions specify how to retry manager creation if it fails. Any fields not explicitly provided
// in the retry options will take on the default values.
// - If watchNamespaces is not empty, only secrets in the given namespaces will be watched. If empty, secrets in
// all namespaces will be watched.
func NewClusterWatcher(ctx context.Context, options manager.Options, watchNamespaces []string) *clusterWatcher {
func NewClusterWatcher(ctx context.Context,
managerOptions manager.Options,
retryOptions RetryOptions,
watchNamespaces []string) *clusterWatcher {
return &clusterWatcher{
ctx: ctx,
handlers: newHandlerList(),
managers: newManagerSet(),
options: options,
managerOptions: managerOptions,
retryOptions: retryOptions,
watchNamespaces: watchNamespaces,
}
}
Expand Down Expand Up @@ -92,15 +134,10 @@ func (s *clusterWatcher) ListClusters() []string {
}

func (c *clusterWatcher) startManager(clusterName string, restCfg *rest.Config) {
retryOptions := c.retryOptionsWithDefaults()
go func() { // this must be async because mgr.Start(ctx) is blocking
retryOptions := []retry.Option{
retry.Delay(time.Second),
retry.Attempts(12),
retry.DelayType(retry.BackOffDelay),
}

retry.Do(func() error {
mgr, err := manager.New(restCfg, c.optionsWithDefaults())
mgr, err := manager.New(restCfg, c.managerOptionsWithDefaults())
if err != nil {
contextutils.LoggerFrom(c.ctx).Errorf("Manager creation failed for cluster %v: %v", clusterName, err)
return err
Expand Down Expand Up @@ -132,11 +169,69 @@ func (c *clusterWatcher) removeCluster(clusterName string) {
c.handlers.RemoveCluster(clusterName)
}

func (c *clusterWatcher) optionsWithDefaults() manager.Options {
options := c.options
options.HealthProbeBindAddress = "0"
options.MetricsBindAddress = "0"
return options
func (c *clusterWatcher) managerOptionsWithDefaults() manager.Options {
managerOptions := c.managerOptions
managerOptions.HealthProbeBindAddress = "0"
managerOptions.MetricsBindAddress = "0"
return managerOptions
}

func (c *clusterWatcher) retryOptionsWithDefaults() []retry.Option {
opt := c.retryOptions

// convert the delay type (defaulting to backoff)
var delayType retry.DelayTypeFunc
if opt.DelayType == RetryDelayType_Fixed {
delayType = retry.FixedDelay
} else {
delayType = retry.BackOffDelay
}

// set the duration values to their specified defaults if not set
var delay time.Duration
if opt.Delay != nil {
delay = *opt.Delay
} else {
delay = time.Second
}

var maxDelay time.Duration
if opt.MaxDelay != nil {
maxDelay = *opt.MaxDelay
} else {
maxDelay = 0
}

var maxJitter time.Duration
if opt.MaxJitter != nil {
maxJitter = *opt.MaxJitter
} else {
maxJitter = delay
}

var attempts uint
if opt.Attempts != nil {
attempts = *opt.Attempts
} else {
attempts = 0
}

// construct the retry options with the above values
retryOptions := []retry.Option{
retry.Delay(delay),
retry.MaxDelay(maxDelay),
retry.Attempts(attempts),
}
if maxJitter > 0 {
// add a random delay with max jitter to the specified delay type
retryOptions = append(retryOptions,
retry.DelayType(retry.CombineDelay(delayType, retry.RandomDelay)),
retry.MaxJitter(maxJitter))
} else {
// if maxJitter was explicitly set to 0, don't add randomness or jitter
retryOptions = append(retryOptions, retry.DelayType(delayType))
}
return retryOptions
}

type asyncManager struct {
Expand Down

0 comments on commit 4013ce5

Please sign in to comment.