Skip to content

Commit 3ad8829

Browse files
authored
Merge pull request #633 from cybozu-go/retry-drain
Retry eviction of pod
2 parents e639180 + 6e9fd2a commit 3ad8829

File tree

13 files changed

+101
-21
lines changed

13 files changed

+101
-21
lines changed

.gitignore

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@
66

77
# Project-local glide cache, RE: https://github.com/Masterminds/glide/issues/736
88
.glide/
9+
vendor/
910

1011
# Editors
1112
*~

cluster.go

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -277,6 +277,8 @@ type Reboot struct {
277277
CommandTimeoutSeconds *int `json:"command_timeout_seconds,omitempty"`
278278
CommandRetries *int `json:"command_retries"`
279279
CommandInterval *int `json:"command_interval"`
280+
EvictRetries *int `json:"evict_retries"`
281+
EvictInterval *int `json:"evict_interval"`
280282
ProtectedNamespaces *metav1.LabelSelector `json:"protected_namespaces,omitempty"`
281283
}
282284

@@ -495,6 +497,12 @@ func validateReboot(reboot Reboot) error {
495497
if reboot.CommandInterval != nil && *reboot.CommandInterval < 0 {
496498
return errors.New("command_interval must not be negative")
497499
}
500+
if reboot.EvictRetries != nil && *reboot.EvictRetries < 0 {
501+
return errors.New("evict_retries must not be negative")
502+
}
503+
if reboot.EvictInterval != nil && *reboot.EvictInterval < 0 {
504+
return errors.New("evict_interval must not be negative")
505+
}
498506
if reboot.MaxConcurrentReboots != nil && *reboot.MaxConcurrentReboots <= 0 {
499507
return errors.New("max_concurrent_reboots must be positive")
500508
}

cluster_test.go

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -110,6 +110,18 @@ func testClusterYAML(t *testing.T) {
110110
if *c.Reboot.CommandInterval != 30 {
111111
t.Error(`*c.Reboot.CommandInterval != 30`)
112112
}
113+
if c.Reboot.EvictRetries == nil {
114+
t.Fatal(`c.Reboot.EvictRetries == nil`)
115+
}
116+
if *c.Reboot.EvictRetries != 10 {
117+
t.Error(`*c.Reboot.EvictRetries != 10`)
118+
}
119+
if c.Reboot.EvictInterval == nil {
120+
t.Fatal(`c.Reboot.EvictInterval == nil`)
121+
}
122+
if *c.Reboot.EvictInterval != 3 {
123+
t.Error(`*c.Reboot.EvictInterval != 3`)
124+
}
113125
if c.Reboot.ProtectedNamespaces == nil {
114126
t.Fatal(`c.Reboot.ProtectedNamespaces == nil`)
115127
}

docs/cluster.md

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -68,13 +68,15 @@ Reboot
6868
------
6969

7070
| Name | Required | Type | Description |
71-
| -------------------------- | -------- | -------------------------------- | ----------------------------------------------------------------------- |
71+
|----------------------------| -------- | -------------------------------- |-------------------------------------------------------------------------|
7272
| `reboot_command` | true | array | A command to reboot. List of strings. |
7373
| `boot_check_command` | true | array | A command to check nodes booted. List of strings. |
7474
| `eviction_timeout_seconds` | false | *int | Deadline for eviction. Must be positive. Default: 600 (10 minutes). |
7575
| `command_timeout_seconds` | false | *int | Deadline for rebooting. Zero means infinity. Default: wait indefinitely |
7676
| `command_retries` | false | *int | Number of reboot retries, not including initial attempt. Default: 0 |
7777
| `command_interval` | false | *int | Interval of time between reboot retries in seconds. Default: 0 |
78+
| `evict_retries` | false | *int | Number of eviction retries, not including initial attempt. Default: 0 |
79+
| `evict_interval` | false | *int | Interval of time between eviction retries in seconds. Default: 0 |
7880
| `max_concurrent_reboots` | false | *int | Maximum number of nodes to be rebooted concurrently. Default: 1 |
7981
| `protected_namespaces` | false | [`LabelSelector`][LabelSelector] | A label selector to protect namespaces. |
8082

@@ -89,6 +91,7 @@ If the node is not booted yet, this command should output `false` to stdout and
8991
If `command_timeout_seconds` is specified, the check command should return within `command_timeout_seconds` seconds, or it is considered failed.
9092

9193
CKE tries to delete Pods in the `protected_namespaces` gracefully with the Kubernetes eviction API.
94+
If the eviction API has failed, CKE retries it for `evict_retries` times with `evict_interval`-second interval.
9295
If any of the Pods cannot be deleted, it aborts the operation.
9396

9497
The Pods in the non-protected namespaces are also tried to be deleted gracefully with the Kubernetes eviction API, but they would be simply deleted if eviction is denied.

mtest/run_test.go

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -341,7 +341,11 @@ func connectEtcd() (*clientv3.Client, error) {
341341
}
342342

343343
func getClusterStatus(cluster *cke.Cluster) (*cke.ClusterStatus, []cke.ResourceDefinition, error) {
344-
controller := server.NewController(nil, 0, time.Hour, time.Second*2, nil, 10)
344+
controller := server.NewController(nil, nil, &server.Config{
345+
Interval: 0,
346+
CertsGCInterval: time.Hour,
347+
MaxConcurrentUpdates: 10,
348+
})
345349

346350
etcd, err := connectEtcd()
347351
if err != nil {

op/reboot.go

Lines changed: 14 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -42,6 +42,8 @@ type rebootDrainStartCommand struct {
4242
entries []*cke.RebootQueueEntry
4343
protectedNamespaces *metav1.LabelSelector
4444
apiserver *cke.Node
45+
evictAttempts int
46+
evictInterval time.Duration
4547

4648
notifyFailedNode func(string)
4749
}
@@ -77,11 +79,22 @@ func (o *rebootDrainStartOp) NextCommand() cke.Commander {
7779
}
7880
o.finished = true
7981

82+
attempts := 1
83+
if o.config.EvictRetries != nil {
84+
attempts = *o.config.EvictRetries + 1
85+
}
86+
interval := 0 * time.Second
87+
if o.config.EvictInterval != nil {
88+
interval = time.Second * time.Duration(*o.config.EvictInterval)
89+
}
90+
8091
return rebootDrainStartCommand{
8192
entries: o.entries,
8293
protectedNamespaces: o.config.ProtectedNamespaces,
8394
apiserver: o.apiserver,
8495
notifyFailedNode: o.notifyFailedNode,
96+
evictAttempts: attempts,
97+
evictInterval: interval,
8598
}
8699
}
87100

@@ -156,7 +169,7 @@ func (c rebootDrainStartCommand) Run(ctx context.Context, inf cke.Infrastructure
156169

157170
// next, evict pods on each node
158171
for _, entry := range evictNodes {
159-
err := evictOrDeleteNodePod(ctx, cs, entry.Node, protected)
172+
err := evictOrDeleteNodePod(ctx, cs, entry.Node, protected, c.evictAttempts, c.evictInterval)
160173
if err != nil {
161174
c.notifyFailedNode(entry.Node)
162175
err = drainBackOff(ctx, inf, entry, err)

op/reboot_decide.go

Lines changed: 21 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -74,14 +74,21 @@ func checkJobPodNotExist(ctx context.Context, cs *kubernetes.Clientset, node str
7474
// evictOrDeleteNodePod evicts or delete Pods on the specified Node.
7575
// It first tries eviction. If the eviction failed and the Pod's namespace is not protected, it deletes the Pod.
7676
// If a running Job Pod exists, this function returns an error.
77-
func evictOrDeleteNodePod(ctx context.Context, cs *kubernetes.Clientset, node string, protected map[string]bool) error {
77+
func evictOrDeleteNodePod(ctx context.Context, cs *kubernetes.Clientset, node string, protected map[string]bool, attempts int, interval time.Duration) error {
7878
return enumeratePods(ctx, cs, node, func(pod *corev1.Pod) error {
79+
evictCount := 0
80+
EVICT:
81+
log.Info("start evicting pod", map[string]interface{}{
82+
"namespace": pod.Namespace,
83+
"name": pod.Name,
84+
})
7985
err := cs.CoreV1().Pods(pod.Namespace).EvictV1(ctx, &policyv1.Eviction{
8086
ObjectMeta: metav1.ObjectMeta{Name: pod.Name, Namespace: pod.Namespace},
8187
})
88+
evictCount++
8289
switch {
8390
case err == nil:
84-
log.Info("start evicting pod", map[string]interface{}{
91+
log.Info("evicted pod", map[string]interface{}{
8592
"namespace": pod.Namespace,
8693
"name": pod.Name,
8794
})
@@ -105,6 +112,18 @@ func evictOrDeleteNodePod(ctx context.Context, cs *kubernetes.Clientset, node st
105112
"name": pod.Name,
106113
})
107114
default:
115+
if evictCount < attempts {
116+
select {
117+
case <-ctx.Done():
118+
return ctx.Err()
119+
case <-time.After(interval):
120+
}
121+
log.Info("retry eviction of pod", map[string]interface{}{
122+
"namespace": pod.Namespace,
123+
"name": pod.Name,
124+
})
125+
goto EVICT
126+
}
108127
return fmt.Errorf("failed to evict pod %s/%s due to PDB: %w", pod.Namespace, pod.Name, err)
109128
}
110129
return nil

pkg/cke/main.go

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -116,7 +116,11 @@ func main() {
116116
}
117117

118118
// Controller
119-
controller := server.NewController(session, interval, gcInterval, timeout, addon, maxConcurrentUpdates)
119+
controller := server.NewController(session, addon, &server.Config{
120+
Interval: interval,
121+
CertsGCInterval: gcInterval,
122+
MaxConcurrentUpdates: maxConcurrentUpdates,
123+
})
120124
well.Go(controller.Run)
121125

122126
// API server

server/config.go

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,13 @@
1+
package server
2+
3+
import "time"
4+
5+
// Config is the configuration for cke-server.
6+
type Config struct {
7+
// Interval is the interval of the main loop.
8+
Interval time.Duration
9+
// CertsGCInterval is the interval of the certificate garbage collection.
10+
CertsGCInterval time.Duration
11+
// MaxConcurrentUpdates is the maximum number of concurrent updates.
12+
MaxConcurrentUpdates int
13+
}

server/control.go

Lines changed: 8 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -23,17 +23,14 @@ var (
2323

2424
// Controller manage operations
2525
type Controller struct {
26-
session *concurrency.Session
27-
interval time.Duration
28-
certsGCInterval time.Duration
29-
timeout time.Duration
30-
addon Integrator
31-
maxConcurrentUpdates int
26+
session *concurrency.Session
27+
addon Integrator
28+
config *Config
3229
}
3330

3431
// NewController construct controller instance
35-
func NewController(s *concurrency.Session, interval, gcInterval, timeout time.Duration, addon Integrator, maxConcurrentUpdates int) Controller {
36-
return Controller{s, interval, gcInterval, timeout, addon, maxConcurrentUpdates}
32+
func NewController(s *concurrency.Session, addon Integrator, config *Config) Controller {
33+
return Controller{s, addon, config}
3734
}
3835

3936
// Run execute procedures with leader elections
@@ -149,7 +146,7 @@ func (c Controller) runLoop(ctx context.Context, leaderKey string) error {
149146
case <-ctx.Done():
150147
return ctx.Err()
151148
}
152-
ticker := time.NewTicker(c.interval)
149+
ticker := time.NewTicker(c.config.Interval)
153150
defer ticker.Stop()
154151
for {
155152
select {
@@ -170,7 +167,7 @@ func (c Controller) runLoop(ctx context.Context, leaderKey string) error {
170167
return ctx.Err()
171168
default:
172169
}
173-
ticker := time.NewTicker(c.certsGCInterval)
170+
ticker := time.NewTicker(c.config.CertsGCInterval)
174171
defer ticker.Stop()
175172
for {
176173
select {
@@ -347,7 +344,7 @@ func (c Controller) runOnce(ctx context.Context, leaderKey string, tick <-chan t
347344
DrainCompleted: drainCompleted,
348345
DrainTimedout: drainTimedout,
349346
RebootDequeued: rebootDequeued,
350-
}, c.maxConcurrentUpdates)
347+
}, c.config)
351348

352349
st := &cke.ServerStatus{
353350
Phase: phase,

0 commit comments

Comments
 (0)