Skip to content

Commit b411d37

Browse files
committed
fix: RunnerDeployment should clean up old RunnerReplicaSets ASAP
Since the initial implementation of RunnerDeployment and until this change, any update to a runner deployment has been leaving old runner replicasets until the next resync interval. This fixes that, by continusouly retrying the reconcilation 10 seconds later to see if there are any old runner replicasets that can be removed. In addition to that, the cleanup of old runner replicasets has been improved to be deferred until all the runners of the newest replica set to be available. This gives you hopefully zero or at less downtime updates of runner deployments. Fixes #24
1 parent a19cd37 commit b411d37

File tree

1 file changed

+45
-12
lines changed

1 file changed

+45
-12
lines changed

controllers/runnerdeployment_controller.go

Lines changed: 45 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,9 @@ import (
2020
"context"
2121
"fmt"
2222
"hash/fnv"
23+
"k8s.io/apimachinery/pkg/types"
2324
"sort"
25+
"time"
2426

2527
"github.com/davecgh/go-spew/spew"
2628
"github.com/go-logr/logr"
@@ -58,7 +60,7 @@ type RunnerDeploymentReconciler struct {
5860

5961
func (r *RunnerDeploymentReconciler) Reconcile(req ctrl.Request) (ctrl.Result, error) {
6062
ctx := context.Background()
61-
log := r.Log.WithValues("runnerreplicaset", req.NamespacedName)
63+
log := r.Log.WithValues("runnerdeployment", req.NamespacedName)
6264

6365
var rd v1alpha1.RunnerDeployment
6466
if err := r.Get(ctx, req.NamespacedName, &rd); err != nil {
@@ -130,38 +132,69 @@ func (r *RunnerDeploymentReconciler) Reconcile(req ctrl.Request) (ctrl.Result, e
130132
return ctrl.Result{}, err
131133
}
132134

133-
return ctrl.Result{}, nil
135+
// We requeue in order to clean up old runner replica sets later.
136+
// Otherwise, they aren't cleaned up until the next re-sync interval.
137+
return ctrl.Result{RequeueAfter: 5 * time.Second}, nil
134138
}
135139

140+
const defaultReplicas = 1
141+
142+
currentDesiredReplicas := getIntOrDefault(newestSet.Spec.Replicas, defaultReplicas)
143+
newDesiredReplicas := getIntOrDefault(desiredRS.Spec.Replicas, defaultReplicas)
144+
136145
// Please add more conditions that we can in-place update the newest runnerreplicaset without disruption
137-
if newestSet.Spec.Replicas != desiredRS.Spec.Replicas {
138-
newestSet.Spec.Replicas = desiredRS.Spec.Replicas
146+
if currentDesiredReplicas != newDesiredReplicas {
147+
newestSet.Spec.Replicas = &newDesiredReplicas
139148

140149
if err := r.Client.Update(ctx, newestSet); err != nil {
141150
log.Error(err, "Failed to update runnerreplicaset resource")
142151

143152
return ctrl.Result{}, err
144153
}
145154

146-
return ctrl.Result{}, nil
155+
return ctrl.Result{}, err
147156
}
148157

149-
for i := range oldSets {
150-
rs := oldSets[i]
158+
// Do we old runner replica sets that should eventually deleted?
159+
if len(oldSets) > 0 {
160+
readyReplicas := newestSet.Status.ReadyReplicas
151161

152-
if err := r.Client.Delete(ctx, &rs); err != nil {
153-
log.Error(err, "Failed to delete runner resource")
162+
if readyReplicas < currentDesiredReplicas {
163+
log.WithValues("runnerreplicaset", types.NamespacedName{
164+
Namespace: newestSet.Namespace,
165+
Name: newestSet.Name,
166+
}).
167+
Info("Waiting until the newest runner replica set to be 100% available")
154168

155-
return ctrl.Result{}, err
169+
return ctrl.Result{RequeueAfter: 10 * time.Second}, nil
156170
}
157171

158-
r.Recorder.Event(&rd, corev1.EventTypeNormal, "RunnerReplicaSetDeleted", fmt.Sprintf("Deleted runnerreplicaset '%s'", rs.Name))
159-
log.Info("Deleted runnerreplicaset", "runnerdeployment", rd.ObjectMeta.Name, "runnerreplicaset", rs.Name)
172+
for i := range oldSets {
173+
rs := oldSets[i]
174+
175+
if err := r.Client.Delete(ctx, &rs); err != nil {
176+
log.Error(err, "Failed to delete runner resource")
177+
178+
return ctrl.Result{}, err
179+
}
180+
181+
r.Recorder.Event(&rd, corev1.EventTypeNormal, "RunnerReplicaSetDeleted", fmt.Sprintf("Deleted runnerreplicaset '%s'", rs.Name))
182+
183+
log.Info("Deleted runnerreplicaset", "runnerdeployment", rd.ObjectMeta.Name, "runnerreplicaset", rs.Name)
184+
}
160185
}
161186

162187
return ctrl.Result{}, nil
163188
}
164189

190+
func getIntOrDefault(p *int, d int) int {
191+
if p == nil {
192+
return d
193+
}
194+
195+
return *p
196+
}
197+
165198
func getTemplateHash(rs *v1alpha1.RunnerReplicaSet) (string, bool) {
166199
hash, ok := rs.Labels[LabelKeyRunnerTemplateHash]
167200

0 commit comments

Comments
 (0)