Skip to content

Commit

Permalink
Merge pull request #11 from weaveworks/extra-ready-logic
Browse files Browse the repository at this point in the history
feat: add logic for making sure cluster is extra ready
  • Loading branch information
foot authored May 9, 2022
2 parents fb9caac + 7d9497f commit 1b179fa
Show file tree
Hide file tree
Showing 9 changed files with 437 additions and 46 deletions.
18 changes: 18 additions & 0 deletions api/v1alpha1/gitopscluster_types.go
Original file line number Diff line number Diff line change
Expand Up @@ -17,11 +17,15 @@ limitations under the License.
package v1alpha1

import (
"time"

metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"

"github.com/fluxcd/pkg/apis/meta"
)

const defaultWaitDuration = time.Second * 60

// EDIT THIS FILE! THIS IS SCAFFOLDING FOR YOU TO OWN!
// NOTE: json tags are required. Any new fields you add must have json tags for the fields to be serialized.

Expand All @@ -33,6 +37,11 @@ type GitopsClusterSpec struct {
// CAPIClusterRef specifies the CAPI Cluster.
// +optional
CAPIClusterRef *meta.LocalObjectReference `json:"capiClusterRef,omitempty"`
// When checking for readiness, this is the time to wait before
// checking again.
//+kubebuilder:default:60s
//+optional
ClusterReadinessBackoff *metav1.Duration `json:"clusterReadinessBackoff,omitempty"`
}

// GitopsClusterStatus defines the observed state of GitopsCluster
Expand Down Expand Up @@ -67,6 +76,15 @@ type GitopsCluster struct {
Status GitopsClusterStatus `json:"status,omitempty"`
}

// ClusterReadinessRequeue returns the configured ClusterReadinessBackoff or a default
// value if not configured.
func (c GitopsCluster) ClusterReadinessRequeue() time.Duration {
if v := c.Spec.ClusterReadinessBackoff; v != nil {
return v.Duration
}
return defaultWaitDuration
}

// +kubebuilder:object:root=true

// GitopsClusterList contains a list of GitopsCluster
Expand Down
35 changes: 35 additions & 0 deletions api/v1alpha1/gitopscluster_types_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,35 @@
/*
Copyright 2022.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/

package v1alpha1

import (
"testing"
"time"

metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
)

func TestControl(t *testing.T) {
c := GitopsCluster{}

if v := c.ClusterReadinessRequeue(); v != defaultWaitDuration {
t.Fatalf("ClusterReadinessRequeue() got %v, want %v", v, defaultWaitDuration)
}

want := time.Second * 20
c.Spec.ClusterReadinessBackoff = &metav1.Duration{Duration: want}
if v := c.ClusterReadinessRequeue(); v != want {
t.Fatalf("ClusterReadinessRequeue() got %v, want %v", v, want)
}
}
5 changes: 5 additions & 0 deletions api/v1alpha1/zz_generated.deepcopy.go

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

4 changes: 4 additions & 0 deletions config/crd/bases/gitops.weave.works_gitopsclusters.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -54,6 +54,10 @@ spec:
required:
- name
type: object
clusterReadinessBackoff:
description: When checking for readiness, this is the time to wait
before checking again.
type: string
secretRef:
description: SecretRef specifies the Secret containing the kubeconfig
for a cluster.
Expand Down
70 changes: 70 additions & 0 deletions controllers/cluster.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,70 @@
package controllers

import (
"context"
"fmt"

"github.com/go-logr/logr"
corev1 "k8s.io/api/core/v1"
"sigs.k8s.io/controller-runtime/pkg/client"
"sigs.k8s.io/controller-runtime/pkg/log"
)

const (
deprecatedControlPlaneLabel = "node-role.kubernetes.io/master"
controlPlaneLabel = "node-role.kubernetes.io/control-plane"
)

// IsControlPlaneReady takes a client connected to a cluster and reports whether or
// not the control-plane for the cluster is "ready".
func IsControlPlaneReady(ctx context.Context, cl client.Client) (bool, error) {
logger := log.FromContext(ctx)
readiness := []bool{}
readyNodes, err := listReadyNodesWithLabel(ctx, logger, cl, controlPlaneLabel)
if err != nil {
return false, err
}
readiness = append(readiness, readyNodes...)

if len(readyNodes) == 0 {
readyNodes, err := listReadyNodesWithLabel(ctx, logger, cl, deprecatedControlPlaneLabel)
if err != nil {
return false, err
}
readiness = append(readiness, readyNodes...)
}

isReady := func(bools []bool) bool {
for _, v := range bools {
if !v {
return false
}
}
return true
}
logger.Info("readiness", "len", len(readiness), "is-ready", isReady(readiness))

// If we have no statuses, then we really don't know if we're ready or not.
return (len(readiness) > 0 && isReady(readiness)), nil
}

func listReadyNodesWithLabel(ctx context.Context, logger logr.Logger, cl client.Client, label string) ([]bool, error) {
nodes := &corev1.NodeList{}
// https://github.com/kubernetes/enhancements/blob/master/keps/sig-cluster-lifecycle/kubeadm/2067-rename-master-label-taint/README.md#design-details
err := cl.List(ctx, nodes, client.HasLabels([]string{label}))
if err != nil {
return nil, fmt.Errorf("failed to query cluster node list: %w", err)
}
logger.Info("listed nodes with control plane label", "label", label, "count", len(nodes.Items))

readiness := []bool{}
for _, node := range nodes.Items {
for _, c := range node.Status.Conditions {
switch c.Type {
case corev1.NodeReady:
readiness = append(readiness, c.Status == corev1.ConditionTrue)
}
}
}
return readiness, nil
}
96 changes: 96 additions & 0 deletions controllers/cluster_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,96 @@
package controllers_test

import (
"context"
"testing"

"github.com/weaveworks/cluster-controller/controllers"
corev1 "k8s.io/api/core/v1"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/apimachinery/pkg/runtime"
"sigs.k8s.io/controller-runtime/pkg/client"
"sigs.k8s.io/controller-runtime/pkg/client/fake"
)

func TestIsControlPlaneReady(t *testing.T) {
controlPlaneLabels := map[string]string{
"node-role.kubernetes.io/master": "",
"node-role.kubernetes.io/control-plane": "",
"beta.kubernetes.io/arch": "amd64",
"beta.kubernetes.io/os": "linux",
"kubernetes.io/arch": "amd64",
"kubernetes.io/hostname": "kind-control-plane",
"kubernetes.io/os": "linux",
}

nodeTests := []struct {
name string
labels map[string]string
conditions []corev1.NodeCondition
wantReady bool
}{
{
name: "control plane not ready",
labels: controlPlaneLabels,
conditions: makeConditions(
corev1.NodeCondition{Type: corev1.NodeReady, Status: corev1.ConditionFalse, LastHeartbeatTime: metav1.Now(), LastTransitionTime: metav1.Now(), Reason: "KubeletNotReady", Message: "container runtime network not ready: NetworkReady=false reason:NetworkPluginNotReady message:Network plugin returns error: cni plugin not initialized"},
),
},
{
name: "control plane ready",
labels: controlPlaneLabels,
conditions: makeConditions(
corev1.NodeCondition{Type: "NetworkUnavailable", Status: "False", LastHeartbeatTime: metav1.Now(), LastTransitionTime: metav1.Now(), Reason: "CalicoIsUp", Message: "Calico is running on this node"},
corev1.NodeCondition{Type: "Ready", Status: "True", LastHeartbeatTime: metav1.Now(), LastTransitionTime: metav1.Now(), Reason: "KubeletReady", Message: "kubelet is posting ready status"},
),
wantReady: true,
},
{
name: "no control plane",
labels: map[string]string{},
conditions: makeConditions(
corev1.NodeCondition{Type: corev1.NodeReady, Status: corev1.ConditionFalse, LastHeartbeatTime: metav1.Now(), LastTransitionTime: metav1.Now(), Reason: "KubeletNotReady", Message: "container runtime network not ready: NetworkReady=false reason:NetworkPluginNotReady message:Network plugin returns error: cni plugin not initialized"},
),
},
}
for _, tt := range nodeTests {
t.Run(tt.name, func(t *testing.T) {
cl := makeClient(makeNode(tt.labels, tt.conditions...))

ready, err := controllers.IsControlPlaneReady(context.TODO(), cl)
if err != nil {
t.Fatal(err)
}

if ready != tt.wantReady {
t.Fatalf("IsControlPlaneReady() got %v, want %v", ready, tt.wantReady)
}
})
}
}

func makeNode(labels map[string]string, conds ...corev1.NodeCondition) *corev1.Node {
return &corev1.Node{
ObjectMeta: metav1.ObjectMeta{
Name: "test-control-plane",
Labels: labels,
},
Spec: corev1.NodeSpec{},
Status: corev1.NodeStatus{
Conditions: conds,
},
}
}

func makeClient(objs ...runtime.Object) client.Client {
return fake.NewClientBuilder().WithRuntimeObjects(objs...).Build()
}

func makeConditions(conds ...corev1.NodeCondition) []corev1.NodeCondition {
base := []corev1.NodeCondition{
corev1.NodeCondition{Type: corev1.NodeMemoryPressure, Status: corev1.ConditionFalse, LastHeartbeatTime: metav1.Now(), LastTransitionTime: metav1.Now(), Reason: "KubeletHasSufficientMemory", Message: "kubelet has sufficient memory available"},
corev1.NodeCondition{Type: corev1.NodeDiskPressure, Status: corev1.ConditionFalse, LastHeartbeatTime: metav1.Now(), LastTransitionTime: metav1.Now(), Reason: "KubeletHasNoDiskPressure", Message: "kubelet has no disk pressure"},
corev1.NodeCondition{Type: corev1.NodePIDPressure, Status: corev1.ConditionFalse, LastHeartbeatTime: metav1.Now(), LastTransitionTime: metav1.Now(), Reason: "KubeletHasSufficientPID", Message: "kubelet has sufficient PID available"},
}
return append(conds, base...)
}
Loading

0 comments on commit 1b179fa

Please sign in to comment.