Skip to content

Commit 29c307c

Browse files
authored
Add E2E failover test. (#268)
Signed-off-by: mabotao <[email protected]> Modify some function names. Signed-off-by: mabotao <[email protected]>
1 parent fe2efa6 commit 29c307c

File tree

1 file changed

+224
-0
lines changed

1 file changed

+224
-0
lines changed

test/e2e/failover_test.go

Lines changed: 224 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,224 @@
1+
package e2e
2+
3+
import (
4+
"context"
5+
"fmt"
6+
7+
"github.com/onsi/ginkgo"
8+
"github.com/onsi/gomega"
9+
appsv1 "k8s.io/api/apps/v1"
10+
"k8s.io/apimachinery/pkg/api/errors"
11+
"k8s.io/apimachinery/pkg/api/meta"
12+
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
13+
"k8s.io/apimachinery/pkg/util/rand"
14+
"k8s.io/apimachinery/pkg/util/wait"
15+
"k8s.io/klog/v2"
16+
"sigs.k8s.io/controller-runtime/pkg/client"
17+
18+
clusterv1alpha1 "github.com/karmada-io/karmada/pkg/apis/cluster/v1alpha1"
19+
workv1alpha1 "github.com/karmada-io/karmada/pkg/apis/work/v1alpha1"
20+
"github.com/karmada-io/karmada/pkg/util"
21+
"github.com/karmada-io/karmada/pkg/util/names"
22+
"github.com/karmada-io/karmada/test/helper"
23+
)
24+
25+
// failover testing is used to test the rescheduling situation when some initially scheduled clusters fail
26+
var _ = ginkgo.Describe("failover testing", func() {
27+
ginkgo.Context("Deployment propagation testing", func() {
28+
var disabledClusters []*clusterv1alpha1.Cluster
29+
policyNamespace := testNamespace
30+
policyName := deploymentNamePrefix + rand.String(RandomStrLength)
31+
deploymentNamespace := testNamespace
32+
deploymentName := policyName
33+
deployment := helper.NewDeployment(deploymentNamespace, deploymentName)
34+
maxGroups := 1
35+
minGroups := 1
36+
numOfFailedClusters := 1
37+
38+
// targetClusterNames is a slice of cluster names in resource binding
39+
var targetClusterNames []string
40+
41+
// set MaxGroups=MinGroups=1, label is location=CHN.
42+
policy := helper.NewPolicyWithGroupsDeployment(policyNamespace, policyName, deployment, maxGroups, minGroups, clusterLabels)
43+
44+
ginkgo.BeforeEach(func() {
45+
ginkgo.By(fmt.Sprintf("creating policy(%s/%s)", policyNamespace, policyName), func() {
46+
_, err := karmadaClient.PolicyV1alpha1().PropagationPolicies(policyNamespace).Create(context.TODO(), policy, metav1.CreateOptions{})
47+
gomega.Expect(err).ShouldNot(gomega.HaveOccurred())
48+
})
49+
50+
})
51+
52+
ginkgo.AfterEach(func() {
53+
ginkgo.By(fmt.Sprintf("removing policy(%s/%s)", policyNamespace, policyName), func() {
54+
err := karmadaClient.PolicyV1alpha1().PropagationPolicies(policyNamespace).Delete(context.TODO(), policyName, metav1.DeleteOptions{})
55+
gomega.Expect(err).ShouldNot(gomega.HaveOccurred())
56+
})
57+
})
58+
59+
ginkgo.It("deployment failover testing", func() {
60+
ginkgo.By(fmt.Sprintf("creating deployment(%s/%s)", deploymentNamespace, deploymentName), func() {
61+
fmt.Printf("MaxGroups= %v, MinGroups= %v\n", maxGroups, minGroups)
62+
_, err := kubeClient.AppsV1().Deployments(testNamespace).Create(context.TODO(), deployment, metav1.CreateOptions{})
63+
gomega.Expect(err).ShouldNot(gomega.HaveOccurred())
64+
65+
fmt.Printf("View the results of the initial scheduling")
66+
targetClusterNames, _ = getTargetClusterNames(deployment)
67+
for _, clusterName := range targetClusterNames {
68+
fmt.Printf("%s is the target cluster\n", clusterName)
69+
}
70+
})
71+
72+
ginkgo.By("set one cluster condition status to false", func() {
73+
temp := numOfFailedClusters
74+
for _, targetClusterName := range targetClusterNames {
75+
if temp > 0 {
76+
err := disableCluster(controlPlaneClient, targetClusterName)
77+
gomega.Expect(err).ShouldNot(gomega.HaveOccurred())
78+
79+
fmt.Printf("cluster %s is false\n", targetClusterName)
80+
currentCluster, _ := util.GetCluster(controlPlaneClient, targetClusterName)
81+
82+
// wait for the current cluster status changing to false
83+
_ = wait.Poll(pollInterval, pollTimeout, func() (done bool, err error) {
84+
if !meta.IsStatusConditionPresentAndEqual(currentCluster.Status.Conditions, clusterv1alpha1.ClusterConditionReady, metav1.ConditionFalse) {
85+
fmt.Printf("current cluster %s is false\n", targetClusterName)
86+
disabledClusters = append(disabledClusters, currentCluster)
87+
return true, nil
88+
}
89+
return false, nil
90+
})
91+
temp--
92+
}
93+
}
94+
})
95+
96+
ginkgo.By("check whether deployment of failed cluster is rescheduled to other available cluster", func() {
97+
totalNum := 0
98+
99+
// Since labels are added to all clusters, clusters are used here instead of written as clusters which have label.
100+
targetClusterNames, err := getTargetClusterNames(deployment)
101+
gomega.Expect(err).ShouldNot(gomega.HaveOccurred())
102+
103+
for _, targetClusterName := range targetClusterNames {
104+
clusterClient := getClusterClient(targetClusterName)
105+
gomega.Expect(clusterClient).ShouldNot(gomega.BeNil())
106+
107+
klog.Infof("Check whether deployment(%s/%s) is present on cluster(%s)", deploymentNamespace, deploymentName, targetClusterName)
108+
err := wait.Poll(pollInterval, pollTimeout, func() (done bool, err error) {
109+
_, err = clusterClient.AppsV1().Deployments(deploymentNamespace).Get(context.TODO(), deploymentName, metav1.GetOptions{})
110+
if err != nil {
111+
if errors.IsNotFound(err) {
112+
return false, nil
113+
}
114+
return false, err
115+
}
116+
fmt.Printf("Deployment(%s/%s) is present on cluster(%s).\n", deploymentNamespace, deploymentName, targetClusterName)
117+
return true, nil
118+
})
119+
gomega.Expect(err).ShouldNot(gomega.HaveOccurred())
120+
totalNum++
121+
gomega.Expect(totalNum == minGroups).Should(gomega.BeTrue())
122+
}
123+
fmt.Printf("reschedule in %d target cluster\n", totalNum)
124+
})
125+
126+
ginkgo.By("recover not ready cluster", func() {
127+
for _, disabledCluster := range disabledClusters {
128+
fmt.Printf("cluster %s is waiting for recovering\n", disabledCluster.Name)
129+
originalAPIEndpoint := getClusterAPIEndpoint(disabledCluster.Name)
130+
131+
err := recoverCluster(controlPlaneClient, disabledCluster.Name, originalAPIEndpoint)
132+
gomega.Expect(err).ShouldNot(gomega.HaveOccurred())
133+
}
134+
})
135+
136+
ginkgo.By(fmt.Sprintf("removing deployment(%s/%s)", deploymentNamespace, deploymentName), func() {
137+
err := kubeClient.AppsV1().Deployments(testNamespace).Delete(context.TODO(), deploymentName, metav1.DeleteOptions{})
138+
gomega.Expect(err).ShouldNot(gomega.HaveOccurred())
139+
})
140+
})
141+
})
142+
})
143+
144+
// invalidateCluster will set wrong API endpoint of current cluster
145+
func disableCluster(c client.Client, clusterName string) error {
146+
err := wait.Poll(pollInterval, pollTimeout, func() (done bool, err error) {
147+
clusterObj := &clusterv1alpha1.Cluster{}
148+
if err := c.Get(context.TODO(), client.ObjectKey{Name: clusterName}, clusterObj); err != nil {
149+
if errors.IsConflict(err) {
150+
return false, nil
151+
}
152+
return false, err
153+
}
154+
// set the APIEndpoint of matched cluster to a wrong value
155+
unavailableAPIEndpoint := "https://172.19.1.3:6443"
156+
clusterObj.Spec.APIEndpoint = unavailableAPIEndpoint
157+
if err := c.Update(context.TODO(), clusterObj); err != nil {
158+
if errors.IsConflict(err) {
159+
return false, nil
160+
}
161+
return false, err
162+
}
163+
return true, nil
164+
})
165+
return err
166+
}
167+
168+
// recoverCluster will recover API endpoint of the disable cluster
169+
func recoverCluster(c client.Client, clusterName string, originalAPIEndpoint string) error {
170+
err := wait.Poll(pollInterval, pollTimeout, func() (done bool, err error) {
171+
clusterObj := &clusterv1alpha1.Cluster{}
172+
if err := c.Get(context.TODO(), client.ObjectKey{Name: clusterName}, clusterObj); err != nil {
173+
return false, err
174+
}
175+
clusterObj.Spec.APIEndpoint = originalAPIEndpoint
176+
if err := c.Update(context.TODO(), clusterObj); err != nil {
177+
if errors.IsConflict(err) {
178+
return false, nil
179+
}
180+
return false, err
181+
}
182+
fmt.Printf("recovered API endpoint is %s\n", clusterObj.Spec.APIEndpoint)
183+
return true, nil
184+
})
185+
return err
186+
}
187+
188+
// get the target cluster names from binding information
189+
func getTargetClusterNames(deployment *appsv1.Deployment) (targetClusterNames []string, err error) {
190+
bindingName := names.GenerateBindingName(deployment.Kind, deployment.Name)
191+
fmt.Printf("deploy kind is %s, name is %s\n", deployment.Kind, deployment.Name)
192+
binding := &workv1alpha1.ResourceBinding{}
193+
194+
fmt.Printf("collect the target clusters in resource binding\n")
195+
err = wait.Poll(pollInterval, pollTimeout, func() (done bool, err error) {
196+
err = controlPlaneClient.Get(context.TODO(), client.ObjectKey{Namespace: deployment.Namespace, Name: bindingName}, binding)
197+
if err != nil {
198+
if errors.IsNotFound(err) {
199+
return false, nil
200+
}
201+
return false, err
202+
}
203+
return true, nil
204+
})
205+
if err != nil {
206+
return nil, err
207+
}
208+
for _, cluster := range binding.Spec.Clusters {
209+
targetClusterNames = append(targetClusterNames, cluster.Name)
210+
}
211+
fmt.Printf("target clusters in resource binding are %s\n", targetClusterNames)
212+
return targetClusterNames, nil
213+
}
214+
215+
// get the API endpoint of a specific cluster
216+
func getClusterAPIEndpoint(clusterName string) (apiEndpoint string) {
217+
for _, cluster := range clusters {
218+
if cluster.Name == clusterName {
219+
apiEndpoint = cluster.Spec.APIEndpoint
220+
fmt.Printf("original API endpoint of the cluster %s is %s\n", clusterName, apiEndpoint)
221+
}
222+
}
223+
return apiEndpoint
224+
}

0 commit comments

Comments
 (0)