|
| 1 | +package e2e |
| 2 | + |
| 3 | +import ( |
| 4 | + "context" |
| 5 | + "fmt" |
| 6 | + |
| 7 | + "github.com/onsi/ginkgo" |
| 8 | + "github.com/onsi/gomega" |
| 9 | + appsv1 "k8s.io/api/apps/v1" |
| 10 | + "k8s.io/apimachinery/pkg/api/errors" |
| 11 | + "k8s.io/apimachinery/pkg/api/meta" |
| 12 | + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" |
| 13 | + "k8s.io/apimachinery/pkg/util/rand" |
| 14 | + "k8s.io/apimachinery/pkg/util/wait" |
| 15 | + "k8s.io/klog/v2" |
| 16 | + "sigs.k8s.io/controller-runtime/pkg/client" |
| 17 | + |
| 18 | + clusterv1alpha1 "github.com/karmada-io/karmada/pkg/apis/cluster/v1alpha1" |
| 19 | + workv1alpha1 "github.com/karmada-io/karmada/pkg/apis/work/v1alpha1" |
| 20 | + "github.com/karmada-io/karmada/pkg/util" |
| 21 | + "github.com/karmada-io/karmada/pkg/util/names" |
| 22 | + "github.com/karmada-io/karmada/test/helper" |
| 23 | +) |
| 24 | + |
| 25 | +// failover testing is used to test the rescheduling situation when some initially scheduled clusters fail |
| 26 | +var _ = ginkgo.Describe("failover testing", func() { |
| 27 | + ginkgo.Context("Deployment propagation testing", func() { |
| 28 | + var disabledClusters []*clusterv1alpha1.Cluster |
| 29 | + policyNamespace := testNamespace |
| 30 | + policyName := deploymentNamePrefix + rand.String(RandomStrLength) |
| 31 | + deploymentNamespace := testNamespace |
| 32 | + deploymentName := policyName |
| 33 | + deployment := helper.NewDeployment(deploymentNamespace, deploymentName) |
| 34 | + maxGroups := 1 |
| 35 | + minGroups := 1 |
| 36 | + numOfFailedClusters := 1 |
| 37 | + |
| 38 | + // targetClusterNames is a slice of cluster names in resource binding |
| 39 | + var targetClusterNames []string |
| 40 | + |
| 41 | + // set MaxGroups=MinGroups=1, label is location=CHN. |
| 42 | + policy := helper.NewPolicyWithGroupsDeployment(policyNamespace, policyName, deployment, maxGroups, minGroups, clusterLabels) |
| 43 | + |
| 44 | + ginkgo.BeforeEach(func() { |
| 45 | + ginkgo.By(fmt.Sprintf("creating policy(%s/%s)", policyNamespace, policyName), func() { |
| 46 | + _, err := karmadaClient.PolicyV1alpha1().PropagationPolicies(policyNamespace).Create(context.TODO(), policy, metav1.CreateOptions{}) |
| 47 | + gomega.Expect(err).ShouldNot(gomega.HaveOccurred()) |
| 48 | + }) |
| 49 | + |
| 50 | + }) |
| 51 | + |
| 52 | + ginkgo.AfterEach(func() { |
| 53 | + ginkgo.By(fmt.Sprintf("removing policy(%s/%s)", policyNamespace, policyName), func() { |
| 54 | + err := karmadaClient.PolicyV1alpha1().PropagationPolicies(policyNamespace).Delete(context.TODO(), policyName, metav1.DeleteOptions{}) |
| 55 | + gomega.Expect(err).ShouldNot(gomega.HaveOccurred()) |
| 56 | + }) |
| 57 | + }) |
| 58 | + |
| 59 | + ginkgo.It("deployment failover testing", func() { |
| 60 | + ginkgo.By(fmt.Sprintf("creating deployment(%s/%s)", deploymentNamespace, deploymentName), func() { |
| 61 | + fmt.Printf("MaxGroups= %v, MinGroups= %v\n", maxGroups, minGroups) |
| 62 | + _, err := kubeClient.AppsV1().Deployments(testNamespace).Create(context.TODO(), deployment, metav1.CreateOptions{}) |
| 63 | + gomega.Expect(err).ShouldNot(gomega.HaveOccurred()) |
| 64 | + |
| 65 | + fmt.Printf("View the results of the initial scheduling") |
| 66 | + targetClusterNames, _ = getTargetClusterNames(deployment) |
| 67 | + for _, clusterName := range targetClusterNames { |
| 68 | + fmt.Printf("%s is the target cluster\n", clusterName) |
| 69 | + } |
| 70 | + }) |
| 71 | + |
| 72 | + ginkgo.By("set one cluster condition status to false", func() { |
| 73 | + temp := numOfFailedClusters |
| 74 | + for _, targetClusterName := range targetClusterNames { |
| 75 | + if temp > 0 { |
| 76 | + err := disableCluster(controlPlaneClient, targetClusterName) |
| 77 | + gomega.Expect(err).ShouldNot(gomega.HaveOccurred()) |
| 78 | + |
| 79 | + fmt.Printf("cluster %s is false\n", targetClusterName) |
| 80 | + currentCluster, _ := util.GetCluster(controlPlaneClient, targetClusterName) |
| 81 | + |
| 82 | + // wait for the current cluster status changing to false |
| 83 | + _ = wait.Poll(pollInterval, pollTimeout, func() (done bool, err error) { |
| 84 | + if !meta.IsStatusConditionPresentAndEqual(currentCluster.Status.Conditions, clusterv1alpha1.ClusterConditionReady, metav1.ConditionFalse) { |
| 85 | + fmt.Printf("current cluster %s is false\n", targetClusterName) |
| 86 | + disabledClusters = append(disabledClusters, currentCluster) |
| 87 | + return true, nil |
| 88 | + } |
| 89 | + return false, nil |
| 90 | + }) |
| 91 | + temp-- |
| 92 | + } |
| 93 | + } |
| 94 | + }) |
| 95 | + |
| 96 | + ginkgo.By("check whether deployment of failed cluster is rescheduled to other available cluster", func() { |
| 97 | + totalNum := 0 |
| 98 | + |
| 99 | + // Since labels are added to all clusters, clusters are used here instead of written as clusters which have label. |
| 100 | + targetClusterNames, err := getTargetClusterNames(deployment) |
| 101 | + gomega.Expect(err).ShouldNot(gomega.HaveOccurred()) |
| 102 | + |
| 103 | + for _, targetClusterName := range targetClusterNames { |
| 104 | + clusterClient := getClusterClient(targetClusterName) |
| 105 | + gomega.Expect(clusterClient).ShouldNot(gomega.BeNil()) |
| 106 | + |
| 107 | + klog.Infof("Check whether deployment(%s/%s) is present on cluster(%s)", deploymentNamespace, deploymentName, targetClusterName) |
| 108 | + err := wait.Poll(pollInterval, pollTimeout, func() (done bool, err error) { |
| 109 | + _, err = clusterClient.AppsV1().Deployments(deploymentNamespace).Get(context.TODO(), deploymentName, metav1.GetOptions{}) |
| 110 | + if err != nil { |
| 111 | + if errors.IsNotFound(err) { |
| 112 | + return false, nil |
| 113 | + } |
| 114 | + return false, err |
| 115 | + } |
| 116 | + fmt.Printf("Deployment(%s/%s) is present on cluster(%s).\n", deploymentNamespace, deploymentName, targetClusterName) |
| 117 | + return true, nil |
| 118 | + }) |
| 119 | + gomega.Expect(err).ShouldNot(gomega.HaveOccurred()) |
| 120 | + totalNum++ |
| 121 | + gomega.Expect(totalNum == minGroups).Should(gomega.BeTrue()) |
| 122 | + } |
| 123 | + fmt.Printf("reschedule in %d target cluster\n", totalNum) |
| 124 | + }) |
| 125 | + |
| 126 | + ginkgo.By("recover not ready cluster", func() { |
| 127 | + for _, disabledCluster := range disabledClusters { |
| 128 | + fmt.Printf("cluster %s is waiting for recovering\n", disabledCluster.Name) |
| 129 | + originalAPIEndpoint := getClusterAPIEndpoint(disabledCluster.Name) |
| 130 | + |
| 131 | + err := recoverCluster(controlPlaneClient, disabledCluster.Name, originalAPIEndpoint) |
| 132 | + gomega.Expect(err).ShouldNot(gomega.HaveOccurred()) |
| 133 | + } |
| 134 | + }) |
| 135 | + |
| 136 | + ginkgo.By(fmt.Sprintf("removing deployment(%s/%s)", deploymentNamespace, deploymentName), func() { |
| 137 | + err := kubeClient.AppsV1().Deployments(testNamespace).Delete(context.TODO(), deploymentName, metav1.DeleteOptions{}) |
| 138 | + gomega.Expect(err).ShouldNot(gomega.HaveOccurred()) |
| 139 | + }) |
| 140 | + }) |
| 141 | + }) |
| 142 | +}) |
| 143 | + |
| 144 | +// invalidateCluster will set wrong API endpoint of current cluster |
| 145 | +func disableCluster(c client.Client, clusterName string) error { |
| 146 | + err := wait.Poll(pollInterval, pollTimeout, func() (done bool, err error) { |
| 147 | + clusterObj := &clusterv1alpha1.Cluster{} |
| 148 | + if err := c.Get(context.TODO(), client.ObjectKey{Name: clusterName}, clusterObj); err != nil { |
| 149 | + if errors.IsConflict(err) { |
| 150 | + return false, nil |
| 151 | + } |
| 152 | + return false, err |
| 153 | + } |
| 154 | + // set the APIEndpoint of matched cluster to a wrong value |
| 155 | + unavailableAPIEndpoint := "https://172.19.1.3:6443" |
| 156 | + clusterObj.Spec.APIEndpoint = unavailableAPIEndpoint |
| 157 | + if err := c.Update(context.TODO(), clusterObj); err != nil { |
| 158 | + if errors.IsConflict(err) { |
| 159 | + return false, nil |
| 160 | + } |
| 161 | + return false, err |
| 162 | + } |
| 163 | + return true, nil |
| 164 | + }) |
| 165 | + return err |
| 166 | +} |
| 167 | + |
| 168 | +// recoverCluster will recover API endpoint of the disable cluster |
| 169 | +func recoverCluster(c client.Client, clusterName string, originalAPIEndpoint string) error { |
| 170 | + err := wait.Poll(pollInterval, pollTimeout, func() (done bool, err error) { |
| 171 | + clusterObj := &clusterv1alpha1.Cluster{} |
| 172 | + if err := c.Get(context.TODO(), client.ObjectKey{Name: clusterName}, clusterObj); err != nil { |
| 173 | + return false, err |
| 174 | + } |
| 175 | + clusterObj.Spec.APIEndpoint = originalAPIEndpoint |
| 176 | + if err := c.Update(context.TODO(), clusterObj); err != nil { |
| 177 | + if errors.IsConflict(err) { |
| 178 | + return false, nil |
| 179 | + } |
| 180 | + return false, err |
| 181 | + } |
| 182 | + fmt.Printf("recovered API endpoint is %s\n", clusterObj.Spec.APIEndpoint) |
| 183 | + return true, nil |
| 184 | + }) |
| 185 | + return err |
| 186 | +} |
| 187 | + |
| 188 | +// get the target cluster names from binding information |
| 189 | +func getTargetClusterNames(deployment *appsv1.Deployment) (targetClusterNames []string, err error) { |
| 190 | + bindingName := names.GenerateBindingName(deployment.Kind, deployment.Name) |
| 191 | + fmt.Printf("deploy kind is %s, name is %s\n", deployment.Kind, deployment.Name) |
| 192 | + binding := &workv1alpha1.ResourceBinding{} |
| 193 | + |
| 194 | + fmt.Printf("collect the target clusters in resource binding\n") |
| 195 | + err = wait.Poll(pollInterval, pollTimeout, func() (done bool, err error) { |
| 196 | + err = controlPlaneClient.Get(context.TODO(), client.ObjectKey{Namespace: deployment.Namespace, Name: bindingName}, binding) |
| 197 | + if err != nil { |
| 198 | + if errors.IsNotFound(err) { |
| 199 | + return false, nil |
| 200 | + } |
| 201 | + return false, err |
| 202 | + } |
| 203 | + return true, nil |
| 204 | + }) |
| 205 | + if err != nil { |
| 206 | + return nil, err |
| 207 | + } |
| 208 | + for _, cluster := range binding.Spec.Clusters { |
| 209 | + targetClusterNames = append(targetClusterNames, cluster.Name) |
| 210 | + } |
| 211 | + fmt.Printf("target clusters in resource binding are %s\n", targetClusterNames) |
| 212 | + return targetClusterNames, nil |
| 213 | +} |
| 214 | + |
| 215 | +// get the API endpoint of a specific cluster |
| 216 | +func getClusterAPIEndpoint(clusterName string) (apiEndpoint string) { |
| 217 | + for _, cluster := range clusters { |
| 218 | + if cluster.Name == clusterName { |
| 219 | + apiEndpoint = cluster.Spec.APIEndpoint |
| 220 | + fmt.Printf("original API endpoint of the cluster %s is %s\n", clusterName, apiEndpoint) |
| 221 | + } |
| 222 | + } |
| 223 | + return apiEndpoint |
| 224 | +} |
0 commit comments