From 2bb1f50d4915e0a6ed8125e522484263e6c75672 Mon Sep 17 00:00:00 2001 From: Elib <73884315+ebattat@users.noreply.github.com> Date: Sat, 31 Aug 2024 11:15:45 +0300 Subject: [PATCH] Add a wait for ODF health check (#871) --- benchmark_runner/common/oc/oc.py | 72 ++++++++++++------- benchmark_runner/common/oc/oc_exceptions.py | 7 ++ .../PerfCI_Operators_Deployment/Jenkinsfile | 4 -- 3 files changed, 52 insertions(+), 31 deletions(-) diff --git a/benchmark_runner/common/oc/oc.py b/benchmark_runner/common/oc/oc.py index 48cca1274..425bffd4b 100644 --- a/benchmark_runner/common/oc/oc.py +++ b/benchmark_runner/common/oc/oc.py @@ -9,7 +9,8 @@ from benchmark_runner.common.oc.oc_exceptions import (PodNotCreateTimeout, PodNotInitializedTimeout, PodNotReadyTimeout, \ PodNotCompletedTimeout, PodTerminateTimeout, PodNameNotExist, LoginFailed, VMNotCreateTimeout, VMDeleteTimeout, \ YAMLNotExist, VMNameNotExist, VMNotInitializedTimeout, VMNotReadyTimeout, VMStateTimeout, VMNotCompletedTimeout, \ - ExecFailed, PodFailed, DVStatusTimeout, CSVNotCreateTimeout, UpgradeNotStartTimeout, OperatorInstallationTimeout, OperatorUpgradeTimeout) + ExecFailed, PodFailed, DVStatusTimeout, CSVNotCreateTimeout, UpgradeNotStartTimeout, OperatorInstallationTimeout, \ + OperatorUpgradeTimeout, ODFHealthCheckTimeout) from benchmark_runner.common.ssh.ssh import SSH from benchmark_runner.main.environment_variables import environment_variables @@ -347,17 +348,57 @@ def wait_for_dv_status(self, current_wait_time += OC.SLEEP_TIME raise DVStatusTimeout(status=status) + @typechecked + @logger_time_stamp + def wait_for_patch(self, pod_name: str, label: str, label_uuid: bool, namespace: str, timeout: int = SHORT_TIMEOUT): + """ + This method waits for patch, needs to wait that pod is created and then wait for ready + @param pod_name: + @param label: + @param label_uuid: + @param namespace: + @param timeout: + @return: + """ + self.wait_for_pod_create(pod_name=pod_name, namespace=namespace) + if self.wait_for_ready(label=label, label_uuid=label_uuid, namespace=namespace): + return True + else: + raise PodNotReadyTimeout(label) + + + def wait_for_odf_healthcheck(self, pod_name: str, namespace: str, + timeout: int = SHORT_TIMEOUT): + """ + This method waits for patch, needs to wait that pod is created and then wait for ready + @param pod_name: + @param namespace: + @param timeout: + @return: + """ + current_wait_time = 0 + health_check = f"{self.__cli} -n {namespace} rsh {self._get_pod_name(pod_name=pod_name, namespace=namespace)} ceph health" + while timeout <= 0 or current_wait_time <= timeout and 'HEALTH_OK' != self.run(health_check).strip(): + # sleep for x seconds + time.sleep(OC.SLEEP_TIME) + current_wait_time += OC.SLEEP_TIME + if 'HEALTH_OK' == self.run(health_check).strip(): + return True + else: + raise ODFHealthCheckTimeout() + + @typechecked + @logger_time_stamp def verify_odf_installation(self, namespace: str = 'openshift-storage'): """ This method verifies ODF installation :return: True ODF passed, False failed """ + # apply patch self.run( f"{self.__cli} patch storagecluster ocs-storagecluster -n {namespace} --type json --patch '[{{ \"op\": \"replace\", \"path\": \"/spec/enableCephTools\", \"value\": true }}]'") self.wait_for_patch(pod_name='rook-ceph-tools', label='app=rook-ceph-tools', label_uuid=False, namespace=namespace) - health_check = self.run( - f"{self.__cli} -n {namespace} rsh {self._get_pod_name(pod_name='rook-ceph-tools', namespace=namespace)} ceph health") - return 'HEALTH_OK' == health_check.strip() + return self.wait_for_odf_healthcheck(pod_name='rook-ceph-tools', namespace=namespace) def get_odf_disk_count(self): """ @@ -826,29 +867,6 @@ def wait_for_csv(self, operator: str, csv_num: int = 1, raise CSVNotCreateTimeout(operator, namespace) - @typechecked - @logger_time_stamp - def wait_for_patch(self, pod_name: str, label: str, label_uuid: bool, namespace: str, timeout: int = SHORT_TIMEOUT): - """ - This method waits for patch, needs to wait that pod is created and then wait for ready - @param pod_name: - @param label: - @param label_uuid: - @param namespace: - @param timeout: - @return: - """ - current_wait_time = 0 - # Wait until the path is applied for the first time - time.sleep(60) - while timeout <= 0 or current_wait_time <= timeout: - if self._get_pod_name(pod_name=pod_name, namespace=namespace) and self.wait_for_ready(label=label, label_uuid=label_uuid, namespace=namespace): - return True - # sleep for x seconds - time.sleep(OC.SLEEP_TIME) - current_wait_time += OC.SLEEP_TIME - raise PodNotReadyTimeout(label) - @typechecked @logger_time_stamp def wait_for_ready(self, label: str, run_type: str = 'pod', workload: str = '', status: str = 'ready', label_uuid: bool = True, diff --git a/benchmark_runner/common/oc/oc_exceptions.py b/benchmark_runner/common/oc/oc_exceptions.py index b41288133..69a775641 100644 --- a/benchmark_runner/common/oc/oc_exceptions.py +++ b/benchmark_runner/common/oc/oc_exceptions.py @@ -157,3 +157,10 @@ class OperatorUpgradeTimeout(OCError): def __init__(self, operator, version, namespace): self.message = f"{operator} operator upgrade to: {version} in namespace: {namespace} didn't complete" super(OperatorUpgradeTimeout, self).__init__(self.message) + + +class ODFHealthCheckTimeout(OCError): + """This exception return odf healthcheck timeout error""" + def __init__(self): + self.message = f"ODF health check timeout" + super(ODFHealthCheckTimeout, self).__init__(self.message) diff --git a/jenkins/PerfCI_Operators_Deployment/Jenkinsfile b/jenkins/PerfCI_Operators_Deployment/Jenkinsfile index e616c20ba..f0f4d82fb 100644 --- a/jenkins/PerfCI_Operators_Deployment/Jenkinsfile +++ b/jenkins/PerfCI_Operators_Deployment/Jenkinsfile @@ -156,10 +156,6 @@ END emailext body: """\ Jenkins job: ${env.BUILD_URL}\nSee the console output for more details: ${env.BUILD_URL}consoleFull\n\n """, subject: msg, to: "${CONTACT_EMAIL1}" - - // Trigger PerfCI-Openshift-Deployment pipeline on failure - echo 'Triggering PerfCI-Openshift-Deployment pipeline due to failure' - build job: 'PerfCI-Openshift-Deployment', wait: false } } success {