From 2a1b987c07b6e500581ba29f80c08b38ed7f4df3 Mon Sep 17 00:00:00 2001 From: Eli Battat Date: Wed, 13 Nov 2024 17:06:05 +0200 Subject: [PATCH] Fix ODF HealthCheck --- benchmark_runner/common/oc/oc.py | 30 ++++++++++++--------- benchmark_runner/common/oc/oc_exceptions.py | 4 +-- 2 files changed, 20 insertions(+), 14 deletions(-) diff --git a/benchmark_runner/common/oc/oc.py b/benchmark_runner/common/oc/oc.py index 22090ae0..f524ba91 100644 --- a/benchmark_runner/common/oc/oc.py +++ b/benchmark_runner/common/oc/oc.py @@ -379,22 +379,28 @@ def wait_for_patch(self, pod_name: str, label: str, label_uuid: bool, namespace: def wait_for_odf_healthcheck(self, pod_name: str, namespace: str, timeout: int = int(environment_variables.environment_variables_dict['timeout'])): """ - This method waits for patch, needs to wait that pod is created and then wait for ready - @param pod_name: - @param namespace: - @param timeout: - @return: + This method waits for the ODF health check by ensuring the pod is created and reaches the 'HEALTH_OK' status. + + @param pod_name: Name of the pod to check health. + @param namespace: Namespace where the pod is located. + @param timeout: Timeout in seconds for waiting. If set to 0 or negative, wait indefinitely. + @return: True if health check passes within the timeout. + @raise ODFHealthCheckTimeout: If health check fails within the timeout. """ current_wait_time = 0 health_check = f"{self.__cli} -n {namespace} rsh {self._get_pod_name(pod_name=pod_name, namespace=namespace)} ceph health" - while timeout <= 0 or current_wait_time <= timeout and 'HEALTH_OK' != self.run(health_check).strip(): - # sleep for x seconds + + while timeout <= 0 or current_wait_time <= timeout: + if 'HEALTH_OK' == self.run(health_check).strip(): + return True + + # Sleep for a defined interval and update the wait time time.sleep(OC.SLEEP_TIME) current_wait_time += OC.SLEEP_TIME - if 'HEALTH_OK' == self.run(health_check).strip(): - return True - else: - raise ODFHealthCheckTimeout() + + # Raise exception if health check fails within the timeout + raise ODFHealthCheckTimeout( + message=f"Health check failed for pod '{pod_name}' in namespace '{namespace}' after {timeout} seconds.") @typechecked @logger_time_stamp @@ -471,7 +477,7 @@ def wait_for_node_ready(self, node: str = None, wait_time: int = None, timeout: wait_time = wait_time or OC.SHORT_TIMEOUT nodes_status = None current_wait_time = 0 - while timeout <= 0 or current_wait_time < timeout: + while timeout <= 0 or current_wait_time <= timeout: nodes_status = self.check_node_status(node=node) if nodes_status is True: return True diff --git a/benchmark_runner/common/oc/oc_exceptions.py b/benchmark_runner/common/oc/oc_exceptions.py index 964f02ca..14bcad29 100644 --- a/benchmark_runner/common/oc/oc_exceptions.py +++ b/benchmark_runner/common/oc/oc_exceptions.py @@ -161,8 +161,8 @@ def __init__(self, operator, version, namespace): class ODFHealthCheckTimeout(OCError): """This exception return odf healthcheck timeout error""" - def __init__(self): - self.message = f"ODF health check timeout" + def __init__(self, message: str): + self.message = message super(ODFHealthCheckTimeout, self).__init__(self.message)