Skip to content

Commit

Permalink
Add a wait for ODF health check (#871)
Browse files Browse the repository at this point in the history
  • Loading branch information
ebattat authored Aug 31, 2024
1 parent cae318e commit 2bb1f50
Show file tree
Hide file tree
Showing 3 changed files with 52 additions and 31 deletions.
72 changes: 45 additions & 27 deletions benchmark_runner/common/oc/oc.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,8 @@
from benchmark_runner.common.oc.oc_exceptions import (PodNotCreateTimeout, PodNotInitializedTimeout, PodNotReadyTimeout, \
PodNotCompletedTimeout, PodTerminateTimeout, PodNameNotExist, LoginFailed, VMNotCreateTimeout, VMDeleteTimeout, \
YAMLNotExist, VMNameNotExist, VMNotInitializedTimeout, VMNotReadyTimeout, VMStateTimeout, VMNotCompletedTimeout, \
ExecFailed, PodFailed, DVStatusTimeout, CSVNotCreateTimeout, UpgradeNotStartTimeout, OperatorInstallationTimeout, OperatorUpgradeTimeout)
ExecFailed, PodFailed, DVStatusTimeout, CSVNotCreateTimeout, UpgradeNotStartTimeout, OperatorInstallationTimeout, \
OperatorUpgradeTimeout, ODFHealthCheckTimeout)
from benchmark_runner.common.ssh.ssh import SSH
from benchmark_runner.main.environment_variables import environment_variables

Expand Down Expand Up @@ -347,17 +348,57 @@ def wait_for_dv_status(self,
current_wait_time += OC.SLEEP_TIME
raise DVStatusTimeout(status=status)

@typechecked
@logger_time_stamp
def wait_for_patch(self, pod_name: str, label: str, label_uuid: bool, namespace: str, timeout: int = SHORT_TIMEOUT):
"""
This method waits for patch, needs to wait that pod is created and then wait for ready
@param pod_name:
@param label:
@param label_uuid:
@param namespace:
@param timeout:
@return:
"""
self.wait_for_pod_create(pod_name=pod_name, namespace=namespace)
if self.wait_for_ready(label=label, label_uuid=label_uuid, namespace=namespace):
return True
else:
raise PodNotReadyTimeout(label)


def wait_for_odf_healthcheck(self, pod_name: str, namespace: str,
timeout: int = SHORT_TIMEOUT):
"""
This method waits for patch, needs to wait that pod is created and then wait for ready
@param pod_name:
@param namespace:
@param timeout:
@return:
"""
current_wait_time = 0
health_check = f"{self.__cli} -n {namespace} rsh {self._get_pod_name(pod_name=pod_name, namespace=namespace)} ceph health"
while timeout <= 0 or current_wait_time <= timeout and 'HEALTH_OK' != self.run(health_check).strip():
# sleep for x seconds
time.sleep(OC.SLEEP_TIME)
current_wait_time += OC.SLEEP_TIME
if 'HEALTH_OK' == self.run(health_check).strip():
return True
else:
raise ODFHealthCheckTimeout()

@typechecked
@logger_time_stamp
def verify_odf_installation(self, namespace: str = 'openshift-storage'):
"""
This method verifies ODF installation
:return: True ODF passed, False failed
"""
# apply patch
self.run(
f"{self.__cli} patch storagecluster ocs-storagecluster -n {namespace} --type json --patch '[{{ \"op\": \"replace\", \"path\": \"/spec/enableCephTools\", \"value\": true }}]'")
self.wait_for_patch(pod_name='rook-ceph-tools', label='app=rook-ceph-tools', label_uuid=False, namespace=namespace)
health_check = self.run(
f"{self.__cli} -n {namespace} rsh {self._get_pod_name(pod_name='rook-ceph-tools', namespace=namespace)} ceph health")
return 'HEALTH_OK' == health_check.strip()
return self.wait_for_odf_healthcheck(pod_name='rook-ceph-tools', namespace=namespace)

def get_odf_disk_count(self):
"""
Expand Down Expand Up @@ -826,29 +867,6 @@ def wait_for_csv(self, operator: str, csv_num: int = 1,

raise CSVNotCreateTimeout(operator, namespace)

@typechecked
@logger_time_stamp
def wait_for_patch(self, pod_name: str, label: str, label_uuid: bool, namespace: str, timeout: int = SHORT_TIMEOUT):
"""
This method waits for patch, needs to wait that pod is created and then wait for ready
@param pod_name:
@param label:
@param label_uuid:
@param namespace:
@param timeout:
@return:
"""
current_wait_time = 0
# Wait until the path is applied for the first time
time.sleep(60)
while timeout <= 0 or current_wait_time <= timeout:
if self._get_pod_name(pod_name=pod_name, namespace=namespace) and self.wait_for_ready(label=label, label_uuid=label_uuid, namespace=namespace):
return True
# sleep for x seconds
time.sleep(OC.SLEEP_TIME)
current_wait_time += OC.SLEEP_TIME
raise PodNotReadyTimeout(label)

@typechecked
@logger_time_stamp
def wait_for_ready(self, label: str, run_type: str = 'pod', workload: str = '', status: str = 'ready', label_uuid: bool = True,
Expand Down
7 changes: 7 additions & 0 deletions benchmark_runner/common/oc/oc_exceptions.py
Original file line number Diff line number Diff line change
Expand Up @@ -157,3 +157,10 @@ class OperatorUpgradeTimeout(OCError):
def __init__(self, operator, version, namespace):
self.message = f"{operator} operator upgrade to: {version} in namespace: {namespace} didn't complete"
super(OperatorUpgradeTimeout, self).__init__(self.message)


class ODFHealthCheckTimeout(OCError):
"""This exception return odf healthcheck timeout error"""
def __init__(self):
self.message = f"ODF health check timeout"
super(ODFHealthCheckTimeout, self).__init__(self.message)
4 changes: 0 additions & 4 deletions jenkins/PerfCI_Operators_Deployment/Jenkinsfile
Original file line number Diff line number Diff line change
Expand Up @@ -156,10 +156,6 @@ END
emailext body: """\
Jenkins job: ${env.BUILD_URL}\nSee the console output for more details: ${env.BUILD_URL}consoleFull\n\n
""", subject: msg, to: "${CONTACT_EMAIL1}"

// Trigger PerfCI-Openshift-Deployment pipeline on failure
echo 'Triggering PerfCI-Openshift-Deployment pipeline due to failure'
build job: 'PerfCI-Openshift-Deployment', wait: false
}
}
success {
Expand Down

0 comments on commit 2bb1f50

Please sign in to comment.