Skip to content

Commit

Permalink
Verify Node Ready (#908)
Browse files Browse the repository at this point in the history
  • Loading branch information
ebattat authored Oct 13, 2024
1 parent 31b912c commit b78bb73
Show file tree
Hide file tree
Showing 5 changed files with 62 additions and 3 deletions.
30 changes: 28 additions & 2 deletions benchmark_runner/common/oc/oc.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@
PodNotCompletedTimeout, PodTerminateTimeout, PodNameNotExist, LoginFailed, VMNotCreateTimeout, VMDeleteTimeout, \
YAMLNotExist, VMNameNotExist, VMNotInitializedTimeout, VMNotReadyTimeout, VMStateTimeout, VMNotCompletedTimeout, \
ExecFailed, PodFailed, DVStatusTimeout, CSVNotCreateTimeout, UpgradeNotStartTimeout, OperatorInstallationTimeout, \
OperatorUpgradeTimeout, ODFHealthCheckTimeout)
OperatorUpgradeTimeout, ODFHealthCheckTimeout, NodeNotReady)
from benchmark_runner.common.ssh.ssh import SSH
from benchmark_runner.main.environment_variables import environment_variables

Expand Down Expand Up @@ -375,7 +375,6 @@ def wait_for_patch(self, pod_name: str, label: str, label_uuid: bool, namespace:
else:
raise PodNotReadyTimeout(label)


def wait_for_odf_healthcheck(self, pod_name: str, namespace: str,
timeout: int = SHORT_TIMEOUT):
"""
Expand Down Expand Up @@ -458,6 +457,33 @@ def get_worker_nodes(self):
"""
return self.run(fr""" {self.__cli} get nodes -l node-role.kubernetes.io/worker= -o jsonpath="{{range .items[*]}}{{.metadata.name}}{{'\n'}}{{end}}" """)

@staticmethod
@typechecked
def check_node_status(nodes_list: list):
"""
This method check node status
@param nodes_list:
@return: True when all nodes in ready status
"""
# Check if any node is not in 'Ready' status
for node in nodes_list:
node_name, node_status = node.split()
if node_status != 'Ready':
raise NodeNotReady(node_name, node_status)

# If no nodes are found in a non-ready state
return True

def verify_nodes_ready(self):
"""
This method verifies that all nodes are in 'Ready' status.
If any node is not ready, it raises an error with the node name and its status.
@return: True is all in 'Ready' status
"""
# Get the node name and status for all nodes
nodes_list = self.run(f"{self.__cli} get nodes --no-headers | awk '{{print $1, $2}}'").splitlines()
return self.check_node_status(nodes_list)

def delete_available_released_pv(self):
"""
This method deletes available or released pv because that avoid launching new pv
Expand Down
7 changes: 7 additions & 0 deletions benchmark_runner/common/oc/oc_exceptions.py
Original file line number Diff line number Diff line change
Expand Up @@ -164,3 +164,10 @@ class ODFHealthCheckTimeout(OCError):
def __init__(self):
self.message = f"ODF health check timeout"
super(ODFHealthCheckTimeout, self).__init__(self.message)


class NodeNotReady(OCError):
"""This exception returns node not ready timeout error"""
def __init__(self, node_name, node_status):
self.message = f"Node {node_name} is not ready. Current status: {node_status}"
super(NodeNotReady, self).__init__(self.message)
2 changes: 2 additions & 0 deletions benchmark_runner/main/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -167,6 +167,7 @@ def upgrade_ocp_bare_metal(step: str):
elif step == 'verify_bare_metal_upgrade_complete':
if bare_metal_operations.is_cluster_upgraded(oc, cnv_version=cnv_version, odf_version=odf_version, lso_version=lso_version):
bare_metal_operations.verify_cluster_is_up(oc)
oc.verify_nodes_ready()
else:
error_message = f'OCP {upgrade_ocp_version} upgrade failed'
logger.error(error_message)
Expand All @@ -191,6 +192,7 @@ def install_resources():
logger.info(f'Start Bare-Metal OpenShift resources installation')
oc = bare_metal_operations.oc_login()
bare_metal_operations.verify_cluster_is_up(oc)
oc.verify_nodes_ready()
bare_metal_operations.install_ocp_resources(resources=resources)
bare_metal_operations.disconnect_from_provisioner()
logger.info(f'End Bare-Metal OpenShift resources installation')
Expand Down
10 changes: 10 additions & 0 deletions tests/integration/benchmark_runner/common/oc/test_oc.py
Original file line number Diff line number Diff line change
Expand Up @@ -232,3 +232,13 @@ def test_collect_prometheus():
time.sleep(10)
tarball = snapshot.retrieve_snapshot(post_wait_time=1)
assert tarfile.is_tarfile(tarball)


def test_verify_nodes_ready():
"""
This method test nodes are ready
@return:
"""
oc = OC(kubeadmin_password=test_environment_variable['kubeadmin_password'])
oc.login()
assert oc.verify_nodes_ready()
16 changes: 15 additions & 1 deletion tests/unittest/benchmark_runner/common/oc/test_oc.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
import mock
import pytest
from benchmark_runner.common.oc.oc import OC
from benchmark_runner.common.oc.oc_exceptions import YAMLNotExist, LoginFailed
from benchmark_runner.common.oc.oc_exceptions import YAMLNotExist, LoginFailed, NodeNotReady


def test_bad_login():
Expand Down Expand Up @@ -47,3 +47,17 @@ def test_short_uuid():
with mock.patch.object(OC, 'get_long_uuid', new=dummy_long_uuid):
oc = OC()
assert oc._OC__get_short_uuid(workload='stressng_pod') == 'bb2be20e'


def test_check_node_status_ready():
oc = OC()
result = oc.check_node_status(nodes_list=['node-0 Ready', 'node-1 Ready', 'node-2 Ready'])
assert result


def test_check_node_status_not_ready():
oc = OC()
with pytest.raises(NodeNotReady) as exc_info:
oc.check_node_status(nodes_list=['node-0 Ready', 'node-1 NotReady', 'node-2 Ready'])
# Check that the exception message is as expected
assert str(exc_info.value) == "Node node-1 is not ready. Current status: NotReady"

0 comments on commit b78bb73

Please sign in to comment.