Skip to content

Commit

Permalink
Wait until the nodes are ready
Browse files Browse the repository at this point in the history
  • Loading branch information
ebattat committed Nov 4, 2024
1 parent c35a7ab commit 379f5bd
Show file tree
Hide file tree
Showing 5 changed files with 52 additions and 27 deletions.
51 changes: 36 additions & 15 deletions benchmark_runner/common/oc/oc.py
Original file line number Diff line number Diff line change
Expand Up @@ -457,32 +457,53 @@ def get_worker_nodes(self):
"""
return self.run(fr""" {self.__cli} get nodes -l node-role.kubernetes.io/worker= -o jsonpath="{{range .items[*]}}{{.metadata.name}}{{'\n'}}{{end}}" """)

@staticmethod
@typechecked
def check_node_status(nodes_list: list):
def wait_for_nodes_ready(self, wait_time: int = None, timeout: int = int(environment_variables.environment_variables_dict['timeout'])):
"""
This method check node status
@param nodes_list:
@return: True when all nodes in ready status
This method waits until all nodes are in 'Ready' status.
@param wait_time: wait time between each loop
@param timeout: Maximum wait time in seconds, negative value means no timeout (default set in environment variables)
@return: True if all nodes are in 'Ready' status within the timeout period
@raises: NodeNotReady if one or more nodes are not ready within the timeout
"""
wait_time = wait_time or OC.SHORT_TIMEOUT
nodes_status = None
current_wait_time = 0
while timeout <= 0 or current_wait_time < timeout:
nodes_status = self.check_all_nodes_status()
if nodes_status is True:
return True
logger.info(f"Waiting for '{nodes_status}' to reach 'Ready' status")
time.sleep(wait_time)
current_wait_time += wait_time
logger.info(f"oc get nodes:\n{self.run('oc get nodes')}")
raise NodeNotReady(nodes_status=nodes_status)

@typechecked
def check_all_nodes_status(self):
"""
This method checks the status of all nodes in the list.
@return: True if all nodes are in 'Ready' status; otherwise, return (node_name, node_status)
"""
not_ready_nodes = {}
# Check if any node is not in 'Ready' status
for node in nodes_list:
for node in self.get_node_status():
node_name, node_status = node.split()
if node_status != 'Ready':
raise NodeNotReady(node_name, node_status)

# If no nodes are found in a non-ready state
return True
not_ready_nodes[node_name] = node_status
if not_ready_nodes:
return not_ready_nodes
else:
return True

def verify_nodes_ready(self):
def get_node_status(self) -> list:
"""
This method verifies that all nodes are in 'Ready' status.
If any node is not ready, it raises an error with the node name and its status.
@return: True is all in 'Ready' status
This method returns node status list
@return:
"""
# Get the node name and status for all nodes
nodes_list = self.run(f"{self.__cli} get nodes --no-headers | awk '{{print $1, $2}}'").splitlines()
return self.check_node_status(nodes_list)
return nodes_list

def delete_available_released_pv(self):
"""
Expand Down
6 changes: 3 additions & 3 deletions benchmark_runner/common/oc/oc_exceptions.py
Original file line number Diff line number Diff line change
Expand Up @@ -167,7 +167,7 @@ def __init__(self):


class NodeNotReady(OCError):
"""This exception returns node not ready timeout error"""
def __init__(self, node_name, node_status):
self.message = f"Node {node_name} is not ready. Current status: {node_status}"
"""This exception indicates a node not ready due to a timeout error"""
def __init__(self, nodes_status: dict):
self.message = f"Node not ready: {nodes_status}"
super(NodeNotReady, self).__init__(self.message)
4 changes: 2 additions & 2 deletions benchmark_runner/main/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -175,7 +175,7 @@ def upgrade_ocp_bare_metal(step: str):
elif step == 'verify_bare_metal_upgrade_complete':
if bare_metal_operations.is_cluster_upgraded(oc, cnv_version=cnv_version, odf_version=odf_version, lso_version=lso_version):
bare_metal_operations.verify_cluster_is_up(oc)
oc.verify_nodes_ready()
oc.wait_for_nodes_ready()
else:
error_message = f'OCP {upgrade_ocp_version} upgrade failed'
logger.error(error_message)
Expand All @@ -200,7 +200,7 @@ def install_resources():
logger.info(f'Start Bare-Metal OpenShift resources installation')
oc = bare_metal_operations.oc_login()
bare_metal_operations.verify_cluster_is_up(oc)
oc.verify_nodes_ready()
oc.wait_for_nodes_ready()
bare_metal_operations.install_ocp_resources(resources=resources)
bare_metal_operations.disconnect_from_provisioner()
logger.info(f'End Bare-Metal OpenShift resources installation')
Expand Down
6 changes: 3 additions & 3 deletions tests/integration/benchmark_runner/common/oc/test_oc.py
Original file line number Diff line number Diff line change
Expand Up @@ -234,11 +234,11 @@ def test_collect_prometheus():
assert tarfile.is_tarfile(tarball)


def test_verify_nodes_ready():
def test_wait_for_nodes_ready():
"""
This method test nodes are ready
This method waits till nodes are ready
@return:
"""
oc = OC(kubeadmin_password=test_environment_variable['kubeadmin_password'])
oc.login()
assert oc.verify_nodes_ready()
assert oc.wait_for_nodes_ready()
12 changes: 8 additions & 4 deletions tests/unittest/benchmark_runner/common/oc/test_oc.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,8 @@

import mock
import pytest
from unittest.mock import patch

from benchmark_runner.common.oc.oc import OC
from benchmark_runner.common.oc.oc_exceptions import YAMLNotExist, LoginFailed, NodeNotReady

Expand Down Expand Up @@ -51,13 +53,15 @@ def test_short_uuid():

def test_check_node_status_ready():
oc = OC()
result = oc.check_node_status(nodes_list=['node-0 Ready', 'node-1 Ready', 'node-2 Ready'])
assert result
with patch.object(OC, 'get_node_status', return_value=['node-0 Ready', 'node-1 Ready', 'node-2 Ready']):
result = oc.wait_for_nodes_ready()
assert result


def test_check_node_status_not_ready():
oc = OC()
with pytest.raises(NodeNotReady) as exc_info:
oc.check_node_status(nodes_list=['node-0 Ready', 'node-1 NotReady', 'node-2 Ready'])
with patch.object(OC, 'get_node_status', return_value=['node-0 NotReady', 'node-1 NotReady', 'node-2 Ready']):
oc.wait_for_nodes_ready(wait_time=3, timeout=10)
# Check that the exception message is as expected
assert str(exc_info.value) == "Node node-1 is not ready. Current status: NotReady"
assert str(exc_info.value) == "Not ready node: {'node-0': 'NotReady', 'node-1': 'NotReady'}"

0 comments on commit 379f5bd

Please sign in to comment.