Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 0 additions & 3 deletions crmsh/qdevice.py
Original file line number Diff line number Diff line change
Expand Up @@ -213,9 +213,6 @@ def check_qnetd_addr(qnetd_addr):
if utils.InterfacesInfo.ip_in_local(qnetd_ip):
raise ValueError("host for qnetd must be a remote one")

if not utils.check_port_open(qnetd_ip, 22):
raise ValueError("ssh service on \"{}\" not available".format(qnetd_addr))

@staticmethod
def check_qdevice_port(qdevice_port):
if not utils.valid_port(qdevice_port):
Expand Down
26 changes: 14 additions & 12 deletions crmsh/ui_cluster.py
Original file line number Diff line number Diff line change
Expand Up @@ -167,23 +167,24 @@ def do_start(self, context, *args):
'''
Starts the cluster stack on all nodes or specific node(s)
'''
node_list = parse_option_for_nodes(context, *args)
try:
node_list = parse_option_for_nodes(context, *args)
except utils.NoSSHError as msg:
logger.error('%s', msg)
logger.info("Please try 'crm cluster start' on each node")
return False

service_check_list = ["pacemaker.service"]
start_qdevice = False
if corosync.is_qdevice_configured():
start_qdevice = True
service_check_list.append("corosync-qdevice.service")

service_manager = ServiceManager()
try:
for node in node_list[:]:
if all([service_manager.service_is_active(srv, remote_addr=node) for srv in service_check_list]):
logger.info("The cluster stack already started on {}".format(node))
node_list.remove(node)
except utils.NoSSHError as msg:
logger.error('%s', msg)
logger.info("Please try 'crm cluster start' on each node")
return False
for node in node_list[:]:
if all([service_manager.service_is_active(srv, remote_addr=node) for srv in service_check_list]):
logger.info("The cluster stack already started on {}".format(node))
node_list.remove(node)
if not node_list:
return

Expand Down Expand Up @@ -248,13 +249,14 @@ def do_stop(self, context, *args):
'''
Stops the cluster stack on all nodes or specific node(s)
'''
node_list = parse_option_for_nodes(context, *args)
try:
node_list = [n for n in node_list if self._node_ready_to_stop_cluster_service(n)]
node_list = parse_option_for_nodes(context, *args)
except utils.NoSSHError as msg:
logger.error('%s', msg)
logger.info("Please try 'crm cluster stop' on each node")
return False

node_list = [n for n in node_list if self._node_ready_to_stop_cluster_service(n)]
if not node_list:
return
logger.debug(f"stop node list: {node_list}")
Expand Down
35 changes: 24 additions & 11 deletions crmsh/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -2427,25 +2427,23 @@ def package_is_installed(pkg, remote_addr=None):
return rc == 0


def node_reachable_check(node, ping_count=1, port=22, timeout=3):
def node_reachable_check(node):
"""
Check if node is reachable by using ping and socket to ssh port
Check if node is reachable by checking SSH port is open
"""
rc, _, _ = ShellUtils().get_stdout_stderr(f"ping -n -c {ping_count} -W {timeout} {node}")
if rc == 0:
return True
# ping failed, try to connect to ssh port by socket
if check_port_open(node, port, timeout):
if node == this_node() or check_port_open(node, 22):
return True
# both ping and socket failed
raise ValueError(f"host \"{node}\" is unreachable")
if config.core.no_ssh:
raise NoSSHError(constants.NO_SSH_ERROR_MSG)
else:
raise ValueError(f"host \"{node}\" is unreachable via SSH")


def get_reachable_node_list(node_list:list[str]) -> list[str]:
reachable_node_list = []
for node in node_list:
try:
if node == this_node() or node_reachable_check(node):
if node_reachable_check(node):
reachable_node_list.append(node)
except ValueError as e:
logger.warning(str(e))
Expand Down Expand Up @@ -2473,6 +2471,12 @@ def __init__(self, msg: str, dead_nodes=None):
self.dead_nodes = dead_nodes or []


class UnreachableNodeError(ValueError):
def __init__(self, msg: str, unreachable_nodes=None):
super().__init__(msg)
self.unreachable_nodes = unreachable_nodes or []


def check_all_nodes_reachable(action_to_do: str, peer_node: str = None):
"""
Check if all cluster nodes are reachable
Expand All @@ -2494,8 +2498,17 @@ def check_all_nodes_reachable(action_to_do: str, peer_node: str = None):
"""
raise DeadNodeError(msg, dead_nodes)

unreachable_nodes = []
for node in online_nodes:
node_reachable_check(node)
try:
node_reachable_check(node)
except ValueError:
unreachable_nodes.append(node)
if unreachable_nodes:
msg = f"""There are unreachable nodes: {', '.join(unreachable_nodes)}.
Please check the network connectivity before {action_to_do}.
"""
raise UnreachableNodeError(msg, unreachable_nodes)


def re_split_string(reg, string):
Expand Down
2 changes: 0 additions & 2 deletions test/features/cluster_blocking_ssh.feature
Original file line number Diff line number Diff line change
Expand Up @@ -60,8 +60,6 @@ Feature: cluster testing with ssh blocked
And Run "firewall-cmd --zone=public --add-rich-rule='rule port port=22 protocol=tcp drop' --permanent && firewall-cmd --reload" on "hanode2"
And Try "ssh -o ConnectTimeout=5 hanode2" on "hanode1"
Then Except "ssh: connect to host hanode2 port 22: Connection timed out" in stderr
When Run "timeout 5s crm report || echo "timeout"" on "hanode1"
Then Expected "timeout" in stdout
When Write multi lines to file "/etc/crm/crm.conf" on "hanode1"
"""
[core]
Expand Down
2 changes: 1 addition & 1 deletion test/features/qdevice_validate.feature
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@ Feature: corosync qdevice/qnetd options validate
Scenario: Service ssh on qnetd node not available
When Run "systemctl stop sshd.service" on "node-without-ssh"
When Try "crm cluster init --qnetd-hostname=node-without-ssh"
Then Except "ERROR: cluster.init: ssh service on "node-without-ssh" not available"
Then Except "ERROR: cluster.init: host "node-without-ssh" is unreachable via SSH"

@clean
Scenario: Option "--qdevice-port" set wrong port
Expand Down