From 557d5f673f837740b66a393671bd2de9aefd8bd8 Mon Sep 17 00:00:00 2001 From: Kefu Chai Date: Fri, 29 Nov 2024 11:05:01 +0800 Subject: [PATCH] test: topology_custom: restrcuture comment as ordered list When investigating issue #21724, the docstring for `test_recover_stuck_raft_recovery` was found to be difficult to follow. Restructured the docstring into an ordered list to: 1. Improve readability 2. Clearly outline the test steps 3. Make the test's logic and flow more immediately comprehensible Signed-off-by: Kefu Chai --- .../test_raft_recovery_stuck.py | 21 ++++++++++++------- 1 file changed, 14 insertions(+), 7 deletions(-) diff --git a/test/topology_custom/test_raft_recovery_stuck.py b/test/topology_custom/test_raft_recovery_stuck.py index 62c344fca584..9f0c0c3afd2c 100644 --- a/test/topology_custom/test_raft_recovery_stuck.py +++ b/test/topology_custom/test_raft_recovery_stuck.py @@ -21,13 +21,20 @@ @log_run_time async def test_recover_stuck_raft_recovery(request, manager: ManagerClient): """ - After creating a cluster, we enter RECOVERY state on every server. Then, we delete the Raft data - and the upgrade state on all servers. We restart them and the upgrade procedure starts. One of the - servers fails, the rest enter 'synchronize' state. We assume the failed server cannot be recovered. - We cannot just remove it at this point; it's already part of group 0, `remove_from_group0` will wait - until upgrade procedure finishes - but the procedure is stuck. To proceed we enter RECOVERY state on - the other servers, remove the failed one, and clear existing Raft data. After leaving RECOVERY the - remaining nodes will restart the procedure, establish a new group 0 and finish upgrade. + 1. Create a cluster, + 2. Enter RECOVERY state on every server. + 3. Delete the Raft data and the upgrade state on all servers. + 4. Restart them and the upgrade procedure starts. + 5. Start the first node with a group 0 upgrade error injected to it, so it fails. + 6. Start the rest of the nodes in the cluster, they enter 'synchronize' state. + We assume the failed server cannot be recovered. We cannot just remove it at this point; + it's already part of group 0, `remove_from_group0` will wait until upgrade procedure + finishes - but the procedure is stuck. To proceed we: + 7. Enter RECOVERY state on the other servers, + 8. Remove the failed node, and + 9. Clear existing Raft data. + 10. After leaving RECOVERY, the remaining nodes will restart the procedure, establish a new + group 0 and finish upgrade. """ cfg = {'enable_user_defined_functions': False, 'force_gossip_topology_changes': True}