|
| 1 | +import asyncio |
| 2 | +import uuid |
| 3 | + |
| 4 | +import pytest |
| 5 | +from temporalio.client import Client, WorkflowUpdateFailedError |
| 6 | +from temporalio.exceptions import ApplicationError |
| 7 | +from temporalio.testing import WorkflowEnvironment |
| 8 | +from temporalio.worker import Worker |
| 9 | + |
| 10 | +from updates_and_signals.safe_message_handlers.activities import ( |
| 11 | + assign_nodes_to_job, |
| 12 | + find_bad_nodes, |
| 13 | + unassign_nodes_for_job, |
| 14 | +) |
| 15 | +from updates_and_signals.safe_message_handlers.workflow import ( |
| 16 | + ClusterManagerAssignNodesToJobInput, |
| 17 | + ClusterManagerDeleteJobInput, |
| 18 | + ClusterManagerInput, |
| 19 | + ClusterManagerWorkflow, |
| 20 | +) |
| 21 | + |
| 22 | + |
| 23 | +async def test_safe_message_handlers(client: Client, env: WorkflowEnvironment): |
| 24 | + if env.supports_time_skipping: |
| 25 | + pytest.skip( |
| 26 | + "Java test server: https://github.com/temporalio/sdk-java/issues/1903" |
| 27 | + ) |
| 28 | + task_queue = f"tq-{uuid.uuid4()}" |
| 29 | + async with Worker( |
| 30 | + client, |
| 31 | + task_queue=task_queue, |
| 32 | + workflows=[ClusterManagerWorkflow], |
| 33 | + activities=[assign_nodes_to_job, unassign_nodes_for_job, find_bad_nodes], |
| 34 | + ): |
| 35 | + cluster_manager_handle = await client.start_workflow( |
| 36 | + ClusterManagerWorkflow.run, |
| 37 | + ClusterManagerInput(), |
| 38 | + id=f"ClusterManagerWorkflow-{uuid.uuid4()}", |
| 39 | + task_queue=task_queue, |
| 40 | + ) |
| 41 | + await cluster_manager_handle.signal(ClusterManagerWorkflow.start_cluster) |
| 42 | + |
| 43 | + allocation_updates = [] |
| 44 | + for i in range(6): |
| 45 | + allocation_updates.append( |
| 46 | + cluster_manager_handle.execute_update( |
| 47 | + ClusterManagerWorkflow.assign_nodes_to_job, |
| 48 | + ClusterManagerAssignNodesToJobInput( |
| 49 | + total_num_nodes=2, job_name=f"task-{i}" |
| 50 | + ), |
| 51 | + ) |
| 52 | + ) |
| 53 | + results = await asyncio.gather(*allocation_updates) |
| 54 | + for result in results: |
| 55 | + assert len(result.nodes_assigned) == 2 |
| 56 | + |
| 57 | + await asyncio.sleep(1) |
| 58 | + |
| 59 | + deletion_updates = [] |
| 60 | + for i in range(6): |
| 61 | + deletion_updates.append( |
| 62 | + cluster_manager_handle.execute_update( |
| 63 | + ClusterManagerWorkflow.delete_job, |
| 64 | + ClusterManagerDeleteJobInput(job_name=f"task-{i}"), |
| 65 | + ) |
| 66 | + ) |
| 67 | + await asyncio.gather(*deletion_updates) |
| 68 | + |
| 69 | + await cluster_manager_handle.signal(ClusterManagerWorkflow.shutdown_cluster) |
| 70 | + |
| 71 | + result = await cluster_manager_handle.result() |
| 72 | + assert result.num_currently_assigned_nodes == 0 |
| 73 | + |
| 74 | + |
| 75 | +async def test_update_idempotency(client: Client, env: WorkflowEnvironment): |
| 76 | + if env.supports_time_skipping: |
| 77 | + pytest.skip( |
| 78 | + "Java test server: https://github.com/temporalio/sdk-java/issues/1903" |
| 79 | + ) |
| 80 | + task_queue = f"tq-{uuid.uuid4()}" |
| 81 | + async with Worker( |
| 82 | + client, |
| 83 | + task_queue=task_queue, |
| 84 | + workflows=[ClusterManagerWorkflow], |
| 85 | + activities=[assign_nodes_to_job, unassign_nodes_for_job, find_bad_nodes], |
| 86 | + ): |
| 87 | + cluster_manager_handle = await client.start_workflow( |
| 88 | + ClusterManagerWorkflow.run, |
| 89 | + ClusterManagerInput(), |
| 90 | + id=f"ClusterManagerWorkflow-{uuid.uuid4()}", |
| 91 | + task_queue=task_queue, |
| 92 | + ) |
| 93 | + |
| 94 | + await cluster_manager_handle.signal(ClusterManagerWorkflow.start_cluster) |
| 95 | + |
| 96 | + result_1 = await cluster_manager_handle.execute_update( |
| 97 | + ClusterManagerWorkflow.assign_nodes_to_job, |
| 98 | + ClusterManagerAssignNodesToJobInput( |
| 99 | + total_num_nodes=5, job_name="jobby-job" |
| 100 | + ), |
| 101 | + ) |
| 102 | + # simulate that in calling it twice, the operation is idempotent |
| 103 | + result_2 = await cluster_manager_handle.execute_update( |
| 104 | + ClusterManagerWorkflow.assign_nodes_to_job, |
| 105 | + ClusterManagerAssignNodesToJobInput( |
| 106 | + total_num_nodes=5, job_name="jobby-job" |
| 107 | + ), |
| 108 | + ) |
| 109 | + # the second call should not assign more nodes (it may return fewer if the health check finds bad nodes |
| 110 | + # in between the two signals.) |
| 111 | + assert result_1.nodes_assigned >= result_2.nodes_assigned |
| 112 | + |
| 113 | + |
| 114 | +async def test_update_failure(client: Client, env: WorkflowEnvironment): |
| 115 | + if env.supports_time_skipping: |
| 116 | + pytest.skip( |
| 117 | + "Java test server: https://github.com/temporalio/sdk-java/issues/1903" |
| 118 | + ) |
| 119 | + task_queue = f"tq-{uuid.uuid4()}" |
| 120 | + async with Worker( |
| 121 | + client, |
| 122 | + task_queue=task_queue, |
| 123 | + workflows=[ClusterManagerWorkflow], |
| 124 | + activities=[assign_nodes_to_job, unassign_nodes_for_job, find_bad_nodes], |
| 125 | + ): |
| 126 | + cluster_manager_handle = await client.start_workflow( |
| 127 | + ClusterManagerWorkflow.run, |
| 128 | + ClusterManagerInput(), |
| 129 | + id=f"ClusterManagerWorkflow-{uuid.uuid4()}", |
| 130 | + task_queue=task_queue, |
| 131 | + ) |
| 132 | + |
| 133 | + await cluster_manager_handle.signal(ClusterManagerWorkflow.start_cluster) |
| 134 | + |
| 135 | + await cluster_manager_handle.execute_update( |
| 136 | + ClusterManagerWorkflow.assign_nodes_to_job, |
| 137 | + ClusterManagerAssignNodesToJobInput( |
| 138 | + total_num_nodes=24, job_name="big-task" |
| 139 | + ), |
| 140 | + ) |
| 141 | + try: |
| 142 | + # Try to assign too many nodes |
| 143 | + await cluster_manager_handle.execute_update( |
| 144 | + ClusterManagerWorkflow.assign_nodes_to_job, |
| 145 | + ClusterManagerAssignNodesToJobInput( |
| 146 | + total_num_nodes=3, job_name="little-task" |
| 147 | + ), |
| 148 | + ) |
| 149 | + except WorkflowUpdateFailedError as e: |
| 150 | + assert isinstance(e.cause, ApplicationError) |
| 151 | + assert e.cause.message == "Cannot assign 3 nodes; have only 1 available" |
| 152 | + finally: |
| 153 | + await cluster_manager_handle.signal(ClusterManagerWorkflow.shutdown_cluster) |
| 154 | + result = await cluster_manager_handle.result() |
| 155 | + assert result.num_currently_assigned_nodes + result.num_bad_nodes == 24 |
0 commit comments