Skip to content

Commit 95ee098

Browse files
committed
Handle sys.exit properly in python_worker
Weirdly, when sys.exit is called in a Python thread, it does not exit the process, but rather only kills the thread. To detect this situation, we start a dedicated Appose-Janitor thread for monitoring existing tasks for dead threads, and issue failure responses for any that did not already issue a terminal response normally (presumably because they were killed).
1 parent e30fc0b commit 95ee098

File tree

2 files changed

+53
-10
lines changed

2 files changed

+53
-10
lines changed

src/appose/python_worker.py

Lines changed: 42 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -42,6 +42,7 @@
4242
import sys
4343
import traceback
4444
from threading import Thread
45+
from time import sleep
4546
from typing import Optional
4647

4748
# NB: Avoid relative imports so that this script can be run standalone.
@@ -53,6 +54,7 @@ class Task:
5354
def __init__(self, uuid: str) -> None:
5455
self.uuid = uuid
5556
self.outputs = {}
57+
self.finished = False
5658
self.cancel_requested = False
5759

5860
def update(
@@ -131,10 +133,10 @@ def execute_script():
131133

132134
self._report_completion()
133135

134-
# TODO: Consider whether to retain a reference to this Thread, and
135-
# expose a "force" option for cancelation that kills it forcibly; see:
136-
# https://www.geeksforgeeks.org/python-different-ways-to-kill-a-thread/
137-
Thread(target=execute_script, name=f"Appose-{self.uuid}").start()
136+
# Create a thread and save a reference to it, in case its script
137+
# ends up killing the thread. This happens e.g. if it calls sys.exit.
138+
self.thread = Thread(target=execute_script, name=f"Appose-{self.uuid}")
139+
self.thread.start()
138140

139141
def _report_launch(self) -> None:
140142
self._respond(ResponseType.LAUNCH, None)
@@ -144,27 +146,55 @@ def _report_completion(self) -> None:
144146
self._respond(ResponseType.COMPLETION, args)
145147

146148
def _respond(self, response_type: ResponseType, args: Optional[Args]) -> None:
149+
already_terminated = False
150+
if response_type.is_terminal():
151+
if self.finished:
152+
# This is not the first terminal response. Let's
153+
# remember, in case an exception is generated below,
154+
# so that we can avoid infinite recursion loops.
155+
already_terminated = True
156+
self.finished = True
157+
147158
response = {"task": self.uuid, "responseType": response_type.value}
148159
if args is not None:
149160
response.update(args)
150161
# NB: Flush is necessary to ensure service receives the data!
151162
try:
152163
print(encode(response), flush=True)
153164
except Exception:
154-
# Encoding can fail due to unsupported types, when the response
155-
# or its elements are not supported by JSON encoding.
165+
if already_terminated:
166+
# An exception triggered a failure response which
167+
# then triggered another exception. Let's stop here
168+
# to avoid the risk of infinite recursion loops.
169+
return
170+
# Encoding can fail due to unsupported types, when the
171+
# response or its elements are not supported by JSON encoding.
156172
# No matter what goes wrong, we want to tell the caller.
157-
if response_type is ResponseType.FAILURE:
158-
# TODO: How to address this hypothetical case
159-
# of a failure message triggering another failure?
160-
raise
161173
self.fail(traceback.format_exc())
162174

163175

164176
def main() -> None:
165177
_set_worker(True)
166178

167179
tasks = {}
180+
running = True
181+
182+
def cleanup_threads():
183+
while running:
184+
sleep(0.05)
185+
dead = {
186+
uuid: task
187+
for uuid, task in tasks.items()
188+
if not task.thread.is_alive()
189+
}
190+
for uuid, task in dead.items():
191+
tasks.pop(uuid)
192+
if not task.finished:
193+
# The task died before reporting a terminal status.
194+
# We report this situation as failure by thread death.
195+
task.fail("thread death")
196+
197+
Thread(target=cleanup_threads, name="Appose-Janitor").start()
168198

169199
while True:
170200
try:
@@ -193,6 +223,8 @@ def main() -> None:
193223
continue
194224
task.cancel_requested = True
195225

226+
running = False
227+
196228

197229
if __name__ == "__main__":
198230
main()

src/appose/service.py

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -256,6 +256,17 @@ class ResponseType(Enum):
256256
FAILURE = "FAILURE"
257257
CRASH = "CRASH"
258258

259+
"""
260+
True iff response type is COMPLETE, CANCELED, FAILED, or CRASHED.
261+
"""
262+
def is_terminal(self):
263+
return self in (
264+
ResponseType.COMPLETION,
265+
ResponseType.CANCELATION,
266+
ResponseType.FAILURE,
267+
ResponseType.CRASH,
268+
)
269+
259270

260271
class TaskEvent:
261272
def __init__(self, task: "Task", response_type: ResponseType) -> None:

0 commit comments

Comments
 (0)