From f8898c1957704f64148175b3345d74d5d51a5f48 Mon Sep 17 00:00:00 2001 From: Tianshun Peng <1677173697@qq.com> Date: Tue, 26 Dec 2023 01:31:15 +0800 Subject: [PATCH 1/3] check pid exists when waiting for child --- src/viztracer/main.py | 11 +++++- src/viztracer/util.py | 72 ++++++++++++++++++++++++++++++++-------- tests/test_regression.py | 44 ++++++++++++++++++++++++ tests/test_util.py | 23 +++++++++++-- 4 files changed, 132 insertions(+), 18 deletions(-) diff --git a/src/viztracer/main.py b/src/viztracer/main.py index b7691108..cb5383eb 100644 --- a/src/viztracer/main.py +++ b/src/viztracer/main.py @@ -590,7 +590,16 @@ def wait_children_finish(self) -> None: try: if any((f.endswith(".viztmp") for f in os.listdir(self.multiprocess_output_dir))): same_line_print("Wait for child processes to finish, Ctrl+C to skip") - while any((f.endswith(".viztmp") for f in os.listdir(self.multiprocess_output_dir))): + while True: + remain_viztmp = [f for f in os.listdir(self.multiprocess_output_dir) if f.endswith(".viztmp")] + if not remain_viztmp: + break + remain_children = list(int(f[7:-12]) for f in remain_viztmp) + for pid in remain_children: + if pid_exists(pid): + break + else: + break time.sleep(0.5) except KeyboardInterrupt: pass diff --git a/src/viztracer/util.py b/src/viztracer/util.py index 286214fe..5e150a62 100644 --- a/src/viztracer/util.py +++ b/src/viztracer/util.py @@ -6,8 +6,14 @@ import os import re import sys +import ctypes from typing import Union +# Windows macros +STILL_ACTIVE = 0x103 +ERROR_ACCESS_DENIED = 0x5 +PROCESS_QUERY_LIMITED_INFORMATION = 0x1000 + def size_fmt(num: Union[int, float], suffix: str = 'B') -> str: for unit in ['', 'Ki', 'Mi', 'Gi']: @@ -101,7 +107,6 @@ def time_str_to_us(t_s: str) -> float: # https://github.com/giampaolo/psutil def pid_exists(pid): """Check whether pid exists in the current process table. - UNIX only. """ if pid < 0: return False @@ -110,19 +115,58 @@ def pid_exists(pid): # in the process group of the calling process. # On certain systems 0 is a valid PID but we have no way # to know that in a portable fashion. + # On Windows, 0 is an idle process buw we don't need to + # check it here raise ValueError('invalid PID 0') - try: - os.kill(pid, 0) - except OSError as err: - if err.errno == errno.ESRCH: - # ESRCH == No such process - return False - elif err.errno == errno.EPERM: - # EPERM clearly means there's a process to deny access to + # UNIX + if sys.platform != "win32": + try: + os.kill(pid, 0) + except OSError as err: + if err.errno == errno.ESRCH: + # ESRCH == No such process + return False + elif err.errno == errno.EPERM: + # EPERM clearly means there's a process to deny access to + return True + else: # pragma: no cover + # According to "man 2 kill" possible error values are + # (EINVAL, EPERM, ESRCH) + raise + else: return True - else: # pragma: no cover - # According to "man 2 kill" possible error values are - # (EINVAL, EPERM, ESRCH) - raise + # Windows else: - return True + kernel32 = ctypes.windll.kernel32 + + process = kernel32.OpenProcess(PROCESS_QUERY_LIMITED_INFORMATION, 0, pid) + if not process: + if kernel32.GetLastError() == ERROR_ACCESS_DENIED: + # Access is denied, which means there's a process. + # Usually it's impossible to run here in viztracer. + return True # pragma: no cover + else: + return False + + exit_code = ctypes.c_ulong() + out = kernel32.GetExitCodeProcess(process, ctypes.byref(exit_code)) + kernel32.CloseHandle(process) + # nonzero return value means the funtion succeeds + if out: + if exit_code.value == STILL_ACTIVE: + # According to documents of GetExitCodeProcess. + # If a thread returns STILL_ACTIVE (259) as an error code, + # then applications that test for that value could interpret + # it to mean that the thread is still running, and continue + # to test for the completion of the thread after the thread + # has terminated, which could put the application into an + # infinite loop. + return True + else: + return False + else: # pragma: no cover + if kernel32.GetLastError() == ERROR_ACCESS_DENIED: + # Access is denied, which means there's a process. + # Usually it's impossible to run here in viztracer. + return True + return False # pragma: no cover diff --git a/tests/test_regression.py b/tests/test_regression.py index 84cf27c6..b91ee95b 100644 --- a/tests/test_regression.py +++ b/tests/test_regression.py @@ -384,3 +384,47 @@ def test_escape_string(self): expected_output_file="result.json", script=issue285_code, expected_stdout=".*Total Entries:.*") + + +wait_for_child = """ +import time +import multiprocessing + +def target(): + time.sleep(3) + +if __name__ == '__main__': + p = multiprocessing.Process(target=target) + p.start() + multiprocessing.process._children = set() + time.sleep(1) +""" + +wait_for_terminated_child = """ +import time +import os +import signal +import multiprocessing + +def target(): + time.sleep(3) + os.kill(os.getpid(), signal.SIGTERM) + +if __name__ == '__main__': + p = multiprocessing.Process(target=target) + p.start() + multiprocessing.process._children = set() + time.sleep(1) +""" + + +class TestWaitForChild(CmdlineTmpl): + def test_child_process_exits_normally(self): + self.template(["viztracer", "-o", "result.json", "--dump_raw", "cmdline_test.py"], + expected_output_file="result.json", expected_stdout=r"Wait", + script=wait_for_child) + + def test_child_process_exits_abnormally(self): + self.template(["viztracer", "-o", "result.json", "--dump_raw", "cmdline_test.py"], + expected_output_file="result.json", expected_stdout=r"Wait", + script=wait_for_terminated_child) diff --git a/tests/test_util.py b/tests/test_util.py index ba462506..51a5861d 100644 --- a/tests/test_util.py +++ b/tests/test_util.py @@ -3,13 +3,18 @@ import os import sys -import unittest +import time +from multiprocessing import Process import viztracer.util from .base_tmpl import BaseTmpl +def target_child(): + time.sleep(1) + + class TestUtil(BaseTmpl): def test_size_fmt(self): size_fmt = viztracer.util.size_fmt @@ -34,11 +39,23 @@ def test_time_str_to_us(self): self.assertRaises(ValueError, time_str_to_us, "0.0.0") self.assertRaises(ValueError, time_str_to_us, "invalid") - @unittest.skipIf(sys.platform == "win32", "pid_exists only works on Unix") def test_pid_exists(self): pid_exists = viztracer.util.pid_exists self.assertFalse(pid_exists(-1)) - self.assertTrue(pid_exists(1)) + if sys.platform != "win32": + self.assertTrue(pid_exists(1)) self.assertTrue(pid_exists(os.getpid())) with self.assertRaises(ValueError): pid_exists(0) + + # test child + p = Process(target=target_child) + p.start() + self.assertTrue(pid_exists(p.pid)) + p.join() + self.assertFalse(pid_exists(p.pid)) + + # test a process that doesn't exist + # Windows pid starts from 4 + if sys.platform == "win32": + self.assertFalse(pid_exists(2)) From 4342142b9c3d228638f17cb6ad9924d27e75f28b Mon Sep 17 00:00:00 2001 From: Tianshun Peng <1677173697@qq.com> Date: Sat, 6 Jan 2024 18:43:50 +0800 Subject: [PATCH 2/3] update wait subprocess --- src/viztracer/main.py | 15 +++++++++------ src/viztracer/util.py | 36 ++++++++++++++++++------------------ tests/test_regression.py | 10 ++++++++-- 3 files changed, 35 insertions(+), 26 deletions(-) diff --git a/src/viztracer/main.py b/src/viztracer/main.py index cb5383eb..ab6cf210 100644 --- a/src/viztracer/main.py +++ b/src/viztracer/main.py @@ -16,6 +16,7 @@ import threading import time import types +import re from typing import Any, Dict, List, Optional, Tuple from viztracer.vcompressor import VCompressor @@ -592,12 +593,14 @@ def wait_children_finish(self) -> None: same_line_print("Wait for child processes to finish, Ctrl+C to skip") while True: remain_viztmp = [f for f in os.listdir(self.multiprocess_output_dir) if f.endswith(".viztmp")] - if not remain_viztmp: - break - remain_children = list(int(f[7:-12]) for f in remain_viztmp) - for pid in remain_children: - if pid_exists(pid): - break + for viztmp_file in remain_viztmp: + match = re.search(r'result_(\d+).json.viztmp', viztmp_file) + if match: + pid = int(match.group(1)) + if pid_exists(pid): + break + else: # pragma: no cover + color_print("WARNING", f"Can't parse {viztmp_file}, skip") else: break time.sleep(0.5) diff --git a/src/viztracer/util.py b/src/viztracer/util.py index 5e150a62..a1b5e31d 100644 --- a/src/viztracer/util.py +++ b/src/viztracer/util.py @@ -118,25 +118,8 @@ def pid_exists(pid): # On Windows, 0 is an idle process buw we don't need to # check it here raise ValueError('invalid PID 0') - # UNIX - if sys.platform != "win32": - try: - os.kill(pid, 0) - except OSError as err: - if err.errno == errno.ESRCH: - # ESRCH == No such process - return False - elif err.errno == errno.EPERM: - # EPERM clearly means there's a process to deny access to - return True - else: # pragma: no cover - # According to "man 2 kill" possible error values are - # (EINVAL, EPERM, ESRCH) - raise - else: - return True # Windows - else: + if sys.platform == "win32": kernel32 = ctypes.windll.kernel32 process = kernel32.OpenProcess(PROCESS_QUERY_LIMITED_INFORMATION, 0, pid) @@ -170,3 +153,20 @@ def pid_exists(pid): # Usually it's impossible to run here in viztracer. return True return False # pragma: no cover + # UNIX + else: + try: + os.kill(pid, 0) + except OSError as err: + if err.errno == errno.ESRCH: + # ESRCH == No such process + return False + elif err.errno == errno.EPERM: + # EPERM clearly means there's a process to deny access to + return True + else: # pragma: no cover + # According to "man 2 kill" possible error values are + # (EINVAL, EPERM, ESRCH) + raise + else: + return True diff --git a/tests/test_regression.py b/tests/test_regression.py index b91ee95b..43f292e1 100644 --- a/tests/test_regression.py +++ b/tests/test_regression.py @@ -396,6 +396,9 @@ def target(): if __name__ == '__main__': p = multiprocessing.Process(target=target) p.start() + # The main process will join the child in multiprocessing.process._children. + # This is a hack to make sure the main process won't join the child process, + # so we can test the wait_children_finish function multiprocessing.process._children = set() time.sleep(1) """ @@ -413,6 +416,9 @@ def target(): if __name__ == '__main__': p = multiprocessing.Process(target=target) p.start() + # The main process will join the child in multiprocessing.process._children. + # This is a hack to make sure the main process won't join the child process, + # so we can test the wait_children_finish function multiprocessing.process._children = set() time.sleep(1) """ @@ -420,11 +426,11 @@ def target(): class TestWaitForChild(CmdlineTmpl): def test_child_process_exits_normally(self): - self.template(["viztracer", "-o", "result.json", "--dump_raw", "cmdline_test.py"], + self.template(["viztracer", "-o", "result.json", "cmdline_test.py"], expected_output_file="result.json", expected_stdout=r"Wait", script=wait_for_child) def test_child_process_exits_abnormally(self): - self.template(["viztracer", "-o", "result.json", "--dump_raw", "cmdline_test.py"], + self.template(["viztracer", "-o", "result.json", "cmdline_test.py"], expected_output_file="result.json", expected_stdout=r"Wait", script=wait_for_terminated_child) From 072b4ee79da0048c2e50143336e164e0afa32fa4 Mon Sep 17 00:00:00 2001 From: Tianshun Peng <1677173697@qq.com> Date: Sun, 7 Jan 2024 15:08:51 +0800 Subject: [PATCH 3/3] update comment --- src/viztracer/main.py | 2 +- src/viztracer/util.py | 4 ++-- tests/test_regression.py | 4 ++-- 3 files changed, 5 insertions(+), 5 deletions(-) diff --git a/src/viztracer/main.py b/src/viztracer/main.py index ab6cf210..f2dcbe90 100644 --- a/src/viztracer/main.py +++ b/src/viztracer/main.py @@ -600,7 +600,7 @@ def wait_children_finish(self) -> None: if pid_exists(pid): break else: # pragma: no cover - color_print("WARNING", f"Can't parse {viztmp_file}, skip") + color_print("WARNING", f"Unknown viztmp file {viztmp_file}") else: break time.sleep(0.5) diff --git a/src/viztracer/util.py b/src/viztracer/util.py index a1b5e31d..875d65c7 100644 --- a/src/viztracer/util.py +++ b/src/viztracer/util.py @@ -118,8 +118,8 @@ def pid_exists(pid): # On Windows, 0 is an idle process buw we don't need to # check it here raise ValueError('invalid PID 0') - # Windows if sys.platform == "win32": + # Windows kernel32 = ctypes.windll.kernel32 process = kernel32.OpenProcess(PROCESS_QUERY_LIMITED_INFORMATION, 0, pid) @@ -153,8 +153,8 @@ def pid_exists(pid): # Usually it's impossible to run here in viztracer. return True return False # pragma: no cover - # UNIX else: + # UNIX try: os.kill(pid, 0) except OSError as err: diff --git a/tests/test_regression.py b/tests/test_regression.py index 43f292e1..82481fe8 100644 --- a/tests/test_regression.py +++ b/tests/test_regression.py @@ -398,7 +398,7 @@ def target(): p.start() # The main process will join the child in multiprocessing.process._children. # This is a hack to make sure the main process won't join the child process, - # so we can test the wait_children_finish function + # so we can test the VizUI.wait_children_finish function multiprocessing.process._children = set() time.sleep(1) """ @@ -418,7 +418,7 @@ def target(): p.start() # The main process will join the child in multiprocessing.process._children. # This is a hack to make sure the main process won't join the child process, - # so we can test the wait_children_finish function + # so we can test the VizUI.wait_children_finish function multiprocessing.process._children = set() time.sleep(1) """