forked from collabora/WhisperLive
-
Notifications
You must be signed in to change notification settings - Fork 0
/
main.py
95 lines (76 loc) · 2.46 KB
/
main.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
import subprocess
import time
import signal
import GPUtil
import psutil
# 全局变量,用于存储PID
pids = []
def run_script(script_name):
# 运行脚本并获取其 PID
process = subprocess.Popen(["python", script_name])
pid = process.pid
print(f"Started {script_name} with PID {pid}")
pids.append(pid)
def kill_processes():
for pid in pids:
try:
process = psutil.Process(pid)
process.terminate()
print(f"Terminated process with PID {pid}")
except (psutil.NoSuchProcess, psutil.AccessDenied, psutil.ZombieProcess):
print(f"Failed to terminate process with PID {pid}")
pids.clear()
# 信号处理函数
def signal_handler(signum, frame):
print("Received SIGINT, terminating subprocess...")
kill_processes()
print("Exiting...")
exit(1)
# 注册信号处理函数
signal.signal(signal.SIGINT, signal_handler)
def restart_scripts():
kill_processes()
run_script("run_server.py")
time.sleep(5)
run_script("run_client.py")
def get_gpu_utilization():
import re
# 执行nvidia-smi命令
smi_output = subprocess.check_output(["nvidia-smi"], text=True, encoding="utf-8")
# 找到包含"184"的行
lines = smi_output.split("\n")
line_with_184 = next((line for line in lines if "184" in line), None)
# # 在找到的行中匹配第一个百分数
# match = re.search(r"\d+%", line_with_184)
# percentage_str = match.group(0)[:-1]
# return int(percentage_str)
# 在找到的行中匹配第一个"P"后面跟数字的字符串
match = re.search(r"P\d+", line_with_184)
if match:
return match.group(0)
else:
return "P2"
p8_count = 0 # 记录P8的数量
p8_thres = 2 # P8重启的阈值
def monitor_gpu():
restart_scripts()
time.sleep(120)
while True:
gpu_status = get_gpu_utilization()
if gpu_status == "P8":
p8_count += 1
if p8_count >= p8_thres:
print("*" * 50)
print("*", "GPU stopped, restarting scripts...")
print("*" * 50)
restart_scripts()
time.sleep(120) # Wait for scripts to start up
else:
print("*" * 30, f"GPU P8 detected, count {p8_count} / {p8_thres}")
else:
p8_count = 0
print("*" * 30, f"GPU status is {gpu_status}")
time.sleep(5)
if __name__ == "__main__":
monitor_gpu()
# restart_scripts()