-
Notifications
You must be signed in to change notification settings - Fork 18
/
Copy pathfix_xorg_conf.py
145 lines (118 loc) · 5.71 KB
/
fix_xorg_conf.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
import os
import sys
import subprocess
def extract_bus_id_hex_decimal(line):
bus_id_hex = line.split(' ')[0]
bus_id0, bus_id12 = bus_id_hex.split(':')[0], bus_id_hex.split(':')[1]
bus_id1, bus_id2 = bus_id12.split('.')
bus_id_decimal = "{}:{}:{}".format(int(bus_id0, 16), int(bus_id1, 16), int(bus_id2, 16))
return bus_id_hex, bus_id_decimal
if __name__ == '__main__':
if len(sys.argv) != 2:
print("Required argument: <path to xorg.conf>")
sys.exit(1)
xorg_config = sys.argv[1]
lspci_p = subprocess.Popen(['lspci'], stdout=subprocess.PIPE)
lspci_vga_p = subprocess.Popen(['egrep', '-h', 'VGA|3D controller|Display controller'], stdin=lspci_p.stdout, stdout=subprocess.PIPE)
lspci_p.stdout.close()
vga_devices = lspci_vga_p.communicate()[0]
gpus = []
instance_type = None
for line in vga_devices.split('\n'):
if len(line) == 0:
continue
if "Cirrus" in line:
continue
if "Advanced Micro Devices" in line:
bus_id_hex, bus_id_decimal = extract_bus_id_hex_decimal(line)
gpus.append((line, bus_id_hex, bus_id_decimal))
if "Advanced Micro Devices, Inc. [AMD/ATI] Device 7362 (rev c3)" in line: # AMD Radeon Pro V520
instance_type = "EC2 g4ad"
else:
print("Unexpected AMD GPU device: {}".format(line))
if "NVIDIA Corporation" in line:
bus_id_hex, bus_id_decimal = extract_bus_id_hex_decimal(line)
gpus.append((line, bus_id_hex, bus_id_decimal))
if "GRID K520" in line:
instance_type = "EC2 g2"
elif "Tesla M60" in line:
instance_type = "EC2 g3"
elif ("Tesla T4" in line) or ("Device 1eb8" in line):
instance_type = "EC2 g4"
elif ("Tesla A10G" in line) or ("Device 2237" in line):
instance_type = "EC2 g5"
elif "Tesla K80" in line:
instance_type = "EC2 p2"
elif ("Tesla V100" in line) or ("Device 1db1" in line):
instance_type = "EC2 p3"
elif ("Tesla P100-PCIE" in line) or ("Device 15f8" in line):
instance_type = "P100 PCIE"
else:
print("Unexpected NVIDIA GPU device: {}".format(line))
if len(gpus) == 0:
print("No GPUs detected with 'lspci | egrep -h \"VGA|3D controller|Display controller\"'!")
sys.exit(1)
if instance_type is not None:
print("Instance type: {}".format(instance_type))
print("{} GPUs detected:".format(len(gpus)))
print(" {: <10s} {: <10s} {}".format("BusID hex", "BusID dec", "lspci output"))
for line, bus_id_hex, bus_id_decimal in gpus:
print(" {: <10s} {: <10s} {}".format(bus_id_hex, bus_id_decimal, line))
print("Fixing xorg.conf {}...".format(xorg_config))
xorg_config_backup = xorg_config + ".backup"
xorg_config_new = xorg_config + ".fixed.tmp"
with open(xorg_config, 'r') as config:
lines = config.readlines()
# 1. Add line with BusID in section Device (taken from output of lspci | egrep -h "VGA|3D controller|Display controller")
# For EC2 g3, EC2 g4, EC2 g5, EC2 p3 and for P100 PCIE also:
# 2. Delete whole section ServerLayout (comment it with # symbol)
# 3. Delete whole section Screen (comment it with # symbol)
#
# On EC2 g3, EC2 g4, EC2 g5, EC2 p3 and for P100 PCIE steps 2 and 3 to fix this error in /var/log/Xorg.0.log:
# (EE) NVIDIA(GPU-0): UseDisplayDevice "None" is not supported with GRID
# (EE) NVIDIA(GPU-0): displayless
# (EE) NVIDIA(GPU-0): Failed to select a display subsystem.
section_start = "Section \""
section_end = "EndSection\n"
sections_to_delete = []
if instance_type in ["EC2 g3", "EC2 g4", "EC2 g5", "EC2 p3", "P100 PCIE"]:
sections_to_delete = ["ServerLayout", "Screen"]
sections_deleted = []
device_index = 0
print(" Writing fixed xorg.conf to {}".format(xorg_config_new))
with open(xorg_config_new, 'w') as updated:
current_section = None
for line in lines:
removed = False
if current_section is None and section_start in line:
current_section = line[len(section_start):-2]
if current_section in sections_to_delete:
print(" Section {} deleted!".format(current_section))
sections_deleted.append(current_section)
if current_section in sections_to_delete:
removed = True
if current_section is not None and line.strip().startswith("BusID"):
if current_section == "Device":
# we want to delete all BusID sections - because we will write our own ones
removed = True
if current_section is not None and line == section_end:
if current_section == "Device":
_, _, bus_id_decimal = gpus[device_index]
print(" BusID {} added!".format(bus_id_decimal))
updated.write(" BusID \"PCI:{}\"\n".format(bus_id_decimal))
device_index += 1
current_section = None
if removed:
updated.write("#{}".format(line))
else:
updated.write("{}".format(line))
if device_index == 0:
print("Section \"Device\" was not found!")
sys.exit(1)
for section in sections_to_delete:
if section not in sections_deleted:
print("Section \"{}\" was not found!".format(section))
sys.exit(1)
os.rename(xorg_config, xorg_config_backup)
print(" Backup saved to {}".format(xorg_config_backup))
os.rename(xorg_config_new, xorg_config)