Skip to content

Commit 5e6c638

Browse files
committed
Add RoCE plugin
Collect RDMA over Converged Ethernet information The new RoCE plugin can be used to retrieve the following data for RoCE devices: - counters - HW counters - ECN configuration - GIDs and their attributes - CMA configuration: TOS and mode
1 parent 611e70e commit 5e6c638

File tree

1 file changed

+87
-0
lines changed

1 file changed

+87
-0
lines changed

sos/report/plugins/roce.py

Lines changed: 87 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,87 @@
1+
# Copyright (c) 2024 NVIDIA CORPORATION & AFFILIATES.
2+
# This file is part of the sos project: https://github.com/sosreport/sos
3+
#
4+
# This copyrighted material is made available to anyone wishing to use,
5+
# modify, copy, or redistribute it subject to the terms and conditions of
6+
# version 2 of the GNU General Public License.
7+
#
8+
# See the LICENSE file in the source distribution for further information.
9+
10+
from sos.report.plugins import Plugin, IndependentPlugin
11+
12+
13+
class RoCE(Plugin, IndependentPlugin):
14+
15+
short_desc = 'RoCE (RDMA over Converged Ethernet) information'
16+
17+
plugin_name = 'roce'
18+
profiles = ('hardware',)
19+
# rdma installed with iproute2
20+
packages = ('iproute2', 'infiniband-diags')
21+
22+
def setup(self):
23+
24+
IB_SYS_DIR = "/sys/class/infiniband/"
25+
ibs = self.listdir(IB_SYS_DIR) if self.path_isdir(IB_SYS_DIR) else []
26+
for ib in ibs:
27+
28+
# cma_roce_mode and cma_roce_tos will not work for bond devices
29+
# ibstat is used to check if the device exist or not, form bond
30+
# the command will return an erro:
31+
# stat of IB device <device> failed: No such file or directory
32+
skip_cma_roce = False
33+
ibstat_out = self.exec_cmd(f'ibstat -s {ib}')
34+
if ibstat_out['status'] != 0:
35+
skip_cma_roce = True
36+
37+
# dump ECN configuration
38+
39+
for port in self.listdir(IB_SYS_DIR + ib + "/ports"):
40+
# skip Infiniband and IWARP devices
41+
try:
42+
p = open(IB_SYS_DIR + ib + "/ports/" + port +
43+
"/link_layer")
44+
except IOError:
45+
continue
46+
link_layer = p.readline()
47+
p.close()
48+
if link_layer != "Ethernet\n":
49+
continue
50+
51+
# dump counters
52+
roce_counters = IB_SYS_DIR + ib + "/ports/" + port + \
53+
"/counters"
54+
self.add_copy_spec([roce_counters])
55+
56+
# dump HW counters
57+
roce_hw_counters = IB_SYS_DIR + ib + "/ports/" + port + \
58+
"/hw_counters"
59+
self.add_copy_spec([roce_hw_counters])
60+
61+
# dump gids
62+
gids = IB_SYS_DIR + ib + "/ports/" + port + "/gids"
63+
self.add_copy_spec([gids])
64+
65+
# dump gid attributes
66+
gid_attrs = IB_SYS_DIR + ib + "/ports/" + port + "/gid_attrs"
67+
self.add_copy_spec([gid_attrs])
68+
69+
# dump ECN configuration
70+
71+
netsys = IB_SYS_DIR + ib + "/device/net"
72+
for netdev in self.listdir(netsys):
73+
74+
# dump roce_np
75+
roce_np = netsys + "/" + netdev + "/ecn/roce_np"
76+
self.add_copy_spec([roce_np])
77+
78+
# dump roce_rp
79+
roce_rp = netsys + "/" + netdev + "/ecn/roce_rp"
80+
self.add_copy_spec([roce_rp])
81+
82+
if not skip_cma_roce:
83+
# cma roce mode
84+
self.add_cmd_output([f"cma_roce_mode -d {ib} -p {port}"])
85+
86+
# cma roce tos
87+
self.add_cmd_output([f"cma_roce_tos -d {ib} -p {port}"])

0 commit comments

Comments
 (0)