|
| 1 | +# Copyright (c) 2024 NVIDIA CORPORATION & AFFILIATES. |
| 2 | +# This file is part of the sos project: https://github.com/sosreport/sos |
| 3 | +# |
| 4 | +# This copyrighted material is made available to anyone wishing to use, |
| 5 | +# modify, copy, or redistribute it subject to the terms and conditions of |
| 6 | +# version 2 of the GNU General Public License. |
| 7 | +# |
| 8 | +# See the LICENSE file in the source distribution for further information. |
| 9 | + |
| 10 | +from sos.report.plugins import Plugin, IndependentPlugin |
| 11 | + |
| 12 | + |
| 13 | +class RoCE(Plugin, IndependentPlugin): |
| 14 | + |
| 15 | + short_desc = 'RoCE (RDMA over Converged Ethernet) information' |
| 16 | + |
| 17 | + plugin_name = 'roce' |
| 18 | + profiles = ('hardware',) |
| 19 | + # rdma installed with iproute2 |
| 20 | + packages = ('iproute2', 'infiniband-diags') |
| 21 | + |
| 22 | + def setup(self): |
| 23 | + |
| 24 | + IB_SYS_DIR = "/sys/class/infiniband/" |
| 25 | + ibs = self.listdir(IB_SYS_DIR) if self.path_isdir(IB_SYS_DIR) else [] |
| 26 | + for ib in ibs: |
| 27 | + |
| 28 | + # cma_roce_mode and cma_roce_tos will not work for bond devices |
| 29 | + # ibstat is used to check if the device exist or not, form bond |
| 30 | + # the command will return an erro: |
| 31 | + # stat of IB device <device> failed: No such file or directory |
| 32 | + skip_cma_roce = False |
| 33 | + ibstat_out = self.exec_cmd(f'ibstat -s {ib}') |
| 34 | + if ibstat_out['status'] != 0: |
| 35 | + skip_cma_roce = True |
| 36 | + |
| 37 | + # dump ECN configuration |
| 38 | + |
| 39 | + for port in self.listdir(IB_SYS_DIR + ib + "/ports"): |
| 40 | + # skip Infiniband and IWARP devices |
| 41 | + try: |
| 42 | + p = open(IB_SYS_DIR + ib + "/ports/" + port + |
| 43 | + "/link_layer") |
| 44 | + except IOError: |
| 45 | + continue |
| 46 | + link_layer = p.readline() |
| 47 | + p.close() |
| 48 | + if link_layer != "Ethernet\n": |
| 49 | + continue |
| 50 | + |
| 51 | + # dump counters |
| 52 | + roce_counters = IB_SYS_DIR + ib + "/ports/" + port + \ |
| 53 | + "/counters" |
| 54 | + self.add_copy_spec([roce_counters]) |
| 55 | + |
| 56 | + # dump HW counters |
| 57 | + roce_hw_counters = IB_SYS_DIR + ib + "/ports/" + port + \ |
| 58 | + "/hw_counters" |
| 59 | + self.add_copy_spec([roce_hw_counters]) |
| 60 | + |
| 61 | + # dump gids |
| 62 | + gids = IB_SYS_DIR + ib + "/ports/" + port + "/gids" |
| 63 | + self.add_copy_spec([gids]) |
| 64 | + |
| 65 | + # dump gid attributes |
| 66 | + gid_attrs = IB_SYS_DIR + ib + "/ports/" + port + "/gid_attrs" |
| 67 | + self.add_copy_spec([gid_attrs]) |
| 68 | + |
| 69 | + # dump ECN configuration |
| 70 | + |
| 71 | + netsys = IB_SYS_DIR + ib + "/device/net" |
| 72 | + for netdev in self.listdir(netsys): |
| 73 | + |
| 74 | + # dump roce_np |
| 75 | + roce_np = netsys + "/" + netdev + "/ecn/roce_np" |
| 76 | + self.add_copy_spec([roce_np]) |
| 77 | + |
| 78 | + # dump roce_rp |
| 79 | + roce_rp = netsys + "/" + netdev + "/ecn/roce_rp" |
| 80 | + self.add_copy_spec([roce_rp]) |
| 81 | + |
| 82 | + if not skip_cma_roce: |
| 83 | + # cma roce mode |
| 84 | + self.add_cmd_output([f"cma_roce_mode -d {ib} -p {port}"]) |
| 85 | + |
| 86 | + # cma roce tos |
| 87 | + self.add_cmd_output([f"cma_roce_tos -d {ib} -p {port}"]) |
0 commit comments