Skip to content

Commit 581932c

Browse files
authored
implement block topology for K8s (#43)
Signed-off-by: Dmitry Shmulevich <[email protected]>
1 parent b753bdd commit 581932c

File tree

3 files changed

+183
-25
lines changed

3 files changed

+183
-25
lines changed

pkg/engines/k8s/labeler.go

Lines changed: 69 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,18 @@ import (
2424
"github.com/NVIDIA/topograph/pkg/topology"
2525
)
2626

27+
const (
28+
hierarchyLayerAccelerator = "network.topology.kubernetes.io/accelerator"
29+
hierarchyLayerBlock = "network.topology.kubernetes.io/block"
30+
hierarchyLayerSpine = "network.topology.kubernetes.io/spine"
31+
hierarchyLayerDatacenter = "network.topology.kubernetes.io/datacenter"
32+
)
33+
34+
var switchNetworkHierarchy = []string{hierarchyLayerBlock, hierarchyLayerSpine, hierarchyLayerDatacenter}
35+
36+
// map nodename:[label name: label value]
37+
type nodeLabelMap map[string]map[string]string
38+
2739
type Labeler interface {
2840
AddNodeLabels(context.Context, string, map[string]string) error
2941
}
@@ -39,48 +51,87 @@ func NewTopologyLabeler() *topologyLabeler {
3951
}
4052

4153
func (l *topologyLabeler) ApplyNodeLabels(ctx context.Context, v *topology.Vertex, labeler Labeler) error {
42-
if v == nil {
54+
if v == nil || len(v.Vertices) == 0 {
4355
return nil
4456
}
45-
levels := []string{}
46-
if len(v.ID) != 0 {
47-
levels = append(levels, v.ID)
57+
58+
nodeMap := make(nodeLabelMap)
59+
if blockRoot, ok := v.Vertices[topology.TopologyBlock]; ok {
60+
if err := l.getBlockNodeLabels(blockRoot, nodeMap); err != nil {
61+
return err
62+
}
4863
}
4964

50-
return l.applyNodeLabels(ctx, v, labeler, levels)
65+
if treeRoot, ok := v.Vertices[topology.TopologyTree]; ok {
66+
layers := []string{}
67+
if len(treeRoot.ID) != 0 {
68+
layers = append(layers, treeRoot.ID)
69+
}
70+
if err := l.getTreeNodeLabels(treeRoot, nodeMap, layers); err != nil {
71+
return err
72+
}
73+
}
74+
75+
for nodeName, labels := range nodeMap {
76+
if err := labeler.AddNodeLabels(ctx, nodeName, labels); err != nil {
77+
return err
78+
}
79+
}
80+
81+
return nil
5182
}
5283

53-
func (l *topologyLabeler) applyNodeLabels(ctx context.Context, v *topology.Vertex, labeler Labeler, levels []string) error {
84+
func (l *topologyLabeler) getTreeNodeLabels(v *topology.Vertex, nodeMap nodeLabelMap, layers []string) error {
5485
if len(v.Vertices) == 0 { // compute node
55-
if len(levels) != 0 {
56-
if v.ID != levels[0] {
57-
return fmt.Errorf("instance ID mismatch: expected %s, got %s", v.ID, levels[0])
86+
if len(layers) != 0 {
87+
if v.ID != layers[0] {
88+
return fmt.Errorf("instance ID mismatch: expected %s, got %s", v.ID, layers[0])
5889
}
59-
60-
labels := make(map[string]string)
61-
for i, sw := range levels[1:] {
90+
nodeName := v.Name
91+
labels, ok := nodeMap[nodeName]
92+
if !ok {
93+
labels = make(map[string]string)
94+
nodeMap[nodeName] = labels
95+
}
96+
for i, sw := range layers[1:] {
6297
if len(sw) == 0 {
6398
break
6499
}
65-
labels[fmt.Sprintf("topology.kubernetes.io/network-level-%d", i+1)] = l.checkLabel(sw)
66-
}
67-
68-
if err := labeler.AddNodeLabels(ctx, v.Name, labels); err != nil {
69-
return err
100+
if i < len(switchNetworkHierarchy) {
101+
labels[(switchNetworkHierarchy[i])] = l.checkLabel(sw)
102+
}
70103
}
71104
}
72105
return nil
73106
}
74107

75108
for _, w := range v.Vertices {
76-
if err := l.applyNodeLabels(ctx, w, labeler, append([]string{w.ID}, levels...)); err != nil {
109+
if err := l.getTreeNodeLabels(w, nodeMap, append([]string{w.ID}, layers...)); err != nil {
77110
return err
78111
}
79112
}
80113

81114
return nil
82115
}
83116

117+
func (l *topologyLabeler) getBlockNodeLabels(v *topology.Vertex, nodeMap nodeLabelMap) error {
118+
for _, block := range v.Vertices {
119+
for _, node := range block.Vertices {
120+
nodeName := node.Name
121+
labels, ok := nodeMap[nodeName]
122+
if !ok {
123+
labels = make(map[string]string)
124+
nodeMap[nodeName] = labels
125+
}
126+
if val, ok := labels[hierarchyLayerAccelerator]; ok {
127+
return fmt.Errorf("multiple accelerator labels %s, %s for node %s", val, block.ID, nodeName)
128+
}
129+
labels[hierarchyLayerAccelerator] = l.checkLabel(block.ID)
130+
}
131+
}
132+
return nil
133+
}
134+
84135
// checkLabel checks the length of the label value.
85136
// If more than 63 characters (Kubernetes limit), it will replace it with hash
86137
func (l *topologyLabeler) checkLabel(val string) string {

pkg/engines/k8s/labeler_test.go

Lines changed: 90 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -38,16 +38,99 @@ func (l *testLabeler) AddNodeLabels(_ context.Context, nodeName string, labels m
3838
return nil
3939
}
4040

41-
func TestApplyNodeLabels(t *testing.T) {
41+
func TestApplyNodeLabelsWithTree(t *testing.T) {
4242
root, _ := translate.GetTreeTestSet(true)
4343
labeler := &testLabeler{data: make(map[string]map[string]string)}
4444
data := map[string]map[string]string{
45-
"Node201": {"topology.kubernetes.io/network-level-1": "S2", "topology.kubernetes.io/network-level-2": "S1"},
46-
"Node202": {"topology.kubernetes.io/network-level-1": "S2", "topology.kubernetes.io/network-level-2": "S1"},
47-
"Node205": {"topology.kubernetes.io/network-level-1": "S2", "topology.kubernetes.io/network-level-2": "S1"},
48-
"Node304": {"topology.kubernetes.io/network-level-1": "xf946c4acef2d5939", "topology.kubernetes.io/network-level-2": "S1"},
49-
"Node305": {"topology.kubernetes.io/network-level-1": "xf946c4acef2d5939", "topology.kubernetes.io/network-level-2": "S1"},
50-
"Node306": {"topology.kubernetes.io/network-level-1": "xf946c4acef2d5939", "topology.kubernetes.io/network-level-2": "S1"},
45+
"Node201": {"network.topology.kubernetes.io/block": "S2", "network.topology.kubernetes.io/spine": "S1"},
46+
"Node202": {"network.topology.kubernetes.io/block": "S2", "network.topology.kubernetes.io/spine": "S1"},
47+
"Node205": {"network.topology.kubernetes.io/block": "S2", "network.topology.kubernetes.io/spine": "S1"},
48+
"Node304": {"network.topology.kubernetes.io/block": "xf946c4acef2d5939", "network.topology.kubernetes.io/spine": "S1"},
49+
"Node305": {"network.topology.kubernetes.io/block": "xf946c4acef2d5939", "network.topology.kubernetes.io/spine": "S1"},
50+
"Node306": {"network.topology.kubernetes.io/block": "xf946c4acef2d5939", "network.topology.kubernetes.io/spine": "S1"},
51+
}
52+
53+
err := NewTopologyLabeler().ApplyNodeLabels(context.TODO(), root, labeler)
54+
require.NoError(t, err)
55+
require.Equal(t, data, labeler.data)
56+
}
57+
58+
func TestApplyNodeLabelsWithBlock(t *testing.T) {
59+
root, _ := translate.GetBlockWithMultiIBTestSet()
60+
labeler := &testLabeler{data: make(map[string]map[string]string)}
61+
data := map[string]map[string]string{
62+
"Node104": {
63+
"network.topology.kubernetes.io/accelerator": "B1",
64+
"network.topology.kubernetes.io/block": "S2",
65+
"network.topology.kubernetes.io/spine": "S1",
66+
"network.topology.kubernetes.io/datacenter": "ibRoot2",
67+
},
68+
"Node105": {
69+
"network.topology.kubernetes.io/accelerator": "B1",
70+
"network.topology.kubernetes.io/block": "S2",
71+
"network.topology.kubernetes.io/spine": "S1",
72+
"network.topology.kubernetes.io/datacenter": "ibRoot2",
73+
},
74+
"Node106": {
75+
"network.topology.kubernetes.io/accelerator": "B1",
76+
"network.topology.kubernetes.io/block": "S2",
77+
"network.topology.kubernetes.io/spine": "S1",
78+
"network.topology.kubernetes.io/datacenter": "ibRoot2",
79+
},
80+
"Node201": {
81+
"network.topology.kubernetes.io/accelerator": "B2",
82+
"network.topology.kubernetes.io/block": "S3",
83+
"network.topology.kubernetes.io/spine": "S1",
84+
"network.topology.kubernetes.io/datacenter": "ibRoot2",
85+
},
86+
"Node202": {
87+
"network.topology.kubernetes.io/accelerator": "B2",
88+
"network.topology.kubernetes.io/block": "S3",
89+
"network.topology.kubernetes.io/spine": "S1",
90+
"network.topology.kubernetes.io/datacenter": "ibRoot2",
91+
},
92+
"Node205": {
93+
"network.topology.kubernetes.io/accelerator": "B2",
94+
"network.topology.kubernetes.io/block": "S3",
95+
"network.topology.kubernetes.io/spine": "S1",
96+
"network.topology.kubernetes.io/datacenter": "ibRoot2",
97+
},
98+
"Node301": {
99+
"network.topology.kubernetes.io/accelerator": "B3",
100+
"network.topology.kubernetes.io/block": "S5",
101+
"network.topology.kubernetes.io/spine": "S4",
102+
"network.topology.kubernetes.io/datacenter": "ibRoot1",
103+
},
104+
"Node302": {
105+
"network.topology.kubernetes.io/accelerator": "B3",
106+
"network.topology.kubernetes.io/block": "S5",
107+
"network.topology.kubernetes.io/spine": "S4",
108+
"network.topology.kubernetes.io/datacenter": "ibRoot1",
109+
},
110+
"Node303": {
111+
"network.topology.kubernetes.io/accelerator": "B3",
112+
"network.topology.kubernetes.io/block": "S5",
113+
"network.topology.kubernetes.io/spine": "S4",
114+
"network.topology.kubernetes.io/datacenter": "ibRoot1",
115+
},
116+
"Node401": {
117+
"network.topology.kubernetes.io/accelerator": "B4",
118+
"network.topology.kubernetes.io/block": "S6",
119+
"network.topology.kubernetes.io/spine": "S4",
120+
"network.topology.kubernetes.io/datacenter": "ibRoot1",
121+
},
122+
"Node402": {
123+
"network.topology.kubernetes.io/accelerator": "B4",
124+
"network.topology.kubernetes.io/block": "S6",
125+
"network.topology.kubernetes.io/spine": "S4",
126+
"network.topology.kubernetes.io/datacenter": "ibRoot1",
127+
},
128+
"Node403": {
129+
"network.topology.kubernetes.io/accelerator": "B4",
130+
"network.topology.kubernetes.io/block": "S6",
131+
"network.topology.kubernetes.io/spine": "S4",
132+
"network.topology.kubernetes.io/datacenter": "ibRoot1",
133+
},
51134
}
52135

53136
err := NewTopologyLabeler().ApplyNodeLabels(context.TODO(), root, labeler)

pkg/translate/output.go

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -351,6 +351,17 @@ func split(input string) (string, string) {
351351
}
352352

353353
func GetTreeTestSet(testForLongLabelName bool) (*topology.Vertex, map[string]string) {
354+
//
355+
// S1
356+
// / \
357+
// S2 S3
358+
// | |
359+
// --- ---
360+
// I14 I21
361+
// I15 I22
362+
// I16 I25
363+
// --- ---
364+
//
354365
var s3name string
355366
if testForLongLabelName {
356367
s3name = "S3very-very-long-id-to-check-label-value-limits-of-63-characters"
@@ -394,6 +405,19 @@ func GetTreeTestSet(testForLongLabelName bool) (*topology.Vertex, map[string]str
394405
}
395406

396407
func GetBlockWithMultiIBTestSet() (*topology.Vertex, map[string]string) {
408+
//
409+
// ibRoot2 ibRoot1
410+
// | |
411+
// S1 S4
412+
// / \ / \
413+
// S2 S3 S5 S6
414+
// | | | |
415+
// --- --- --- ---
416+
// I14\ I21\ I31\ I41\
417+
// I15-B1 I22-B2 I32-B3 I42-B4
418+
// I16/ I25/ I33/ I43/
419+
// --- --- --- ---
420+
//
397421
instance2node := map[string]string{
398422
"I14": "Node104", "I15": "Node105", "I16": "Node106",
399423
"I21": "Node201", "I22": "Node202", "I25": "Node205",

0 commit comments

Comments
 (0)