diff --git a/pkg/server/grpc_client.go b/pkg/server/grpc_client.go index ce6c7c7..e6c439e 100644 --- a/pkg/server/grpc_client.go +++ b/pkg/server/grpc_client.go @@ -25,6 +25,7 @@ import ( "k8s.io/klog/v2" "github.com/NVIDIA/topograph/pkg/common" + "github.com/NVIDIA/topograph/pkg/metrics" pb "github.com/NVIDIA/topograph/pkg/protos" ) @@ -58,10 +59,20 @@ func forwardRequest(ctx context.Context, tr *common.TopologyRequest, url string, klog.V(4).Infof("Response: %s", response.String()) - return toGraph(response, cis), nil + return toGraph(response, cis, getTopologyFormat(tr.Engine.Params)), nil } -func toGraph(response *pb.TopologyResponse, cis []common.ComputeInstances) *common.Vertex { +// getTopologyFormat derives topology format from engine parameters: tree (default) or block +func getTopologyFormat(params map[string]string) string { + if len(params) != 0 { + if format, ok := params[common.KeyPlugin]; ok && len(format) != 0 { + return format + } + } + return common.ValTopologyTree +} + +func toGraph(response *pb.TopologyResponse, cis []common.ComputeInstances, format string) *common.Vertex { i2n := make(map[string]string) for _, ci := range cis { for instance, node := range ci.Instances { @@ -135,9 +146,10 @@ func toGraph(response *pb.TopologyResponse, cis []common.ComputeInstances) *comm } if len(i2n) != 0 { - klog.V(4).Infof("Adding unclaimed nodes %v", i2n) + klog.V(4).Infof("Adding nodes w/o topology: %v", i2n) + metrics.SetMissingTopology("GTS", len(i2n)) sw := &common.Vertex{ - ID: "extra", + ID: common.NoTopology, Vertices: make(map[string]*common.Vertex), } for instanceID, nodeName := range i2n { @@ -146,7 +158,7 @@ func toGraph(response *pb.TopologyResponse, cis []common.ComputeInstances) *comm ID: instanceID, } } - forest["extra"] = sw + forest[common.NoTopology] = sw } treeRoot := &common.Vertex{ @@ -156,15 +168,25 @@ func toGraph(response *pb.TopologyResponse, cis []common.ComputeInstances) *comm treeRoot.Vertices[name] = node } - blockRoot := &common.Vertex{ - Vertices: make(map[string]*common.Vertex), - } - for name, domain := range blocks { - blockRoot.Vertices[name] = domain - } + metadata := map[string]string{common.KeyPlugin: format} + if format == common.ValTopologyBlock { + blockRoot := &common.Vertex{ + Vertices: make(map[string]*common.Vertex), + } + for name, domain := range blocks { + blockRoot.Vertices[name] = domain + } + + return &common.Vertex{ + Vertices: map[string]*common.Vertex{ + common.ValTopologyBlock: blockRoot, + common.ValTopologyTree: treeRoot, + }, + Metadata: metadata, + } + } else { + treeRoot.Metadata = metadata - root := &common.Vertex{ - Vertices: map[string]*common.Vertex{common.ValTopologyBlock: blockRoot, common.ValTopologyTree: treeRoot}, + return treeRoot } - return root } diff --git a/pkg/server/grpc_client_test.go b/pkg/server/grpc_client_test.go index d07a532..bf0ecee 100644 --- a/pkg/server/grpc_client_test.go +++ b/pkg/server/grpc_client_test.go @@ -142,5 +142,45 @@ func TestToGraph(t *testing.T) { Vertices: map[string]*common.Vertex{common.ValTopologyBlock: blockRoot, common.ValTopologyTree: treeRoot}, } - require.Equal(t, root, toGraph(&pb.TopologyResponse{Instances: instances}, cis)) + require.Equal(t, root, toGraph(&pb.TopologyResponse{Instances: instances}, cis, common.ValTopologyBlock)) +} + +func TestGetTopologyFormat(t *testing.T) { + testCases := []struct { + name string + params map[string]string + format string + }{ + { + name: "Case 1: nil params", + params: nil, + format: common.ValTopologyTree, + }, + { + name: "Case 2: empty params", + params: make(map[string]string), + format: common.ValTopologyTree, + }, + { + name: "Case 3: missing key", + params: map[string]string{"a": "b"}, + format: common.ValTopologyTree, + }, + { + name: "Case 4: block topology", + params: map[string]string{common.KeyPlugin: common.ValTopologyBlock}, + format: common.ValTopologyBlock, + }, + { + name: "Case 5: tree topology", + params: map[string]string{common.KeyPlugin: common.ValTopologyTree}, + format: common.ValTopologyTree, + }, + } + + for _, tc := range testCases { + t.Run(tc.name, func(t *testing.T) { + require.Equal(t, tc.format, getTopologyFormat(tc.params)) + }) + } } diff --git a/tests/models/medium-h100.yaml b/tests/models/medium-h100.yaml index 769c52a..4b3523f 100644 --- a/tests/models/medium-h100.yaml +++ b/tests/models/medium-h100.yaml @@ -44,3 +44,6 @@ capacity_blocks: - name: cb14 type: H100 nodes: [n14-1,n14-2] +- name: cb15 + type: CPU + nodes: [cpu-1,cpu-2]