diff --git a/go.mod b/go.mod index c29cd22..2a810aa 100644 --- a/go.mod +++ b/go.mod @@ -112,4 +112,5 @@ replace ( github.com/aws/aws-sdk-go-v2/credentials v1.17.45 => github.com/pkedy/aws-sdk-go-v2/credentials v0.0.0-20241115203348-0198b6c98cd9 github.com/aws/aws-sdk-go-v2/service/autoscaling v1.48.0 => github.com/pkedy/aws-sdk-go-v2/service/autoscaling v0.0.0-20241115203348-0198b6c98cd9 github.com/aws/aws-sdk-go-v2/service/ec2 v1.187.0 => github.com/pkedy/aws-sdk-go-v2/service/ec2 v0.0.0-20241115203348-0198b6c98cd9 + github.com/oracle/oci-go-sdk/v65 v65.78.0 => ../../oracle/oci-go-sdk/v65 ) diff --git a/go.sum b/go.sum index 29cc64a..e66523d 100644 --- a/go.sum +++ b/go.sum @@ -161,8 +161,6 @@ github.com/onsi/ginkgo/v2 v2.19.0 h1:9Cnnf7UHo57Hy3k6/m5k3dRfGTMXGvxhHFvkDTCTpvA github.com/onsi/ginkgo/v2 v2.19.0/go.mod h1:rlwLi9PilAFJ8jCg9UE1QP6VBpd6/xj3SRC0d6TU0To= github.com/onsi/gomega v1.33.1 h1:dsYjIxxSR755MDmKVsaFQTE22ChNBcuuTWgkUDSubOk= github.com/onsi/gomega v1.33.1/go.mod h1:U4R44UsT+9eLIaYRB2a5qajjtQYn0hauxvRm16AVYg0= -github.com/oracle/oci-go-sdk/v65 v65.78.0 h1:iM7lFFA7cJkUD4tmrlsAHWgL3HuTuF9mdvTAliMkcFA= -github.com/oracle/oci-go-sdk/v65 v65.78.0/go.mod h1:IBEV9l1qBzUpo7zgGaRUhbB05BVfcDGYRFBCPlTcPp0= github.com/pkedy/aws-sdk-go-v2 v0.0.0-20241115203348-0198b6c98cd9 h1:QhMFD0yJ9nEj4BCX9lREQ7twLM5oEL8y9UwKsRNJamo= github.com/pkedy/aws-sdk-go-v2 v0.0.0-20241115203348-0198b6c98cd9/go.mod h1:2SK5n0a2karNTv5tbP1SjsX0uhttou00v/HpXKM1ZUo= github.com/pkedy/aws-sdk-go-v2/service/ec2 v0.0.0-20241115203348-0198b6c98cd9 h1:wA7yd0OxRH3EWuKaJ7ijRowlWgH2b99nrP+d10+0Sc4= diff --git a/pkg/providers/oci/instance_topology.go b/pkg/providers/oci/instance_topology.go index f47e779..2fbbb6d 100644 --- a/pkg/providers/oci/instance_topology.go +++ b/pkg/providers/oci/instance_topology.go @@ -19,7 +19,6 @@ package oci import ( "context" "fmt" - "net/http" "sort" "time" @@ -29,6 +28,7 @@ import ( "github.com/NVIDIA/topograph/pkg/metrics" "github.com/NVIDIA/topograph/pkg/topology" + "github.com/NVIDIA/topograph/pkg/translate" ) type level int @@ -39,109 +39,111 @@ const ( hpcIslandLevel ) -func GenerateInstanceTopology(ctx context.Context, factory ClientFactory, cis []topology.ComputeInstances) ([]*core.ComputeBareMetalHostSummary, error) { - var err error - bareMetalHostSummaries := []*core.ComputeBareMetalHostSummary{} +func GenerateInstanceTopology(ctx context.Context, factory ClientFactory, pageSize *int, cis []topology.ComputeInstances) (hosts []core.ComputeHostSummary, blockMap map[string]string, err error) { + blockMap = make(map[string]string) + for _, ci := range cis { - if bareMetalHostSummaries, err = generateInstanceTopology(ctx, factory, &ci, bareMetalHostSummaries); err != nil { - return nil, err + var client Client + if client, err = factory(ci.Region, pageSize); err != nil { + return + } + if hosts, err = getComputeHostInfo(ctx, client, hosts, blockMap); err != nil { + return } } - return bareMetalHostSummaries, nil + return } -func getComputeCapacityTopologies(ctx context.Context, client Client) (cct []core.ComputeCapacityTopologySummary, err error) { - compartmentId := client.TenancyOCID() - - adRequest := identity.ListAvailabilityDomainsRequest{ - CompartmentId: &compartmentId, - } +func getComputeHostSummary(ctx context.Context, client Client, availabilityDomain *string) ([]core.ComputeHostSummary, error) { + var hosts []core.ComputeHostSummary - timeStart := time.Now() - ads, err := client.ListAvailabilityDomains(ctx, adRequest) - if err != nil { - return cct, fmt.Errorf("unable to get AD: %v", err) + req := core.ListComputeHostsRequest{ + CompartmentId: client.TenancyOCID(), + AvailabilityDomain: availabilityDomain, + Limit: client.Limit(), } - requestLatency.WithLabelValues("ListAvailabilityDomains", ads.HTTPResponse().Status).Observe(time.Since(timeStart).Seconds()) - for _, ad := range ads.Items { - cctRequest := core.ListComputeCapacityTopologiesRequest{ - CompartmentId: &compartmentId, - AvailabilityDomain: ad.Name, + for { + timeStart := time.Now() + resp, err := client.ListComputeHosts(ctx, req) + requestLatency.WithLabelValues("ListComputeHosts", resp.HTTPResponse().Status).Observe(time.Since(timeStart).Seconds()) + if err != nil { + return nil, err } - for { - timeStart := time.Now() - resp, err := client.ListComputeCapacityTopologies(ctx, cctRequest) - requestLatency.WithLabelValues("ListComputeCapacityTopologies", resp.HTTPResponse().Status).Observe(time.Since(timeStart).Seconds()) - if err != nil { - if resp.HTTPResponse().StatusCode == http.StatusNotFound { - return cct, fmt.Errorf("%v for getting ComputeCapacityTopology in %s: %v", resp.HTTPResponse().StatusCode, *ad.Name, err) - } else { - return cct, fmt.Errorf("unable to get ComputeCapacity Topologies in %s : %v", *ad.Name, err) - } - } - cct = append(cct, resp.Items...) - klog.V(4).Infof("Received computeCapacityTopology %d groups; processed %d", len(resp.Items), len(cct)) - if resp.OpcNextPage != nil { - cctRequest.Page = resp.OpcNextPage - } else { - break - } + hosts = append(hosts, resp.Items...) + + if resp.OpcNextPage != nil { + req.Page = resp.OpcNextPage + } else { + break } } - return cct, nil + return hosts, nil } -func getBMHSummaryPerComputeCapacityTopology(ctx context.Context, client Client, topologyID string) (bmhSummary []core.ComputeBareMetalHostSummary, err error) { - compartmentId := client.TenancyOCID() - request := core.ListComputeCapacityTopologyComputeBareMetalHostsRequest{ - ComputeCapacityTopologyId: &topologyID, - CompartmentId: &compartmentId, +// getLocalBlockMap returns a map between LocalBlocks and ComputeGpuMemoryFabrics +func getLocalBlockMap(ctx context.Context, client Client, availabilityDomain *string, blockMap map[string]string) error { + req := core.ListComputeGpuMemoryFabricsRequest{ + CompartmentId: client.TenancyOCID(), + AvailabilityDomain: availabilityDomain, + Limit: client.Limit(), } + for { timeStart := time.Now() - response, err := client.ListComputeCapacityTopologyComputeBareMetalHosts(ctx, request) - requestLatency.WithLabelValues("ListComputeCapacityTopologyComputeBareMetalHosts", response.HTTPResponse().Status).Observe(time.Since(timeStart).Seconds()) + resp, err := client.ListComputeGpuMemoryFabrics(ctx, req) + requestLatency.WithLabelValues("ListComputeGpuMemoryFabrics", resp.HTTPResponse().Status).Observe(time.Since(timeStart).Seconds()) if err != nil { - klog.Errorln(err.Error()) - break + return err } - bmhSummary = append(bmhSummary, response.Items...) + for _, fabrics := range resp.Items { + blockMap[*fabrics.ComputeLocalBlockId] = *fabrics.Id + } - if response.OpcNextPage != nil { - request.Page = response.OpcNextPage + if resp.OpcNextPage != nil { + req.Page = resp.OpcNextPage } else { break } } - return bmhSummary, nil + + return nil } -func getBareMetalHostSummaries(ctx context.Context, client Client) ([]core.ComputeBareMetalHostSummary, error) { - computeCapacityTopology, err := getComputeCapacityTopologies(ctx, client) +func getComputeHostInfo(ctx context.Context, client Client, hosts []core.ComputeHostSummary, blockMap map[string]string) ([]core.ComputeHostSummary, error) { + req := identity.ListAvailabilityDomainsRequest{ + CompartmentId: client.TenancyOCID(), + } + + timeStart := time.Now() + resp, err := client.ListAvailabilityDomains(ctx, req) if err != nil { - return nil, fmt.Errorf("unable to get compute capacity topologies: %s", err.Error()) + return nil, fmt.Errorf("unable to get availability domains: %v", err) } - klog.V(4).Infof("Received computeCapacityTopology for %d groups", len(computeCapacityTopology)) + requestLatency.WithLabelValues("ListAvailabilityDomains", resp.HTTPResponse().Status).Observe(time.Since(timeStart).Seconds()) - var bareMetalHostSummaries []core.ComputeBareMetalHostSummary - for _, cct := range computeCapacityTopology { - bareMetalHostSummary, err := getBMHSummaryPerComputeCapacityTopology(ctx, client, *cct.Id) + for _, ad := range resp.Items { + summary, err := getComputeHostSummary(ctx, client, ad.Name) if err != nil { - return nil, fmt.Errorf("unable to get bare metal hosts info: %s", err.Error()) + return nil, fmt.Errorf("unable to get hosts info: %v", err) + } + hosts = append(hosts, summary...) + + if err = getLocalBlockMap(ctx, client, ad.Name, blockMap); err != nil { + return nil, fmt.Errorf("unable to get local block map: %v", err) } - bareMetalHostSummaries = append(bareMetalHostSummaries, bareMetalHostSummary...) } - klog.V(4).Infof("Returning bareMetalHostSummaries for %d nodes", len(bareMetalHostSummaries)) - return bareMetalHostSummaries, nil + klog.V(4).Infof("Returning host info for %d nodes and %d blocks", len(hosts), len(blockMap)) + + return hosts, nil } -func toGraph(bareMetalHostSummaries []*core.ComputeBareMetalHostSummary, cis []topology.ComputeInstances) (*topology.Vertex, error) { +func toGraph(hosts []core.ComputeHostSummary, blockMap map[string]string, cis []topology.ComputeInstances) (*topology.Vertex, error) { instanceToNodeMap := make(map[string]string) for _, ci := range cis { for instance, node := range ci.Instances { @@ -152,18 +154,25 @@ func toGraph(bareMetalHostSummaries []*core.ComputeBareMetalHostSummary, cis []t nodes := make(map[string]*topology.Vertex) forest := make(map[string]*topology.Vertex) + domainMap := translate.NewDomainMap() + levelWiseSwitchCount := map[level]int{localBlockLevel: 0, networkBlockLevel: 0, hpcIslandLevel: 0} - bareMetalHostSummaries = filterAndSort(bareMetalHostSummaries, instanceToNodeMap) - for _, bmhSummary := range bareMetalHostSummaries { - nodeName := instanceToNodeMap[*bmhSummary.InstanceId] - delete(instanceToNodeMap, *bmhSummary.InstanceId) + hosts = filterAndSort(hosts, instanceToNodeMap) + for _, host := range hosts { + nodeName := instanceToNodeMap[*host.Id] + delete(instanceToNodeMap, *host.Id) instance := &topology.Vertex{ Name: nodeName, - ID: *bmhSummary.InstanceId, + ID: *host.Id, + } + + localBlockId := *host.LocalBlockId + + if blockDomain, ok := blockMap[localBlockId]; ok { + domainMap.AddHost(blockDomain, nodeName) } - localBlockId := *bmhSummary.ComputeLocalBlockId localBlock, ok := nodes[localBlockId] if !ok { levelWiseSwitchCount[localBlockLevel]++ @@ -176,7 +185,7 @@ func toGraph(bareMetalHostSummaries []*core.ComputeBareMetalHostSummary, cis []t } localBlock.Vertices[instance.ID] = instance - networkBlockId := *bmhSummary.ComputeNetworkBlockId + networkBlockId := *host.NetworkBlockId networkBlock, ok := nodes[networkBlockId] if !ok { levelWiseSwitchCount[networkBlockLevel]++ @@ -189,7 +198,7 @@ func toGraph(bareMetalHostSummaries []*core.ComputeBareMetalHostSummary, cis []t } networkBlock.Vertices[localBlockId] = localBlock - hpcIslandId := *bmhSummary.ComputeHpcIslandId + hpcIslandId := *host.HpcIslandId hpcIsland, ok := nodes[hpcIslandId] if !ok { levelWiseSwitchCount[hpcIslandLevel]++ @@ -231,75 +240,61 @@ func toGraph(bareMetalHostSummaries []*core.ComputeBareMetalHostSummary, cis []t Vertices: make(map[string]*topology.Vertex), } root.Vertices[topology.TopologyTree] = treeRoot + if len(domainMap) != 0 { + root.Vertices[topology.TopologyBlock] = domainMap.ToBlocks() + } return root, nil } -func filterAndSort(bareMetalHostSummaries []*core.ComputeBareMetalHostSummary, instanceToNodeMap map[string]string) []*core.ComputeBareMetalHostSummary { - var filtered []*core.ComputeBareMetalHostSummary - for _, bmh := range bareMetalHostSummaries { - if bmh.InstanceId == nil { - klog.V(5).Infof("Instance ID is nil for bmhSummary %s", bmh.String()) +func filterAndSort(hosts []core.ComputeHostSummary, instanceToNodeMap map[string]string) []core.ComputeHostSummary { + var filtered []core.ComputeHostSummary + for _, host := range hosts { + if host.Id == nil { + klog.Warningf("InstanceID is nil for host %s", host.String()) continue } - if bmh.ComputeLocalBlockId == nil { - klog.Warningf("ComputeLocalBlockId is nil for instance %q", *bmh.InstanceId) - missingAncestor.WithLabelValues("localBlock", *bmh.InstanceId).Add(float64(1)) + if host.LocalBlockId == nil { + klog.Warningf("LocalBlockId is nil for instance %q", *host.Id) + missingAncestor.WithLabelValues("LocalBlock", *host.Id).Add(float64(1)) continue } - if bmh.ComputeNetworkBlockId == nil { - klog.Warningf("ComputeNetworkBlockId is nil for instance %q", *bmh.InstanceId) - missingAncestor.WithLabelValues("networkBlock", *bmh.InstanceId).Add(float64(1)) + if host.NetworkBlockId == nil { + klog.Warningf("NetworkBlockId is nil for instance %q", *host.Id) + missingAncestor.WithLabelValues("networkBlock", *host.Id).Add(float64(1)) continue } - if bmh.ComputeHpcIslandId == nil { - klog.Warningf("ComputeHpcIslandId is nil for instance %q", *bmh.InstanceId) - missingAncestor.WithLabelValues("hpcIsland", *bmh.InstanceId).Add(float64(1)) + if host.HpcIslandId == nil { + klog.Warningf("HpcIslandId is nil for instance %q", *host.Id) + missingAncestor.WithLabelValues("hpcIsland", *host.Id).Add(float64(1)) continue } - if _, ok := instanceToNodeMap[*bmh.InstanceId]; ok { - klog.V(4).Infof("Adding bmhSummary %s", bmh.String()) - filtered = append(filtered, bmh) + if _, ok := instanceToNodeMap[*host.Id]; ok { + klog.V(4).Infof("Adding host %s", host.String()) + filtered = append(filtered, host) } else { - klog.V(4).Infof("Skipping bmhSummary %s", bmh.String()) + klog.V(4).Infof("Skipping host %s", host.String()) } } sort.Slice(filtered, func(i, j int) bool { - if filtered[i].ComputeHpcIslandId != filtered[j].ComputeHpcIslandId { - return *filtered[i].ComputeHpcIslandId < *filtered[j].ComputeHpcIslandId + if filtered[i].HpcIslandId != filtered[j].HpcIslandId { + return *filtered[i].HpcIslandId < *filtered[j].HpcIslandId } - if filtered[i].ComputeNetworkBlockId != filtered[j].ComputeNetworkBlockId { - return *filtered[i].ComputeNetworkBlockId < *filtered[j].ComputeNetworkBlockId + if filtered[i].NetworkBlockId != filtered[j].NetworkBlockId { + return *filtered[i].NetworkBlockId < *filtered[j].NetworkBlockId } - if filtered[i].ComputeLocalBlockId != filtered[j].ComputeLocalBlockId { - return *filtered[i].ComputeLocalBlockId < *filtered[j].ComputeLocalBlockId + if filtered[i].LocalBlockId != filtered[j].LocalBlockId { + return *filtered[i].LocalBlockId < *filtered[j].LocalBlockId } - return *filtered[i].InstanceId < *filtered[j].InstanceId + return *filtered[i].Id < *filtered[j].Id }) return filtered } - -func generateInstanceTopology(ctx context.Context, factory ClientFactory, ci *topology.ComputeInstances, bareMetalHostSummaries []*core.ComputeBareMetalHostSummary) ([]*core.ComputeBareMetalHostSummary, error) { - client, err := factory(ci.Region) - if err != nil { - return nil, err - } - - bmh, err := getBareMetalHostSummaries(ctx, client) - if err != nil { - return nil, fmt.Errorf("unable to populate compute capacity topology: %s", err.Error()) - } - - for _, bm := range bmh { - bareMetalHostSummaries = append(bareMetalHostSummaries, &bm) - } - return bareMetalHostSummaries, nil -} diff --git a/pkg/providers/oci/provider.go b/pkg/providers/oci/provider.go index 3403358..e3700a6 100644 --- a/pkg/providers/oci/provider.go +++ b/pkg/providers/oci/provider.go @@ -37,23 +37,29 @@ type Provider struct { clientFactory ClientFactory } -type ClientFactory func(region string) (Client, error) +type ClientFactory func(region string, pageSize *int) (Client, error) type Client interface { - TenancyOCID() string - ListAvailabilityDomains(ctx context.Context, request identity.ListAvailabilityDomainsRequest) (response identity.ListAvailabilityDomainsResponse, err error) - ListComputeCapacityTopologies(ctx context.Context, request core.ListComputeCapacityTopologiesRequest) (response core.ListComputeCapacityTopologiesResponse, err error) - ListComputeCapacityTopologyComputeBareMetalHosts(ctx context.Context, request core.ListComputeCapacityTopologyComputeBareMetalHostsRequest) (response core.ListComputeCapacityTopologyComputeBareMetalHostsResponse, err error) + TenancyOCID() *string + Limit() *int + ListAvailabilityDomains(context.Context, identity.ListAvailabilityDomainsRequest) (identity.ListAvailabilityDomainsResponse, error) + ListComputeHosts(context.Context, core.ListComputeHostsRequest) (core.ListComputeHostsResponse, error) + ListComputeGpuMemoryFabrics(context.Context, core.ListComputeGpuMemoryFabricsRequest) (core.ListComputeGpuMemoryFabricsResponse, error) } type ociClient struct { identity.IdentityClient core.ComputeClient tenancyOCID string + limit *int } -func (c *ociClient) TenancyOCID() string { - return c.tenancyOCID +func (c *ociClient) TenancyOCID() *string { + return &c.tenancyOCID +} + +func (c *ociClient) Limit() *int { + return c.limit } func NamedLoader() (string, providers.Loader) { @@ -66,7 +72,7 @@ func Loader(ctx context.Context, config providers.Config) (providers.Provider, e return nil, err } - clientFactory := func(region string) (Client, error) { + clientFactory := func(region string, limit *int) (Client, error) { identityClient, err := identity.NewIdentityClientWithConfigurationProvider(provider) if err != nil { return nil, fmt.Errorf("unable to create identity client. Bailing out : %v", err) @@ -92,6 +98,7 @@ func Loader(ctx context.Context, config providers.Config) (providers.Provider, e IdentityClient: identityClient, ComputeClient: computeClient, tenancyOCID: tenacyOCID, + limit: limit, }, nil } @@ -145,13 +152,13 @@ func New(ociClientFactory ClientFactory) *Provider { } } -func (p *Provider) GenerateTopologyConfig(ctx context.Context, _ *int, instances []topology.ComputeInstances) (*topology.Vertex, error) { - cfg, err := GenerateInstanceTopology(ctx, p.clientFactory, instances) +func (p *Provider) GenerateTopologyConfig(ctx context.Context, pageSize *int, instances []topology.ComputeInstances) (*topology.Vertex, error) { + cfg, blockMap, err := GenerateInstanceTopology(ctx, p.clientFactory, pageSize, instances) if err != nil { return nil, err } - return toGraph(cfg, instances) + return toGraph(cfg, blockMap, instances) } // Engine support diff --git a/pkg/providers/oci/provider_sim.go b/pkg/providers/oci/provider_sim.go new file mode 100644 index 0000000..c81c56a --- /dev/null +++ b/pkg/providers/oci/provider_sim.go @@ -0,0 +1,134 @@ +/* + * Copyright (c) 2024, NVIDIA CORPORATION. All rights reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package oci + +import ( + "context" + "fmt" + "net/http" + + "github.com/agrea/ptr" + "github.com/oracle/oci-go-sdk/v65/core" + "github.com/oracle/oci-go-sdk/v65/identity" + + "github.com/NVIDIA/topograph/pkg/models" + "github.com/NVIDIA/topograph/pkg/providers" +) + +const ( + NAME_SIM = "oci-sim" +) + +type SimClient struct { + Model *models.Model +} + +var httpResponce = &http.Response{ + Status: "200 OK", + StatusCode: 200, +} + +func (c *SimClient) TenancyOCID() *string { + val := "simulation" + return &val +} + +func (c *SimClient) Limit() *int { return nil } + +func (c *SimClient) ListAvailabilityDomains(ctx context.Context, req identity.ListAvailabilityDomainsRequest) (identity.ListAvailabilityDomainsResponse, error) { + return identity.ListAvailabilityDomainsResponse{ + RawResponse: httpResponce, + Items: []identity.AvailabilityDomain{ + {Name: ptr.String("ad")}, + }, + }, nil +} + +func (c *SimClient) ListComputeHosts(ctx context.Context, req core.ListComputeHostsRequest) (core.ListComputeHostsResponse, error) { + resp := core.ListComputeHostsResponse{ + RawResponse: httpResponce, + ComputeHostCollection: core.ComputeHostCollection{ + Items: make([]core.ComputeHostSummary, 0, len(c.Model.Nodes)), + }, + } + + for _, node := range c.Model.Nodes { + host := core.ComputeHostSummary{Id: ptr.String(node.Name)} + for i := 0; i < len(node.NetLayers) && i < 3; i++ { + switch i { + case 0: + host.LocalBlockId = ptr.String(node.NetLayers[i]) + case 1: + host.NetworkBlockId = ptr.String(node.NetLayers[i]) + case 2: + host.HpcIslandId = ptr.String(node.NetLayers[i]) + } + } + resp.Items = append(resp.Items, host) + } + + return resp, nil +} + +func (c *SimClient) ListComputeGpuMemoryFabrics(ctx context.Context, req core.ListComputeGpuMemoryFabricsRequest) (core.ListComputeGpuMemoryFabricsResponse, error) { + blockMap := make(map[string]string) + + for _, node := range c.Model.Nodes { + if len(node.NVLink) != 0 && len(node.NetLayers) != 0 { + blockMap[node.NetLayers[0]] = node.NVLink + } + } + + resp := core.ListComputeGpuMemoryFabricsResponse{ + RawResponse: httpResponce, + ComputeGpuMemoryFabricCollection: core.ComputeGpuMemoryFabricCollection{ + Items: make([]core.ComputeGpuMemoryFabricSummary, 0, len(blockMap)), + }, + } + + for block, domain := range blockMap { + resp.Items = append(resp.Items, core.ComputeGpuMemoryFabricSummary{ + Id: ptr.String(domain), + ComputeLocalBlockId: ptr.String(block), + }) + } + + return resp, nil +} + +func NamedLoaderSim() (string, providers.Loader) { + return NAME_SIM, LoaderSim +} + +func LoaderSim(ctx context.Context, cfg providers.Config) (providers.Provider, error) { + p, err := providers.GetSimulationParams(cfg.Params) + if err != nil { + return nil, err + } + + csp_model, err := models.NewModelFromFile(p.ModelPath) + if err != nil { + return nil, fmt.Errorf("unable to load model file for simulation, %v", err) + } + simClient := &SimClient{Model: csp_model} + + clientFactory := func(region string, _ *int) (Client, error) { + return simClient, nil + } + + return New(clientFactory), nil +} diff --git a/pkg/registry/registry.go b/pkg/registry/registry.go index 1fee299..1a7ea73 100644 --- a/pkg/registry/registry.go +++ b/pkg/registry/registry.go @@ -37,6 +37,7 @@ var Providers = providers.NewRegistry( cw.NamedLoader, gcp.NamedLoader, oci.NamedLoader, + oci.NamedLoaderSim, provider_test.NamedLoader, ) diff --git a/tests/configs/topograph-local-config.yaml b/tests/configs/topograph-local-config.yaml new file mode 100644 index 0000000..82323d5 --- /dev/null +++ b/tests/configs/topograph-local-config.yaml @@ -0,0 +1,5 @@ +http: + port: 49021 + ssl: false + +request_aggregation_delay: 2s diff --git a/tests/configs/topograph-sim-config.yaml b/tests/configs/topograph-sim-config.yaml new file mode 100644 index 0000000..fc3d047 --- /dev/null +++ b/tests/configs/topograph-sim-config.yaml @@ -0,0 +1,7 @@ +http: + port: 49021 + ssl: false + +forward_service_url: localhost:49025 + +request_aggregation_delay: 2s diff --git a/tests/payloads/test-oci-sim-block.json b/tests/payloads/test-oci-sim-block.json new file mode 100644 index 0000000..2219bad --- /dev/null +++ b/tests/payloads/test-oci-sim-block.json @@ -0,0 +1,30 @@ +{ + "provider": { + "name": "oci-sim", + "params": { + "model_path": "tests/models/medium.yaml" + } + }, + "engine": { + "name": "slurm", + "params": { + "plugin": "topology/block", + "block_sizes": "2,4" + } + }, + "nodes": [ + { + "region": "R1", + "instances": { + "n11-1": "n11-1", + "n11-2": "n11-2", + "n12-1": "n12-1", + "n12-2": "n12-2", + "n13-1": "n13-1", + "n13-2": "n13-2", + "n14-1": "n14-1", + "n14-2": "n14-2" + } + } + ] +} diff --git a/tests/payloads/test-oci-sim-tree.json b/tests/payloads/test-oci-sim-tree.json new file mode 100644 index 0000000..719a3a5 --- /dev/null +++ b/tests/payloads/test-oci-sim-tree.json @@ -0,0 +1,26 @@ +{ + "provider": { + "name": "oci-sim", + "params": { + "model_path": "tests/models/medium.yaml" + } + }, + "engine": { + "name": "slurm" + }, + "nodes": [ + { + "region": "R1", + "instances": { + "n11-1": "n11-1", + "n11-2": "n11-2", + "n12-1": "n12-1", + "n12-2": "n12-2", + "n13-1": "n13-1", + "n13-2": "n13-2", + "n14-1": "n14-1", + "n14-2": "n14-2" + } + } + ] +}