|
| 1 | +/* |
| 2 | + * Copyright 2025 NVIDIA CORPORATION |
| 3 | + * SPDX-License-Identifier: Apache-2.0 |
| 4 | + */ |
| 5 | + |
| 6 | +package dra |
| 7 | + |
| 8 | +import ( |
| 9 | + "context" |
| 10 | + |
| 11 | + "k8s.io/client-go/kubernetes" |
| 12 | + "k8s.io/client-go/rest" |
| 13 | + |
| 14 | + "github.com/NVIDIA/topograph/internal/k8s" |
| 15 | + "github.com/NVIDIA/topograph/pkg/providers" |
| 16 | + "github.com/NVIDIA/topograph/pkg/topology" |
| 17 | +) |
| 18 | + |
| 19 | +const NAME = "dra" |
| 20 | + |
| 21 | +type Provider struct { |
| 22 | + config *rest.Config |
| 23 | + client *kubernetes.Clientset |
| 24 | +} |
| 25 | + |
| 26 | +func NamedLoader() (string, providers.Loader) { |
| 27 | + return NAME, Loader |
| 28 | +} |
| 29 | + |
| 30 | +func Loader(ctx context.Context, config providers.Config) (providers.Provider, error) { |
| 31 | + cfg, err := rest.InClusterConfig() |
| 32 | + if err != nil { |
| 33 | + return nil, err |
| 34 | + } |
| 35 | + |
| 36 | + client, err := kubernetes.NewForConfig(cfg) |
| 37 | + if err != nil { |
| 38 | + return nil, err |
| 39 | + } |
| 40 | + |
| 41 | + return &Provider{ |
| 42 | + config: cfg, |
| 43 | + client: client, |
| 44 | + }, nil |
| 45 | +} |
| 46 | + |
| 47 | +func (p *Provider) GenerateTopologyConfig(ctx context.Context, _ *int, instances []topology.ComputeInstances) (*topology.Vertex, error) { |
| 48 | + regIndices := make(map[string]int) // map[region : index] |
| 49 | + for i, ci := range instances { |
| 50 | + regIndices[ci.Region] = i |
| 51 | + } |
| 52 | + |
| 53 | + nodes, err := k8s.GetNodes(ctx, p.client) |
| 54 | + if err != nil { |
| 55 | + return nil, err |
| 56 | + } |
| 57 | + |
| 58 | + domainMap := topology.NewDomainMap() |
| 59 | + for _, node := range nodes.Items { |
| 60 | + clusterID, ok := node.Labels["nvidia.com/gpu.clique"] |
| 61 | + if !ok { |
| 62 | + continue |
| 63 | + } |
| 64 | + |
| 65 | + region := node.Annotations[topology.KeyNodeRegion] |
| 66 | + indx, ok := regIndices[region] |
| 67 | + if !ok { |
| 68 | + continue |
| 69 | + } |
| 70 | + |
| 71 | + i2n := instances[indx].Instances |
| 72 | + if host, ok := i2n[node.Name]; ok { |
| 73 | + domainMap.AddHost(clusterID, node.Name, host) |
| 74 | + } |
| 75 | + } |
| 76 | + |
| 77 | + return toGraph(domainMap), nil |
| 78 | +} |
| 79 | + |
| 80 | +func toGraph(domainMap topology.DomainMap) *topology.Vertex { |
| 81 | + root := &topology.Vertex{ |
| 82 | + Vertices: make(map[string]*topology.Vertex), |
| 83 | + Metadata: make(map[string]string), |
| 84 | + } |
| 85 | + root.Vertices[topology.TopologyBlock] = domainMap.ToBlocks() |
| 86 | + |
| 87 | + return root |
| 88 | +} |
| 89 | + |
| 90 | +func GetNodeAnnotations(ctx context.Context, hostname string) (map[string]string, error) { |
| 91 | + annotations := map[string]string{ |
| 92 | + topology.KeyNodeInstance: hostname, |
| 93 | + topology.KeyNodeRegion: "local", |
| 94 | + } |
| 95 | + |
| 96 | + return annotations, nil |
| 97 | +} |
0 commit comments