Skip to content

Commit 0ce010a

Browse files
authored
make switch names in oci shorter (#5)
* make switch names in oci shorter Signed-off-by: Ajay Mishra <[email protected]>
1 parent d612876 commit 0ce010a

File tree

3 files changed

+163
-43
lines changed

3 files changed

+163
-43
lines changed

pkg/oci/instance_topology.go

Lines changed: 73 additions & 34 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,7 @@ import (
2020
"context"
2121
"fmt"
2222
"net/http"
23+
"sort"
2324
"time"
2425

2526
OCICommon "github.com/oracle/oci-go-sdk/v65/common"
@@ -30,6 +31,14 @@ import (
3031
"github.com/NVIDIA/topograph/pkg/common"
3132
)
3233

34+
type level int
35+
36+
const (
37+
localBlockLevel level = iota + 1
38+
networkBlockLevel
39+
hpcIslandLevel
40+
)
41+
3342
func GenerateInstanceTopology(ctx context.Context, creds OCICommon.ConfigurationProvider, cis []common.ComputeInstances) ([]*core.ComputeBareMetalHostSummary, error) {
3443
var err error
3544
bareMetalHostSummaries := []*core.ComputeBareMetalHostSummary{}
@@ -144,73 +153,51 @@ func toGraph(bareMetalHostSummaries []*core.ComputeBareMetalHostSummary, cis []c
144153

145154
nodes := make(map[string]*common.Vertex)
146155
forest := make(map[string]*common.Vertex)
147-
156+
levelWiseSwitchCount := map[level]int{localBlockLevel: 0, networkBlockLevel: 0, hpcIslandLevel: 0}
157+
bareMetalHostSummaries = filterAndSort(bareMetalHostSummaries, instanceToNodeMap)
148158
for _, bmhSummary := range bareMetalHostSummaries {
149-
if bmhSummary.InstanceId == nil {
150-
klog.V(5).Infof("Skipped bmhSummary %s", bmhSummary.String())
151-
continue
152-
}
153-
nodeName, ok := instanceToNodeMap[*bmhSummary.InstanceId]
154-
if !ok {
155-
klog.V(5).Infof("Node not found for instance ID %s", *bmhSummary.InstanceId)
156-
continue
157-
}
158-
klog.V(4).Infof("Found node %q instance %q", nodeName, *bmhSummary.InstanceId)
159+
nodeName := instanceToNodeMap[*bmhSummary.InstanceId]
159160
delete(instanceToNodeMap, *bmhSummary.InstanceId)
160161

161162
instance := &common.Vertex{
162163
Name: nodeName,
163164
ID: *bmhSummary.InstanceId,
164165
}
165166

166-
localBlockId := "lb_nil"
167-
if bmhSummary.ComputeLocalBlockId != nil {
168-
localBlockId = *bmhSummary.ComputeLocalBlockId
169-
} else {
170-
klog.Warningf("ComputeLocalBlockId is nil for instance %q", *bmhSummary.InstanceId)
171-
missingAncestor.WithLabelValues("localBlock", nodeName).Add(float64(1))
172-
}
173-
167+
localBlockId := *bmhSummary.ComputeLocalBlockId
174168
localBlock, ok := nodes[localBlockId]
175169
if !ok {
170+
levelWiseSwitchCount[localBlockLevel]++
176171
localBlock = &common.Vertex{
177172
ID: localBlockId,
178173
Vertices: make(map[string]*common.Vertex),
174+
Name: fmt.Sprintf("Switch.%d.%d", localBlockLevel, levelWiseSwitchCount[localBlockLevel]),
179175
}
180176
nodes[localBlockId] = localBlock
181177
}
182178
localBlock.Vertices[instance.ID] = instance
183179

184-
networkBlockId := "nw_nil"
185-
if bmhSummary.ComputeNetworkBlockId != nil {
186-
networkBlockId = *bmhSummary.ComputeNetworkBlockId
187-
} else {
188-
klog.Warningf("ComputeNetworkBlockId is nil for instance %q", *bmhSummary.InstanceId)
189-
missingAncestor.WithLabelValues("networkBlock", nodeName).Add(float64(1))
190-
}
191-
180+
networkBlockId := *bmhSummary.ComputeNetworkBlockId
192181
networkBlock, ok := nodes[networkBlockId]
193182
if !ok {
183+
levelWiseSwitchCount[networkBlockLevel]++
194184
networkBlock = &common.Vertex{
195185
ID: networkBlockId,
196186
Vertices: make(map[string]*common.Vertex),
187+
Name: fmt.Sprintf("Switch.%d.%d", networkBlockLevel, levelWiseSwitchCount[networkBlockLevel]),
197188
}
198189
nodes[networkBlockId] = networkBlock
199190
}
200191
networkBlock.Vertices[localBlockId] = localBlock
201192

202-
hpcIslandId := "hpc_nil"
203-
if bmhSummary.ComputeHpcIslandId != nil {
204-
hpcIslandId = *bmhSummary.ComputeHpcIslandId
205-
} else {
206-
klog.Warningf("ComputeHpcIslandId is nil for instance %q", *bmhSummary.InstanceId)
207-
missingAncestor.WithLabelValues("hpcIsland", nodeName).Add(float64(1))
208-
}
193+
hpcIslandId := *bmhSummary.ComputeHpcIslandId
209194
hpcIsland, ok := nodes[hpcIslandId]
210195
if !ok {
196+
levelWiseSwitchCount[hpcIslandLevel]++
211197
hpcIsland = &common.Vertex{
212198
ID: hpcIslandId,
213199
Vertices: make(map[string]*common.Vertex),
200+
Name: fmt.Sprintf("Switch.%d.%d", hpcIslandLevel, levelWiseSwitchCount[hpcIslandLevel]),
214201
}
215202
nodes[hpcIslandId] = hpcIsland
216203
forest[hpcIslandId] = hpcIsland
@@ -244,6 +231,58 @@ func toGraph(bareMetalHostSummaries []*core.ComputeBareMetalHostSummary, cis []c
244231

245232
}
246233

234+
func filterAndSort(bareMetalHostSummaries []*core.ComputeBareMetalHostSummary, instanceToNodeMap map[string]string) []*core.ComputeBareMetalHostSummary {
235+
var filtered []*core.ComputeBareMetalHostSummary
236+
for _, bmh := range bareMetalHostSummaries {
237+
if bmh.InstanceId == nil {
238+
klog.V(5).Infof("Instance ID is nil for bmhSummary %s", bmh.String())
239+
continue
240+
}
241+
242+
if bmh.ComputeLocalBlockId == nil {
243+
klog.Warningf("ComputeLocalBlockId is nil for instance %q", *bmh.InstanceId)
244+
missingAncestor.WithLabelValues("localBlock", *bmh.InstanceId).Add(float64(1))
245+
continue
246+
}
247+
248+
if bmh.ComputeNetworkBlockId == nil {
249+
klog.Warningf("ComputeNetworkBlockId is nil for instance %q", *bmh.InstanceId)
250+
missingAncestor.WithLabelValues("networkBlock", *bmh.InstanceId).Add(float64(1))
251+
continue
252+
}
253+
254+
if bmh.ComputeHpcIslandId == nil {
255+
klog.Warningf("ComputeHpcIslandId is nil for instance %q", *bmh.InstanceId)
256+
missingAncestor.WithLabelValues("hpcIsland", *bmh.InstanceId).Add(float64(1))
257+
continue
258+
}
259+
260+
if _, ok := instanceToNodeMap[*bmh.InstanceId]; ok {
261+
klog.V(4).Infof("Adding bmhSummary %s", bmh.String())
262+
filtered = append(filtered, bmh)
263+
} else {
264+
klog.V(4).Infof("Skipping bmhSummary %s", bmh.String())
265+
}
266+
}
267+
268+
sort.Slice(filtered, func(i, j int) bool {
269+
if filtered[i].ComputeHpcIslandId != filtered[j].ComputeHpcIslandId {
270+
return *filtered[i].ComputeHpcIslandId < *filtered[j].ComputeHpcIslandId
271+
}
272+
273+
if filtered[i].ComputeNetworkBlockId != filtered[j].ComputeNetworkBlockId {
274+
return *filtered[i].ComputeNetworkBlockId < *filtered[j].ComputeNetworkBlockId
275+
}
276+
277+
if filtered[i].ComputeLocalBlockId != filtered[j].ComputeLocalBlockId {
278+
return *filtered[i].ComputeLocalBlockId < *filtered[j].ComputeLocalBlockId
279+
}
280+
281+
return *filtered[i].InstanceId < *filtered[j].InstanceId
282+
})
283+
return filtered
284+
}
285+
247286
func generateInstanceTopology(ctx context.Context, provider OCICommon.ConfigurationProvider, ci *common.ComputeInstances, bareMetalHostSummaries []*core.ComputeBareMetalHostSummary) ([]*core.ComputeBareMetalHostSummary, error) {
248287
identityClient, err := identity.NewIdentityClientWithConfigurationProvider(provider)
249288
if err != nil {

pkg/translate/output.go

Lines changed: 28 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -31,14 +31,15 @@ func ToSLURM(wr io.Writer, root *common.Vertex) error {
3131
leaves := make(map[string][]string)
3232
parents := []*common.Vertex{}
3333
queue := []*common.Vertex{root}
34+
idToName := make(map[string]string)
3435

3536
for len(queue) > 0 {
3637
v := queue[0]
3738
queue = queue[1:]
3839
if len(v.ID) != 0 {
3940
parents = append(parents, v)
4041
}
41-
42+
idToName[v.ID] = v.Name
4243
for _, w := range v.Vertices {
4344
if len(w.Vertices) == 0 { // it's a leaf; don't add to queue
4445
_, ok := leaves[v.ID]
@@ -55,14 +56,22 @@ func ToSLURM(wr io.Writer, root *common.Vertex) error {
5556

5657
for _, sw := range parents {
5758
if _, ok := leaves[sw.ID]; !ok {
58-
if err := writeSwitch(wr, sw); err != nil {
59-
return err
59+
err := writeSwitch(wr, sw)
60+
if err != nil {
61+
return fmt.Errorf("failed to write switch %s: %w", sw.ID, err)
6062
}
6163
}
6264
}
63-
65+
var comment, switchName string
6466
for sw, nodes := range leaves {
65-
_, err := wr.Write([]byte(fmt.Sprintf("SwitchName=%s Nodes=%s\n", sw, strings.Join(compress(nodes), ","))))
67+
if idToName[sw] != "" {
68+
comment = fmt.Sprintf("# %s=%s\n", idToName[sw], sw)
69+
switchName = idToName[sw]
70+
} else {
71+
comment = ""
72+
switchName = sw
73+
}
74+
_, err := wr.Write([]byte(fmt.Sprintf("%sSwitchName=%s Nodes=%s\n", comment, switchName, strings.Join(compress(nodes), ","))))
6675
if err != nil {
6776
return err
6877
}
@@ -78,10 +87,20 @@ func writeSwitch(wr io.Writer, v *common.Vertex) error {
7887

7988
arr := make([]string, 0, len(v.Vertices))
8089
for _, node := range v.Vertices {
81-
arr = append(arr, node.ID)
90+
if node.Name == "" {
91+
arr = append(arr, node.ID)
92+
} else {
93+
arr = append(arr, node.Name)
94+
}
8295
}
83-
84-
_, err := wr.Write([]byte(fmt.Sprintf("SwitchName=%s Switches=%s\n", v.ID, strings.Join(compress(arr), ","))))
96+
var comment string
97+
if v.Name == "" {
98+
comment = ""
99+
v.Name = v.ID
100+
} else {
101+
comment = fmt.Sprintf("# %s=%s\n", v.Name, v.ID)
102+
}
103+
_, err := wr.Write([]byte(fmt.Sprintf("%sSwitchName=%s Switches=%s\n", comment, v.Name, strings.Join(compress(arr), ","))))
85104
if err != nil {
86105
return err
87106
}
@@ -90,7 +109,7 @@ func writeSwitch(wr io.Writer, v *common.Vertex) error {
90109
}
91110

92111
// compress finds contiguos numerical suffixes in names and presents then as ranges.
93-
// example: ["eos0507", "eos0509", "eos0508"] -> ["eos0[507-509]
112+
// example: ["eos0507", "eos0509", "eos0508"] -> ["eos0[507-509"]
94113
func compress(input []string) []string {
95114
ret := []string{}
96115
keys := []string{}

pkg/translate/output_test.go

Lines changed: 62 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,7 @@ import (
2020
"bytes"
2121
"testing"
2222

23+
"github.com/NVIDIA/topograph/pkg/common"
2324
"github.com/stretchr/testify/require"
2425
)
2526

@@ -32,6 +33,17 @@ SwitchName=S3 Nodes=Node[304-306]
3233
testConfig2 = `SwitchName=S1 Switches=S[2-3]
3334
SwitchName=S3 Nodes=Node[304-306]
3435
SwitchName=S2 Nodes=Node[201-202],Node205
36+
`
37+
shortNameExpectedResult = `# switch.3.1=hpcislandid-1
38+
SwitchName=switch.3.1 Switches=switch.2.[1-2]
39+
# switch.2.1=network-block-1
40+
SwitchName=switch.2.1 Switches=switch.1.1
41+
# switch.2.2=network-block-2
42+
SwitchName=switch.2.2 Switches=switch.1.2
43+
# switch.1.1=local-block-1
44+
SwitchName=switch.1.1 Nodes=node-1
45+
# switch.1.2=local-block-2
46+
SwitchName=switch.1.2 Nodes=node-2
3547
`
3648
)
3749

@@ -48,6 +60,56 @@ func TestToSLURM(t *testing.T) {
4860
}
4961
}
5062

63+
func TestToSlurmNameShortener(t *testing.T) {
64+
v := &common.Vertex{
65+
Vertices: map[string]*common.Vertex{
66+
"hpcislandid-1": {
67+
ID: "hpcislandid-1",
68+
Name: "switch.3.1",
69+
Vertices: map[string]*common.Vertex{
70+
"network-block-1": {
71+
ID: "network-block-1",
72+
Name: "switch.2.1",
73+
Vertices: map[string]*common.Vertex{
74+
"local-block-1": {
75+
ID: "local-block-1",
76+
Name: "switch.1.1",
77+
Vertices: map[string]*common.Vertex{
78+
"node-1": {
79+
ID: "node-1-id",
80+
Name: "node-1",
81+
},
82+
},
83+
},
84+
},
85+
},
86+
"network-block-2": {
87+
ID: "network-block-2",
88+
Name: "switch.2.2",
89+
Vertices: map[string]*common.Vertex{
90+
"local-block-2": {
91+
ID: "local-block-2",
92+
Name: "switch.1.2",
93+
Vertices: map[string]*common.Vertex{
94+
"node-2": {
95+
ID: "node-2-id",
96+
Name: "node-2",
97+
},
98+
},
99+
},
100+
},
101+
},
102+
},
103+
},
104+
},
105+
}
106+
107+
buf := &bytes.Buffer{}
108+
err := ToSLURM(buf, v)
109+
require.NoError(t, err)
110+
require.Equal(t, shortNameExpectedResult, buf.String())
111+
}
112+
51113
func TestCompress(t *testing.T) {
52114
testCases := []struct {
53115
name string

0 commit comments

Comments
 (0)