Skip to content

Commit 2399940

Browse files
committed
feat: add option to select k8s nodes for topology config
Signed-off-by: Dmitry Shmulevich <[email protected]>
1 parent 00c087f commit 2399940

File tree

22 files changed

+252
-122
lines changed

22 files changed

+252
-122
lines changed

charts/topograph/Chart.yaml

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,7 @@ type: application
1515
# This is the chart version. This version number should be incremented each time you make changes
1616
# to the chart and its templates, including the app version.
1717
# Versions are expected to follow Semantic Versioning (https://semver.org/)
18-
version: 0.1.0
18+
version: 0.2.0
1919

2020
# This is the version number of the application being deployed. This version number should be
2121
# incremented each time you make changes to the application. Versions are not expected to
@@ -25,8 +25,8 @@ appVersion: "1.16.0"
2525

2626
dependencies:
2727
- name: node-data-broker
28-
version: 0.1.0
28+
version: 0.2.0
2929
repository: "file://charts/node-data-broker"
3030
- name: node-observer
31-
version: 0.1.0
31+
version: 0.2.0
3232
repository: "file://charts/node-observer"

charts/topograph/charts/node-data-broker/Chart.yaml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,7 @@ type: application
1515
# This is the chart version. This version number should be incremented each time you make changes
1616
# to the chart and its templates, including the app version.
1717
# Versions are expected to follow Semantic Versioning (https://semver.org/)
18-
version: 0.1.0
18+
version: 0.2.0
1919

2020
# This is the version number of the application being deployed. This version number should be
2121
# incremented each time you make changes to the application. Versions are not expected to

charts/topograph/charts/node-observer/Chart.yaml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,7 @@ type: application
1515
# This is the chart version. This version number should be incremented each time you make changes
1616
# to the chart and its templates, including the app version.
1717
# Versions are expected to follow Semantic Versioning (https://semver.org/)
18-
version: 0.1.0
18+
version: 0.2.0
1919

2020
# This is the version number of the application being deployed. This version number should be
2121
# incremented each time you make changes to the application. Versions are not expected to

charts/topograph/charts/node-observer/templates/configmap.yml

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,9 @@ metadata:
77
data:
88
node-observer-config.yaml: |-
99
generateTopologyUrl: "{{ include "topograph.url" $ }}/v1/generate"
10-
params:
11-
{{- toYaml .Values.global.engineParams | nindent 6 }}
10+
provider:
11+
{{- toYaml .Values.global.provider | nindent 6 }}
12+
engine:
13+
{{- toYaml .Values.global.engine | nindent 6 }}
1214
trigger:
1315
{{- toYaml .Values.topograph.trigger | nindent 6 }}

charts/topograph/templates/configmap.yml

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -9,8 +9,6 @@ data:
99
http:
1010
port: {{ .Values.global.service.port }}
1111
ssl: false
12-
provider: {{ .Values.global.provider }}
13-
engine: {{ .Values.global.engine }}
1412
requestAggregationDelay: {{ .Values.config.requestAggregationDelay }}
1513
{{- if .Values.config.credentialsSecretName }}
1614
credentialsPath: /etc/topograph/credentials/credentials.yaml

charts/topograph/values-slinky-block-example.yaml

Lines changed: 19 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -3,19 +3,25 @@
33
# Declare variables to be passed into your templates.
44

55
global:
6-
# provider: "aws", "oci", "gcp", "nebius", "netq", "infiniband-k8s", "dra" or "test"
7-
provider: aws
8-
# engine: "k8s" or "slinky"
9-
engine: slinky
10-
engineParams:
11-
namespace: slurm
12-
podSelector:
13-
matchLabels:
14-
app.kubernetes.io/component: compute
15-
plugin: topology/block
16-
block_sizes: 4
17-
topologyConfigPath: topology.conf
18-
topologyConfigmapName: slurm-config
6+
provider:
7+
# name: "aws", "oci", "gcp", "nebius", "netq", "infiniband-k8s", "dra" or "test"
8+
name: aws
9+
params:
10+
nodeSelector:
11+
slurmCluster: my-cluster
12+
engine:
13+
name: slinky
14+
params:
15+
namespace: slurm
16+
nodeSelector:
17+
slurmCluster: my-cluster
18+
podSelector:
19+
matchLabels:
20+
app.kubernetes.io/component: compute
21+
plugin: topology/block
22+
block_sizes: 4
23+
topologyConfigPath: topology.conf
24+
topologyConfigmapName: slurm-config
1925

2026
nodeSelector:
2127
dedicated: user-workload

charts/topograph/values-slinky-partition-example.yaml

Lines changed: 24 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -3,29 +3,30 @@
33
# Declare variables to be passed into your templates.
44

55
global:
6-
# provider: "aws", "oci", "gcp", "nebius", "netq", "infiniband-k8s", "dra" or "test"
7-
provider: aws
8-
# engine: "k8s" or "slinky"
9-
engine: slinky
10-
engineParams:
11-
namespace: slurm
12-
podSelector:
13-
matchLabels:
14-
app.kubernetes.io/component: compute
15-
topologies:
16-
topo1:
17-
plugin: topology/block
18-
blockSizes: [2,4]
19-
topo2:
20-
plugin: topology/block
21-
blockSizes: [8,16]
22-
topo3:
23-
plugin: topology/tree
24-
topo-default:
25-
plugin: topology/flat
26-
clusterDefault: true
27-
topologyConfigPath: topology.conf
28-
topologyConfigmapName: slurm-config
6+
provider:
7+
# name: "aws", "oci", "gcp", "nebius", "netq", "infiniband-k8s", "dra" or "test"
8+
name: aws
9+
engine:
10+
name: slinky
11+
params:
12+
namespace: slurm
13+
podSelector:
14+
matchLabels:
15+
app.kubernetes.io/component: compute
16+
topologies:
17+
topo1:
18+
plugin: topology/block
19+
blockSizes: [2,4]
20+
topo2:
21+
plugin: topology/block
22+
blockSizes: [8,16]
23+
topo3:
24+
plugin: topology/tree
25+
topo-default:
26+
plugin: topology/flat
27+
clusterDefault: true
28+
topologyConfigPath: topology.conf
29+
topologyConfigmapName: slurm-config
2930

3031
nodeSelector:
3132
dedicated: user-workload

charts/topograph/values-slinky-tree-example.yaml

Lines changed: 13 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -3,18 +3,19 @@
33
# Declare variables to be passed into your templates.
44

55
global:
6-
# provider: "aws", "oci", "gcp", "nebius", "netq", "infiniband-k8s", "dra" or "test"
7-
provider: aws
8-
# engine: "k8s" or "slinky"
9-
engine: slinky
10-
engineParams:
11-
namespace: slurm
12-
podSelector:
13-
matchLabels:
14-
app.kubernetes.io/component: compute
15-
plugin: topology/tree
16-
topologyConfigPath: topology.conf
17-
topologyConfigmapName: slurm-config
6+
provider:
7+
# name: "aws", "oci", "gcp", "nebius", "netq", "infiniband-k8s", "dra" or "test"
8+
name: aws
9+
engine:
10+
name: slinky
11+
params:
12+
namespace: slurm
13+
podSelector:
14+
matchLabels:
15+
app.kubernetes.io/component: compute
16+
plugin: topology/tree
17+
topologyConfigPath: topology.conf
18+
topologyConfigmapName: slurm-config
1819

1920
nodeSelector:
2021
dedicated: user-workload

charts/topograph/values.yaml

Lines changed: 6 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -3,11 +3,12 @@
33
# Declare variables to be passed into your templates.
44

55
global:
6-
# provider: "aws", "oci", "gcp", "nebius", "netq", "infiniband-k8s", "dra" or "test".
7-
provider: test
8-
# engine: "k8s" or "slinky"
9-
engine: k8s
10-
# engineParams:
6+
provider:
7+
# name: "aws", "oci", "gcp", "nebius", "netq", "infiniband-k8s", "dra" or "test".
8+
name: test
9+
engine:
10+
# name: "k8s" or "slinky"
11+
name: k8s
1112

1213
service:
1314
type: ClusterIP

internal/k8s/utils.go

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -19,8 +19,12 @@ import (
1919
"k8s.io/client-go/tools/remotecommand"
2020
)
2121

22-
func GetNodes(ctx context.Context, client *kubernetes.Clientset) (*corev1.NodeList, error) {
23-
nodes, err := client.CoreV1().Nodes().List(ctx, metav1.ListOptions{})
22+
func GetNodes(ctx context.Context, client *kubernetes.Clientset, opt *metav1.ListOptions) (*corev1.NodeList, error) {
23+
if opt == nil {
24+
opt = &metav1.ListOptions{}
25+
}
26+
27+
nodes, err := client.CoreV1().Nodes().List(ctx, *opt)
2428
if err != nil {
2529
return nil, fmt.Errorf("failed to list node in the cluster: %v", err)
2630
}

0 commit comments

Comments
 (0)