Skip to content

Commit dbb02c3

Browse files
committed
Make maxNodesPerIMEXDomain configurable (default at 18)
Signed-off-by: Kevin Klues <[email protected]>
1 parent 2b52128 commit dbb02c3

File tree

6 files changed

+48
-23
lines changed

6 files changed

+48
-23
lines changed

cmd/compute-domain-controller/controller.go

Lines changed: 10 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -38,6 +38,9 @@ type ManagerConfig struct {
3838
// imageName is the full image name to use when rendering templates
3939
imageName string
4040

41+
// maxNodesPerIMEXDomain is the maximum number of nodes per IMEX domain to allocate
42+
maxNodesPerIMEXDomain int
43+
4144
// clientsets provides access to various Kubernetes API client interfaces
4245
clientsets flags.ClientSets
4346

@@ -67,12 +70,13 @@ func (c *Controller) Run(ctx context.Context) error {
6770
workQueue := workqueue.New(workqueue.DefaultControllerRateLimiter())
6871

6972
managerConfig := &ManagerConfig{
70-
driverName: c.config.driverName,
71-
driverNamespace: c.config.flags.namespace,
72-
additionalNamespaces: c.config.flags.additionalNamespaces.Value(),
73-
imageName: c.config.flags.imageName,
74-
clientsets: c.config.clientsets,
75-
workQueue: workQueue,
73+
driverName: c.config.driverName,
74+
driverNamespace: c.config.flags.namespace,
75+
additionalNamespaces: c.config.flags.additionalNamespaces.Value(),
76+
imageName: c.config.flags.imageName,
77+
maxNodesPerIMEXDomain: c.config.flags.maxNodesPerIMEXDomain,
78+
clientsets: c.config.clientsets,
79+
workQueue: workQueue,
7680
}
7781

7882
cdManager := NewComputeDomainManager(managerConfig)

cmd/compute-domain-controller/daemonset.go

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -50,6 +50,7 @@ type DaemonSetTemplateData struct {
5050
ComputeDomainLabelValue types.UID
5151
ResourceClaimTemplateName string
5252
ImageName string
53+
MaxNodesPerIMEXDomain int
5354
FeatureGates map[string]bool
5455
}
5556

@@ -195,6 +196,7 @@ func (m *DaemonSetManager) Create(ctx context.Context, cd *nvapi.ComputeDomain)
195196
ComputeDomainLabelValue: cd.UID,
196197
ResourceClaimTemplateName: rct.Name,
197198
ImageName: m.config.imageName,
199+
MaxNodesPerIMEXDomain: m.config.maxNodesPerIMEXDomain,
198200
FeatureGates: featuregates.ToMap(),
199201
}
200202

cmd/compute-domain-controller/main.go

Lines changed: 13 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -43,17 +43,19 @@ import (
4343
)
4444

4545
const (
46-
DriverName = "compute-domain.nvidia.com"
46+
DriverName = "compute-domain.nvidia.com"
47+
defaultMaxNodesPerIMEXDomain = 18
4748
)
4849

4950
type Flags struct {
5051
kubeClientConfig flags.KubeClientConfig
5152
loggingConfig *flags.LoggingConfig
5253
featureGateConfig *flags.FeatureGateConfig
5354

54-
podName string
55-
namespace string
56-
imageName string
55+
podName string
56+
namespace string
57+
imageName string
58+
maxNodesPerIMEXDomain int
5759

5860
httpEndpoint string
5961
metricsPath string
@@ -103,6 +105,13 @@ func newApp() *cli.App {
103105
Destination: &flags.imageName,
104106
EnvVars: []string{"IMAGE_NAME"},
105107
},
108+
&cli.IntFlag{
109+
Name: "max-nodes-per-imex-domain",
110+
Usage: "The maximum number of possible nodes per IMEX domain",
111+
Value: defaultMaxNodesPerIMEXDomain,
112+
EnvVars: []string{"MAX_NODES_PER_IMEX_DOMAIN"},
113+
Destination: &flags.maxNodesPerIMEXDomain,
114+
},
106115
&cli.StringFlag{
107116
Category: "HTTP server:",
108117
Name: "http-endpoint",

cmd/compute-domain-daemon/hostname.go

Lines changed: 13 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -29,25 +29,26 @@ import (
2929
)
3030

3131
const (
32-
maxHostnames = 18
3332
hostsFilePath = "/etc/hosts"
3433
hostnameFormat = "compute-domain-daemon-%d"
3534
)
3635

3736
// HostnameManager manages the allocation of static hostnames to IP addresses.
3837
type HostnameManager struct {
3938
sync.Mutex
40-
ipToHostname map[string]string
41-
cliqueID string
42-
nodesConfigPath string
39+
ipToHostname map[string]string
40+
cliqueID string
41+
maxNodesPerIMEXDomain int
42+
nodesConfigPath string
4343
}
4444

4545
// NewHostnameManager creates a new hostname manager.
46-
func NewHostnameManager(cliqueID string, nodesConfigPath string) *HostnameManager {
46+
func NewHostnameManager(cliqueID string, maxNodesPerIMEXDomain int, nodesConfigPath string) *HostnameManager {
4747
return &HostnameManager{
48-
ipToHostname: make(map[string]string),
49-
cliqueID: cliqueID,
50-
nodesConfigPath: nodesConfigPath,
48+
ipToHostname: make(map[string]string),
49+
cliqueID: cliqueID,
50+
maxNodesPerIMEXDomain: maxNodesPerIMEXDomain,
51+
nodesConfigPath: nodesConfigPath,
5152
}
5253
}
5354

@@ -118,7 +119,7 @@ func (m *HostnameManager) allocateHostname(ip string) (string, error) {
118119
}
119120

120121
// Find the next available hostname
121-
for i := 0; i < maxHostnames; i++ {
122+
for i := 0; i < m.maxNodesPerIMEXDomain; i++ {
122123
hostname := fmt.Sprintf(hostnameFormat, i)
123124
// Check if this hostname is already in use
124125
inUse := false
@@ -135,7 +136,7 @@ func (m *HostnameManager) allocateHostname(ip string) (string, error) {
135136
}
136137

137138
// If all hostnames are used, return an error
138-
return "", fmt.Errorf("no hostnames available (max: %d)", maxHostnames)
139+
return "", fmt.Errorf("no hostnames available (max: %d)", m.maxNodesPerIMEXDomain)
139140
}
140141

141142
// updateHostsFile updates the /etc/hosts file with current IP to hostname mappings.
@@ -205,13 +206,13 @@ func (m *HostnameManager) WriteNodesConfig() error {
205206
defer f.Close()
206207

207208
// Write static hostnames
208-
for i := 0; i < maxHostnames; i++ {
209+
for i := 0; i < m.maxNodesPerIMEXDomain; i++ {
209210
hostname := fmt.Sprintf(hostnameFormat, i)
210211
if _, err := fmt.Fprintf(f, "%s\n", hostname); err != nil {
211212
return fmt.Errorf("failed to write to nodes config file: %w", err)
212213
}
213214
}
214215

215-
klog.Infof("Created static nodes config file with %d hostnames using format %s", maxHostnames, hostnameFormat)
216+
klog.Infof("Created static nodes config file with %d hostnames using format %s", m.maxNodesPerIMEXDomain, hostnameFormat)
216217
return nil
217218
}

cmd/compute-domain-daemon/main.go

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -51,6 +51,7 @@ type Flags struct {
5151
computeDomainNamespace string
5252
nodeName string
5353
podIP string
54+
maxNodesPerIMEXDomain int
5455
loggingConfig *flags.LoggingConfig
5556
featureGateConfig *flags.FeatureGateConfig
5657
}
@@ -124,6 +125,12 @@ func newApp() *cli.App {
124125
EnvVars: []string{"POD_IP"},
125126
Destination: &flags.podIP,
126127
},
128+
&cli.IntFlag{
129+
Name: "max-nodes-per-imex-domain",
130+
Usage: "The maximum number of possible nodes per IMEX domain",
131+
EnvVars: []string{"MAX_NODES_PER_IMEX_DOMAIN"},
132+
Destination: &flags.maxNodesPerIMEXDomain,
133+
},
127134
}
128135
cliFlags = append(cliFlags, flags.featureGateConfig.Flags()...)
129136
cliFlags = append(cliFlags, flags.loggingConfig.Flags()...)
@@ -179,7 +186,7 @@ func run(ctx context.Context, cancel context.CancelFunc, flags *Flags) error {
179186
klog.Infof("config: %v", config)
180187

181188
// Prepare Hostname manager
182-
hostnameManager := NewHostnameManager(flags.cliqueID, nodesConfigPath)
189+
hostnameManager := NewHostnameManager(flags.cliqueID, flags.maxNodesPerIMEXDomain, nodesConfigPath)
183190

184191
// Create static nodes config file with hostnames
185192
if err := hostnameManager.WriteNodesConfig(); err != nil {

templates/compute-domain-daemon.tmpl.yaml

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,8 @@ spec:
2626
image: {{ .ImageName }}
2727
command: ["compute-domain-daemon", "-v", "6", "run"]
2828
env:
29+
- name: MAX_NODES_PER_IMEX_DOMAIN
30+
value: "{{ .MaxNodesPerIMEXDomain }}"
2931
- name: NODE_NAME
3032
valueFrom:
3133
fieldRef:

0 commit comments

Comments
 (0)