Skip to content

Commit 03b41e2

Browse files
committed
Make maxNodesPerIMEXDomain configurable (default at 18)
Signed-off-by: Kevin Klues <[email protected]>
1 parent b8b2a06 commit 03b41e2

File tree

6 files changed

+47
-22
lines changed

6 files changed

+47
-22
lines changed

cmd/compute-domain-controller/controller.go

Lines changed: 9 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -38,6 +38,9 @@ type ManagerConfig struct {
3838
// imageName is the full image name to use when rendering templates
3939
imageName string
4040

41+
// maxNodesPerIMEXDomain is the maximum number of nodes per IMEX domain to allocate
42+
maxNodesPerIMEXDomain int
43+
4144
// clientsets provides access to various Kubernetes API client interfaces
4245
clientsets flags.ClientSets
4346

@@ -63,11 +66,12 @@ func (c *Controller) Run(ctx context.Context) error {
6366
workQueue := workqueue.New(workqueue.DefaultControllerRateLimiter())
6467

6568
managerConfig := &ManagerConfig{
66-
driverName: c.config.driverName,
67-
driverNamespace: c.config.flags.namespace,
68-
imageName: c.config.flags.imageName,
69-
clientsets: c.config.clientsets,
70-
workQueue: workQueue,
69+
driverName: c.config.driverName,
70+
driverNamespace: c.config.flags.namespace,
71+
imageName: c.config.flags.imageName,
72+
maxNodesPerIMEXDomain: c.config.flags.maxNodesPerIMEXDomain,
73+
clientsets: c.config.clientsets,
74+
workQueue: workQueue,
7175
}
7276

7377
cdManager := NewComputeDomainManager(managerConfig)

cmd/compute-domain-controller/daemonset.go

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -49,6 +49,7 @@ type DaemonSetTemplateData struct {
4949
ComputeDomainLabelValue types.UID
5050
ResourceClaimTemplateName string
5151
ImageName string
52+
MaxNodesPerIMEXDomain int
5253
}
5354

5455
type DaemonSetManager struct {
@@ -181,6 +182,7 @@ func (m *DaemonSetManager) Create(ctx context.Context, namespace string, cd *nva
181182
ComputeDomainLabelValue: cd.UID,
182183
ResourceClaimTemplateName: rct.Name,
183184
ImageName: m.config.imageName,
185+
MaxNodesPerIMEXDomain: m.config.maxNodesPerIMEXDomain,
184186
}
185187

186188
tmpl, err := template.ParseFiles(DaemonSetTemplatePath)

cmd/compute-domain-controller/main.go

Lines changed: 13 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -43,16 +43,18 @@ import (
4343
)
4444

4545
const (
46-
DriverName = "compute-domain.nvidia.com"
46+
DriverName = "compute-domain.nvidia.com"
47+
defaultMaxNodesPerIMEXDomain = 18
4748
)
4849

4950
type Flags struct {
5051
kubeClientConfig flags.KubeClientConfig
5152
loggingConfig *flags.LoggingConfig
5253

53-
podName string
54-
namespace string
55-
imageName string
54+
podName string
55+
namespace string
56+
imageName string
57+
maxNodesPerIMEXDomain int
5658

5759
httpEndpoint string
5860
metricsPath string
@@ -99,6 +101,13 @@ func newApp() *cli.App {
99101
Destination: &flags.imageName,
100102
EnvVars: []string{"IMAGE_NAME"},
101103
},
104+
&cli.IntFlag{
105+
Name: "max-nodes-per-imex-domain",
106+
Usage: "The maximum number of possible nodes per IMEX domain",
107+
Value: defaultMaxNodesPerIMEXDomain,
108+
EnvVars: []string{"MAX_NODES_PER_IMEX_DOMAIN"},
109+
Destination: &flags.maxNodesPerIMEXDomain,
110+
},
102111
&cli.StringFlag{
103112
Category: "HTTP server:",
104113
Name: "http-endpoint",

cmd/compute-domain-daemon/hostname.go

Lines changed: 13 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -29,25 +29,26 @@ import (
2929
)
3030

3131
const (
32-
maxHostnames = 18
3332
hostsFilePath = "/etc/hosts"
3433
hostnameFormat = "compute-domain-daemon-%d"
3534
)
3635

3736
// HostnameManager manages the allocation of static hostnames to IP addresses.
3837
type HostnameManager struct {
3938
sync.Mutex
40-
ipToHostname map[string]string
41-
cliqueID string
42-
nodesConfigPath string
39+
ipToHostname map[string]string
40+
cliqueID string
41+
maxNodesPerIMEXDomain int
42+
nodesConfigPath string
4343
}
4444

4545
// NewHostnameManager creates a new hostname manager.
46-
func NewHostnameManager(cliqueID string, nodesConfigPath string) *HostnameManager {
46+
func NewHostnameManager(cliqueID string, maxNodesPerIMEXDomain int, nodesConfigPath string) *HostnameManager {
4747
return &HostnameManager{
48-
ipToHostname: make(map[string]string),
49-
cliqueID: cliqueID,
50-
nodesConfigPath: nodesConfigPath,
48+
ipToHostname: make(map[string]string),
49+
cliqueID: cliqueID,
50+
maxNodesPerIMEXDomain: maxNodesPerIMEXDomain,
51+
nodesConfigPath: nodesConfigPath,
5152
}
5253
}
5354

@@ -118,7 +119,7 @@ func (m *HostnameManager) allocateHostname(ip string) (string, error) {
118119
}
119120

120121
// Find the next available hostname
121-
for i := 0; i < maxHostnames; i++ {
122+
for i := 0; i < m.maxNodesPerIMEXDomain; i++ {
122123
hostname := fmt.Sprintf(hostnameFormat, i)
123124
// Check if this hostname is already in use
124125
inUse := false
@@ -135,7 +136,7 @@ func (m *HostnameManager) allocateHostname(ip string) (string, error) {
135136
}
136137

137138
// If all hostnames are used, return an error
138-
return "", fmt.Errorf("no hostnames available (max: %d)", maxHostnames)
139+
return "", fmt.Errorf("no hostnames available (max: %d)", m.maxNodesPerIMEXDomain)
139140
}
140141

141142
// updateHostsFile updates the /etc/hosts file with current IP to hostname mappings.
@@ -205,13 +206,13 @@ func (m *HostnameManager) WriteNodesConfig() error {
205206
defer f.Close()
206207

207208
// Write static hostnames
208-
for i := 0; i < maxHostnames; i++ {
209+
for i := 0; i < m.maxNodesPerIMEXDomain; i++ {
209210
hostname := fmt.Sprintf(hostnameFormat, i)
210211
if _, err := fmt.Fprintf(f, "%s\n", hostname); err != nil {
211212
return fmt.Errorf("failed to write to nodes config file: %w", err)
212213
}
213214
}
214215

215-
klog.Infof("Created static nodes config file with %d hostnames using format %s", maxHostnames, hostnameFormat)
216+
klog.Infof("Created static nodes config file with %d hostnames using format %s", m.maxNodesPerIMEXDomain, hostnameFormat)
216217
return nil
217218
}

cmd/compute-domain-daemon/main.go

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -48,6 +48,7 @@ type Flags struct {
4848
computeDomainNamespace string
4949
nodeName string
5050
podIP string
51+
maxNodesPerIMEXDomain int
5152
loggingConfig *flags.LoggingConfig
5253
}
5354

@@ -119,6 +120,12 @@ func newApp() *cli.App {
119120
EnvVars: []string{"POD_IP"},
120121
Destination: &flags.podIP,
121122
},
123+
&cli.IntFlag{
124+
Name: "max-nodes-per-imex-domain",
125+
Usage: "The maximum number of possible nodes per IMEX domain",
126+
EnvVars: []string{"MAX_NODES_PER_IMEX_DOMAIN"},
127+
Destination: &flags.maxNodesPerIMEXDomain,
128+
},
122129
}
123130
cliFlags = append(cliFlags, flags.loggingConfig.Flags()...)
124131

@@ -173,7 +180,7 @@ func run(ctx context.Context, cancel context.CancelFunc, flags *Flags) error {
173180
klog.Infof("config: %v", config)
174181

175182
// Prepare Hostname manager
176-
hostnameManager := NewHostnameManager(flags.cliqueID, nodesConfigPath)
183+
hostnameManager := NewHostnameManager(flags.cliqueID, flags.maxNodesPerIMEXDomain, nodesConfigPath)
177184

178185
// Create static nodes config file with hostnames
179186
if err := hostnameManager.WriteNodesConfig(); err != nil {

templates/compute-domain-daemon.tmpl.yaml

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,8 @@ spec:
2626
image: {{ .ImageName }}
2727
command: ["compute-domain-daemon", "-v", "6", "run"]
2828
env:
29+
- name: MAX_NODES_PER_IMEX_DOMAIN
30+
value: "{{ .MaxNodesPerIMEXDomain }}"
2931
- name: NODE_NAME
3032
valueFrom:
3133
fieldRef:

0 commit comments

Comments
 (0)