Skip to content

Commit 55c647a

Browse files
authored
Merge pull request #638 from jgehrcke/jp/imex-daemon-sigusr1
CD daemon: send SIGUSR1 to IMEX daemon on node updates
2 parents c10a367 + c6b8c80 commit 55c647a

File tree

4 files changed

+56
-15
lines changed

4 files changed

+56
-15
lines changed

cmd/compute-domain-daemon/controller.go

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -111,9 +111,9 @@ func (c *Controller) Run(ctx context.Context) error {
111111
return nil
112112
}
113113

114-
// GetNodesUpdateChan() returns a channel that only ever yields a full set of nodes,
115-
// i.e. during startup this blocks until the expected number of nodes is present
116-
// in CD status.
114+
// GetNodesUpdateChan() returns a channel that yields updates for the nodes
115+
// currently present in the CD status. This is only a complete set of nodes
116+
// (size `numNodes`) if IMEXDaemonsWithDNSNames=false.
117117
func (c *Controller) GetNodesUpdateChan() chan []*nvapi.ComputeDomainNode {
118118
return c.computeDomainManager.GetNodesUpdateChan()
119119
}

cmd/compute-domain-daemon/dnsnames.go

Lines changed: 8 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -57,8 +57,10 @@ func NewDNSNameManager(cliqueID string, maxNodesPerIMEXDomain int, nodesConfigPa
5757
}
5858
}
5959

60-
// UpdateDNSNameMappings updates the /etc/hosts file with any new IP to DNS name mappings.
61-
func (m *DNSNameManager) UpdateDNSNameMappings(nodes []*nvapi.ComputeDomainNode) error {
60+
// UpdateDNSNameMappings updates the /etc/hosts file with any new IP to DNS name
61+
// mappings. The boolean return value indicates whether the hosts file was
62+
// updated or not (it must be ignored when the returned error is non-nil).
63+
func (m *DNSNameManager) UpdateDNSNameMappings(nodes []*nvapi.ComputeDomainNode) (bool, error) {
6264
m.Lock()
6365
defer m.Unlock()
6466

@@ -78,7 +80,7 @@ func (m *DNSNameManager) UpdateDNSNameMappings(nodes []*nvapi.ComputeDomainNode)
7880
// Construct the DNS name from the node index
7981
dnsName, err := m.constructDNSName(node)
8082
if err != nil {
81-
return fmt.Errorf("failed to allocate DNS name for IP %s: %w", node.IPAddress, err)
83+
return false, fmt.Errorf("failed to allocate DNS name for IP %s: %w", node.IPAddress, err)
8284
}
8385

8486
// Assign the IP -> DNS name mapping
@@ -87,14 +89,14 @@ func (m *DNSNameManager) UpdateDNSNameMappings(nodes []*nvapi.ComputeDomainNode)
8789

8890
// If the existing ipToDNSName mappings are unchanged, exit early
8991
if maps.Equal(ipToDNSName, m.ipToDNSName) {
90-
return nil
92+
return false, nil
9193
}
9294

9395
// Otherwise, update the cached ipToDNSName mapping
9496
m.ipToDNSName = ipToDNSName
9597

96-
// And updated the hosts file with new mappings
97-
return m.updateHostsFile()
98+
// And update the hosts file with the new mapping
99+
return true, m.updateHostsFile()
98100
}
99101

100102
// LogDNSNameMappings logs the current compute-domain-daemon mappings from memory.

cmd/compute-domain-daemon/main.go

Lines changed: 26 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -322,13 +322,37 @@ func IMEXDaemonUpdateLoopWithDNSNames(ctx context.Context, controller *Controlle
322322
klog.Infof("shutdown: stop IMEXDaemonUpdateLoopWithDNSNames")
323323
return nil
324324
case nodes := <-controller.GetNodesUpdateChan():
325-
if err := dnsNameManager.UpdateDNSNameMappings(nodes); err != nil {
325+
updated, err := dnsNameManager.UpdateDNSNameMappings(nodes)
326+
if err != nil {
326327
return fmt.Errorf("failed to update DNS name => IP mappings: %w", err)
327328
}
328-
if err := processManager.EnsureStarted(); err != nil {
329+
330+
fresh, err := processManager.EnsureStarted()
331+
if err != nil {
329332
return fmt.Errorf("failed to ensure IMEX daemon is started: %w", err)
330333
}
334+
331335
dnsNameManager.LogDNSNameMappings()
336+
337+
// Skip sending SIGUSR1 when the process is fresh (has newly been
338+
// created) or when thiss was a noop update. TODO: review skipping
339+
// this also if the new set of IP addresses only strictly removes
340+
// addresses compared to the old set (then we don't need to force
341+
// the daemon to re-resolve & re-connect).
342+
if !updated || fresh {
343+
continue
344+
}
345+
346+
// Actively ask the IMEX daemon to re-read its config and to
347+
// re-connect to its peers (involving DNS name re-resolution).
348+
klog.Infof("updated DNS/IP mapping, old process: send SIGUSR1")
349+
if err := processManager.Signal(syscall.SIGUSR1); err != nil {
350+
// Only log (ignore this error for now: if the process went away
351+
// unexpectedly, the process manager will handle that. If any
352+
// other error resulted in bad signal delivery, we may get away
353+
// with it).
354+
klog.Errorf("failed to send SIGUSR1 to child process: %s", err)
355+
}
332356
}
333357
}
334358
}

cmd/compute-domain-daemon/process.go

Lines changed: 19 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -56,12 +56,27 @@ func (m *ProcessManager) Restart() error {
5656
return m.start()
5757
}
5858

59-
// EnsureStarted starts the process if it is not already running. If the process is already started, this is a no-op.
60-
func (m *ProcessManager) EnsureStarted() error {
59+
// EnsureStarted starts the process if it is not already running. If the process
60+
// is already started, this is a no-op. The boolean return value indicates
61+
// `new`, i.e. it is `true` if the process was _newly_ started. It must be
62+
// ignored when the returned error is non-nil.
63+
func (m *ProcessManager) EnsureStarted() (bool, error) {
6164
if m.handle != nil {
62-
return nil
65+
return false, nil
6366
}
64-
return m.start()
67+
return true, m.start()
68+
}
69+
70+
// Signal() attempts to send the provided signal to the managed child process.
71+
// Any error is emitted to the caller and must be handled there.
72+
func (m *ProcessManager) Signal(s os.Signal) error {
73+
m.Lock()
74+
defer m.Unlock()
75+
76+
if m.handle == nil {
77+
return fmt.Errorf("pm: sending signal %s failed: not started", s)
78+
}
79+
return m.handle.Process.Signal(s)
6580
}
6681

6782
func (m *ProcessManager) start() error {

0 commit comments

Comments
 (0)