Skip to content

Commit 753eb2d

Browse files
authored
Merge pull request #17 from jumpstarter-dev/bootc-order
Handle bootc last, and always apply configuration changes
2 parents e7d3ae4 + a9e6222 commit 753eb2d

File tree

2 files changed

+121
-58
lines changed

2 files changed

+121
-58
lines changed

internal/exporter/host/host.go

Lines changed: 44 additions & 58 deletions
Original file line numberDiff line numberDiff line change
@@ -127,44 +127,6 @@ func (e *ExporterHostSyncer) filterExporterInstances(hostName string, exporterIn
127127
return exporterInstances
128128
}
129129

130-
// handleBootcUpgrade handles bootc upgrade checking and execution
131-
func (e *ExporterHostSyncer) handleBootcUpgrade(hostSsh ssh.HostManager) (bool, error) {
132-
// Check if bootc upgrade service is already running
133-
statusCmd, _ := hostSsh.RunHostCommand("systemctl is-active bootc-fetch-apply-updates.service bootc-fetch-apply-updates.timer")
134-
if statusCmd != nil {
135-
statuses := strings.Fields(statusCmd.Stdout)
136-
if len(statuses) == 2 &&
137-
(statuses[0] == "active" || statuses[0] == "activating" ||
138-
statuses[1] == "active" || statuses[1] == "activating") {
139-
fmt.Printf(" ⚠️ Bootc upgrade in progress, skipping exporter instances for this host\n")
140-
return true, nil // skip = true
141-
}
142-
}
143-
144-
// Check booted image
145-
bootcStdout, err := hostSsh.RunHostCommand("[ -f /run/ostree-booted ] && bootc upgrade --check")
146-
if err == nil && bootcStdout != nil && bootcStdout.ExitCode == 0 && bootcStdout.Stdout != "" {
147-
if strings.HasPrefix(bootcStdout.Stdout, "No changes") {
148-
if e.dryRun {
149-
fmt.Printf(" ✅ Bootc image is up to date\n")
150-
}
151-
} else if e.dryRun {
152-
fmt.Printf(" 📄 Would upgrade bootc image\n")
153-
} else {
154-
// Trigger bootc upgrade timer now. Assuming it uses manual activation (e.g. OnActiveSec=0, RandomizedDelaySec=1h, RemainAfterElapse=false)
155-
_, err := hostSsh.RunHostCommand("systemctl restart bootc-fetch-apply-updates.timer")
156-
if err != nil {
157-
return false, fmt.Errorf("error triggering bootc upgrade service: %w", err)
158-
}
159-
fmt.Printf(" ✅ Bootc upgrade started, skipping exporter instances for this host\n")
160-
return true, nil // skip = true
161-
}
162-
} else {
163-
fmt.Printf(" ℹ️ Not a bootc managed host\n")
164-
}
165-
return false, nil // skip = false
166-
}
167-
168130
// processExporterInstance processes a single exporter instance
169131
func (e *ExporterHostSyncer) processExporterInstance(exporterInstance *api.ExporterInstance, hostSsh ssh.HostManager) error {
170132
if isDead, deadAnnotation := isExporterInstanceDead(exporterInstance); isDead {
@@ -222,8 +184,17 @@ func (e *ExporterHostSyncer) calculateBackoffDelay(attempts int) time.Duration {
222184
return delay
223185
}
224186

225-
// processExporterInstances processes exporter instances and adds failures to global retry queue
226-
func (e *ExporterHostSyncer) processExporterInstances(exporterInstances []*api.ExporterInstance, hostSsh ssh.HostManager, hostName string, retryQueue *[]RetryItem) {
187+
// addToRetryQueue increments attempts and adds a retry item to the next retry queue
188+
func (e *ExporterHostSyncer) addToRetryQueue(retryItem *RetryItem, err error, nextRetryQueue *[]RetryItem) {
189+
retryItem.Attempts++
190+
retryItem.LastError = err
191+
retryItem.LastAttemptTime = time.Now()
192+
*nextRetryQueue = append(*nextRetryQueue, *retryItem)
193+
}
194+
195+
// processExporterInstancesAndBootc processes exporter instances and adds failures to global retry queue
196+
func (e *ExporterHostSyncer) processExporterInstancesAndBootc(exporterInstances []*api.ExporterInstance, hostSsh ssh.HostManager, hostName string, retryQueue *[]RetryItem) {
197+
227198
for _, exporterInstance := range exporterInstances {
228199
if err := e.processExporterInstance(exporterInstance, hostSsh); err != nil {
229200
fmt.Printf(" ❌ Failed to process %s: %v\n", exporterInstance.Name, err)
@@ -237,6 +208,19 @@ func (e *ExporterHostSyncer) processExporterInstances(exporterInstances []*api.E
237208
})
238209
}
239210
}
211+
212+
if err := hostSsh.HandleBootcUpgrade(e.dryRun); err != nil {
213+
// For other errors, just log them and continue
214+
fmt.Printf(" ⚠️ Bootc upgrade error: %v\n", err)
215+
*retryQueue = append(*retryQueue, RetryItem{
216+
ExporterInstance: nil,
217+
HostSSH: hostSsh,
218+
HostName: hostName,
219+
Attempts: 1,
220+
LastError: err,
221+
LastAttemptTime: time.Now(),
222+
})
223+
}
240224
}
241225

242226
// processGlobalRetryQueue processes the global retry queue with exponential backoff
@@ -272,18 +256,26 @@ func (e *ExporterHostSyncer) processGlobalRetryQueue(retryQueue []RetryItem) err
272256

273257
// Second pass: retry items that are ready
274258
for _, retryItem := range itemsToRetry {
275-
fmt.Printf("🔄 Retrying %s on %s (attempt %d/%d)...\n",
276-
retryItem.ExporterInstance.Name, retryItem.HostName, retryItem.Attempts+1, e.retryConfig.MaxAttempts)
277-
278-
if err := e.processExporterInstance(retryItem.ExporterInstance, retryItem.HostSSH); err != nil {
279-
// Still failed, increment attempts and add to next retry queue
280-
retryItem.Attempts++
281-
retryItem.LastError = err
282-
retryItem.LastAttemptTime = time.Now()
283-
nextRetryQueue = append(nextRetryQueue, retryItem)
284-
fmt.Printf("❌ Retry failed for %s on %s: %v\n", retryItem.ExporterInstance.Name, retryItem.HostName, err)
259+
if retryItem.ExporterInstance == nil {
260+
fmt.Printf("🔄 Retrying bootc upgrade on %s (attempt %d/%d)...\n",
261+
retryItem.HostName, retryItem.Attempts+1, e.retryConfig.MaxAttempts)
262+
if err := retryItem.HostSSH.HandleBootcUpgrade(e.dryRun); err != nil {
263+
fmt.Printf("❌ Retry failed for bootc upgrade on %s: %v\n", retryItem.HostName, err)
264+
e.addToRetryQueue(&retryItem, err, &nextRetryQueue)
265+
} else {
266+
fmt.Printf("✅ Retry succeeded for bootc upgrade on %s\n", retryItem.HostName)
267+
}
285268
} else {
286-
fmt.Printf("✅ Retry succeeded for %s on %s\n", retryItem.ExporterInstance.Name, retryItem.HostName)
269+
fmt.Printf("🔄 Retrying instance %s on %s (attempt %d/%d)...\n",
270+
retryItem.ExporterInstance.Name, retryItem.HostName, retryItem.Attempts+1, e.retryConfig.MaxAttempts)
271+
272+
if err := e.processExporterInstance(retryItem.ExporterInstance, retryItem.HostSSH); err != nil {
273+
// Still failed, increment attempts and add to next retry queue
274+
fmt.Printf("❌ Retry failed for %s on %s: %v\n", retryItem.ExporterInstance.Name, retryItem.HostName, err)
275+
e.addToRetryQueue(&retryItem, err, &nextRetryQueue)
276+
} else {
277+
fmt.Printf("✅ Retry succeeded for %s on %s\n", retryItem.ExporterInstance.Name, retryItem.HostName)
278+
}
287279
}
288280
}
289281

@@ -359,14 +351,8 @@ func (e *ExporterHostSyncer) SyncExporterHosts() error {
359351
fmt.Printf(" ✅ Connection: %s\n", status)
360352
}
361353

362-
if skip, err := e.handleBootcUpgrade(hostSsh); err != nil {
363-
return err
364-
} else if skip {
365-
continue
366-
}
367-
368354
// Process each exporter instance and add failures to global retry queue
369-
e.processExporterInstances(exporterInstances, hostSsh, host.Name, &retryQueue)
355+
e.processExporterInstancesAndBootc(exporterInstances, hostSsh, host.Name, &retryQueue)
370356
}
371357

372358
// Second pass: retry all failed instances globally

internal/exporter/ssh/ssh.go

Lines changed: 77 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -19,12 +19,24 @@ import (
1919
"golang.org/x/crypto/ssh/agent"
2020
)
2121

22+
// BootcStatus represents the status of bootc upgrade
23+
type BootcStatus int
24+
25+
const (
26+
BOOTC_UP_TO_DATE BootcStatus = iota
27+
BOOTC_UPDATING
28+
BOOTC_WILL_UPDATE
29+
BOOTC_NOT_MANAGED
30+
)
31+
2232
type HostManager interface {
2333
Status() (string, error)
2434
NeedsUpdate() (bool, error)
2535
Diff() (string, error)
2636
Apply(exporterConfig *v1alpha1.ExporterConfigTemplate, dryRun bool) error
2737
RunHostCommand(command string) (*CommandResult, error)
38+
GetBootcStatus() BootcStatus
39+
HandleBootcUpgrade(dryRun bool) error
2840
}
2941

3042
// CommandResult represents the result of running a command via SSH
@@ -202,6 +214,18 @@ func (m *SSHHostManager) Apply(exporterConfig *v1alpha1.ExporterConfigTemplate,
202214
return fmt.Errorf("failed to reconcile exporter config file: %w", err)
203215
}
204216

217+
if m.GetBootcStatus() == BOOTC_UPDATING {
218+
if dryRun {
219+
fmt.Printf(" 📄 Bootc upgrade in progress, would skip exporter service restarts/container updates\n")
220+
} else {
221+
fmt.Printf(" ⚠️ Bootc upgrade in progress, skipping exporter service restarts/container updates\n")
222+
return nil
223+
}
224+
}
225+
226+
// Only if bootc is not updating, we restart/start services and pull containers
227+
// otherwise it's too much pressure on the system
228+
205229
if changedExporterConfig || changedContainer || changedService {
206230
if !dryRun {
207231
// Apply the changes: reload systemd, enable service and restart the exporter
@@ -386,6 +410,59 @@ func (m *SSHHostManager) reconcileFile(path string, content string, dryRun bool)
386410
return true, nil
387411
}
388412

413+
// GetBootcStatus checks the bootc status and returns the appropriate BootcStatus enum
414+
func (m *SSHHostManager) GetBootcStatus() BootcStatus {
415+
// Check if bootc upgrade service is already running
416+
statusCmd, _ := m.RunHostCommand("systemctl is-active bootc-fetch-apply-updates.service bootc-fetch-apply-updates.timer")
417+
if statusCmd != nil {
418+
statuses := strings.Fields(statusCmd.Stdout)
419+
if len(statuses) == 2 &&
420+
(statuses[0] == "active" || statuses[0] == "activating" ||
421+
statuses[1] == "active" || statuses[1] == "activating") {
422+
return BOOTC_UPDATING
423+
}
424+
}
425+
426+
// Check booted image
427+
bootcStdout, err := m.RunHostCommand("[ -f /run/ostree-booted ] && bootc upgrade --check")
428+
if err == nil && bootcStdout != nil && bootcStdout.ExitCode == 0 && bootcStdout.Stdout != "" {
429+
if strings.HasPrefix(bootcStdout.Stdout, "No changes") {
430+
return BOOTC_UP_TO_DATE
431+
} else {
432+
return BOOTC_WILL_UPDATE
433+
}
434+
}
435+
436+
return BOOTC_NOT_MANAGED
437+
}
438+
439+
// HandleBootcUpgrade handles bootc upgrade checking and execution
440+
func (m *SSHHostManager) HandleBootcUpgrade(dryRun bool) error {
441+
status := m.GetBootcStatus()
442+
443+
switch status {
444+
case BOOTC_UPDATING:
445+
fmt.Printf(" ⚠️ Bootc upgrade in progress\n")
446+
case BOOTC_UP_TO_DATE:
447+
fmt.Printf(" ✅ Bootc image is up to date\n")
448+
case BOOTC_WILL_UPDATE:
449+
if dryRun {
450+
fmt.Printf(" 📄 Would upgrade bootc image\n")
451+
} else {
452+
// Trigger bootc upgrade timer now. Assuming it uses manual activation (e.g. OnActiveSec=0, RandomizedDelaySec=1h, RemainAfterElapse=false)
453+
_, err := m.RunHostCommand("systemctl restart bootc-fetch-apply-updates.timer")
454+
if err != nil {
455+
return fmt.Errorf("error triggering bootc upgrade service: %w", err)
456+
}
457+
fmt.Printf(" ✅ Bootc upgrade started\n")
458+
return nil
459+
}
460+
case BOOTC_NOT_MANAGED:
461+
fmt.Printf(" ℹ️ Not a bootc managed host\n")
462+
}
463+
return nil
464+
}
465+
389466
func (m *SSHHostManager) createSshClient() (*ssh.Client, error) {
390467

391468
port := 22

0 commit comments

Comments
 (0)