@@ -127,44 +127,6 @@ func (e *ExporterHostSyncer) filterExporterInstances(hostName string, exporterIn
127127 return exporterInstances
128128}
129129
130- // handleBootcUpgrade handles bootc upgrade checking and execution
131- func (e * ExporterHostSyncer ) handleBootcUpgrade (hostSsh ssh.HostManager ) (bool , error ) {
132- // Check if bootc upgrade service is already running
133- statusCmd , _ := hostSsh .RunHostCommand ("systemctl is-active bootc-fetch-apply-updates.service bootc-fetch-apply-updates.timer" )
134- if statusCmd != nil {
135- statuses := strings .Fields (statusCmd .Stdout )
136- if len (statuses ) == 2 &&
137- (statuses [0 ] == "active" || statuses [0 ] == "activating" ||
138- statuses [1 ] == "active" || statuses [1 ] == "activating" ) {
139- fmt .Printf (" ⚠️ Bootc upgrade in progress, skipping exporter instances for this host\n " )
140- return true , nil // skip = true
141- }
142- }
143-
144- // Check booted image
145- bootcStdout , err := hostSsh .RunHostCommand ("[ -f /run/ostree-booted ] && bootc upgrade --check" )
146- if err == nil && bootcStdout != nil && bootcStdout .ExitCode == 0 && bootcStdout .Stdout != "" {
147- if strings .HasPrefix (bootcStdout .Stdout , "No changes" ) {
148- if e .dryRun {
149- fmt .Printf (" ✅ Bootc image is up to date\n " )
150- }
151- } else if e .dryRun {
152- fmt .Printf (" 📄 Would upgrade bootc image\n " )
153- } else {
154- // Trigger bootc upgrade timer now. Assuming it uses manual activation (e.g. OnActiveSec=0, RandomizedDelaySec=1h, RemainAfterElapse=false)
155- _ , err := hostSsh .RunHostCommand ("systemctl restart bootc-fetch-apply-updates.timer" )
156- if err != nil {
157- return false , fmt .Errorf ("error triggering bootc upgrade service: %w" , err )
158- }
159- fmt .Printf (" ✅ Bootc upgrade started, skipping exporter instances for this host\n " )
160- return true , nil // skip = true
161- }
162- } else {
163- fmt .Printf (" ℹ️ Not a bootc managed host\n " )
164- }
165- return false , nil // skip = false
166- }
167-
168130// processExporterInstance processes a single exporter instance
169131func (e * ExporterHostSyncer ) processExporterInstance (exporterInstance * api.ExporterInstance , hostSsh ssh.HostManager ) error {
170132 if isDead , deadAnnotation := isExporterInstanceDead (exporterInstance ); isDead {
@@ -222,8 +184,17 @@ func (e *ExporterHostSyncer) calculateBackoffDelay(attempts int) time.Duration {
222184 return delay
223185}
224186
225- // processExporterInstances processes exporter instances and adds failures to global retry queue
226- func (e * ExporterHostSyncer ) processExporterInstances (exporterInstances []* api.ExporterInstance , hostSsh ssh.HostManager , hostName string , retryQueue * []RetryItem ) {
187+ // addToRetryQueue increments attempts and adds a retry item to the next retry queue
188+ func (e * ExporterHostSyncer ) addToRetryQueue (retryItem * RetryItem , err error , nextRetryQueue * []RetryItem ) {
189+ retryItem .Attempts ++
190+ retryItem .LastError = err
191+ retryItem .LastAttemptTime = time .Now ()
192+ * nextRetryQueue = append (* nextRetryQueue , * retryItem )
193+ }
194+
195+ // processExporterInstancesAndBootc processes exporter instances and adds failures to global retry queue
196+ func (e * ExporterHostSyncer ) processExporterInstancesAndBootc (exporterInstances []* api.ExporterInstance , hostSsh ssh.HostManager , hostName string , retryQueue * []RetryItem ) {
197+
227198 for _ , exporterInstance := range exporterInstances {
228199 if err := e .processExporterInstance (exporterInstance , hostSsh ); err != nil {
229200 fmt .Printf (" ❌ Failed to process %s: %v\n " , exporterInstance .Name , err )
@@ -237,6 +208,19 @@ func (e *ExporterHostSyncer) processExporterInstances(exporterInstances []*api.E
237208 })
238209 }
239210 }
211+
212+ if err := hostSsh .HandleBootcUpgrade (e .dryRun ); err != nil {
213+ // For other errors, just log them and continue
214+ fmt .Printf (" ⚠️ Bootc upgrade error: %v\n " , err )
215+ * retryQueue = append (* retryQueue , RetryItem {
216+ ExporterInstance : nil ,
217+ HostSSH : hostSsh ,
218+ HostName : hostName ,
219+ Attempts : 1 ,
220+ LastError : err ,
221+ LastAttemptTime : time .Now (),
222+ })
223+ }
240224}
241225
242226// processGlobalRetryQueue processes the global retry queue with exponential backoff
@@ -272,18 +256,26 @@ func (e *ExporterHostSyncer) processGlobalRetryQueue(retryQueue []RetryItem) err
272256
273257 // Second pass: retry items that are ready
274258 for _ , retryItem := range itemsToRetry {
275- fmt .Printf ("🔄 Retrying %s on %s (attempt %d/%d)...\n " ,
276- retryItem .ExporterInstance .Name , retryItem .HostName , retryItem .Attempts + 1 , e .retryConfig .MaxAttempts )
277-
278- if err := e .processExporterInstance (retryItem .ExporterInstance , retryItem .HostSSH ); err != nil {
279- // Still failed, increment attempts and add to next retry queue
280- retryItem .Attempts ++
281- retryItem .LastError = err
282- retryItem .LastAttemptTime = time .Now ()
283- nextRetryQueue = append (nextRetryQueue , retryItem )
284- fmt .Printf ("❌ Retry failed for %s on %s: %v\n " , retryItem .ExporterInstance .Name , retryItem .HostName , err )
259+ if retryItem .ExporterInstance == nil {
260+ fmt .Printf ("🔄 Retrying bootc upgrade on %s (attempt %d/%d)...\n " ,
261+ retryItem .HostName , retryItem .Attempts + 1 , e .retryConfig .MaxAttempts )
262+ if err := retryItem .HostSSH .HandleBootcUpgrade (e .dryRun ); err != nil {
263+ fmt .Printf ("❌ Retry failed for bootc upgrade on %s: %v\n " , retryItem .HostName , err )
264+ e .addToRetryQueue (& retryItem , err , & nextRetryQueue )
265+ } else {
266+ fmt .Printf ("✅ Retry succeeded for bootc upgrade on %s\n " , retryItem .HostName )
267+ }
285268 } else {
286- fmt .Printf ("✅ Retry succeeded for %s on %s\n " , retryItem .ExporterInstance .Name , retryItem .HostName )
269+ fmt .Printf ("🔄 Retrying instance %s on %s (attempt %d/%d)...\n " ,
270+ retryItem .ExporterInstance .Name , retryItem .HostName , retryItem .Attempts + 1 , e .retryConfig .MaxAttempts )
271+
272+ if err := e .processExporterInstance (retryItem .ExporterInstance , retryItem .HostSSH ); err != nil {
273+ // Still failed, increment attempts and add to next retry queue
274+ fmt .Printf ("❌ Retry failed for %s on %s: %v\n " , retryItem .ExporterInstance .Name , retryItem .HostName , err )
275+ e .addToRetryQueue (& retryItem , err , & nextRetryQueue )
276+ } else {
277+ fmt .Printf ("✅ Retry succeeded for %s on %s\n " , retryItem .ExporterInstance .Name , retryItem .HostName )
278+ }
287279 }
288280 }
289281
@@ -359,14 +351,8 @@ func (e *ExporterHostSyncer) SyncExporterHosts() error {
359351 fmt .Printf (" ✅ Connection: %s\n " , status )
360352 }
361353
362- if skip , err := e .handleBootcUpgrade (hostSsh ); err != nil {
363- return err
364- } else if skip {
365- continue
366- }
367-
368354 // Process each exporter instance and add failures to global retry queue
369- e .processExporterInstances (exporterInstances , hostSsh , host .Name , & retryQueue )
355+ e .processExporterInstancesAndBootc (exporterInstances , hostSsh , host .Name , & retryQueue )
370356 }
371357
372358 // Second pass: retry all failed instances globally
0 commit comments