Skip to content

Commit 0827e1e

Browse files
fix: Don't propagate partial recovery healthy events to ND and FR (#296)
1 parent b992a6c commit 0827e1e

File tree

2 files changed

+15
-6
lines changed

2 files changed

+15
-6
lines changed

fault-quarantine/pkg/reconciler/reconciler.go

Lines changed: 12 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -434,10 +434,19 @@ func (r *Reconciler) handleAlreadyQuarantinedNode(
434434
return nil
435435
}
436436

437-
// Event will modify annotations, proceed with quarantine handling and propagate to ND/FR
438-
var status model.Status
437+
// Event will modify FQ annotations, proceed with quarantine handling
438+
stayQuarantined := r.handleQuarantinedNode(ctx, event, ruleSetEvals)
439439

440-
if r.handleQuarantinedNode(ctx, event, ruleSetEvals) {
440+
// Partial recovery: healthy event that doesn't fully unquarantine the node should
441+
// not be propagated to ND/FR
442+
if event.IsHealthy && stayQuarantined {
443+
return nil
444+
}
445+
446+
var status model.Status
447+
if stayQuarantined {
448+
// Only for an unhealthy event, set status to AlreadyQuarantined and
449+
// propagate to ND/FR
441450
status = model.AlreadyQuarantined
442451
} else {
443452
status = model.UnQuarantined

fault-quarantine/pkg/reconciler/reconciler_e2e_test.go

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -735,11 +735,11 @@ func TestE2E_EntityLevelTracking(t *testing.T) {
735735
model.StatusInProgress,
736736
)
737737

738-
t.Log("Verify status is AlreadyQuarantined (partial recovery, node stays quarantined)")
738+
t.Log("Verify status is nil (partial recovery not propagated to ND/FR)")
739739
require.Eventually(t, func() bool {
740740
status := getStatus(eventID3)
741-
return status != nil && *status == model.AlreadyQuarantined
742-
}, statusCheckTimeout, statusCheckPollInterval, "Status should be AlreadyQuarantined for partial recovery")
741+
return status == nil
742+
}, statusCheckTimeout, statusCheckPollInterval, "Status should be nil for partial recovery")
743743

744744
require.Eventually(t, func() bool {
745745
node, _ := e2eTestClient.CoreV1().Nodes().Get(ctx, nodeName, metav1.GetOptions{})

0 commit comments

Comments
 (0)