@@ -30,6 +30,7 @@ import (
3030 "github.com/nvidia/nvsentinel/health-monitors/csp-health-monitor/pkg/model"
3131 "github.com/stretchr/testify/assert"
3232 "github.com/stretchr/testify/mock"
33+ "github.com/stretchr/testify/require"
3334 v1 "k8s.io/api/core/v1"
3435 metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
3536 "k8s.io/client-go/kubernetes"
@@ -145,8 +146,11 @@ func createTestClient(t *testing.T) (*AWSClient, *MockAWSHealthClient, kubernete
145146 nodeInformer .Stop ()
146147 })
147148
148- // Wait for the informer to sync
149- time .Sleep (100 * time .Millisecond )
149+ require .Eventually (t , func () bool {
150+ instanceIDs := nodeInformer .GetInstanceIDs ()
151+ _ , exists := instanceIDs [testInstanceID ]
152+ return exists
153+ }, 5 * time .Second , 50 * time .Millisecond , "Node should be tracked by informer" )
150154
151155 client := & AWSClient {
152156 config : config.AWSConfig {
@@ -295,8 +299,13 @@ func TestMultipleAffectedEntities(t *testing.T) {
295299 })
296300 }
297301
298- // Wait for informer to sync the new nodes
299- time .Sleep (200 * time .Millisecond )
302+ require .Eventually (t , func () bool {
303+ instanceIDs := client .nodeInformer .GetInstanceIDs ()
304+ return len (instanceIDs ) == 3 &&
305+ instanceIDs [testInstanceID ] == testNodeName &&
306+ instanceIDs [testInstanceID1 ] == testNodeName1 &&
307+ instanceIDs [testInstanceID2 ] == testNodeName2
308+ }, 5 * time .Second , 50 * time .Millisecond , "All 3 nodes should be tracked by informer" )
300309
301310 startTime := time .Now ().Add (24 * time .Hour )
302311 endTime := startTime .Add (2 * time .Hour )
@@ -361,9 +370,7 @@ func TestMultipleAffectedEntities(t *testing.T) {
361370 receivedEvents := 0
362371 affectedNodes := make (map [string ]bool )
363372
364- // Collect all events from channel (with timeout protection)
365- timeout := time .After (2 * time .Second )
366- for {
373+ require .Eventually (t , func () bool {
367374 select {
368375 case event := <- eventChan :
369376 var expectedEntityArn string
@@ -381,18 +388,12 @@ func TestMultipleAffectedEntities(t *testing.T) {
381388 affectedNodes [event .NodeName ] = true
382389 assert .Equal (t , model .StatusDetected , event .Status )
383390 assert .Equal (t , expectedEntityArn , event .EventID )
384- case <- timeout :
385- // Break out of the loop after timeout
386- goto checkResults
387391 default :
388- if receivedEvents >= 3 {
389- goto checkResults
390- }
391- time .Sleep (10 * time .Millisecond ) // Small sleep to prevent CPU spin
392+ // No events available
392393 }
393- }
394+ return receivedEvents >= 3
395+ }, 5 * time .Second , 50 * time .Millisecond , "Should receive 3 maintenance events" )
394396
395- checkResults:
396397 assert .Equal (t , 3 , receivedEvents , "Should have received 3 maintenance events" )
397398 assert .Equal (t , 3 , len (affectedNodes ), "Should have affected 3 distinct nodes" )
398399}
@@ -492,8 +493,13 @@ func TestCompletedEvent(t *testing.T) {
492493 assert .NoError (t , err )
493494 defer testK8sClient .CoreV1 ().Nodes ().Delete (context .Background (), testNodeName2 , metav1.DeleteOptions {})
494495
495- // Wait for the informer to sync the new nodes
496- time .Sleep (200 * time .Millisecond )
496+ require .Eventually (t , func () bool {
497+ instanceIDs := client .nodeInformer .GetInstanceIDs ()
498+ return len (instanceIDs ) == 3 &&
499+ instanceIDs [testInstanceID ] == testNodeName &&
500+ instanceIDs [testInstanceID1 ] == testNodeName1 &&
501+ instanceIDs [testInstanceID2 ] == testNodeName2
502+ }, 5 * time .Second , 50 * time .Millisecond , "All 3 nodes should be tracked by informer" )
497503
498504 eventChan := make (chan model.MaintenanceEvent , 10 )
499505
@@ -670,25 +676,18 @@ func TestInstanceFiltering(t *testing.T) {
670676
671677 receivedEvents := 0
672678
673- timeout := time .After (1 * time .Second )
674- for {
679+ require .Eventually (t , func () bool {
675680 select {
676681 case event := <- eventChan :
677682 receivedEvents ++
678683 // Verify it's for our cluster's instance
679684 assert .Equal (t , testInstanceID , event .ResourceID )
680685 assert .Equal (t , testNodeName , event .NodeName )
681- case <- timeout :
682- goto checkResults
683686 default :
684- if receivedEvents >= 1 {
685- goto checkResults
686- }
687- time .Sleep (10 * time .Millisecond )
688687 }
689- }
688+ return receivedEvents >= 1
689+ }, 5 * time .Second , 50 * time .Millisecond , "Should receive event for our cluster's instance" )
690690
691- checkResults:
692691 assert .Equal (t , 1 , receivedEvents , "Should receive event only for our cluster's instance" )
693692}
694693
@@ -872,8 +871,12 @@ func TestIgnoredEventTypes(t *testing.T) {
872871 _ = k8sClient .CoreV1 ().Nodes ().Delete (context .Background (), testNodeNameIgnored , metav1.DeleteOptions {})
873872 })
874873
875- // Wait for informer to sync
876- time .Sleep (200 * time .Millisecond )
874+ require .Eventually (t , func () bool {
875+ instanceIDs := client .nodeInformer .GetInstanceIDs ()
876+ return len (instanceIDs ) == 2 &&
877+ instanceIDs [testInstanceID ] == testNodeName &&
878+ instanceIDs [testInstanceIDIgnored ] == testNodeNameIgnored
879+ }, 5 * time .Second , 50 * time .Millisecond , "Both nodes should be tracked by informer" )
877880
878881 instanceStopEventArn := fmt .Sprintf (
879882 "arn:aws:health:%s::event/%s/%s/test-event-stop" ,
0 commit comments