@@ -153,6 +153,7 @@ func TestReconciler_ProcessEvent(t *testing.T) {
153153 mongoFindOneResponse map [string ]interface {}
154154 expectError bool
155155 expectedNodeLabel * string
156+ numReconciles int
156157 validateFunc func (t * testing.T , client kubernetes.Interface , ctx context.Context , nodeName string , err error )
157158 }{
158159 {
@@ -254,39 +255,58 @@ func TestReconciler_ProcessEvent(t *testing.T) {
254255 },
255256 },
256257 {
257- name : "AllowCompletion mode waits for pods to complete" ,
258+ name : "AllowCompletion mode waits for pods to complete and verifies consistent pod ordering " ,
258259 nodeName : "completion-node" ,
259260 namespaces : []string {"completion-test" },
260261 nodeQuarantined : model .Quarantined ,
261262 pods : []* v1.Pod {
262263 {
263- ObjectMeta : metav1.ObjectMeta {Name : "running-pod" , Namespace : "completion-test" },
264+ ObjectMeta : metav1.ObjectMeta {Name : "running-pod-3" , Namespace : "completion-test" },
265+ Spec : v1.PodSpec {NodeName : "completion-node" , Containers : []v1.Container {{Name : "c" , Image : "nginx" }}},
266+ Status : v1.PodStatus {Phase : v1 .PodRunning },
267+ },
268+ {
269+ ObjectMeta : metav1.ObjectMeta {Name : "running-pod-1" , Namespace : "completion-test" },
270+ Spec : v1.PodSpec {NodeName : "completion-node" , Containers : []v1.Container {{Name : "c" , Image : "nginx" }}},
271+ Status : v1.PodStatus {Phase : v1 .PodRunning },
272+ },
273+ {
274+ ObjectMeta : metav1.ObjectMeta {Name : "running-pod-2" , Namespace : "completion-test" },
264275 Spec : v1.PodSpec {NodeName : "completion-node" , Containers : []v1.Container {{Name : "c" , Image : "nginx" }}},
265276 Status : v1.PodStatus {Phase : v1 .PodRunning },
266277 },
267278 },
268279 expectError : true ,
269280 expectedNodeLabel : ptr .To (string (statemanager .DrainingLabelValue )),
281+ numReconciles : 5 ,
270282 validateFunc : func (t * testing.T , client kubernetes.Interface , ctx context.Context , nodeName string , err error ) {
271283 assert .Error (t , err )
272284 assert .Contains (t , err .Error (), "waiting for pods to complete" )
273285
274286 nodeEvents , err := client .CoreV1 ().Events (metav1 .NamespaceDefault ).List (ctx , metav1.ListOptions {FieldSelector : fmt .Sprintf ("involvedObject.name=%s,involvedObject.kind=Node" , nodeName )})
275287 require .NoError (t , err )
276- require .Len (t , nodeEvents .Items , 1 )
288+ require .Len (t , nodeEvents .Items , 1 , "only one event should be created despite multiple reconciliations" )
277289 require .Equal (t , nodeEvents .Items [0 ].Reason , "AwaitingPodCompletion" )
278- require .Equal (t , nodeEvents .Items [0 ].Message , "Waiting for following pods to finish: [completion-test/running-pod]" )
290+ expectedMessage := "Waiting for following pods to finish: [completion-test/running-pod-1 completion-test/running-pod-2 completion-test/running-pod-3]"
291+ require .Equal (t , expectedMessage , nodeEvents .Items [0 ].Message , "pod list should be in sorted order" )
279292
280- pod , err := client .CoreV1 ().Pods ("completion-test" ).Get (ctx , "running-pod" , metav1.GetOptions {})
281- require .NoError (t , err )
282- pod .Status .Phase = v1 .PodSucceeded
283- _ , err = client .CoreV1 ().Pods ("completion-test" ).UpdateStatus (ctx , pod , metav1.UpdateOptions {})
284- require .NoError (t , err )
293+ for _ , podName := range []string {"running-pod-1" , "running-pod-2" , "running-pod-3" } {
294+ pod , err := client .CoreV1 ().Pods ("completion-test" ).Get (ctx , podName , metav1.GetOptions {})
295+ require .NoError (t , err )
296+ pod .Status .Phase = v1 .PodSucceeded
297+ _ , err = client .CoreV1 ().Pods ("completion-test" ).UpdateStatus (ctx , pod , metav1.UpdateOptions {})
298+ require .NoError (t , err )
299+ }
285300
286301 require .Eventually (t , func () bool {
287- updatedPod , err := client .CoreV1 ().Pods ("completion-test" ).Get (ctx , "running-pod" , metav1.GetOptions {})
288- return err == nil && updatedPod .Status .Phase == v1 .PodSucceeded
289- }, 30 * time .Second , 1 * time .Second , "pod status should be updated to succeeded" )
302+ for _ , podName := range []string {"running-pod-1" , "running-pod-2" , "running-pod-3" } {
303+ updatedPod , err := client .CoreV1 ().Pods ("completion-test" ).Get (ctx , podName , metav1.GetOptions {})
304+ if err != nil || updatedPod .Status .Phase != v1 .PodSucceeded {
305+ return false
306+ }
307+ }
308+ return true
309+ }, 30 * time .Second , 1 * time .Second , "all pod statuses should be updated to succeeded" )
290310 },
291311 },
292312 {
@@ -420,11 +440,19 @@ func TestReconciler_ProcessEvent(t *testing.T) {
420440 beforeReceived := getCounterValue (t , metrics .TotalEventsReceived )
421441 beforeDuration := getHistogramCount (t , metrics .EventHandlingDuration )
422442
423- err := processHealthEvent (setup .ctx , t , setup .reconciler , setup .mockCollection , healthEventOptions {
424- nodeName : tt .nodeName ,
425- nodeQuarantined : tt .nodeQuarantined ,
426- drainForce : tt .drainForce ,
427- })
443+ numReconciles := tt .numReconciles
444+ if numReconciles == 0 {
445+ numReconciles = 1
446+ }
447+
448+ var err error
449+ for i := 0 ; i < numReconciles ; i ++ {
450+ err = processHealthEvent (setup .ctx , t , setup .reconciler , setup .mockCollection , healthEventOptions {
451+ nodeName : tt .nodeName ,
452+ nodeQuarantined : tt .nodeQuarantined ,
453+ drainForce : tt .drainForce ,
454+ })
455+ }
428456
429457 afterReceived := getCounterValue (t , metrics .TotalEventsReceived )
430458 afterDuration := getHistogramCount (t , metrics .EventHandlingDuration )
@@ -1195,136 +1223,6 @@ func TestReconciler_MultipleEventsOnNodeCancelledByUnQuarantine(t *testing.T) {
11951223
11961224 pod2 , err := setup .client .CoreV1 ().Pods ("timeout-test" ).Get (setup .ctx , "pod-2" , metav1.GetOptions {})
11971225 require .NoError (t , err )
1198- return metric .Counter .GetValue ()
1199- }
1200-
1201- func getCounterVecValue (t * testing.T , counterVec * prometheus.CounterVec , labelValues ... string ) float64 {
1202- t .Helper ()
1203- counter , err := counterVec .GetMetricWithLabelValues (labelValues ... )
1204- require .NoError (t , err )
1205- metric := & dto.Metric {}
1206- err = counter .Write (metric )
1207- require .NoError (t , err )
1208- return metric .Counter .GetValue ()
1209- }
1210-
1211- func getGaugeValue (t * testing.T , gauge prometheus.Gauge ) float64 {
1212- t .Helper ()
1213- metric := & dto.Metric {}
1214- err := gauge .Write (metric )
1215- require .NoError (t , err )
1216- return metric .Gauge .GetValue ()
1217- }
1218-
1219- func getGaugeVecValue (t * testing.T , gaugeVec * prometheus.GaugeVec , labelValues ... string ) float64 {
1220- t .Helper ()
1221- gauge , err := gaugeVec .GetMetricWithLabelValues (labelValues ... )
1222- require .NoError (t , err )
1223- metric := & dto.Metric {}
1224- err = gauge .Write (metric )
1225- require .NoError (t , err )
1226- return metric .Gauge .GetValue ()
1227- }
1228-
1229- func getHistogramCount (t * testing.T , histogram prometheus.Histogram ) uint64 {
1230- t .Helper ()
1231- metric := & dto.Metric {}
1232- err := histogram .Write (metric )
1233- require .NoError (t , err )
1234- return metric .Histogram .GetSampleCount ()
1226+ assert .Nil (t , pod2 .DeletionTimestamp , "pod-2 should not be deleted" )
12351227}
12361228
1237- // TestReconciler_PodListSortingConsistentEvents verifies that pod list sorting prevents duplicate Kubernetes events by ensuring consistent event messages across multiple reconciliation calls.
1238- func TestReconciler_PodListSortingConsistentEvents (t * testing.T ) {
1239- setup := setupDirectTest (t , []config.UserNamespace {
1240- {Name : "test-*" , Mode : config .ModeAllowCompletion },
1241- }, false )
1242-
1243- nodeName := "test-node-sorting"
1244- createNode (setup .ctx , t , setup .client , nodeName )
1245- createNamespace (setup .ctx , t , setup .client , "test-sorting" )
1246-
1247- for _ , podName := range []string {"ubuntu-gpu-deployment-64c54f5b4c-2c4c5" , "alloy-metrics-55d6fbdbd-vrp2h" , "alloy-gateway-0" } {
1248- createPod (setup .ctx , t , setup .client , "test-sorting" , podName , nodeName , v1 .PodRunning )
1249- }
1250-
1251- // Wait for all pods to be visible in the informer cache
1252- require .Eventually (t , func () bool {
1253- pods , err := setup .informersInstance .FindEvictablePodsInNamespaceAndNode ("test-sorting" , nodeName )
1254- if err != nil {
1255- return false
1256- }
1257- return len (pods ) == 3
1258- }, 30 * time .Second , 100 * time .Millisecond , "All 3 pods should be visible in informer cache before test starts" )
1259-
1260- const numIterations = 5
1261- var referenceMessage string
1262- var mismatchFound bool
1263- var mismatchIteration int
1264- var mismatchMessage string
1265-
1266- for i := 0 ; i < numIterations ; i ++ {
1267- setup .client .CoreV1 ().Events (metav1 .NamespaceDefault ).DeleteCollection (
1268- setup .ctx , metav1.DeleteOptions {}, metav1.ListOptions {})
1269-
1270- // Wait for events to be deleted
1271- require .Eventually (t , func () bool {
1272- events , _ := setup .client .CoreV1 ().Events (metav1 .NamespaceDefault ).List (
1273- setup .ctx ,
1274- metav1.ListOptions {
1275- FieldSelector : fmt .Sprintf ("involvedObject.name=%s,involvedObject.kind=Node" , nodeName ),
1276- },
1277- )
1278- return len (events .Items ) == 0
1279- }, 5 * time .Second , 50 * time .Millisecond , "Events should be deleted before next iteration" )
1280-
1281- err := processHealthEvent (setup .ctx , t , setup .reconciler , setup .mockCollection , healthEventOptions {
1282- nodeName : nodeName ,
1283- nodeQuarantined : model .Quarantined ,
1284- })
1285-
1286- assert .Error (t , err )
1287-
1288- // Wait for new event to be created with the pod list
1289- var currentMessage string
1290- require .Eventually (t , func () bool {
1291- events , err := setup .client .CoreV1 ().Events (metav1 .NamespaceDefault ).List (
1292- setup .ctx ,
1293- metav1.ListOptions {
1294- FieldSelector : fmt .Sprintf ("involvedObject.name=%s,involvedObject.kind=Node" , nodeName ),
1295- },
1296- )
1297- if err != nil || len (events .Items ) == 0 {
1298- return false
1299- }
1300- msg := events .Items [len (events .Items )- 1 ].Message
1301- if len (msg ) > 0 && msg != "" {
1302- currentMessage = msg
1303- return true
1304- }
1305- return false
1306- }, 5 * time .Second , 50 * time .Millisecond , "Event with pod list should be created" )
1307-
1308- if i == 0 {
1309- referenceMessage = currentMessage
1310- continue
1311- }
1312-
1313- if currentMessage != referenceMessage {
1314- mismatchFound = true
1315- mismatchIteration = i
1316- mismatchMessage = currentMessage
1317- break
1318- }
1319- }
1320-
1321- require .NotEmpty (t , referenceMessage , "No event messages were generated" )
1322-
1323- if mismatchFound {
1324- assert .Fail (t , fmt .Sprintf ("FAIL: Iteration %d produced different message. sort.Strings() missing from reconciler.go:252\n Expected: %s\n Got: %s" ,
1325- mismatchIteration , referenceMessage , mismatchMessage ))
1326- }
1327-
1328- expectedMessage := "Waiting for following pods to finish: [test-sorting/alloy-gateway-0 test-sorting/alloy-metrics-55d6fbdbd-vrp2h test-sorting/ubuntu-gpu-deployment-64c54f5b4c-2c4c5]"
1329- assert .Equal (t , expectedMessage , referenceMessage , "FAIL: Pods not in same order" )
1330- }
0 commit comments