Skip to content

Commit b12dcb9

Browse files
committed
test: integrate pod sorting verification into existing table-driven test
- Update 'AllowCompletion mode' test to verify consistent pod ordering - Add numReconciles field to support multiple reconciliation cycles - Test with 3 pods in non-alphabetical order to validate sorting - Verify only 1 event created despite 5 reconciliations (deduplication works) - Remove standalone TestReconciler_PodListSortingConsistentEvents test Signed-off-by: Gyan172004 <[email protected]>
1 parent ff9a82c commit b12dcb9

File tree

2 files changed

+47
-149
lines changed

2 files changed

+47
-149
lines changed

node-drainer/pkg/reconciler/reconciler.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -19,8 +19,8 @@ import (
1919
"encoding/json"
2020
"fmt"
2121
"log/slog"
22-
"strings"
2322
"sort"
23+
"strings"
2424
"sync"
2525
"time"
2626

node-drainer/pkg/reconciler/reconciler_integration_test.go

Lines changed: 46 additions & 148 deletions
Original file line numberDiff line numberDiff line change
@@ -153,6 +153,7 @@ func TestReconciler_ProcessEvent(t *testing.T) {
153153
mongoFindOneResponse map[string]interface{}
154154
expectError bool
155155
expectedNodeLabel *string
156+
numReconciles int
156157
validateFunc func(t *testing.T, client kubernetes.Interface, ctx context.Context, nodeName string, err error)
157158
}{
158159
{
@@ -254,39 +255,58 @@ func TestReconciler_ProcessEvent(t *testing.T) {
254255
},
255256
},
256257
{
257-
name: "AllowCompletion mode waits for pods to complete",
258+
name: "AllowCompletion mode waits for pods to complete and verifies consistent pod ordering",
258259
nodeName: "completion-node",
259260
namespaces: []string{"completion-test"},
260261
nodeQuarantined: model.Quarantined,
261262
pods: []*v1.Pod{
262263
{
263-
ObjectMeta: metav1.ObjectMeta{Name: "running-pod", Namespace: "completion-test"},
264+
ObjectMeta: metav1.ObjectMeta{Name: "running-pod-3", Namespace: "completion-test"},
265+
Spec: v1.PodSpec{NodeName: "completion-node", Containers: []v1.Container{{Name: "c", Image: "nginx"}}},
266+
Status: v1.PodStatus{Phase: v1.PodRunning},
267+
},
268+
{
269+
ObjectMeta: metav1.ObjectMeta{Name: "running-pod-1", Namespace: "completion-test"},
270+
Spec: v1.PodSpec{NodeName: "completion-node", Containers: []v1.Container{{Name: "c", Image: "nginx"}}},
271+
Status: v1.PodStatus{Phase: v1.PodRunning},
272+
},
273+
{
274+
ObjectMeta: metav1.ObjectMeta{Name: "running-pod-2", Namespace: "completion-test"},
264275
Spec: v1.PodSpec{NodeName: "completion-node", Containers: []v1.Container{{Name: "c", Image: "nginx"}}},
265276
Status: v1.PodStatus{Phase: v1.PodRunning},
266277
},
267278
},
268279
expectError: true,
269280
expectedNodeLabel: ptr.To(string(statemanager.DrainingLabelValue)),
281+
numReconciles: 5,
270282
validateFunc: func(t *testing.T, client kubernetes.Interface, ctx context.Context, nodeName string, err error) {
271283
assert.Error(t, err)
272284
assert.Contains(t, err.Error(), "waiting for pods to complete")
273285

274286
nodeEvents, err := client.CoreV1().Events(metav1.NamespaceDefault).List(ctx, metav1.ListOptions{FieldSelector: fmt.Sprintf("involvedObject.name=%s,involvedObject.kind=Node", nodeName)})
275287
require.NoError(t, err)
276-
require.Len(t, nodeEvents.Items, 1)
288+
require.Len(t, nodeEvents.Items, 1, "only one event should be created despite multiple reconciliations")
277289
require.Equal(t, nodeEvents.Items[0].Reason, "AwaitingPodCompletion")
278-
require.Equal(t, nodeEvents.Items[0].Message, "Waiting for following pods to finish: [completion-test/running-pod]")
290+
expectedMessage := "Waiting for following pods to finish: [completion-test/running-pod-1 completion-test/running-pod-2 completion-test/running-pod-3]"
291+
require.Equal(t, expectedMessage, nodeEvents.Items[0].Message, "pod list should be in sorted order")
279292

280-
pod, err := client.CoreV1().Pods("completion-test").Get(ctx, "running-pod", metav1.GetOptions{})
281-
require.NoError(t, err)
282-
pod.Status.Phase = v1.PodSucceeded
283-
_, err = client.CoreV1().Pods("completion-test").UpdateStatus(ctx, pod, metav1.UpdateOptions{})
284-
require.NoError(t, err)
293+
for _, podName := range []string{"running-pod-1", "running-pod-2", "running-pod-3"} {
294+
pod, err := client.CoreV1().Pods("completion-test").Get(ctx, podName, metav1.GetOptions{})
295+
require.NoError(t, err)
296+
pod.Status.Phase = v1.PodSucceeded
297+
_, err = client.CoreV1().Pods("completion-test").UpdateStatus(ctx, pod, metav1.UpdateOptions{})
298+
require.NoError(t, err)
299+
}
285300

286301
require.Eventually(t, func() bool {
287-
updatedPod, err := client.CoreV1().Pods("completion-test").Get(ctx, "running-pod", metav1.GetOptions{})
288-
return err == nil && updatedPod.Status.Phase == v1.PodSucceeded
289-
}, 30*time.Second, 1*time.Second, "pod status should be updated to succeeded")
302+
for _, podName := range []string{"running-pod-1", "running-pod-2", "running-pod-3"} {
303+
updatedPod, err := client.CoreV1().Pods("completion-test").Get(ctx, podName, metav1.GetOptions{})
304+
if err != nil || updatedPod.Status.Phase != v1.PodSucceeded {
305+
return false
306+
}
307+
}
308+
return true
309+
}, 30*time.Second, 1*time.Second, "all pod statuses should be updated to succeeded")
290310
},
291311
},
292312
{
@@ -420,11 +440,19 @@ func TestReconciler_ProcessEvent(t *testing.T) {
420440
beforeReceived := getCounterValue(t, metrics.TotalEventsReceived)
421441
beforeDuration := getHistogramCount(t, metrics.EventHandlingDuration)
422442

423-
err := processHealthEvent(setup.ctx, t, setup.reconciler, setup.mockCollection, healthEventOptions{
424-
nodeName: tt.nodeName,
425-
nodeQuarantined: tt.nodeQuarantined,
426-
drainForce: tt.drainForce,
427-
})
443+
numReconciles := tt.numReconciles
444+
if numReconciles == 0 {
445+
numReconciles = 1
446+
}
447+
448+
var err error
449+
for i := 0; i < numReconciles; i++ {
450+
err = processHealthEvent(setup.ctx, t, setup.reconciler, setup.mockCollection, healthEventOptions{
451+
nodeName: tt.nodeName,
452+
nodeQuarantined: tt.nodeQuarantined,
453+
drainForce: tt.drainForce,
454+
})
455+
}
428456

429457
afterReceived := getCounterValue(t, metrics.TotalEventsReceived)
430458
afterDuration := getHistogramCount(t, metrics.EventHandlingDuration)
@@ -1195,136 +1223,6 @@ func TestReconciler_MultipleEventsOnNodeCancelledByUnQuarantine(t *testing.T) {
11951223

11961224
pod2, err := setup.client.CoreV1().Pods("timeout-test").Get(setup.ctx, "pod-2", metav1.GetOptions{})
11971225
require.NoError(t, err)
1198-
return metric.Counter.GetValue()
1199-
}
1200-
1201-
func getCounterVecValue(t *testing.T, counterVec *prometheus.CounterVec, labelValues ...string) float64 {
1202-
t.Helper()
1203-
counter, err := counterVec.GetMetricWithLabelValues(labelValues...)
1204-
require.NoError(t, err)
1205-
metric := &dto.Metric{}
1206-
err = counter.Write(metric)
1207-
require.NoError(t, err)
1208-
return metric.Counter.GetValue()
1209-
}
1210-
1211-
func getGaugeValue(t *testing.T, gauge prometheus.Gauge) float64 {
1212-
t.Helper()
1213-
metric := &dto.Metric{}
1214-
err := gauge.Write(metric)
1215-
require.NoError(t, err)
1216-
return metric.Gauge.GetValue()
1217-
}
1218-
1219-
func getGaugeVecValue(t *testing.T, gaugeVec *prometheus.GaugeVec, labelValues ...string) float64 {
1220-
t.Helper()
1221-
gauge, err := gaugeVec.GetMetricWithLabelValues(labelValues...)
1222-
require.NoError(t, err)
1223-
metric := &dto.Metric{}
1224-
err = gauge.Write(metric)
1225-
require.NoError(t, err)
1226-
return metric.Gauge.GetValue()
1227-
}
1228-
1229-
func getHistogramCount(t *testing.T, histogram prometheus.Histogram) uint64 {
1230-
t.Helper()
1231-
metric := &dto.Metric{}
1232-
err := histogram.Write(metric)
1233-
require.NoError(t, err)
1234-
return metric.Histogram.GetSampleCount()
1226+
assert.Nil(t, pod2.DeletionTimestamp, "pod-2 should not be deleted")
12351227
}
12361228

1237-
// TestReconciler_PodListSortingConsistentEvents verifies that pod list sorting prevents duplicate Kubernetes events by ensuring consistent event messages across multiple reconciliation calls.
1238-
func TestReconciler_PodListSortingConsistentEvents(t *testing.T) {
1239-
setup := setupDirectTest(t, []config.UserNamespace{
1240-
{Name: "test-*", Mode: config.ModeAllowCompletion},
1241-
}, false)
1242-
1243-
nodeName := "test-node-sorting"
1244-
createNode(setup.ctx, t, setup.client, nodeName)
1245-
createNamespace(setup.ctx, t, setup.client, "test-sorting")
1246-
1247-
for _, podName := range []string{"ubuntu-gpu-deployment-64c54f5b4c-2c4c5", "alloy-metrics-55d6fbdbd-vrp2h", "alloy-gateway-0"} {
1248-
createPod(setup.ctx, t, setup.client, "test-sorting", podName, nodeName, v1.PodRunning)
1249-
}
1250-
1251-
// Wait for all pods to be visible in the informer cache
1252-
require.Eventually(t, func() bool {
1253-
pods, err := setup.informersInstance.FindEvictablePodsInNamespaceAndNode("test-sorting", nodeName)
1254-
if err != nil {
1255-
return false
1256-
}
1257-
return len(pods) == 3
1258-
}, 30*time.Second, 100*time.Millisecond, "All 3 pods should be visible in informer cache before test starts")
1259-
1260-
const numIterations = 5
1261-
var referenceMessage string
1262-
var mismatchFound bool
1263-
var mismatchIteration int
1264-
var mismatchMessage string
1265-
1266-
for i := 0; i < numIterations; i++ {
1267-
setup.client.CoreV1().Events(metav1.NamespaceDefault).DeleteCollection(
1268-
setup.ctx, metav1.DeleteOptions{}, metav1.ListOptions{})
1269-
1270-
// Wait for events to be deleted
1271-
require.Eventually(t, func() bool {
1272-
events, _ := setup.client.CoreV1().Events(metav1.NamespaceDefault).List(
1273-
setup.ctx,
1274-
metav1.ListOptions{
1275-
FieldSelector: fmt.Sprintf("involvedObject.name=%s,involvedObject.kind=Node", nodeName),
1276-
},
1277-
)
1278-
return len(events.Items) == 0
1279-
}, 5*time.Second, 50*time.Millisecond, "Events should be deleted before next iteration")
1280-
1281-
err := processHealthEvent(setup.ctx, t, setup.reconciler, setup.mockCollection, healthEventOptions{
1282-
nodeName: nodeName,
1283-
nodeQuarantined: model.Quarantined,
1284-
})
1285-
1286-
assert.Error(t, err)
1287-
1288-
// Wait for new event to be created with the pod list
1289-
var currentMessage string
1290-
require.Eventually(t, func() bool {
1291-
events, err := setup.client.CoreV1().Events(metav1.NamespaceDefault).List(
1292-
setup.ctx,
1293-
metav1.ListOptions{
1294-
FieldSelector: fmt.Sprintf("involvedObject.name=%s,involvedObject.kind=Node", nodeName),
1295-
},
1296-
)
1297-
if err != nil || len(events.Items) == 0 {
1298-
return false
1299-
}
1300-
msg := events.Items[len(events.Items)-1].Message
1301-
if len(msg) > 0 && msg != "" {
1302-
currentMessage = msg
1303-
return true
1304-
}
1305-
return false
1306-
}, 5*time.Second, 50*time.Millisecond, "Event with pod list should be created")
1307-
1308-
if i == 0 {
1309-
referenceMessage = currentMessage
1310-
continue
1311-
}
1312-
1313-
if currentMessage != referenceMessage {
1314-
mismatchFound = true
1315-
mismatchIteration = i
1316-
mismatchMessage = currentMessage
1317-
break
1318-
}
1319-
}
1320-
1321-
require.NotEmpty(t, referenceMessage, "No event messages were generated")
1322-
1323-
if mismatchFound {
1324-
assert.Fail(t, fmt.Sprintf("FAIL: Iteration %d produced different message. sort.Strings() missing from reconciler.go:252\nExpected: %s\nGot: %s",
1325-
mismatchIteration, referenceMessage, mismatchMessage))
1326-
}
1327-
1328-
expectedMessage := "Waiting for following pods to finish: [test-sorting/alloy-gateway-0 test-sorting/alloy-metrics-55d6fbdbd-vrp2h test-sorting/ubuntu-gpu-deployment-64c54f5b4c-2c4c5]"
1329-
assert.Equal(t, expectedMessage, referenceMessage, "FAIL: Pods not in same order")
1330-
}

0 commit comments

Comments
 (0)