Skip to content

Commit 3376a27

Browse files
authored
Merge branch 'main' into 192-helm
2 parents 53665d5 + 1f43b53 commit 3376a27

File tree

3 files changed

+52
-17
lines changed

3 files changed

+52
-17
lines changed

node-drainer/pkg/informers/informers.go

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,7 @@ import (
2020
"log/slog"
2121
"path/filepath"
2222
"regexp"
23+
"sort"
2324
"sync"
2425
"time"
2526

@@ -522,6 +523,8 @@ func (i *Informers) DeletePodsAfterTimeout(ctx context.Context, nodeName string,
522523
podNames = append(podNames, pod.Name)
523524
}
524525

526+
sort.Strings(podNames)
527+
525528
message := fmt.Sprintf(
526529
"Waiting for following pods to finish: %v in namespace: %v or they will be force deleted on: %s",
527530
podNames, namespaces, deleteDateTimeUTC,

node-drainer/pkg/reconciler/reconciler.go

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,7 @@ import (
1919
"encoding/json"
2020
"fmt"
2121
"log/slog"
22+
"sort"
2223
"strings"
2324
"sync"
2425
"time"
@@ -391,6 +392,8 @@ func (r *Reconciler) executeCheckCompletion(ctx context.Context,
391392
}
392393

393394
if !allPodsComplete {
395+
sort.Strings(remainingPods)
396+
394397
message := fmt.Sprintf("Waiting for following pods to finish: %v", remainingPods)
395398
reason := "AwaitingPodCompletion"
396399

node-drainer/pkg/reconciler/reconciler_integration_test.go

Lines changed: 46 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -153,6 +153,7 @@ func TestReconciler_ProcessEvent(t *testing.T) {
153153
mongoFindOneResponse map[string]interface{}
154154
expectError bool
155155
expectedNodeLabel *string
156+
numReconciles int
156157
validateFunc func(t *testing.T, client kubernetes.Interface, ctx context.Context, nodeName string, err error)
157158
}{
158159
{
@@ -254,39 +255,58 @@ func TestReconciler_ProcessEvent(t *testing.T) {
254255
},
255256
},
256257
{
257-
name: "AllowCompletion mode waits for pods to complete",
258+
name: "AllowCompletion mode waits for pods to complete and verifies consistent pod ordering",
258259
nodeName: "completion-node",
259260
namespaces: []string{"completion-test"},
260261
nodeQuarantined: model.Quarantined,
261262
pods: []*v1.Pod{
262263
{
263-
ObjectMeta: metav1.ObjectMeta{Name: "running-pod", Namespace: "completion-test"},
264+
ObjectMeta: metav1.ObjectMeta{Name: "running-pod-3", Namespace: "completion-test"},
265+
Spec: v1.PodSpec{NodeName: "completion-node", Containers: []v1.Container{{Name: "c", Image: "nginx"}}},
266+
Status: v1.PodStatus{Phase: v1.PodRunning},
267+
},
268+
{
269+
ObjectMeta: metav1.ObjectMeta{Name: "running-pod-1", Namespace: "completion-test"},
270+
Spec: v1.PodSpec{NodeName: "completion-node", Containers: []v1.Container{{Name: "c", Image: "nginx"}}},
271+
Status: v1.PodStatus{Phase: v1.PodRunning},
272+
},
273+
{
274+
ObjectMeta: metav1.ObjectMeta{Name: "running-pod-2", Namespace: "completion-test"},
264275
Spec: v1.PodSpec{NodeName: "completion-node", Containers: []v1.Container{{Name: "c", Image: "nginx"}}},
265276
Status: v1.PodStatus{Phase: v1.PodRunning},
266277
},
267278
},
268279
expectError: true,
269280
expectedNodeLabel: ptr.To(string(statemanager.DrainingLabelValue)),
281+
numReconciles: 5,
270282
validateFunc: func(t *testing.T, client kubernetes.Interface, ctx context.Context, nodeName string, err error) {
271283
assert.Error(t, err)
272284
assert.Contains(t, err.Error(), "waiting for pods to complete")
273285

274286
nodeEvents, err := client.CoreV1().Events(metav1.NamespaceDefault).List(ctx, metav1.ListOptions{FieldSelector: fmt.Sprintf("involvedObject.name=%s,involvedObject.kind=Node", nodeName)})
275287
require.NoError(t, err)
276-
require.Len(t, nodeEvents.Items, 1)
288+
require.Len(t, nodeEvents.Items, 1, "only one event should be created despite multiple reconciliations")
277289
require.Equal(t, nodeEvents.Items[0].Reason, "AwaitingPodCompletion")
278-
require.Equal(t, nodeEvents.Items[0].Message, "Waiting for following pods to finish: [completion-test/running-pod]")
290+
expectedMessage := "Waiting for following pods to finish: [completion-test/running-pod-1 completion-test/running-pod-2 completion-test/running-pod-3]"
291+
require.Equal(t, expectedMessage, nodeEvents.Items[0].Message, "pod list should be in sorted order")
279292

280-
pod, err := client.CoreV1().Pods("completion-test").Get(ctx, "running-pod", metav1.GetOptions{})
281-
require.NoError(t, err)
282-
pod.Status.Phase = v1.PodSucceeded
283-
_, err = client.CoreV1().Pods("completion-test").UpdateStatus(ctx, pod, metav1.UpdateOptions{})
284-
require.NoError(t, err)
293+
for _, podName := range []string{"running-pod-1", "running-pod-2", "running-pod-3"} {
294+
pod, err := client.CoreV1().Pods("completion-test").Get(ctx, podName, metav1.GetOptions{})
295+
require.NoError(t, err)
296+
pod.Status.Phase = v1.PodSucceeded
297+
_, err = client.CoreV1().Pods("completion-test").UpdateStatus(ctx, pod, metav1.UpdateOptions{})
298+
require.NoError(t, err)
299+
}
285300

286301
require.Eventually(t, func() bool {
287-
updatedPod, err := client.CoreV1().Pods("completion-test").Get(ctx, "running-pod", metav1.GetOptions{})
288-
return err == nil && updatedPod.Status.Phase == v1.PodSucceeded
289-
}, 30*time.Second, 1*time.Second, "pod status should be updated to succeeded")
302+
for _, podName := range []string{"running-pod-1", "running-pod-2", "running-pod-3"} {
303+
updatedPod, err := client.CoreV1().Pods("completion-test").Get(ctx, podName, metav1.GetOptions{})
304+
if err != nil || updatedPod.Status.Phase != v1.PodSucceeded {
305+
return false
306+
}
307+
}
308+
return true
309+
}, 30*time.Second, 1*time.Second, "all pod statuses should be updated to succeeded")
290310
},
291311
},
292312
{
@@ -420,11 +440,19 @@ func TestReconciler_ProcessEvent(t *testing.T) {
420440
beforeReceived := getCounterValue(t, metrics.TotalEventsReceived)
421441
beforeDuration := getHistogramCount(t, metrics.EventHandlingDuration)
422442

423-
err := processHealthEvent(setup.ctx, t, setup.reconciler, setup.mockCollection, healthEventOptions{
424-
nodeName: tt.nodeName,
425-
nodeQuarantined: tt.nodeQuarantined,
426-
drainForce: tt.drainForce,
427-
})
443+
numReconciles := tt.numReconciles
444+
if numReconciles == 0 {
445+
numReconciles = 1
446+
}
447+
448+
var err error
449+
for i := 0; i < numReconciles; i++ {
450+
err = processHealthEvent(setup.ctx, t, setup.reconciler, setup.mockCollection, healthEventOptions{
451+
nodeName: tt.nodeName,
452+
nodeQuarantined: tt.nodeQuarantined,
453+
drainForce: tt.drainForce,
454+
})
455+
}
428456

429457
afterReceived := getCounterValue(t, metrics.TotalEventsReceived)
430458
afterDuration := getHistogramCount(t, metrics.EventHandlingDuration)
@@ -1197,3 +1225,4 @@ func TestReconciler_MultipleEventsOnNodeCancelledByUnQuarantine(t *testing.T) {
11971225
require.NoError(t, err)
11981226
assert.Nil(t, pod2.DeletionTimestamp, "pod-2 should not be deleted")
11991227
}
1228+

0 commit comments

Comments
 (0)