|
4 | 4 | package status_updater |
5 | 5 |
|
6 | 6 | import ( |
| 7 | + "context" |
| 8 | + "errors" |
7 | 9 | "strconv" |
8 | 10 | "sync" |
9 | 11 | "testing" |
| 12 | + "time" |
10 | 13 |
|
11 | 14 | . "github.com/onsi/ginkgo/v2" |
12 | 15 | . "github.com/onsi/gomega" |
13 | 16 |
|
14 | 17 | v1 "k8s.io/api/core/v1" |
| 18 | + apierrors "k8s.io/apimachinery/pkg/api/errors" |
15 | 19 | metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" |
16 | 20 | "k8s.io/apimachinery/pkg/runtime" |
| 21 | + "k8s.io/apimachinery/pkg/runtime/schema" |
17 | 22 | "k8s.io/client-go/kubernetes/fake" |
18 | 23 | fakecorev1 "k8s.io/client-go/kubernetes/typed/core/v1/fake" |
19 | 24 | faketesting "k8s.io/client-go/testing" |
20 | 25 | "k8s.io/client-go/tools/record" |
21 | 26 |
|
22 | 27 | kubeaischedfake "github.com/NVIDIA/KAI-scheduler/pkg/apis/client/clientset/versioned/fake" |
| 28 | + fakeschedulingv2alpha2 "github.com/NVIDIA/KAI-scheduler/pkg/apis/client/clientset/versioned/typed/scheduling/v2alpha2/fake" |
| 29 | + enginev2alpha2 "github.com/NVIDIA/KAI-scheduler/pkg/apis/scheduling/v2alpha2" |
23 | 30 | ) |
24 | 31 |
|
25 | 32 | const ( |
@@ -76,4 +83,91 @@ var _ = Describe("Status Updater Concurrency - large scale: increase queue size" |
76 | 83 | close(signalCh) |
77 | 84 | wg.Wait() |
78 | 85 | }) |
| 86 | + |
| 87 | + It("updatePodGroup - No retry after conflict error", func() { |
| 88 | + updateStatusCalls := 0 |
| 89 | + patchCalls := 0 |
| 90 | + |
| 91 | + // Set up reactor to return conflict error on UpdateStatus calls |
| 92 | + kubeAiSchedClient.SchedulingV2alpha2().(*fakeschedulingv2alpha2.FakeSchedulingV2alpha2).PrependReactor( |
| 93 | + "update", "podgroups", func(action faketesting.Action) (handled bool, ret runtime.Object, err error) { |
| 94 | + if updateAction, ok := action.(faketesting.UpdateAction); ok { |
| 95 | + if updateAction.GetSubresource() == "status" { |
| 96 | + updateStatusCalls++ |
| 97 | + // Return a conflict error to simulate resource version mismatch |
| 98 | + return true, nil, apierrors.NewConflict( |
| 99 | + schema.GroupResource{Group: "scheduling.run.ai", Resource: "podgroups"}, |
| 100 | + "test-pg", |
| 101 | + errors.New("the object has been modified; please apply your changes to the latest version"), |
| 102 | + ) |
| 103 | + } |
| 104 | + } |
| 105 | + return false, nil, nil |
| 106 | + }, |
| 107 | + ) |
| 108 | + |
| 109 | + // Track patch calls separately |
| 110 | + kubeAiSchedClient.SchedulingV2alpha2().(*fakeschedulingv2alpha2.FakeSchedulingV2alpha2).PrependReactor( |
| 111 | + "patch", "podgroups", func(action faketesting.Action) (handled bool, ret runtime.Object, err error) { |
| 112 | + patchCalls++ |
| 113 | + return false, nil, nil |
| 114 | + }, |
| 115 | + ) |
| 116 | + |
| 117 | + job := &enginev2alpha2.PodGroup{ |
| 118 | + ObjectMeta: metav1.ObjectMeta{ |
| 119 | + Name: "test-pg", |
| 120 | + Namespace: "test-ns", |
| 121 | + UID: "test-uid", |
| 122 | + }, |
| 123 | + Status: enginev2alpha2.PodGroupStatus{ |
| 124 | + SchedulingConditions: []enginev2alpha2.SchedulingCondition{ |
| 125 | + { |
| 126 | + TransitionID: "1", |
| 127 | + Type: enginev2alpha2.UnschedulableOnNodePool, |
| 128 | + NodePool: "test", |
| 129 | + Reason: "test", |
| 130 | + Message: "test", |
| 131 | + Status: v1.ConditionTrue, |
| 132 | + }, |
| 133 | + }, |
| 134 | + }, |
| 135 | + } |
| 136 | + |
| 137 | + key := statusUpdater.keyForPodGroupPayload(job.Name, job.Namespace, job.UID) |
| 138 | + updateData := &inflightUpdate{ |
| 139 | + object: job, |
| 140 | + patchData: nil, // No patch data, only status update |
| 141 | + updateStatus: true, |
| 142 | + subResources: nil, |
| 143 | + } |
| 144 | + |
| 145 | + // Store the inflight update |
| 146 | + statusUpdater.inFlightPodGroups.Store(key, updateData) |
| 147 | + |
| 148 | + statusUpdater.Run(make(chan struct{})) |
| 149 | + |
| 150 | + // Call updatePodGroup directly |
| 151 | + ctx := context.Background() |
| 152 | + statusUpdater.updatePodGroup(ctx, key, updateData) |
| 153 | + |
| 154 | + // Verify UpdateStatus was called once |
| 155 | + Expect(updateStatusCalls).To(Equal(1), "UpdateStatus should be called once") |
| 156 | + |
| 157 | + // Verify Patch was not called (no patchData provided) |
| 158 | + Expect(patchCalls).To(Equal(0), "Patch should not be called when no patchData is provided") |
| 159 | + |
| 160 | + // Verify it's not in the applied cache (since the update failed with conflict) |
| 161 | + _, appliedExists := statusUpdater.appliedPodGroupUpdates.Load(key) |
| 162 | + Expect(appliedExists).To(BeFalse(), "Update should not be in applied cache after conflict error") |
| 163 | + |
| 164 | + // The key behavior: Verify the queue is empty (no retry was queued) |
| 165 | + // When a conflict error occurs, the function returns early without calling pushToUpdateQueue |
| 166 | + select { |
| 167 | + case <-statusUpdater.updateQueueOut: |
| 168 | + Fail("Update queue should be empty - no retry should be queued for conflict errors") |
| 169 | + case <-time.After(100 * time.Millisecond): |
| 170 | + // Expected - queue is empty, meaning no retry was scheduled |
| 171 | + } |
| 172 | + }) |
79 | 173 | }) |
0 commit comments