Skip to content

Commit 87a6227

Browse files
authored
Merge pull request #1838 from rahulait/add-retry-for-conflicts
[improvement] : add logic to retry if update fails due to conflict
2 parents 109f67e + a2308d4 commit 87a6227

File tree

5 files changed

+150
-14
lines changed

5 files changed

+150
-14
lines changed

internal/conditions/clusterpolicy.go

Lines changed: 20 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,7 @@ import (
2323
"k8s.io/apimachinery/pkg/api/meta"
2424
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
2525
"k8s.io/apimachinery/pkg/types"
26+
"k8s.io/client-go/util/retry"
2627
"sigs.k8s.io/controller-runtime/pkg/client"
2728
"sigs.k8s.io/controller-runtime/pkg/log"
2829

@@ -55,14 +56,12 @@ func (u *clusterPolicyUpdater) SetConditionsError(ctx context.Context, cr any, r
5556
return u.setConditions(ctx, clusterPolicyCr, Error, reason, message)
5657
}
5758

58-
func (u *clusterPolicyUpdater) setConditions(ctx context.Context, cr *nvidiav1.ClusterPolicy, statusType, reason, message string) error {
59-
reqLogger := log.FromContext(ctx)
59+
// updateConditions updates the conditions of the ClusterPolicy CR
60+
func (u *clusterPolicyUpdater) updateConditions(ctx context.Context, cr *nvidiav1.ClusterPolicy, statusType, reason, message string) error {
6061
// Fetch latest instance and update state to avoid version mismatch
6162
instance := &nvidiav1.ClusterPolicy{}
62-
err := u.client.Get(ctx, types.NamespacedName{Name: cr.Name}, instance)
63-
if err != nil {
64-
reqLogger.Error(err, "Failed to get ClusterPolicy instance for status update", "name", cr.Name)
65-
return err
63+
if err := u.client.Get(ctx, types.NamespacedName{Name: cr.Name}, instance); err != nil {
64+
return fmt.Errorf("failed to get ClusterPolicy instance for status update: %w", err)
6665
}
6766

6867
switch statusType {
@@ -93,9 +92,23 @@ func (u *clusterPolicyUpdater) setConditions(ctx context.Context, cr *nvidiav1.C
9392
Message: message,
9493
})
9594
default:
96-
reqLogger.Error(nil, "Unknown status type provided", "statusType", statusType)
9795
return fmt.Errorf("unknown status type provided: %s", statusType)
9896
}
9997

10098
return u.client.Status().Update(ctx, instance)
10199
}
100+
101+
// setConditions updates the conditions of the ClusterPolicy CR
102+
// with retry on conflict to handle version mismatches
103+
func (u *clusterPolicyUpdater) setConditions(ctx context.Context, cr *nvidiav1.ClusterPolicy, statusType, reason, message string) error {
104+
reqLogger := log.FromContext(ctx)
105+
106+
err := retry.RetryOnConflict(retry.DefaultBackoff, func() error {
107+
return u.updateConditions(ctx, cr, statusType, reason, message)
108+
})
109+
110+
if err != nil {
111+
reqLogger.Error(err, "Failed to update ClusterPolicy status after retries", "name", cr.Name)
112+
}
113+
return err
114+
}

internal/conditions/nvidiadriver.go

Lines changed: 20 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,7 @@ import (
2323
"k8s.io/apimachinery/pkg/api/meta"
2424
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
2525
"k8s.io/apimachinery/pkg/types"
26+
"k8s.io/client-go/util/retry"
2627
"sigs.k8s.io/controller-runtime/pkg/client"
2728
"sigs.k8s.io/controller-runtime/pkg/log"
2829

@@ -61,14 +62,12 @@ func (u *nvDriverUpdater) SetConditionsError(ctx context.Context, cr any, reason
6162
return u.setConditions(ctx, nvDriverCr, Error, reason, message)
6263
}
6364

64-
func (u *nvDriverUpdater) setConditions(ctx context.Context, cr *nvidiav1alpha1.NVIDIADriver, statusType, reason, message string) error {
65-
reqLogger := log.FromContext(ctx)
65+
// updateConditions updates the conditions of the NVIDIADriver CR
66+
func (u *nvDriverUpdater) updateConditions(ctx context.Context, cr *nvidiav1alpha1.NVIDIADriver, statusType, reason, message string) error {
6667
// Fetch latest instance and update state to avoid version mismatch
6768
instance := &nvidiav1alpha1.NVIDIADriver{}
68-
err := u.client.Get(ctx, types.NamespacedName{Name: cr.Name}, instance)
69-
if err != nil {
70-
reqLogger.Error(err, "Failed to get NVIDIADriver instance for status update", "name", cr.Name)
71-
return err
69+
if err := u.client.Get(ctx, types.NamespacedName{Name: cr.Name}, instance); err != nil {
70+
return fmt.Errorf("failed to get NVIDIADriver instance for status update: %w", err)
7271
}
7372

7473
switch statusType {
@@ -107,9 +106,23 @@ func (u *nvDriverUpdater) setConditions(ctx context.Context, cr *nvidiav1alpha1.
107106
instance.Status.State = nvidiav1alpha1.NotReady
108107
}
109108
default:
110-
reqLogger.Error(nil, "Unknown status type provided", "statusType", statusType)
111109
return fmt.Errorf("unknown status type provided: %s", statusType)
112110
}
113111

114112
return u.client.Status().Update(ctx, instance)
115113
}
114+
115+
// setConditions updates the conditions of the NVIDIADriver CR
116+
// with retry on conflict to handle version mismatches
117+
func (u *nvDriverUpdater) setConditions(ctx context.Context, cr *nvidiav1alpha1.NVIDIADriver, statusType, reason, message string) error {
118+
reqLogger := log.FromContext(ctx)
119+
120+
err := retry.RetryOnConflict(retry.DefaultBackoff, func() error {
121+
return u.updateConditions(ctx, cr, statusType, reason, message)
122+
})
123+
124+
if err != nil {
125+
reqLogger.Error(err, "Failed to update NVIDIADriver status after retries", "name", cr.Name)
126+
}
127+
return err
128+
}

vendor/k8s.io/client-go/util/retry/OWNERS

Lines changed: 4 additions & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

vendor/k8s.io/client-go/util/retry/util.go

Lines changed: 105 additions & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

vendor/modules.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -990,6 +990,7 @@ k8s.io/client-go/util/flowcontrol
990990
k8s.io/client-go/util/homedir
991991
k8s.io/client-go/util/jsonpath
992992
k8s.io/client-go/util/keyutil
993+
k8s.io/client-go/util/retry
993994
k8s.io/client-go/util/workqueue
994995
# k8s.io/component-base v0.34.1
995996
## explicit; go 1.24.0

0 commit comments

Comments
 (0)