Skip to content

Commit fb0b257

Browse files
committed
Ensure we always have the *latest* ComputeDomain object when modifying
If we fail in the workqueue loop we could end up trying to modify a stale object forever if we don't resync it. Signed-off-by: Kevin Klues <[email protected]>
1 parent f0f8a7a commit fb0b257

File tree

2 files changed

+70
-2
lines changed

2 files changed

+70
-2
lines changed

cmd/compute-domain-daemon/computedomain.go

Lines changed: 39 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -87,7 +87,14 @@ func (m *ComputeDomainManager) Start(ctx context.Context) (rerr error) {
8787
}
8888
}()
8989

90-
_, err := m.informer.AddEventHandler(cache.ResourceEventHandlerFuncs{
90+
err := m.informer.AddIndexers(cache.Indexers{
91+
"uid": uidIndexer[*nvapi.ComputeDomain],
92+
})
93+
if err != nil {
94+
return fmt.Errorf("error adding indexer for ComputeDomain UID: %w", err)
95+
}
96+
97+
_, err = m.informer.AddEventHandler(cache.ResourceEventHandlerFuncs{
9198
AddFunc: func(obj any) {
9299
m.config.workQueue.Enqueue(obj, m.onAddOrUpdate)
93100
},
@@ -133,13 +140,43 @@ func (m *ComputeDomainManager) Stop() error {
133140
return nil
134141
}
135142

143+
// Get gets the ComputeDomain by UID from the informer cache.
144+
func (m *ComputeDomainManager) Get(uid string) (*nvapi.ComputeDomain, error) {
145+
objs, err := m.informer.GetIndexer().ByIndex("uid", uid)
146+
if err != nil {
147+
return nil, fmt.Errorf("error retrieving ComputeDomain by UID: %w", err)
148+
}
149+
if len(objs) == 0 {
150+
return nil, nil
151+
}
152+
if len(objs) != 1 {
153+
return nil, fmt.Errorf("multiple ComputeDomains with the same UID")
154+
}
155+
cd, ok := objs[0].(*nvapi.ComputeDomain)
156+
if !ok {
157+
return nil, fmt.Errorf("error casting to ComputeDomain")
158+
}
159+
return cd, nil
160+
}
161+
136162
// onAddOrUpdate handles the addition or update of a ComputeDomain.
137163
func (m *ComputeDomainManager) onAddOrUpdate(ctx context.Context, obj any) error {
138-
cd, ok := obj.(*nvapi.ComputeDomain)
164+
// Cast the object to a ComputeDomain object
165+
o, ok := obj.(*nvapi.ComputeDomain)
139166
if !ok {
140167
return fmt.Errorf("failed to cast to ComputeDomain")
141168
}
142169

170+
// Get the latest ComputeDomain object from the informer cache since we
171+
// plan to update it later and always *must* have the latest version.
172+
cd, err := m.Get(string(o.GetUID()))
173+
if err != nil {
174+
return fmt.Errorf("error getting latest ComputeDomain: %w", err)
175+
}
176+
if cd == nil {
177+
return nil
178+
}
179+
143180
// Skip ComputeDomains that don't match on UUID
144181
if string(cd.UID) != m.config.computeDomainUUID {
145182
klog.Errorf("ComputeDomain processed with non-matching UID (%v, %v)", cd.UID, m.config.computeDomainUUID)
Lines changed: 31 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,31 @@
1+
/*
2+
* Copyright (c) 2025 NVIDIA CORPORATION. All rights reserved.
3+
*
4+
* Licensed under the Apache License, Version 2.0 (the "License");
5+
* you may not use this file except in compliance with the License.
6+
* You may obtain a copy of the License at
7+
*
8+
* http://www.apache.org/licenses/LICENSE-2.0
9+
*
10+
* Unless required by applicable law or agreed to in writing, software
11+
* distributed under the License is distributed on an "AS IS" BASIS,
12+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13+
* See the License for the specific language governing permissions and
14+
* limitations under the License.
15+
*/
16+
17+
package main
18+
19+
import (
20+
"fmt"
21+
22+
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
23+
)
24+
25+
func uidIndexer[T metav1.ObjectMetaAccessor](obj any) ([]string, error) {
26+
d, ok := obj.(T)
27+
if !ok {
28+
return nil, fmt.Errorf("expected a %T but got %T", *new(T), obj)
29+
}
30+
return []string{string(d.GetObjectMeta().GetUID())}, nil
31+
}

0 commit comments

Comments
 (0)