@@ -22,6 +22,7 @@ import (
2222 "sync"
2323 "time"
2424
25+ corev1 "k8s.io/api/core/v1"
2526 metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
2627 "k8s.io/client-go/tools/cache"
2728 "k8s.io/klog/v2"
@@ -64,7 +65,7 @@ func NewComputeDomainManager(config *ManagerConfig) *ComputeDomainManager {
6465 factory := nvinformers .NewSharedInformerFactory (config .clientsets .Nvidia , informerResyncPeriod )
6566 informer := factory .Resource ().V1beta1 ().ComputeDomains ().Informer ()
6667
67- klog .Infof ("Creating new ComputeDomainManager with config %+v " , config )
68+ klog .Infof ("Creating new ComputeDomainManager for %s/%s " , config . driverName , config . driverNamespace )
6869 m := & ComputeDomainManager {
6970 config : config ,
7071 factory : factory ,
@@ -149,7 +150,7 @@ func (m *ComputeDomainManager) Get(uid string) (*nvapi.ComputeDomain, error) {
149150 return nil , fmt .Errorf ("error retrieving ComputeDomain by UID: %w" , err )
150151 }
151152 if len (cds ) == 0 {
152- klog .Infof ("No ComputeDomain found with UID: %s" , uid )
153+ klog .V ( 2 ). Infof ("No ComputeDomain found with UID: %s" , uid )
153154 return nil , nil
154155 }
155156 if len (cds ) != 1 {
@@ -169,7 +170,7 @@ func (m *ComputeDomainManager) RemoveFinalizer(ctx context.Context, uid string)
169170 return fmt .Errorf ("error retrieving ComputeDomain: %w" , err )
170171 }
171172 if cd == nil {
172- klog .Infof ("ComputeDomain with UID %s not found, nothing to do" , uid )
173+ klog .V ( 2 ). Infof ("ComputeDomain with UID %s not found, nothing to do" , uid )
173174 return nil
174175 }
175176
@@ -185,16 +186,31 @@ func (m *ComputeDomainManager) RemoveFinalizer(ctx context.Context, uid string)
185186 }
186187 }
187188 if len (cd .Finalizers ) == len (newCD .Finalizers ) {
188- klog .Infof ("Finalizer not found on ComputeDomain %s/%s, nothing to do" , cd .Namespace , cd .Name )
189+ klog .V ( 2 ). Infof ("Finalizer not found on ComputeDomain %s/%s, nothing to do" , cd .Namespace , cd .Name )
189190 return nil
190191 }
191192
192193 if _ , err = m .config .clientsets .Nvidia .ResourceV1beta1 ().ComputeDomains (cd .Namespace ).Update (ctx , newCD , metav1.UpdateOptions {}); err != nil {
193194 return fmt .Errorf ("error updating ComputeDomain: %w" , err )
194195 }
196+
195197 return nil
196198}
197199
200+ // logNodesWithComputeDomainLabel logs nodes that have a ComputeDomain label and returns their names.
201+ func (m * ComputeDomainManager ) logNodesWithComputeDomainLabel (nodes * corev1.NodeList , cdUID string ) []string {
202+ if len (nodes .Items ) == 0 {
203+ klog .Infof ("No nodes found with label for ComputeDomain with UID %s" , cdUID )
204+ return nil
205+ }
206+
207+ nodeNames := []string {}
208+ for _ , node := range nodes .Items {
209+ nodeNames = append (nodeNames , node .Name )
210+ }
211+ return nodeNames
212+ }
213+
198214// AssertWorkloadsCompletes ensures that all workloads asssociated with a ComputeDomain have completed.
199215//
200216// TODO: We should probably also check to ensure that all ResourceClaims
@@ -219,38 +235,9 @@ func (m *ComputeDomainManager) AssertWorkloadsCompleted(ctx context.Context, cdU
219235 }
220236
221237 if len (nodes .Items ) != 0 {
222- // show nodes with labels
223- nodeNames := []string {}
224- for _ , node := range nodes .Items {
225- nodeNames = append (nodeNames , node .Name )
226- }
227- klog .Errorf ("Found %d nodes with label for ComputeDomain with UID %s: %v" , len (nodes .Items ), cdUID , nodeNames )
228- return fmt .Errorf ("nodes exist with label for ComputeDomain %s" , cdUID )
238+ nodeNames := m .logNodesWithComputeDomainLabel (nodes , cdUID )
239+ return fmt .Errorf ("nodes %v with label for ComputeDomain %s" , nodeNames , cdUID )
229240 }
230-
231- // check if all resource claims for workloads are gone
232- cd , err := m .Get (cdUID )
233- if err != nil {
234- return fmt .Errorf ("error retrieving ComputeDomain: %w" , err )
235- }
236-
237- resourceClaims , err := m .config .clientsets .Core .ResourceV1beta1 ().ResourceClaims (cd .Namespace ).List (ctx , metav1.ListOptions {
238- LabelSelector : metav1 .FormatLabelSelector (labelSelector ),
239- })
240- if err != nil {
241- return fmt .Errorf ("error retrieving ResourceClaims: %w" , err )
242- }
243-
244- if len (resourceClaims .Items ) != 0 {
245- claimNames := []string {}
246- for _ , claim := range resourceClaims .Items {
247- claimNames = append (claimNames , claim .Name )
248- }
249- klog .Errorf ("Found %d ResourceClaims for ComputeDomain with UID %s: %v" ,
250- len (resourceClaims .Items ), cdUID , claimNames )
251- return fmt .Errorf ("ResourceClaims exist for ComputeDomain %s" , cdUID )
252- }
253-
254241 return nil
255242}
256243
0 commit comments