Skip to content

Commit a484a4b

Browse files
authored
Handle unset usage lister client gracefully (#388)
* Handle unset usage lister client gracefully
1 parent 2e4e73a commit a484a4b

File tree

5 files changed

+21
-6
lines changed

5 files changed

+21
-6
lines changed

pkg/scheduler/cache/cache.go

Lines changed: 9 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -158,7 +158,9 @@ func newSchedulerCache(schedulerCacheParams *SchedulerCacheParams) *SchedulerCac
158158
sc.podLister = sc.informerFactory.Core().V1().Pods().Lister()
159159
sc.podGroupLister = sc.kubeAiSchedulerInformerFactory.Scheduling().V2alpha2().PodGroups().Lister()
160160

161-
sc.usageLister = usagedb.NewUsageLister(schedulerCacheParams.UsageDBClient, nil, nil, nil)
161+
if schedulerCacheParams.UsageDBClient != nil {
162+
sc.usageLister = usagedb.NewUsageLister(schedulerCacheParams.UsageDBClient, nil, nil, nil)
163+
}
162164

163165
clusterInfo, err := cluster_info.New(sc.informerFactory, sc.kubeAiSchedulerInformerFactory, sc.kueueInformerFactory, sc.usageLister, sc.schedulingNodePoolParams,
164166
sc.restrictNodeScheduling, &sc.K8sClusterPodAffinityInfo, sc.scheduleCSIStorage, sc.fullHierarchyFairness, sc.StatusUpdater)
@@ -194,15 +196,19 @@ func (sc *SchedulerCache) Run(stopCh <-chan struct{}) {
194196
sc.kueueInformerFactory.Start(stopCh)
195197
sc.StatusUpdater.Run(stopCh)
196198

197-
sc.usageLister.Start(stopCh)
199+
if sc.usageLister != nil {
200+
sc.usageLister.Start(stopCh)
201+
}
198202
}
199203

200204
func (sc *SchedulerCache) WaitForCacheSync(stopCh <-chan struct{}) {
201205
sc.informerFactory.WaitForCacheSync(stopCh)
202206
sc.kubeAiSchedulerInformerFactory.WaitForCacheSync(stopCh)
203207
sc.kueueInformerFactory.WaitForCacheSync(stopCh)
204208

205-
sc.usageLister.WaitForCacheSync(stopCh)
209+
if sc.usageLister != nil {
210+
sc.usageLister.WaitForCacheSync(stopCh)
211+
}
206212
}
207213

208214
func (sc *SchedulerCache) Evict(evictedPod *v1.Pod, evictedPodGroup *podgroup_info.PodGroupInfo,

pkg/scheduler/cache/cluster_info/cluster_info.go

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -59,6 +59,7 @@ type ClusterInfo struct {
5959
includeCSIStorageObjects bool
6060
nodePoolSelector labels.Selector
6161
fairnessLevelType FairnessLevelType
62+
collectUsageData bool
6263
}
6364

6465
type FairnessLevelType string
@@ -106,6 +107,7 @@ func New(
106107
nodePoolSelector: nodePoolSelector,
107108
fairnessLevelType: fairnessLevelType,
108109
podGroupSync: podGroupSync,
110+
collectUsageData: usageLister != nil,
109111
}, nil
110112
}
111113

@@ -150,10 +152,9 @@ func (c *ClusterInfo) Snapshot() (*api.ClusterInfo, error) {
150152

151153
usage, usageErr := c.snapshotQueueResourceUsage()
152154
if usageErr != nil {
153-
log.InfraLogger.Warningf("error snapshotting queue resource usage: %c", usageErr)
155+
log.InfraLogger.V(2).Warnf("error snapshotting queue resource usage: %c", usageErr)
154156
}
155157
if usage == nil {
156-
log.InfraLogger.Warningf("resource usage is nil, using 0 values for all queues")
157158
usage = queue_info.NewClusterUsage()
158159
}
159160
snapshot.QueueResourceUsage = *usage

pkg/scheduler/cache/cluster_info/data_lister/kubernetes_lister.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -112,7 +112,7 @@ func (k *k8sLister) ListQueues() ([]*enginev2.Queue, error) {
112112

113113
func (k *k8sLister) ListResourceUsage() (*queue_info.ClusterUsage, error) {
114114
if k.usageLister == nil {
115-
return nil, fmt.Errorf("usage lister is not set")
115+
return queue_info.NewClusterUsage(), fmt.Errorf("usage lister is not set")
116116
}
117117

118118
return k.usageLister.GetResourceUsage()

pkg/scheduler/cache/cluster_info/queue.go

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -78,6 +78,10 @@ func (c *ClusterInfo) snapshotQueues() (map[common_info.QueueID]*queue_info.Queu
7878
}
7979

8080
func (c *ClusterInfo) snapshotQueueResourceUsage() (*queue_info.ClusterUsage, error) {
81+
if !c.collectUsageData {
82+
return nil, nil
83+
}
84+
8185
return c.dataLister.ListResourceUsage()
8286
}
8387

pkg/scheduler/cache/usagedb/usagedb.go

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -65,6 +65,10 @@ func (l *UsageLister) GetResourceUsage() (*queue_info.ClusterUsage, error) {
6565
l.lastUsageDataMutex.RLock()
6666
defer l.lastUsageDataMutex.RUnlock()
6767

68+
if l.client == nil {
69+
return nil, fmt.Errorf("client is not set")
70+
}
71+
6872
if l.lastUsageDataTime == nil {
6973
return nil, fmt.Errorf("usage data is not available")
7074
}

0 commit comments

Comments
 (0)