@@ -34,18 +34,14 @@ type Interface interface {
3434}
3535
3636const (
37- resourceReservation = "runai-reservation"
38- namespace = "runai-reservation"
39- serviceAccountName = "runai-reservation"
40- scalingPodsNamespace = "runai-scale-adjust"
41- gpuIndexAnnotationName = "run.ai/reserve_for_gpu_index"
42- numberOfGPUsToReserve = 1
43- appLabelValue = resourceReservation
44- gpuReservationPodPrefix = resourceReservation + "-gpu"
45- runaiResourceReservationAppLabelName = "app.runai.resource.reservation"
46- reservationPodRandomCharacters = 5
47- unknownGpuIndicator = "-1"
48- nodeIndex = "runai-node"
37+ resourceReservation = "resource-reservation"
38+ gpuReservationPodPrefix = "gpu-reservation"
39+ scalingPodsNamespace = "runai-scale-adjust"
40+ gpuIndexAnnotationName = "run.ai/reserve_for_gpu_index"
41+ numberOfGPUsToReserve = 1
42+ reservationPodRandomCharacters = 5
43+ unknownGpuIndicator = "-1"
44+ nodeIndex = "runai-node"
4945)
5046
5147type service struct {
@@ -54,20 +50,29 @@ type service struct {
5450 reservationPodImage string
5551 allocationTimeout time.Duration
5652 gpuGroupMutex * group_mutex.GroupMutex
53+ namespace string
54+ serviceAccountName string
55+ appLabelValue string
5756}
5857
5958func NewService (
6059 fakeGPuNodes bool ,
6160 kubeClient client.WithWatch ,
6261 reservationPodImage string ,
6362 allocationTimeout time.Duration ,
63+ namespace string ,
64+ serviceAccountName string ,
65+ appLabelValue string ,
6466) * service {
6567 return & service {
6668 fakeGPuNodes : fakeGPuNodes ,
6769 kubeClient : kubeClient ,
6870 reservationPodImage : reservationPodImage ,
6971 allocationTimeout : allocationTimeout ,
7072 gpuGroupMutex : group_mutex .NewGroupMutex (),
73+ namespace : namespace ,
74+ serviceAccountName : serviceAccountName ,
75+ appLabelValue : appLabelValue ,
7176 }
7277}
7378
@@ -157,7 +162,7 @@ func (rsc *service) syncForPods(ctx context.Context, pods []*v1.Pod, gpuGroupToS
157162 fractionPods := map [string ][]* v1.Pod {}
158163
159164 for _ , pod := range pods {
160- if pod .Namespace == namespace {
165+ if pod .Namespace == rsc . namespace {
161166 reservationPods [gpuGroupToSync ] = pod
162167 continue
163168 }
@@ -301,7 +306,7 @@ func (rsc *service) findGPUIndexByGroup(gpuGroup string) (
301306) {
302307 pods := & v1.PodList {}
303308 err = rsc .kubeClient .List (context .Background (), pods ,
304- client .InNamespace (namespace ),
309+ client .InNamespace (rsc . namespace ),
305310 client.MatchingLabels {constants .GPUGroup : gpuGroup })
306311 if err != nil {
307312 return "" , err
@@ -334,7 +339,7 @@ func (rsc *service) createGPUReservationPodAndGetIndex(ctx context.Context, node
334339 logger .Error (deleteErr , "failed to delete reservation pod" , "name" , pod .Name )
335340 }
336341 return unknownGpuIndicator , fmt .Errorf (
337- "failed waiting for GPU reservation pod to allocate: %v/%v" , nodeName , pod .Name )
342+ "failed waiting for GPU reservation pod to allocate: %v/%v" , rsc . namespace , pod .Name )
338343 }
339344
340345 return gpuIndex , err
@@ -385,16 +390,16 @@ func (rsc *service) createGPUReservationPod(ctx context.Context, nodeName, gpuGr
385390 },
386391 }
387392
388- pod , err := rsc .createResourceReservationPod (nodeName , gpuGroup , podName , gpuReservationPodPrefix , resources )
393+ pod , err := rsc .createResourceReservationPod (nodeName , gpuGroup , podName , resources )
389394 if err != nil {
390- logger .Error (err , "Failed to created GPU reservation pod on node" ,
391- "nodeName" , nodeName , "namespace" , namespace , "name" , podName )
395+ logger .Error (err , "Failed to create GPU reservation pod on node" ,
396+ "nodeName" , nodeName , "namespace" , rsc . namespace , "name" , podName )
392397 return nil , err
393398 }
394399
395400 logger .Info (
396401 "Successfully created GPU resource reservation pod" ,
397- "nodeName" , nodeName , "namespace" , namespace , "name" , podName )
402+ "nodeName" , nodeName , "namespace" , rsc . namespace , "name" , podName )
398403 return pod , nil
399404}
400405
@@ -405,7 +410,7 @@ func (rsc *service) waitForGPUReservationPodAllocation(
405410 pods := & v1.PodList {}
406411 watcher , err := rsc .kubeClient .Watch (
407412 ctx , pods ,
408- client .InNamespace (namespace ),
413+ client .InNamespace (rsc . namespace ),
409414 client.MatchingFields {"metadata.name" : gpuReservationPodName },
410415 )
411416 if err != nil {
@@ -432,25 +437,23 @@ func (rsc *service) waitForGPUReservationPodAllocation(
432437}
433438
434439func (rsc * service ) createResourceReservationPod (
435- nodeName , gpuGroup , podName , appName string ,
436- resources v1.ResourceRequirements ,
440+ nodeName , gpuGroup , podName string , resources v1.ResourceRequirements ,
437441) (* v1.Pod , error ) {
438442 podSpec := & v1.Pod {
439443 ObjectMeta : metav1.ObjectMeta {
440444 Name : podName ,
441- Namespace : namespace ,
445+ Namespace : rsc . namespace ,
442446 Labels : map [string ]string {
443- constants .AppLabelName : appLabelValue ,
444- constants .GPUGroup : gpuGroup ,
445- runaiResourceReservationAppLabelName : appName ,
447+ constants .AppLabelName : rsc .appLabelValue ,
448+ constants .GPUGroup : gpuGroup ,
446449 },
447450 Annotations : map [string ]string {
448451 karpenterv1 .DoNotDisruptAnnotationKey : "true" ,
449452 },
450453 },
451454 Spec : v1.PodSpec {
452455 NodeName : nodeName ,
453- ServiceAccountName : serviceAccountName ,
456+ ServiceAccountName : rsc . serviceAccountName ,
454457 Containers : []v1.Container {
455458 {
456459 Name : resourceReservation ,
0 commit comments