Skip to content

Commit 7e1cffe

Browse files
authored
Cluster autoscaler adjustment for GPU sharing pods (#119)
* added node scale adjuster code * added node scale adjuster deployment * Added autoscaling flag to helm chart values * Added autoscaling README doc * Changed autoscaling to clusterAutoscaling * Added node-scale-adjuster arguments reference to autoscaling doc
1 parent 72309d1 commit 7e1cffe

40 files changed

+2977
-10
lines changed

Makefile

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,7 @@ KUSTOMIZE ?= $(LOCALBIN)/kustomize
1616

1717
# Space seperated list of services to build by default
1818
# SERVICE_NAMES := service1 service2 service3
19-
SERVICE_NAMES := podgrouper scheduler binder webhookmanager resourcereservation snapshot-tool
19+
SERVICE_NAMES := podgrouper scheduler binder webhookmanager resourcereservation snapshot-tool scalingpod nodescaleadjuster
2020

2121

2222
lint: fmt-go vet-go lint-go
@@ -62,6 +62,7 @@ manifests: controller-gen kustomize ## Generate ClusterRole and CustomResourceDe
6262
$(CONTROLLER_GEN) rbac:roleName=kai-binder,headerFile="./hack/boilerplate.yaml.txt" paths="./pkg/binder/..." paths="./cmd/binder/..." output:stdout > deployments/kai-scheduler/templates/rbac/binder.yaml
6363
$(CONTROLLER_GEN) rbac:roleName=kai-resource-reservation,headerFile="./hack/boilerplate.yaml.txt" paths="./pkg/resourcereservation/..." paths="./cmd/resourcereservation/..." output:stdout > deployments/kai-scheduler/templates/rbac/resourcereservation.yaml
6464
$(CONTROLLER_GEN) rbac:roleName=kai-scheduler,headerFile="./hack/boilerplate.yaml.txt" paths="./pkg/scheduler/..." paths="./cmd/scheduler/..." output:stdout > deployments/kai-scheduler/templates/rbac/scheduler.yaml
65+
$(CONTROLLER_GEN) rbac:roleName=kai-node-scale-adjuster,headerFile="./hack/boilerplate.yaml.txt" paths="./pkg/nodescaleadjuster/..." paths="./cmd/nodescaleadjuster/..." output:stdout > deployments/kai-scheduler/templates/rbac/nodescaleadjuster.yaml
6566

6667
$(CONTROLLER_GEN) rbac:roleName=kai-webhookmanager,headerFile="./hack/boilerplate.yaml.txt" paths="./pkg/webhookmanager/..." paths="./cmd/webhookmanager/..." output:stdout > deployments/kustomization/webhookmanager-clusterrole/resource.yaml
6768
$(KUSTOMIZE) build deployments/kustomization/webhookmanager-clusterrole > deployments/kai-scheduler/templates/rbac/webhookmanager.yaml

cmd/binder/app/app.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -122,7 +122,7 @@ func New() (*App, error) {
122122
rrs := resourcereservation.NewService(options.FakeGPUNodes, clientWithWatch, options.ResourceReservationPodImage,
123123
time.Duration(options.ResourceReservationAllocationTimeout)*time.Second,
124124
options.ResourceReservationNamespace, options.ResourceReservationServiceAccount,
125-
options.ResourceReservationAppLabel)
125+
options.ResourceReservationAppLabel, options.ScalingPodNamespace)
126126

127127
reconcilerParams := &controllers.ReconcilerParams{
128128
MaxConcurrentReconciles: options.MaxConcurrentReconciles,

cmd/binder/app/options.go

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@ type Options struct {
1616
ResourceReservationPodImage string
1717
ResourceReservationAppLabel string
1818
ResourceReservationAllocationTimeout int
19+
ScalingPodNamespace string
1920
QPS float64
2021
Burst int
2122
MaxConcurrentReconciles int
@@ -54,6 +55,9 @@ func InitOptions() *Options {
5455
fs.IntVar(&options.ResourceReservationAllocationTimeout,
5556
"resource-reservation-allocation-timeout", 40,
5657
"Resource reservation allocation timeout in seconds")
58+
fs.StringVar(&options.ScalingPodNamespace,
59+
"scale-adjust-namespace", "kai-scale-adjust",
60+
"Scaling pods namespace")
5761
fs.Float64Var(&options.QPS,
5862
"qps", 50,
5963
"Queries per second to the K8s API server")

cmd/nodescaleadjuster/app/app.go

Lines changed: 99 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,99 @@
1+
// Copyright 2025 NVIDIA CORPORATION
2+
// SPDX-License-Identifier: Apache-2.0
3+
4+
package app
5+
6+
import (
7+
"flag"
8+
"log"
9+
10+
"go.uber.org/zap/zapcore"
11+
v1 "k8s.io/api/core/v1"
12+
"k8s.io/apimachinery/pkg/runtime"
13+
utilruntime "k8s.io/apimachinery/pkg/util/runtime"
14+
clientgoscheme "k8s.io/client-go/kubernetes/scheme"
15+
ctrl "sigs.k8s.io/controller-runtime"
16+
"sigs.k8s.io/controller-runtime/pkg/healthz"
17+
"sigs.k8s.io/controller-runtime/pkg/log/zap"
18+
19+
"github.com/NVIDIA/KAI-scheduler/pkg/nodescaleadjuster/consts"
20+
"github.com/NVIDIA/KAI-scheduler/pkg/nodescaleadjuster/controller"
21+
"github.com/NVIDIA/KAI-scheduler/pkg/nodescaleadjuster/scale_adjuster"
22+
"github.com/NVIDIA/KAI-scheduler/pkg/nodescaleadjuster/scaler"
23+
)
24+
25+
var (
26+
scheme = runtime.NewScheme()
27+
setupLog = ctrl.Log.WithName("setup")
28+
)
29+
30+
func init() {
31+
utilruntime.Must(clientgoscheme.AddToScheme(scheme))
32+
utilruntime.Must(v1.AddToScheme(scheme))
33+
// +kubebuilder:scaffold:scheme
34+
}
35+
36+
func Run() error {
37+
options := NewOptions()
38+
options.AddFlags()
39+
40+
opts := zap.Options{
41+
Development: true,
42+
TimeEncoder: zapcore.ISO8601TimeEncoder,
43+
}
44+
opts.BindFlags(flag.CommandLine)
45+
flag.Parse()
46+
ctrl.SetLogger(zap.New(zap.UseFlagOptions(&opts)))
47+
48+
log.Println("Node scale adjuster started")
49+
50+
clientConfig := ctrl.GetConfigOrDie()
51+
mgr, err := ctrl.NewManager(clientConfig, ctrl.Options{
52+
Scheme: scheme,
53+
})
54+
if err != nil {
55+
setupLog.Error(err, "unable to start manager")
56+
return err
57+
}
58+
59+
nodeScaler := scaler.NewScaler(mgr.GetClient(), options.ScalingPodImage, options.ScalingPodNamespace,
60+
options.ScalingPodAppLabel, options.ScalingPodServiceAccount)
61+
62+
scaleAdjuster := scale_adjuster.NewScaleAdjuster(
63+
mgr.GetClient(),
64+
nodeScaler,
65+
options.ScalingPodNamespace,
66+
consts.DefaultCoolDownSeconds,
67+
options.GPUMemoryToFractionRatio,
68+
options.SchedulerName)
69+
70+
podReconciler := &controller.PodReconciler{
71+
ScaleAdjuster: scaleAdjuster,
72+
SchedulerName: options.SchedulerName,
73+
NodeScaleNamespace: options.ScalingPodImage,
74+
Client: mgr.GetClient(),
75+
Scheme: mgr.GetScheme(),
76+
}
77+
78+
if err = podReconciler.SetupWithManager(mgr); err != nil {
79+
setupLog.Error(err, "unable to create controller", "controller", "Pod")
80+
return err
81+
}
82+
83+
if err := mgr.AddHealthzCheck("healthz", healthz.Ping); err != nil {
84+
setupLog.Error(err, "unable to set up health check")
85+
return err
86+
}
87+
if err := mgr.AddReadyzCheck("readyz", healthz.Ping); err != nil {
88+
setupLog.Error(err, "unable to set up ready check")
89+
return err
90+
}
91+
92+
setupLog.Info("starting manager")
93+
if err := mgr.Start(ctrl.SetupSignalHandler()); err != nil {
94+
setupLog.Error(err, "problem running manager")
95+
return err
96+
}
97+
98+
return nil
99+
}
Lines changed: 48 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,48 @@
1+
// Copyright 2025 NVIDIA CORPORATION
2+
// SPDX-License-Identifier: Apache-2.0
3+
4+
package app
5+
6+
import (
7+
"flag"
8+
"fmt"
9+
10+
"github.com/NVIDIA/KAI-scheduler/pkg/nodescaleadjuster/consts"
11+
)
12+
13+
type Options struct {
14+
ScalingPodImage string
15+
ScalingPodNamespace string
16+
SchedulerName string
17+
GPUMemoryToFractionRatio float64
18+
ScalingPodAppLabel string
19+
ScalingPodServiceAccount string
20+
}
21+
22+
// NewOptions creates a new Options
23+
func NewOptions() *Options {
24+
s := Options{}
25+
return &s
26+
}
27+
28+
// AddFlags adds flags for a specific CMServer to the specified FlagSet
29+
func (s *Options) AddFlags() {
30+
flag.StringVar(&s.ScalingPodImage,
31+
"scaling-pod-image", consts.DefaultScalingPodImage,
32+
"The image to use for the scaling pod, defaults to "+consts.DefaultScalingPodImage)
33+
flag.StringVar(&s.ScalingPodNamespace,
34+
"scale-adjust-namespace", "kai-scale-adjust",
35+
"The namespace to use for the scaling pods, defaults to kai-scale-adjust")
36+
flag.StringVar(&s.SchedulerName,
37+
"scheduler-name", "kai-scheduler",
38+
"Scheduler name, defaults to kai-scheduler")
39+
flag.StringVar(&s.ScalingPodAppLabel,
40+
"scaling-pod-app-label", "scaling-pod",
41+
"Scaling pod app label")
42+
flag.StringVar(&s.ScalingPodServiceAccount,
43+
"scaling-pod-service-account", "scaling-pod",
44+
"Scaling pod service account name")
45+
flag.Float64Var(&s.GPUMemoryToFractionRatio,
46+
"gpu-memory-to-fraction-ratio", consts.DefaultGPUMemoryToFractionRatio,
47+
fmt.Sprintf("The ratio of GPU memory to fraction, defaults to %f", consts.DefaultGPUMemoryToFractionRatio))
48+
}

cmd/nodescaleadjuster/main.go

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,18 @@
1+
// Copyright 2025 NVIDIA CORPORATION
2+
// SPDX-License-Identifier: Apache-2.0
3+
4+
package main
5+
6+
import (
7+
"fmt"
8+
"os"
9+
10+
"github.com/NVIDIA/KAI-scheduler/cmd/nodescaleadjuster/app"
11+
)
12+
13+
func main() {
14+
if err := app.Run(); err != nil {
15+
fmt.Printf("Error while running the app: %v", err)
16+
os.Exit(1)
17+
}
18+
}

cmd/scalingpod/main.go

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,7 @@
1+
// Copyright 2025 NVIDIA CORPORATION
2+
// SPDX-License-Identifier: Apache-2.0
3+
4+
package main
5+
6+
func main() {
7+
}
Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,15 @@
1+
# Copyright 2025 NVIDIA CORPORATION
2+
# SPDX-License-Identifier: Apache-2.0
3+
---
4+
apiVersion: rbac.authorization.k8s.io/v1
5+
kind: ClusterRoleBinding
6+
metadata:
7+
name: kai-node-scale-adjuster
8+
subjects:
9+
- kind: ServiceAccount
10+
name: node-scale-adjuster
11+
namespace: {{ .Release.Namespace }}
12+
roleRef:
13+
kind: ClusterRole
14+
name: kai-node-scale-adjuster
15+
apiGroup: rbac.authorization.k8s.io
Lines changed: 36 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,36 @@
1+
# Copyright 2025 NVIDIA CORPORATION
2+
# SPDX-License-Identifier: Apache-2.0
3+
---
4+
apiVersion: rbac.authorization.k8s.io/v1
5+
kind: ClusterRole
6+
metadata:
7+
name: kai-node-scale-adjuster
8+
rules:
9+
- apiGroups:
10+
- ""
11+
resources:
12+
- pods
13+
verbs:
14+
- create
15+
- delete
16+
- get
17+
- list
18+
- patch
19+
- update
20+
- watch
21+
- apiGroups:
22+
- ""
23+
resources:
24+
- pods/finalizers
25+
verbs:
26+
- create
27+
- patch
28+
- update
29+
- apiGroups:
30+
- ""
31+
resources:
32+
- pods/status
33+
verbs:
34+
- get
35+
- patch
36+
- update

0 commit comments

Comments
 (0)