Skip to content

Commit 108083b

Browse files
authored
Roman/podgroup controller (#215)
* Added PodGroupController code * Added PodGroupController deployment
1 parent 34b71a5 commit 108083b

29 files changed

+3373
-1
lines changed

Makefile

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,7 @@ KUSTOMIZE ?= $(LOCALBIN)/kustomize
1616

1717
# Space seperated list of services to build by default
1818
# SERVICE_NAMES := service1 service2 service3
19-
SERVICE_NAMES := podgrouper scheduler binder webhookmanager resourcereservation snapshot-tool scalingpod nodescaleadjuster
19+
SERVICE_NAMES := podgrouper scheduler binder webhookmanager resourcereservation snapshot-tool scalingpod nodescaleadjuster podgroupcontroller
2020

2121

2222
lint: fmt-go vet-go lint-go
@@ -63,6 +63,7 @@ manifests: controller-gen kustomize ## Generate ClusterRole and CustomResourceDe
6363
$(CONTROLLER_GEN) rbac:roleName=kai-resource-reservation,headerFile="./hack/boilerplate.yaml.txt" paths="./pkg/resourcereservation/..." paths="./cmd/resourcereservation/..." output:stdout > deployments/kai-scheduler/templates/rbac/resourcereservation.yaml
6464
$(CONTROLLER_GEN) rbac:roleName=kai-scheduler,headerFile="./hack/boilerplate.yaml.txt" paths="./pkg/scheduler/..." paths="./cmd/scheduler/..." output:stdout > deployments/kai-scheduler/templates/rbac/scheduler.yaml
6565
$(CONTROLLER_GEN) rbac:roleName=kai-node-scale-adjuster,headerFile="./hack/boilerplate.yaml.txt" paths="./pkg/nodescaleadjuster/..." paths="./cmd/nodescaleadjuster/..." output:stdout > deployments/kai-scheduler/templates/rbac/nodescaleadjuster.yaml
66+
$(CONTROLLER_GEN) rbac:roleName=kai-podgroup-controller,headerFile="./hack/boilerplate.yaml.txt" paths="./pkg/podgroupcontroller/..." paths="./cmd/podgroupcontroller/..." output:stdout > deployments/kai-scheduler/templates/rbac/podgroupcontroller.yaml
6667

6768
$(CONTROLLER_GEN) rbac:roleName=kai-webhookmanager,headerFile="./hack/boilerplate.yaml.txt" paths="./pkg/webhookmanager/..." paths="./cmd/webhookmanager/..." output:stdout > deployments/kustomization/webhookmanager-clusterrole/resource.yaml
6869
$(KUSTOMIZE) build deployments/kustomization/webhookmanager-clusterrole > deployments/kai-scheduler/templates/rbac/webhookmanager.yaml

cmd/podgroupcontroller/app/app.go

Lines changed: 124 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,124 @@
1+
// Copyright 2025 NVIDIA CORPORATION
2+
// SPDX-License-Identifier: Apache-2.0
3+
4+
package app
5+
6+
import (
7+
"flag"
8+
9+
"github.com/NVIDIA/KAI-scheduler/pkg/apis/scheduling/v2alpha2"
10+
"github.com/NVIDIA/KAI-scheduler/pkg/podgroupcontroller/controllers"
11+
12+
"go.uber.org/zap/zapcore"
13+
v1 "k8s.io/api/core/v1"
14+
schedulingv1 "k8s.io/api/scheduling/v1"
15+
"k8s.io/apimachinery/pkg/fields"
16+
"sigs.k8s.io/controller-runtime/pkg/client"
17+
18+
// Import all Kubernetes client auth plugins (e.g. Azure, GCP, OIDC, etc.)
19+
// to ensure that exec-entrypoint and run can make use of them.
20+
_ "k8s.io/client-go/plugin/pkg/client/auth"
21+
22+
"k8s.io/apimachinery/pkg/runtime"
23+
utilruntime "k8s.io/apimachinery/pkg/util/runtime"
24+
clientgoscheme "k8s.io/client-go/kubernetes/scheme"
25+
ctrl "sigs.k8s.io/controller-runtime"
26+
"sigs.k8s.io/controller-runtime/pkg/healthz"
27+
"sigs.k8s.io/controller-runtime/pkg/log/zap"
28+
29+
"sigs.k8s.io/controller-runtime/pkg/cache"
30+
// +kubebuilder:scaffold:imports
31+
)
32+
33+
const (
34+
schedulerNameField = "spec.schedulerName"
35+
)
36+
37+
var (
38+
scheme = runtime.NewScheme()
39+
setupLog = ctrl.Log.WithName("setup")
40+
)
41+
42+
func init() {
43+
utilruntime.Must(clientgoscheme.AddToScheme(scheme))
44+
utilruntime.Must(v2alpha2.AddToScheme(scheme))
45+
46+
// +kubebuilder:scaffold:scheme
47+
}
48+
49+
func Run() error {
50+
options := InitOptions()
51+
opts := zap.Options{
52+
Development: true,
53+
TimeEncoder: zapcore.ISO8601TimeEncoder,
54+
Level: zapcore.Level(-1 * options.LogLevel),
55+
}
56+
opts.BindFlags(flag.CommandLine)
57+
flag.Parse()
58+
ctrl.SetLogger(zap.New(zap.UseFlagOptions(&opts)))
59+
60+
clientConfig := ctrl.GetConfigOrDie()
61+
clientConfig.QPS = float32(options.Qps)
62+
clientConfig.Burst = options.Burst
63+
64+
schedulerSelector := fields.Set{schedulerNameField: options.SchedulerName}.AsSelector()
65+
cacheOptions := cache.Options{}
66+
cacheOptions.ByObject = map[client.Object]cache.ByObject{
67+
&v1.Pod{}: {Field: schedulerSelector},
68+
&v1.Node{}: {}, // TODO: filter by strict/non-strict runai nodes
69+
&schedulingv1.PriorityClass{}: {},
70+
&v2alpha2.PodGroup{}: {},
71+
}
72+
73+
mgr, err := ctrl.NewManager(clientConfig, ctrl.Options{
74+
Scheme: scheme,
75+
Cache: cacheOptions,
76+
HealthProbeBindAddress: options.ProbeAddr,
77+
LeaderElection: options.EnableLeaderElection,
78+
LeaderElectionID: "3f770c00.run.ai",
79+
// LeaderElectionReleaseOnCancel defines if the leader should step down voluntarily
80+
// when the Manager ends. This requires the binary to immediately end when the
81+
// Manager is stopped, otherwise, this setting is unsafe. Setting this significantly
82+
// speeds up voluntary leader transitions as the new leader don't have to wait
83+
// LeaseDuration time first.
84+
//
85+
// In the default scaffold provided, the program ends immediately after
86+
// the manager stops, so would be fine to enable this option. However,
87+
// if you are doing or is intended to do any operation such as perform cleanups
88+
// after the manager stops then its usage might be unsafe.
89+
// LeaderElectionReleaseOnCancel: true,
90+
})
91+
if err != nil {
92+
setupLog.Error(err, "unable to start manager")
93+
return err
94+
}
95+
96+
configs := controllers.Configs{
97+
MaxConcurrentReconciles: options.MaxConcurrentReconciles,
98+
}
99+
if err = (&controllers.PodGroupReconciler{
100+
Client: mgr.GetClient(),
101+
Scheme: mgr.GetScheme(),
102+
}).SetupWithManager(mgr, configs); err != nil {
103+
setupLog.Error(err, "unable to create controller", "controller", "Pod")
104+
return err
105+
}
106+
// +kubebuilder:scaffold:builder
107+
108+
if err = mgr.AddHealthzCheck("healthz", healthz.Ping); err != nil {
109+
setupLog.Error(err, "unable to set up health check")
110+
return err
111+
}
112+
if err = mgr.AddReadyzCheck("readyz", healthz.Ping); err != nil {
113+
setupLog.Error(err, "unable to set up ready check")
114+
return err
115+
}
116+
117+
setupLog.Info("starting manager")
118+
if err = mgr.Start(ctrl.SetupSignalHandler()); err != nil {
119+
setupLog.Error(err, "problem running manager")
120+
return err
121+
}
122+
123+
return nil
124+
}
Lines changed: 43 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,43 @@
1+
// Copyright 2025 NVIDIA CORPORATION
2+
// SPDX-License-Identifier: Apache-2.0
3+
4+
package app
5+
6+
import (
7+
"flag"
8+
)
9+
10+
type Options struct {
11+
MetricsAddr string
12+
EnableLeaderElection bool
13+
ProbeAddr string
14+
Qps int
15+
Burst int
16+
MaxConcurrentReconciles int
17+
LogLevel int
18+
SchedulerName string
19+
}
20+
21+
func InitOptions() *Options {
22+
options := &Options{}
23+
24+
flag.StringVar(&options.MetricsAddr, "metrics-bind-address", ":8080",
25+
"The address the metric endpoint binds to.")
26+
flag.StringVar(&options.ProbeAddr, "health-probe-bind-address", ":8081",
27+
"The address the probe endpoint binds to.")
28+
flag.BoolVar(&options.EnableLeaderElection, "leader-elect", false,
29+
"Enable leader election for controller manager. "+
30+
"Enabling this will ensure there is only one active controller manager.")
31+
flag.IntVar(&options.Qps, "qps", 50,
32+
"Queries per second to the K8s API server")
33+
flag.IntVar(&options.Burst, "burst", 300,
34+
"Burst to the K8s API server")
35+
flag.IntVar(&options.MaxConcurrentReconciles, "max-concurrent-reconciles", 10,
36+
"Max concurrent reconciles")
37+
flag.IntVar(&options.LogLevel, "log-level", 3,
38+
"Log level")
39+
flag.StringVar(&options.SchedulerName, "scheduler-name", "kai-scheduler",
40+
"The name of the scheduler used to schedule pod groups")
41+
42+
return options
43+
}

cmd/podgroupcontroller/main.go

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,18 @@
1+
// Copyright 2025 NVIDIA CORPORATION
2+
// SPDX-License-Identifier: Apache-2.0
3+
4+
package main
5+
6+
import (
7+
"fmt"
8+
"os"
9+
10+
"github.com/NVIDIA/KAI-scheduler/cmd/podgroupcontroller/app"
11+
)
12+
13+
func main() {
14+
if err := app.Run(); err != nil {
15+
fmt.Printf("Error while running the app: %v", err)
16+
os.Exit(1)
17+
}
18+
}
Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,15 @@
1+
# Copyright 2025 NVIDIA CORPORATION
2+
# SPDX-License-Identifier: Apache-2.0
3+
---
4+
apiVersion: rbac.authorization.k8s.io/v1
5+
kind: ClusterRoleBinding
6+
metadata:
7+
name: kai-podgroup-controller
8+
subjects:
9+
- kind: ServiceAccount
10+
name: podgroup-controller
11+
namespace: {{ .Release.Namespace }}
12+
roleRef:
13+
kind: ClusterRole
14+
name: kai-podgroup-controller
15+
apiGroup: rbac.authorization.k8s.io
Lines changed: 44 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,44 @@
1+
# Copyright 2025 NVIDIA CORPORATION
2+
# SPDX-License-Identifier: Apache-2.0
3+
---
4+
apiVersion: rbac.authorization.k8s.io/v1
5+
kind: ClusterRole
6+
metadata:
7+
name: kai-podgroup-controller
8+
rules:
9+
- apiGroups:
10+
- ""
11+
resources:
12+
- nodes
13+
- pods
14+
- pods/status
15+
verbs:
16+
- get
17+
- list
18+
- watch
19+
- apiGroups:
20+
- scheduling.k8s.io
21+
resources:
22+
- priorityclasses
23+
verbs:
24+
- get
25+
- list
26+
- watch
27+
- apiGroups:
28+
- scheduling.run.ai
29+
resources:
30+
- podgroups
31+
verbs:
32+
- get
33+
- list
34+
- watch
35+
- apiGroups:
36+
- scheduling.run.ai
37+
resources:
38+
- podgroups/status
39+
verbs:
40+
- get
41+
- list
42+
- patch
43+
- update
44+
- watch
Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,7 @@
1+
# Copyright 2025 NVIDIA CORPORATION
2+
# SPDX-License-Identifier: Apache-2.0
3+
4+
apiVersion: v1
5+
kind: ServiceAccount
6+
metadata:
7+
name: podgroup-controller
Lines changed: 50 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,50 @@
1+
# Copyright 2025 NVIDIA CORPORATION
2+
# SPDX-License-Identifier: Apache-2.0
3+
4+
apiVersion: apps/v1
5+
kind: Deployment
6+
metadata:
7+
name: podgroup-controller
8+
spec:
9+
replicas: 1
10+
selector:
11+
matchLabels:
12+
app: podgroup-controller
13+
template:
14+
metadata:
15+
labels:
16+
app: podgroup-controller
17+
spec:
18+
serviceAccountName: podgroup-controller
19+
containers:
20+
- name: podgroup-controller
21+
image: "{{ .Values.global.registry }}/{{ .Values.podgroupcontroller.image.name }}:{{ .Chart.Version }}"
22+
imagePullPolicy: {{ .Values.podgroupcontroller.image.pullPolicy }}
23+
{{- if .Values.podgroupcontroller.additionalArgs }}
24+
args:
25+
{{- toYaml .Values.podgroupcontroller.additionalArgs | nindent 12 }}
26+
{{- end }}
27+
{{- if .Values.podgroupcontroller.resources }}
28+
resources:
29+
{{- toYaml .Values.podgroupcontroller.resources | nindent 12 }}
30+
{{- end }}
31+
{{- if .Values.global.securityContext }}
32+
securityContext:
33+
{{- toYaml .Values.global.securityContext | nindent 12 }}
34+
{{- end }}
35+
{{- if .Values.global.imagePullSecrets }}
36+
imagePullSecrets:
37+
{{- toYaml .Values.global.imagePullSecrets | nindent 8 }}
38+
{{- end }}
39+
{{- if .Values.global.nodeSelector }}
40+
nodeSelector:
41+
{{- toYaml .Values.global.nodeSelector | nindent 8 }}
42+
{{- end }}
43+
{{- if .Values.global.affinity }}
44+
affinity:
45+
{{- toYaml .Values.global.affinity | nindent 8 }}
46+
{{- end }}
47+
{{- if .Values.global.tolerations }}
48+
tolerations:
49+
{{- toYaml .Values.global.tolerations | nindent 8 }}
50+
{{- end }}

deployments/kai-scheduler/values.yaml

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,19 @@ podgrouper:
2525
cpu: "250m"
2626
memory: "128Mi"
2727

28+
podgroupcontroller:
29+
image:
30+
name: podgroupcontroller
31+
pullPolicy: IfNotPresent
32+
additionalArgs: []
33+
resources:
34+
limits:
35+
cpu: "500m"
36+
memory: "256Mi"
37+
requests:
38+
cpu: "250m"
39+
memory: "128Mi"
40+
2841
binder:
2942
name: binder
3043
image:

pkg/common/constants/constants.go

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@ package constants
66
const (
77
AppLabelName = "app"
88
GpuResource = "nvidia.com/gpu"
9+
NvidiaGpuMemory = "nvidia.com/gpu.memory"
910
UnlimitedResourceQuantity = float64(-1)
1011
DefaultQueuePriority = 100
1112
DefaultNodePoolName = "default"

0 commit comments

Comments
 (0)