Skip to content

Commit f0d0869

Browse files
propagate scheduler service namespace for leader election in HA (#236)
* propagate scheduler service namespace for leader election in HA * Added namespace argument to scheduler deployment * Update cmd/scheduler/app/options/options.go Co-authored-by: Roman Baron <[email protected]> * Update cmd/scheduler/app/options/options.go Co-authored-by: Roman Baron <[email protected]> * Fixed options test in scheduler service --------- Co-authored-by: Roman Baron <[email protected]> Co-authored-by: Roman Baron <[email protected]>
1 parent e7f65a6 commit f0d0869

File tree

4 files changed

+9
-6
lines changed

4 files changed

+9
-6
lines changed

cmd/scheduler/app/options/options.go

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@ const (
1616
defaultSchedulerName = "kai-scheduler"
1717
defaultResourceReservationAppLabel = "kai-resource-reservation"
1818
defaultMetricsNamespace = "kai"
19+
defaultNamespace = "kai-scheduler"
1920
defaultSchedulerPeriod = time.Second
2021
defaultStalenessGracePeriod = 60 * time.Second
2122
defaultListenAddress = ":8080"
@@ -65,6 +66,7 @@ type ServerOption struct {
6566
CPUWorkerNodeLabelKey string
6667
GPUWorkerNodeLabelKey string
6768
MIGWorkerNodeLabelKey string
69+
Namspace string
6870

6971
QPS int
7072
Burst int
@@ -80,6 +82,8 @@ func NewServerOption() *ServerOption {
8082
func (s *ServerOption) AddFlags(fs *pflag.FlagSet) {
8183
// kai-scheduler will ignore pods with scheduler names other than specified with the option
8284
fs.StringVar(&s.SchedulerName, "scheduler-name", defaultSchedulerName, "The scheduler name in pod spec that handled by this scheduler")
85+
fs.StringVar(&s.Namspace, "namespace", defaultNamespace, "Scheduler service namespace")
86+
fs.StringVar(&s.MetricsNamespace, "metrics-namespace", defaultMetricsNamespace, "Metrics namespace")
8387
fs.StringVar(&s.ResourceReservationAppLabel, "resource-reservation-app-label", defaultResourceReservationAppLabel, "App label value of resource reservation pods")
8488
fs.BoolVar(&s.RestrictSchedulingNodes, "restrict-node-scheduling", false, "kai-scheduler will allocate jobs only to restricted nodes")
8589
fs.StringVar(&s.NodePoolLabelKey, "nodepool-label-key", defaultNodePoolLabelKey, "The label key by which to filter scheduling nodepool")
@@ -90,7 +94,6 @@ func (s *ServerOption) AddFlags(fs *pflag.FlagSet) {
9094
"Start a leader election client and gain leadership before "+
9195
"executing the main loop. Enable this when running replicated kai-scheduler for high availability")
9296
fs.BoolVar(&s.PrintVersion, "version", true, "Show version")
93-
fs.StringVar(&s.MetricsNamespace, "metrics-namespace", defaultMetricsNamespace, "Metrics namespace")
9497
fs.StringVar(&s.ListenAddress, "listen-address", defaultListenAddress, "The address to listen on for HTTP requests")
9598
fs.BoolVar(&s.EnableProfiler, "enable-profiler", false, "Enable profiler")
9699
fs.StringVar(&s.ProfilerApiPort, "profiler-port", defaultProfilerApiPort, "The port to listen for profiler api requests")

cmd/scheduler/app/options/options_test.go

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -28,10 +28,11 @@ func TestAddFlags(t *testing.T) {
2828
// This is a snapshot of expected options parsed by args.
2929
expected := &ServerOption{
3030
SchedulerName: defaultSchedulerName,
31+
Namspace: defaultNamespace,
32+
MetricsNamespace: defaultMetricsNamespace,
3133
ResourceReservationAppLabel: defaultResourceReservationAppLabel,
3234
SchedulePeriod: 5 * time.Minute,
3335
PrintVersion: true,
34-
MetricsNamespace: defaultMetricsNamespace,
3536
ListenAddress: defaultListenAddress,
3637
ProfilerApiPort: defaultProfilerApiPort,
3738
Verbosity: defaultVerbosityLevel,

cmd/scheduler/app/server.go

Lines changed: 2 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -41,8 +41,6 @@ const (
4141
leaseDuration = 15 * time.Second
4242
renewDeadline = 10 * time.Second
4343
retryPeriod = 5 * time.Second
44-
45-
lockObjectNamespace = ""
4644
)
4745

4846
var logFlushFreq = pflag.Duration("log-flush-frequency", 5*time.Second, "Maximum number of seconds between log flushes")
@@ -174,7 +172,7 @@ func Run(opt *options.ServerOption, config *restclient.Config, mux *http.ServeMu
174172

175173
// Prepare event clients.
176174
broadcaster := record.NewBroadcaster()
177-
broadcaster.StartRecordingToSink(&corev1.EventSinkImpl{Interface: leaderElectionClient.CoreV1().Events(lockObjectNamespace)})
175+
broadcaster.StartRecordingToSink(&corev1.EventSinkImpl{Interface: leaderElectionClient.CoreV1().Events(opt.Namspace)})
178176
eventRecorder := broadcaster.NewRecorder(scheme.Scheme, v1.EventSource{Component: opt.SchedulerName})
179177

180178
hostname, err := os.Hostname()
@@ -185,7 +183,7 @@ func Run(opt *options.ServerOption, config *restclient.Config, mux *http.ServeMu
185183
id := hostname + "_" + string(uuid.NewUUID())
186184

187185
rl, err := resourcelock.New(resourcelock.LeasesResourceLock,
188-
lockObjectNamespace,
186+
opt.Namspace,
189187
opt.SchedulerName,
190188
leaderElectionClient.CoreV1(),
191189
leaderElectionClient.CoordinationV1(),

deployments/kai-scheduler/templates/services/scheduler.yaml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,7 @@ spec:
2424
imagePullPolicy: {{ .Values.scheduler.image.pullPolicy }}
2525
args:
2626
- "--scheduler-conf=/etc/config/config.yaml"
27+
- "--namespace={{ .Release.Namespace }}"
2728
- "--resource-reservation-app-label={{ .Values.global.resourceReservation.appLabel }}"
2829
{{- if .Values.scheduler.additionalArgs }}
2930
{{- toYaml .Values.scheduler.additionalArgs | nindent 12 }}

0 commit comments

Comments
 (0)