Skip to content

Commit 794b488

Browse files
committed
Fixed config serialization
1 parent 0d6314d commit 794b488

File tree

7 files changed

+170
-54
lines changed

7 files changed

+170
-54
lines changed

pkg/operator/operands/scheduler/resources_for_shard.go

Lines changed: 15 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -11,17 +11,18 @@ import (
1111

1212
"golang.org/x/exp/slices"
1313

14-
"gopkg.in/yaml.v3"
1514
v1 "k8s.io/api/apps/v1"
1615
corev1 "k8s.io/api/core/v1"
1716
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
1817
"k8s.io/apimachinery/pkg/util/intstr"
1918
"sigs.k8s.io/controller-runtime/pkg/client"
19+
"sigs.k8s.io/yaml"
2020

2121
"github.com/NVIDIA/KAI-scheduler/cmd/scheduler/app/options"
2222
kaiv1 "github.com/NVIDIA/KAI-scheduler/pkg/apis/kai/v1"
2323
kaiConfigUtils "github.com/NVIDIA/KAI-scheduler/pkg/operator/config"
2424
"github.com/NVIDIA/KAI-scheduler/pkg/operator/operands/common"
25+
"github.com/NVIDIA/KAI-scheduler/pkg/scheduler/conf"
2526
"github.com/spf13/pflag"
2627
)
2728

@@ -125,7 +126,7 @@ func (s *SchedulerForShard) configMapForShard(
125126
APIVersion: "v1",
126127
}
127128
placementArguments := calculatePlacementArguments(shard.Spec.PlacementStrategy)
128-
innerConfig := config{}
129+
innerConfig := conf.SchedulerConfiguration{}
129130

130131
actions := []string{"allocate"}
131132
if placementArguments[gpuResource] != spreadStrategy && placementArguments[cpuResource] != spreadStrategy {
@@ -142,9 +143,9 @@ func (s *SchedulerForShard) configMapForShard(
142143
}
143144
}
144145

145-
innerConfig.Tiers = []tier{
146+
innerConfig.Tiers = []conf.Tier{
146147
{
147-
Plugins: []plugin{
148+
Plugins: []conf.PluginOption{
148149
{Name: "predicates"},
149150
{Name: "proportion", Arguments: proportionArgs},
150151
{Name: "priority"},
@@ -167,8 +168,8 @@ func (s *SchedulerForShard) configMapForShard(
167168

168169
innerConfig.Tiers[0].Plugins = append(
169170
innerConfig.Tiers[0].Plugins,
170-
plugin{Name: fmt.Sprintf("gpu%s", strings.Replace(placementArguments[gpuResource], "bin", "", 1))},
171-
plugin{
171+
conf.PluginOption{Name: fmt.Sprintf("gpu%s", strings.Replace(placementArguments[gpuResource], "bin", "", 1))},
172+
conf.PluginOption{
172173
Name: "nodeplacement",
173174
Arguments: placementArguments,
174175
},
@@ -177,7 +178,7 @@ func (s *SchedulerForShard) configMapForShard(
177178
if placementArguments[gpuResource] == binpackStrategy {
178179
innerConfig.Tiers[0].Plugins = append(
179180
innerConfig.Tiers[0].Plugins,
180-
plugin{Name: "gpusharingorder"},
181+
conf.PluginOption{Name: "gpusharingorder"},
181182
)
182183
}
183184

@@ -206,7 +207,7 @@ func (s *SchedulerForShard) configMapForShard(
206207
return schedulerConfig, nil
207208
}
208209

209-
func validateJobDepthMap(shard *kaiv1.SchedulingShard, innerConfig config, actions []string) error {
210+
func validateJobDepthMap(shard *kaiv1.SchedulingShard, innerConfig conf.SchedulerConfiguration, actions []string) error {
210211
for actionToConfigure := range shard.Spec.QueueDepthPerAction {
211212
if !slices.Contains(actions, actionToConfigure) {
212213
return fmt.Errorf(invalidJobDepthMapError, innerConfig.Actions, actionToConfigure)
@@ -305,20 +306,21 @@ func calculatePlacementArguments(placementStrategy *kaiv1.PlacementStrategy) map
305306
}
306307
}
307308

308-
func addMinRuntimePluginIfNeeded(plugins *[]plugin, minRuntime *kaiv1.MinRuntime) {
309+
func addMinRuntimePluginIfNeeded(plugins *[]conf.PluginOption, minRuntime *kaiv1.MinRuntime) {
309310
if minRuntime == nil || (minRuntime.PreemptMinRuntime == nil && minRuntime.ReclaimMinRuntime == nil) {
310311
return
311312
}
312313

313-
minRuntimePlugin := plugin{Name: "minruntime", Arguments: map[string]string{}}
314-
314+
minRuntimeArgs := make(map[string]string)
315315
if minRuntime.PreemptMinRuntime != nil {
316-
minRuntimePlugin.Arguments["defaultPreemptMinRuntime"] = *minRuntime.PreemptMinRuntime
316+
minRuntimeArgs["defaultPreemptMinRuntime"] = *minRuntime.PreemptMinRuntime
317317
}
318318
if minRuntime.ReclaimMinRuntime != nil {
319-
minRuntimePlugin.Arguments["defaultReclaimMinRuntime"] = *minRuntime.ReclaimMinRuntime
319+
minRuntimeArgs["defaultReclaimMinRuntime"] = *minRuntime.ReclaimMinRuntime
320320
}
321321

322+
minRuntimePlugin := conf.PluginOption{Name: "minruntime", Arguments: minRuntimeArgs}
323+
322324
*plugins = append(*plugins, minRuntimePlugin)
323325
}
324326

pkg/operator/operands/scheduler/resources_test.go

Lines changed: 118 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -8,15 +8,16 @@ import (
88
"fmt"
99
"strings"
1010
"testing"
11-
12-
"gopkg.in/yaml.v3"
11+
"time"
1312

1413
"github.com/spf13/pflag"
1514

1615
"github.com/NVIDIA/KAI-scheduler/cmd/scheduler/app/options"
1716
kaiv1 "github.com/NVIDIA/KAI-scheduler/pkg/apis/kai/v1"
1817
kaiv1qc "github.com/NVIDIA/KAI-scheduler/pkg/apis/kai/v1/queue_controller"
1918
kaiv1scheduler "github.com/NVIDIA/KAI-scheduler/pkg/apis/kai/v1/scheduler"
19+
usagedbapi "github.com/NVIDIA/KAI-scheduler/pkg/scheduler/cache/usagedb/api"
20+
"github.com/NVIDIA/KAI-scheduler/pkg/scheduler/conf"
2021

2122
"github.com/stretchr/testify/assert"
2223
"github.com/stretchr/testify/require"
@@ -25,6 +26,7 @@ import (
2526
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
2627
"k8s.io/utils/ptr"
2728
"sigs.k8s.io/controller-runtime/pkg/client/fake"
29+
"sigs.k8s.io/yaml"
2830
)
2931

3032
func TestDeploymentForShard(t *testing.T) {
@@ -187,7 +189,7 @@ func TestValidateJobDepthMap(t *testing.T) {
187189

188190
for _, tt := range tests {
189191
t.Run(tt.name, func(t *testing.T) {
190-
innerConfig := config{
192+
innerConfig := conf.SchedulerConfiguration{
191193
Actions: strings.Join(tt.actions, ", "),
192194
}
193195

@@ -431,6 +433,59 @@ tiers:
431433
},
432434
expectedErr: true,
433435
},
436+
{
437+
name: "usage DB configuration",
438+
config: &kaiv1.Config{
439+
Spec: kaiv1.ConfigSpec{},
440+
},
441+
shard: &kaiv1.SchedulingShard{
442+
Spec: kaiv1.SchedulingShardSpec{
443+
UsageDBConfig: &usagedbapi.UsageDBConfig{
444+
ClientType: "prometheus",
445+
ConnectionString: "http://prometheus-operated.kai-scheduler.svc.cluster.local:9090",
446+
UsageParams: &usagedbapi.UsageParams{
447+
HalfLifePeriod: &metav1.Duration{Duration: 10 * time.Minute},
448+
WindowSize: &metav1.Duration{Duration: 10 * time.Minute},
449+
WindowType: ptr.To(usagedbapi.SlidingWindow),
450+
},
451+
},
452+
},
453+
},
454+
expected: map[string]string{
455+
"config.yaml": `actions: allocate,consolidation,reclaim,preempt,stalegangeviction
456+
tiers:
457+
- plugins:
458+
- name: predicates
459+
- name: proportion
460+
- name: priority
461+
- name: nodeavailability
462+
- name: resourcetype
463+
- name: podaffinity
464+
- name: elastic
465+
- name: kubeflow
466+
- name: ray
467+
- name: subgrouporder
468+
- name: taskorder
469+
- name: nominatednode
470+
- name: dynamicresources
471+
- name: minruntime
472+
- name: topology
473+
- name: snapshot
474+
- name: gpupack
475+
- name: nodeplacement
476+
arguments:
477+
cpu: binpack
478+
gpu: binpack
479+
- name: gpusharingorder
480+
usageDBConfig:
481+
clientType: prometheus
482+
connectionString: http://prometheus-operated.kai-scheduler.svc.cluster.local:9090
483+
usageParams:
484+
halfLifePeriod: 10m
485+
windowSize: 10m
486+
windowType: sliding`,
487+
},
488+
},
434489
}
435490

436491
for _, tt := range tests {
@@ -454,15 +509,15 @@ tiers:
454509
require.True(t, found, "ConfigMap missing config.yaml")
455510

456511
// Unmarshal expected YAML from test case
457-
var expectedConfig config
512+
var expectedConfig conf.SchedulerConfiguration
458513
if _, ok := tt.expected["config.yaml"]; !ok {
459514
t.Fatal("Test case must provide expected YAML for config.yaml")
460515
}
461516
err = yaml.Unmarshal([]byte(tt.expected["config.yaml"]), &expectedConfig)
462517
require.NoError(t, err, "Failed to unmarshal expected config")
463518

464519
// Unmarshal actual YAML from ConfigMap
465-
var actualConfig config
520+
var actualConfig conf.SchedulerConfiguration
466521
err = yaml.Unmarshal([]byte(actualYAML), &actualConfig)
467522
require.NoError(t, err, "Failed to unmarshal actual config")
468523

@@ -606,3 +661,61 @@ func TestServiceAccountForScheduler(t *testing.T) {
606661
})
607662
}
608663
}
664+
665+
func TestMarshalingShardVsConfig(t *testing.T) {
666+
shardSpecString := `
667+
spec:
668+
partitionLabelValue: ""
669+
placementStrategy:
670+
cpu: binpack
671+
gpu: binpack
672+
usageDBConfig:
673+
clientType: prometheus
674+
connectionString: http://prometheus-operated.kai-scheduler.svc.cluster.local:9090
675+
usageParams:
676+
halfLifePeriod: 10m
677+
windowSize: 10m
678+
windowType: sliding
679+
`
680+
681+
shardSpec := &kaiv1.SchedulingShardSpec{}
682+
err := yaml.Unmarshal([]byte(shardSpecString), shardSpec)
683+
assert.NoError(t, err)
684+
685+
configString := `actions: allocate,consolidation,reclaim,preempt,stalegangeviction
686+
tiers:
687+
- plugins:
688+
- name: predicates
689+
- name: proportion
690+
- name: priority
691+
- name: nodeavailability
692+
- name: resourcetype
693+
- name: podaffinity
694+
- name: elastic
695+
- name: kubeflow
696+
- name: ray
697+
- name: subgrouporder
698+
- name: taskorder
699+
- name: nominatednode
700+
- name: dynamicresources
701+
- name: minruntime
702+
- name: topology
703+
- name: snapshot
704+
- name: gpupack
705+
- name: nodeplacement
706+
arguments:
707+
cpu: binpack
708+
gpu: binpack
709+
- name: gpusharingorder
710+
usageDBConfig:
711+
clientType: prometheus
712+
connectionString: http://prometheus-operated.kai-scheduler.svc.cluster.local:9090
713+
usageParams:
714+
halfLifePeriod: 10m
715+
windowSize: 10m
716+
windowType: sliding
717+
`
718+
config := &conf.SchedulerConfiguration{}
719+
err = yaml.Unmarshal([]byte(configString), config)
720+
assert.NoError(t, err)
721+
}

pkg/operator/operands/scheduler/scheduler.go

Lines changed: 0 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,6 @@ import (
1111

1212
kaiv1 "github.com/NVIDIA/KAI-scheduler/pkg/apis/kai/v1"
1313
"github.com/NVIDIA/KAI-scheduler/pkg/operator/operands/common"
14-
usagedbapi "github.com/NVIDIA/KAI-scheduler/pkg/scheduler/cache/usagedb/api"
1514
)
1615

1716
const (
@@ -23,22 +22,6 @@ const (
2322
defaultResourceName = "scheduler"
2423
)
2524

26-
type config struct {
27-
Actions string `yaml:"actions"`
28-
Tiers []tier `yaml:"tiers,omitempty"`
29-
QueueDepthPerAction map[string]int `yaml:"queueDepthPerAction,omitempty"`
30-
UsageDBConfig *usagedbapi.UsageDBConfig `yaml:"usageDBConfig,omitempty"`
31-
}
32-
33-
type tier struct {
34-
Plugins []plugin `yaml:"plugins"`
35-
}
36-
37-
type plugin struct {
38-
Name string `yaml:"name"`
39-
Arguments map[string]string `yaml:"arguments,omitempty"`
40-
}
41-
4225
type SchedulerForShard struct {
4326
schedulingShard *kaiv1.SchedulingShard
4427

pkg/operator/operands/scheduler/scheduler_test.go

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -137,7 +137,7 @@ var _ = Describe("Scheduler", func() {
137137
cm := cmObj.(*v1.ConfigMap)
138138

139139
Expect(err).To(BeNil())
140-
Expect(cm.Data["config.yaml"]).To(Equal(`actions: allocate, consolidation, reclaim, preempt, stalegangeviction
140+
Expect(cm.Data["config.yaml"]).To(MatchYAML(`actions: allocate, consolidation, reclaim, preempt, stalegangeviction
141141
tiers:
142142
- plugins:
143143
- name: predicates
@@ -176,7 +176,7 @@ tiers:
176176
cm := cmObj.(*v1.ConfigMap)
177177

178178
Expect(err).To(BeNil())
179-
Expect(cm.Data["config.yaml"]).To(Equal(`actions: allocate, reclaim, preempt, stalegangeviction
179+
Expect(cm.Data["config.yaml"]).To(MatchYAML(`actions: allocate, reclaim, preempt, stalegangeviction
180180
tiers:
181181
- plugins:
182182
- name: predicates

pkg/scheduler/conf/scheduler_conf.go

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -69,21 +69,21 @@ type PluginOption struct {
6969
// The name of Plugin
7070
Name string `yaml:"name" json:"name"`
7171
// JobOrderDisabled defines whether jobOrderFn is disabled
72-
JobOrderDisabled bool `yaml:"disableJobOrder" json:"disableJobOrder"`
72+
JobOrderDisabled bool `yaml:"disableJobOrder,omitempty" json:"disableJobOrder,omitempty"`
7373
// TaskOrderDisabled defines whether taskOrderFn is disabled
74-
TaskOrderDisabled bool `yaml:"disableTaskOrder" json:"disableTaskOrder"`
74+
TaskOrderDisabled bool `yaml:"disableTaskOrder,omitempty" json:"disableTaskOrder,omitempty"`
7575
// PreemptableDisabled defines whether preemptableFn is disabled
76-
PreemptableDisabled bool `yaml:"disablePreemptable" json:"disablePreemptable"`
76+
PreemptableDisabled bool `yaml:"disablePreemptable,omitempty" json:"disablePreemptable,omitempty"`
7777
// ReclaimableDisabled defines whether reclaimableFn is disabled
78-
ReclaimableDisabled bool `yaml:"disableReclaimable" json:"disableReclaimable"`
78+
ReclaimableDisabled bool `yaml:"disableReclaimable,omitempty" json:"disableReclaimable,omitempty"`
7979
// QueueOrderDisabled defines whether queueOrderFn is disabled
80-
QueueOrderDisabled bool `yaml:"disableQueueOrder" json:"disableQueueOrder"`
80+
QueueOrderDisabled bool `yaml:"disableQueueOrder,omitempty" json:"disableQueueOrder,omitempty"`
8181
// PredicateDisabled defines whether predicateFn is disabled
82-
PredicateDisabled bool `yaml:"disablePredicate" json:"disablePredicate"`
82+
PredicateDisabled bool `yaml:"disablePredicate,omitempty" json:"disablePredicate,omitempty"`
8383
// NodeOrderDisabled defines whether NodeOrderFn is disabled
84-
NodeOrderDisabled bool `yaml:"disableNodeOrder" json:"disableNodeOrder"`
84+
NodeOrderDisabled bool `yaml:"disableNodeOrder,omitempty" json:"disableNodeOrder,omitempty"`
8585
// Arguments defines the different arguments that can be given to different plugins
86-
Arguments map[string]string `yaml:"arguments" json:"arguments"`
86+
Arguments map[string]string `yaml:"arguments,omitempty" json:"arguments,omitempty"`
8787
}
8888

8989
type SchedulingNodePoolParams struct {

pkg/scheduler/conf_util/scheduler_conf_util.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -24,7 +24,7 @@ import (
2424
"io/ioutil"
2525
"strings"
2626

27-
"gopkg.in/yaml.v2"
27+
"sigs.k8s.io/yaml"
2828

2929
"github.com/NVIDIA/KAI-scheduler/pkg/scheduler/conf"
3030
"github.com/NVIDIA/KAI-scheduler/pkg/scheduler/framework"

0 commit comments

Comments
 (0)