Skip to content

Commit d0d8cc3

Browse files
authored
Fair share simulator (#339)
* Added fair share simulator
1 parent f44935f commit d0d8cc3

File tree

6 files changed

+361
-2
lines changed

6 files changed

+361
-2
lines changed

.gitignore

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,4 +5,7 @@ cover.out
55
.DS_Store
66
coverage/
77
*.test
8-
launch.json
8+
launch.json
9+
10+
cmd/fairshare-simulator/fairshare-simulator
11+
cmd/snapshot-tool/snapshot-tool

Makefile

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,7 @@ KUSTOMIZE ?= $(LOCALBIN)/kustomize
1616

1717
# Space seperated list of services to build by default
1818
# SERVICE_NAMES := service1 service2 service3
19-
SERVICE_NAMES := podgrouper scheduler binder webhookmanager resourcereservation snapshot-tool scalingpod nodescaleadjuster podgroupcontroller queuecontroller
19+
SERVICE_NAMES := podgrouper scheduler binder webhookmanager resourcereservation snapshot-tool scalingpod nodescaleadjuster podgroupcontroller queuecontroller fairshare-simulator
2020

2121

2222
lint: fmt-go vet-go lint-go

cmd/fairshare-simulator/README.md

Lines changed: 87 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,87 @@
1+
# Fairshare Simulator
2+
3+
This is a simple HTTP server that simulates the fair share resource division algorithm used in the KAI Scheduler's proportion plugin.
4+
5+
## Building and Running
6+
7+
Build the simulator:
8+
9+
```bash
10+
go build .
11+
```
12+
13+
Run it:
14+
15+
```bash
16+
./fairshare-simulator -port=8080
17+
```
18+
19+
The port is configurable with the `-port` flag and defaults to 8080.
20+
21+
## Usage
22+
23+
Send a POST request to `/simulate` with a JSON body containing the simulation parameters.
24+
25+
### Example Request
26+
27+
```http
28+
POST /simulate HTTP/1.1
29+
Content-Type: application/json
30+
31+
{
32+
"totalResource": {
33+
"GPU": 100,
34+
"CPU": 16000,
35+
"Memory": 32000000
36+
},
37+
"queues": [
38+
{
39+
"uid": "queue1",
40+
"name": "test-queue",
41+
"priority": 0,
42+
"resourceShare": {
43+
"gpu": {
44+
"deserved": 10,
45+
"request": 100,
46+
"overQuotaWeight": 3
47+
}
48+
}
49+
},
50+
{
51+
"uid": "queue2",
52+
"name": "test-queue2",
53+
"priority": 0,
54+
"resourceShare": {
55+
"gpu": {
56+
"deserved": 10,
57+
"request": 100,
58+
"overQuotaWeight": 1
59+
}
60+
}
61+
}
62+
]
63+
}
64+
```
65+
66+
### Response
67+
68+
The response is a JSON object with fair share values for each queue:
69+
70+
```json
71+
{
72+
"queue1": {
73+
"gpu": 70,
74+
"cpu": 16000,
75+
"memory": 100000
76+
},
77+
"queue2": {
78+
"gpu": 30,
79+
"cpu": 16000,
80+
"memory": 100000
81+
}
82+
}
83+
```
84+
85+
(Note: Actual values depend on the input parameters and the simulation logic.)
86+
87+
This simulator uses the `SetResourcesShare` function from the proportion plugin to compute the fair shares.
Lines changed: 36 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,36 @@
1+
POST http://localhost:8080/simulate HTTP/1.1
2+
content-type: application/json
3+
4+
{
5+
"totalResource": {
6+
"GPU": 100,
7+
"CPU": 16000,
8+
"Memory": 32000000
9+
},
10+
"queues": [
11+
{
12+
"uid": "queue1",
13+
"name": "test-queue",
14+
"priority": 0,
15+
"resourceShare": {
16+
"gpu": {
17+
"deserved": 10,
18+
"request": 100,
19+
"overQuotaWeight": 3
20+
}
21+
}
22+
},
23+
{
24+
"uid": "queue2",
25+
"name": "test-queue2",
26+
"priority": 0,
27+
"resourceShare": {
28+
"gpu": {
29+
"deserved": 10,
30+
"request": 100,
31+
"overQuotaWeight": 1
32+
}
33+
}
34+
}
35+
]
36+
}

cmd/fairshare-simulator/main.go

Lines changed: 103 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,103 @@
1+
// Copyright 2023 NVIDIA CORPORATION
2+
// SPDX-License-Identifier: Apache-2.0
3+
4+
package main
5+
6+
import (
7+
"encoding/json"
8+
"flag"
9+
"fmt"
10+
"log"
11+
"net/http"
12+
13+
"github.com/NVIDIA/KAI-scheduler/pkg/scheduler/api/common_info"
14+
"github.com/NVIDIA/KAI-scheduler/pkg/scheduler/plugins/proportion/resource_division"
15+
rs "github.com/NVIDIA/KAI-scheduler/pkg/scheduler/plugins/proportion/resource_share"
16+
)
17+
18+
type SimulateRequest struct {
19+
TotalResource rs.ResourceQuantities `json:"totalResource"`
20+
Queues []rs.QueueOverrides `json:"queues"`
21+
}
22+
23+
type QueueFairShare struct {
24+
GPU float64 `json:"gpu"`
25+
CPU float64 `json:"cpu"`
26+
Memory float64 `json:"memory"`
27+
}
28+
29+
type server struct {
30+
enableCors bool
31+
}
32+
33+
func (s *server) enableCorsHeaders(w *http.ResponseWriter) {
34+
(*w).Header().Set("Access-Control-Allow-Origin", "*")
35+
(*w).Header().Set("Access-Control-Allow-Methods", "POST, OPTIONS")
36+
(*w).Header().Set("Access-Control-Allow-Headers", "Content-Type")
37+
}
38+
39+
func (s *server) simulateHandler(w http.ResponseWriter, r *http.Request) {
40+
if s.enableCors {
41+
s.enableCorsHeaders(&w)
42+
if r.Method == "OPTIONS" {
43+
w.WriteHeader(http.StatusOK)
44+
return
45+
}
46+
}
47+
48+
if r.Method != "POST" {
49+
http.Error(w, "Only POST allowed", http.StatusMethodNotAllowed)
50+
return
51+
}
52+
53+
var req SimulateRequest
54+
if err := json.NewDecoder(r.Body).Decode(&req); err != nil {
55+
http.Error(w, err.Error(), http.StatusBadRequest)
56+
return
57+
}
58+
59+
queues := SimulateSetResourcesShare(req.TotalResource, req.Queues)
60+
61+
resp := make(map[string]QueueFairShare)
62+
for id, qa := range queues {
63+
resp[string(id)] = QueueFairShare{
64+
GPU: qa.GPU.FairShare,
65+
CPU: qa.CPU.FairShare,
66+
Memory: qa.Memory.FairShare,
67+
}
68+
}
69+
70+
w.Header().Set("Content-Type", "application/json")
71+
err := json.NewEncoder(w).Encode(resp)
72+
if err != nil {
73+
http.Error(w, fmt.Sprintf("Failed to encode response: %v", err), http.StatusInternalServerError)
74+
return
75+
}
76+
}
77+
78+
func main() {
79+
var port = flag.Int("port", 8080, "Port to listen on")
80+
var enableCors = flag.Bool("enable-cors", false, "Enable CORS headers for cross-origin requests")
81+
flag.Parse()
82+
83+
s := &server{
84+
enableCors: *enableCors,
85+
}
86+
87+
http.HandleFunc("/simulate", s.simulateHandler)
88+
log.Printf("Starting server on port %d (CORS enabled: %v)...", *port, *enableCors)
89+
err := http.ListenAndServe(fmt.Sprintf(":%d", *port), nil)
90+
if err != nil {
91+
log.Fatalf("Failed to start server: %v", err)
92+
}
93+
}
94+
95+
func SimulateSetResourcesShare(totalResource rs.ResourceQuantities, queueOverrides []rs.QueueOverrides) map[common_info.QueueID]*rs.QueueAttributes {
96+
queues := make(map[common_info.QueueID]*rs.QueueAttributes)
97+
for _, qo := range queueOverrides {
98+
qa := qo.ToQueueAttributes()
99+
queues[qa.UID] = qa
100+
}
101+
resource_division.SetResourcesShare(totalResource, queues)
102+
return queues
103+
}
Lines changed: 130 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,130 @@
1+
// Copyright 2025 NVIDIA CORPORATION
2+
// SPDX-License-Identifier: Apache-2.0
3+
4+
package resource_share
5+
6+
import (
7+
"time"
8+
9+
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
10+
11+
consts "github.com/NVIDIA/KAI-scheduler/pkg/common/constants"
12+
common_info "github.com/NVIDIA/KAI-scheduler/pkg/scheduler/api/common_info"
13+
"knative.dev/pkg/ptr"
14+
)
15+
16+
const (
17+
defaultRequest = 100000
18+
)
19+
20+
type ResourceShareOverrides struct {
21+
Deserved *float64 `json:"deserved"`
22+
FairShare *float64 `json:"fairShare"`
23+
MaxAllowed *float64 `json:"maxAllowed"`
24+
OverQuotaWeight *float64 `json:"overQuotaWeight"`
25+
Allocated *float64 `json:"allocated"`
26+
AllocatedNotPreemptible *float64 `json:"allocatedNotPreemptible"`
27+
Request *float64 `json:"request"`
28+
AbsoluteUsage *float64 `json:"absoluteUsage"`
29+
VacantAdjustedUsage *float64 `json:"vacantAdjustedUsage"`
30+
}
31+
32+
func (r *ResourceShareOverrides) ResourceShare() *ResourceShare {
33+
rs := ResourceShare{
34+
Deserved: 0,
35+
FairShare: 0,
36+
MaxAllowed: consts.UnlimitedResourceQuantity,
37+
OverQuotaWeight: 1,
38+
Allocated: 0,
39+
AllocatedNotPreemptible: 0,
40+
Request: defaultRequest,
41+
}
42+
if r.Deserved != nil {
43+
rs.Deserved = *r.Deserved
44+
}
45+
if r.FairShare != nil {
46+
rs.FairShare = *r.FairShare
47+
}
48+
if r.MaxAllowed != nil {
49+
rs.MaxAllowed = *r.MaxAllowed
50+
}
51+
if r.OverQuotaWeight != nil {
52+
rs.OverQuotaWeight = *r.OverQuotaWeight
53+
}
54+
if r.Allocated != nil {
55+
rs.Allocated = *r.Allocated
56+
}
57+
if r.AllocatedNotPreemptible != nil {
58+
rs.AllocatedNotPreemptible = *r.AllocatedNotPreemptible
59+
}
60+
if r.Request != nil {
61+
rs.Request = *r.Request
62+
}
63+
return &rs
64+
}
65+
66+
type QueueResourceShareOverrides struct {
67+
GPU *ResourceShareOverrides `json:"gpu"`
68+
CPU *ResourceShareOverrides `json:"cpu"`
69+
Memory *ResourceShareOverrides `json:"memory"`
70+
}
71+
72+
func (q QueueResourceShareOverrides) ResourceShare() QueueResourceShare {
73+
if q.GPU == nil {
74+
q.GPU = &ResourceShareOverrides{}
75+
}
76+
if q.CPU == nil {
77+
q.CPU = &ResourceShareOverrides{
78+
Deserved: ptr.Float64(consts.UnlimitedResourceQuantity),
79+
}
80+
}
81+
if q.Memory == nil {
82+
q.Memory = &ResourceShareOverrides{
83+
Deserved: ptr.Float64(consts.UnlimitedResourceQuantity),
84+
}
85+
}
86+
rs := QueueResourceShare{
87+
GPU: *q.GPU.ResourceShare(),
88+
CPU: *q.CPU.ResourceShare(),
89+
Memory: *q.Memory.ResourceShare(),
90+
}
91+
return rs
92+
}
93+
94+
type QueueOverrides struct {
95+
UID common_info.QueueID `json:"uid"`
96+
Name string `json:"name"`
97+
ParentQueue common_info.QueueID `json:"parentQueue"`
98+
ChildQueues []common_info.QueueID `json:"childQueues"`
99+
CreationTimestamp *string `json:"creationTimestamp"`
100+
Priority *int `json:"priority"`
101+
ResourceShare QueueResourceShareOverrides `json:"resourceShare"`
102+
}
103+
104+
func (qo *QueueOverrides) ToQueueAttributes() *QueueAttributes {
105+
qa := &QueueAttributes{
106+
UID: qo.UID,
107+
Name: qo.Name,
108+
ParentQueue: qo.ParentQueue,
109+
ChildQueues: qo.ChildQueues,
110+
Priority: 0,
111+
QueueResourceShare: qo.ResourceShare.ResourceShare(),
112+
}
113+
114+
if qo.Priority != nil {
115+
qa.Priority = *qo.Priority
116+
}
117+
118+
if qo.CreationTimestamp != nil {
119+
t, err := time.Parse(time.RFC3339, *qo.CreationTimestamp)
120+
if err == nil {
121+
qa.CreationTimestamp = metav1.Time{Time: t}
122+
} else {
123+
qa.CreationTimestamp = metav1.Now()
124+
}
125+
} else {
126+
qa.CreationTimestamp = metav1.Now()
127+
}
128+
129+
return qa
130+
}

0 commit comments

Comments
 (0)