Skip to content

Commit 33d9b1f

Browse files
committed
Merge branch 'main' into strategy
2 parents 86c9103 + 9ae23a9 commit 33d9b1f

File tree

7 files changed

+661
-32
lines changed

7 files changed

+661
-32
lines changed

operator/api/v1alpha1/deployment_policy_types.go

Lines changed: 10 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -16,11 +16,6 @@
1616
* limitations under the License.
1717
*/
1818

19-
/*
20-
* SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
21-
* SPDX-License-Identifier: Apache-2.0
22-
*/
23-
2419
package v1alpha1
2520

2621
import (
@@ -109,6 +104,16 @@ type DeploymentBudget struct {
109104
Count *int `json:"count,omitempty"`
110105
}
111106

107+
// StrategyType represents the type of deployment strategy
108+
type StrategyType string
109+
110+
const (
111+
StrategyTypeFixed StrategyType = "fixed"
112+
StrategyTypeLinear StrategyType = "linear"
113+
StrategyTypeExponential StrategyType = "exponential"
114+
StrategyTypeUnknown StrategyType = "unknown"
115+
)
116+
112117
const (
113118
DefaultCompartmentName = "__default__"
114119
)

operator/api/v1alpha1/zz_generated.deepcopy.go

Lines changed: 2 additions & 2 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

operator/internal/controller/cluster_state_v2.go

Lines changed: 101 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -194,6 +194,7 @@ type SkyhookNodes interface {
194194
AddCompartment(name string, compartment *wrapper.Compartment)
195195
AddCompartmentNode(name string, node wrapper.SkyhookNode)
196196
PersistCompartmentBatchStates() bool
197+
AssignNodeToCompartment(node wrapper.SkyhookNode) (string, error)
197198
}
198199

199200
var _ SkyhookNodes = &skyhookNodes{}
@@ -872,6 +873,106 @@ func (skyhook *skyhookNodes) AddCompartmentNode(name string, node wrapper.Skyhoo
872873
skyhook.compartments[name].AddNode(node)
873874
}
874875

876+
// compartmentMatch represents a compartment that matches a node
877+
type compartmentMatch struct {
878+
name string
879+
strategyType v1alpha1.StrategyType
880+
capacity int
881+
}
882+
883+
// countMatchingNodes counts how many nodes from allNodes match the given selector
884+
func (skyhook *skyhookNodes) countMatchingNodes(selector metav1.LabelSelector) (int, error) {
885+
labelSelector, err := metav1.LabelSelectorAsSelector(&selector)
886+
if err != nil {
887+
return 0, err
888+
}
889+
890+
count := 0
891+
for _, node := range skyhook.nodes {
892+
if labelSelector.Matches(labels.Set(node.GetNode().Labels)) {
893+
count++
894+
}
895+
}
896+
return count, nil
897+
}
898+
899+
// AssignNodeToCompartment assigns a single node to the appropriate compartment using overlap resolution.
900+
// When a node matches multiple compartments, it resolves using:
901+
// 1. Strategy safety order: Fixed is safer than Linear, which is safer than Exponential
902+
// 2. Tie-break on same strategy: Choose compartment with smaller effective ceiling (window)
903+
// 3. Final tie-break: Lexicographically by compartment name for determinism
904+
// Assignments are recalculated fresh on every reconcile based on current cluster state.
905+
func (skyhook *skyhookNodes) AssignNodeToCompartment(node wrapper.SkyhookNode) (string, error) {
906+
nodeLabels := labels.Set(node.GetNode().Labels)
907+
908+
matches := []compartmentMatch{}
909+
910+
// Collect all matching compartments (excluding default)
911+
for _, compartment := range skyhook.compartments {
912+
// Skip the default compartment - it's a fallback
913+
if compartment.Name == v1alpha1.DefaultCompartmentName {
914+
continue
915+
}
916+
917+
selector, err := metav1.LabelSelectorAsSelector(&compartment.Selector)
918+
if err != nil {
919+
return "", fmt.Errorf("invalid selector for compartment %s: %w", compartment.Name, err)
920+
}
921+
922+
if selector.Matches(nodeLabels) {
923+
// Count how many nodes in total match this compartment's selector
924+
matchedCount, err := skyhook.countMatchingNodes(compartment.Selector)
925+
if err != nil {
926+
return "", fmt.Errorf("error counting matching nodes for compartment %s: %w", compartment.Name, err)
927+
}
928+
929+
// Ensure at least 1 node for capacity calculation
930+
if matchedCount == 0 {
931+
matchedCount = 1
932+
}
933+
934+
stratType := wrapper.GetStrategyType(compartment.Strategy)
935+
capacity := wrapper.ComputeEffectiveCapacity(compartment.Budget, matchedCount)
936+
937+
matches = append(matches, compartmentMatch{
938+
name: compartment.Name,
939+
strategyType: stratType,
940+
capacity: capacity,
941+
})
942+
}
943+
}
944+
945+
// No matches - assign to default
946+
if len(matches) == 0 {
947+
return v1alpha1.DefaultCompartmentName, nil
948+
}
949+
950+
// Single match - return it
951+
if len(matches) == 1 {
952+
return matches[0].name, nil
953+
}
954+
955+
// Multiple matches - apply overlap resolution
956+
// Sort matches using the safety heuristic
957+
sort.Slice(matches, func(i, j int) bool {
958+
// 1. Strategy safety order: Fixed > Linear > Exponential
959+
if matches[i].strategyType != matches[j].strategyType {
960+
return wrapper.StrategyIsSafer(matches[i].strategyType, matches[j].strategyType)
961+
}
962+
963+
// 2. Tie-break on same strategy: smaller window (capacity)
964+
if matches[i].capacity != matches[j].capacity {
965+
return matches[i].capacity < matches[j].capacity
966+
}
967+
968+
// 3. Final tie-break: lexicographically by name for determinism
969+
return matches[i].name < matches[j].name
970+
})
971+
972+
// Return the safest compartment
973+
return matches[0].name, nil
974+
}
975+
875976
// cleanupNodeMap removes nodes from the given map that no longer exist in currentNodes
876977
// Returns false if nodeMap is nil, otherwise returns true if any nodes were removed
877978
func cleanupNodeMap[T any](nodeMap map[string]T, currentNodes map[string]struct{}) bool {

0 commit comments

Comments
 (0)