Skip to content
Open
Show file tree
Hide file tree
Changes from 16 commits
Commits
Show all changes
19 commits
Select commit Hold shift + click to select a range
9dc8e0f
feat: added consumedCounters into pool util calculation
MenD32 May 23, 2025
9a450a1
feat: added consumedCounters into pool util calculation
MenD32 May 23, 2025
ff1d30f
tests: added test to check partionable devices are calculated correctly
MenD32 May 23, 2025
d0f230e
tests: added test to check partionable devices are calculated correctly
MenD32 May 23, 2025
e92825e
tests: added test to check partionable devices are calculated correctly
MenD32 May 23, 2025
862482f
Merge branch 'master' into feat/partitionable-devices-support
MenD32 Jun 4, 2025
bb5c8d9
fix: bumped crypto and net
MenD32 Jun 10, 2025
f9f397a
Merge branch 'master' into feat/partitionable-devices-support
MenD32 Jun 11, 2025
4453bf2
Add Capacity Buffer controller logic
abdelrahman882 Sep 11, 2025
6e4f48b
feat: added flag to set deletion candidate taint TTL
MenD32 Sep 12, 2025
85a0d94
Add rapid release channel to GKE cluster creation command
laoj2 Sep 17, 2025
b09676c
change kwok nodegroup annotation key
drmorr0 Sep 17, 2025
212869b
merge: merging from main
MenD32 Sep 22, 2025
4fa5202
feat: added flag to set deletion candidate taint TTL
MenD32 Sep 12, 2025
9f2c8db
Merge branch 'master' into feat/partitionable-devices-support
MenD32 Sep 22, 2025
3956443
fix: updated resourceapi to v1
MenD32 Oct 2, 2025
c61b8ad
feat: Partionable Devices Support
MenD32 Nov 7, 2025
c2633bf
fix: added weighting to summation in order to consider a mix of parti…
MenD32 Nov 21, 2025
2ed4602
fix: added weighting to summation in order to consider a mix of parti…
MenD32 Nov 21, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
81 changes: 76 additions & 5 deletions cluster-autoscaler/simulator/dynamicresources/utils/utilization.go
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@ import (

v1 "k8s.io/api/core/v1"
resourceapi "k8s.io/api/resource/v1"
"k8s.io/apimachinery/pkg/api/resource"
"k8s.io/autoscaler/cluster-autoscaler/simulator/framework"
)

Expand All @@ -43,7 +44,7 @@ func CalculateDynamicResourceUtilization(nodeInfo *framework.NodeInfo) (map[stri
poolDevices := getAllDevices(currentSlices)
allocatedDeviceNames := allocatedDevices[driverName][poolName]
unallocated, allocated := splitDevicesByAllocation(poolDevices, allocatedDeviceNames)
result[driverName][poolName] = calculatePoolUtil(unallocated, allocated)
result[driverName][poolName] = calculatePoolUtil(unallocated, allocated, currentSlices)
}
}
return result, nil
Expand All @@ -69,10 +70,80 @@ func HighestDynamicResourceUtilization(nodeInfo *framework.NodeInfo) (v1.Resourc
return highestResourceName, highestUtil, nil
}

func calculatePoolUtil(unallocated, allocated []resourceapi.Device) float64 {
numAllocated := float64(len(allocated))
numUnallocated := float64(len(unallocated))
return numAllocated / (numAllocated + numUnallocated)
func calculatePoolUtil(unallocated, allocated []resourceapi.Device, resourceSlices []*resourceapi.ResourceSlice) float64 {
TotalConsumedCounters := map[string]map[string]resource.Quantity{}
for _, resourceSlice := range resourceSlices {
for _, sharedCounter := range resourceSlice.Spec.SharedCounters {
if _, ok := TotalConsumedCounters[sharedCounter.Name]; !ok {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Is there a chance that more than one resource slice from the passed in resourceSlices will have a counter CounterSet with the same name (Name property)? That would be the only reason to check for existence before initializing TotalConsumedCounters[sharedCounter.Name] = map[string]resource.Quantity{}. Also, if that's true, are we confident that they won't have any collisions with any of the names of the Counters in their Counters map[string]Counter? Otherwise we're overwriting them below.

tl;dr we may be able to simplify this and simply assign TotalConsumedCounters[sharedCounter.Name] = map[string]resource.Quantity{} without having to first check if it's already there, or if not, there may be more checks.

I did this and UT still pass:

$ git diff
diff --git a/cluster-autoscaler/simulator/dynamicresources/utils/utilization.go b/cluster-autoscaler/simulator/dynamicresources/utils/utilization.go
index c717fdfd6..98f7480a6 100644
--- a/cluster-autoscaler/simulator/dynamicresources/utils/utilization.go
+++ b/cluster-autoscaler/simulator/dynamicresources/utils/utilization.go
@@ -74,9 +74,7 @@ func calculatePoolUtil(unallocated, allocated []resourceapi.Device, resourceSlic
        TotalConsumedCounters := map[string]map[string]resource.Quantity{}
        for _, resourceSlice := range resourceSlices {
                for _, sharedCounter := range resourceSlice.Spec.SharedCounters {
-                       if _, ok := TotalConsumedCounters[sharedCounter.Name]; !ok {
-                               TotalConsumedCounters[sharedCounter.Name] = map[string]resource.Quantity{}
-                       }
+                       TotalConsumedCounters[sharedCounter.Name] = map[string]resource.Quantity{}
                        for counter, value := range sharedCounter.Counters {
                                TotalConsumedCounters[sharedCounter.Name][counter] = value.Value
                        }

Copy link
Contributor Author

@MenD32 MenD32 Nov 7, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

My impression from the KEP is that there shouldn't be any collisions in counterset names, since these are unique within a resource pool.

I got the impression that there could be a collision of the same sharedcounter from 2 different resource pools, but this would be high improbable since it'd imply that the same exact device (same device ID) appears in multiple resource pools.

Since this code is within a pool's scope, I think I'll simplify it the way you suggested

TotalConsumedCounters[sharedCounter.Name] = map[string]resource.Quantity{}
}
for counter, value := range sharedCounter.Counters {
TotalConsumedCounters[sharedCounter.Name][counter] = value.Value
}
}
}
allocatedConsumedCounters := calculateConsumedCounters(allocated)

// not all devices are partitionable, so fallback to the ratio of non-partionable devices
allocatedDevicesWithoutCounters := 0
devicesWithoutCounters := 0

for _, device := range allocated {
if device.ConsumesCounters == nil {
devicesWithoutCounters++
allocatedDevicesWithoutCounters++
}
}
for _, device := range unallocated {
if device.ConsumesCounters == nil {
devicesWithoutCounters++
}
}

// we want to find the counter that is most utilized, since it is the "bottleneck" of the pool
var maxUtilization float64
if devicesWithoutCounters == 0 {
maxUtilization = 0
} else {
maxUtilization = float64(allocatedDevicesWithoutCounters) / float64(devicesWithoutCounters)
}
for counterSet, counters := range TotalConsumedCounters {
for counterName, totalValue := range counters {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

nit: is this easier to follow?

			if totalValue.IsZero() {
				continue
			}

(rather then checking for !totalValue.IsZero() two nested iterations later)

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

yeah, this is wayyy cleaner. I'll change it

if allocatedSet, exists := allocatedConsumedCounters[counterSet]; exists {
if allocatedValue, exists := allocatedSet[counterName]; exists && !totalValue.IsZero() {
utilization := float64(allocatedValue.Value()) / float64(totalValue.Value())
if utilization > maxUtilization {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Can we explain how we're able to compare counter allocation (expressed in terms of resource.Quantity) w/ device allocation (expressed in terms of num devices / ints)?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think this might be a problem within the code, since if only some devices within the resource pool are non-partitionable, the correct utilization calculation would be to add the highest shared counter util with the ratio of non partitionable devices.

I think this case is also very unlikely since that'd imply that devices within the same resource pool are handled differently by the deviceClass, like partitioning only half on the GPUs in the node.

fix for that should be simple.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

At first my plan was to assume that a resource slice has either partitionable or non-partitionable devices, I changed the implementation to count utilization of actual devices, so that all of a device's partitions will be counted as one device.

This allows to put a weight on each ratio (devices/devices & resourceQuantity/resourceQuantity) so that the utilization calculation would still make sense.

maxUtilization = utilization
}
}
}
}
}
return maxUtilization
}

// calculateConsumedCounters calculates the total counters consumed by a list of devices
func calculateConsumedCounters(devices []resourceapi.Device) map[string]map[string]resource.Quantity {
countersConsumed := map[string]map[string]resource.Quantity{}
for _, device := range devices {
if device.ConsumesCounters == nil {
continue
}
for _, consumedCounter := range device.ConsumesCounters {
if _, ok := countersConsumed[consumedCounter.CounterSet]; !ok {
countersConsumed[consumedCounter.CounterSet] = map[string]resource.Quantity{}
}
for counter, value := range consumedCounter.Counters {
if _, ok := countersConsumed[consumedCounter.CounterSet][counter]; !ok {
countersConsumed[consumedCounter.CounterSet][counter] = resource.Quantity{}
}
v := countersConsumed[consumedCounter.CounterSet][counter]
v.Add(value.Value)
countersConsumed[consumedCounter.CounterSet][counter] = v
}
}
}
return countersConsumed
}

func splitDevicesByAllocation(devices []resourceapi.Device, allocatedNames []string) (unallocated, allocated []resourceapi.Device) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@ import (

apiv1 "k8s.io/api/core/v1"
resourceapi "k8s.io/api/resource/v1"
"k8s.io/apimachinery/pkg/api/resource"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/apimachinery/pkg/types"
"k8s.io/autoscaler/cluster-autoscaler/simulator/framework"
Expand Down Expand Up @@ -141,7 +142,28 @@ func TestDynamicResourceUtilization(t *testing.T) {
wantHighestUtilization: 0.2,
wantHighestUtilizationName: apiv1.ResourceName(fmt.Sprintf("%s/%s", fooDriver, "pool1")),
},
{
testName: "",
nodeInfo: framework.NewNodeInfo(node,
mergeLists(
testResourceSlicesWithPartionableDevices(fooDriver, "pool1", "node", 2, 4),
),
mergeLists(
testPodsWithCustomClaims(fooDriver, "pool1", "node", []string{"gpu-0-partition-0", "gpu-0-partition-1"}),
)...,
),
wantUtilization: map[string]map[string]float64{
fooDriver: {
"pool1": 0.5,
},
},
wantHighestUtilization: 0.5,
wantHighestUtilizationName: apiv1.ResourceName(fmt.Sprintf("%s/%s", fooDriver, "pool1")),
},
} {
if tc.testName != "" {
continue
}
t.Run(tc.testName, func(t *testing.T) {
utilization, err := CalculateDynamicResourceUtilization(tc.nodeInfo)
if diff := cmp.Diff(tc.wantErr, err, cmpopts.EquateErrors()); diff != "" {
Expand Down Expand Up @@ -190,6 +212,74 @@ func testResourceSlices(driverName, poolName, nodeName string, poolGen, deviceCo
return result
}

func testResourceSlicesWithPartionableDevices(driverName, poolName, nodeName string, poolGen, partitionCount int) []*resourceapi.ResourceSlice {
sliceName := fmt.Sprintf("%s-%s-slice", driverName, poolName)
var devices []resourceapi.Device
for i := 0; i < partitionCount; i++ {
devices = append(
devices,
resourceapi.Device{
Name: fmt.Sprintf("gpu-0-partition-%d", i),
Capacity: map[resourceapi.QualifiedName]resourceapi.DeviceCapacity{
"memory": {
Value: resource.MustParse("10Gi"),
},
},
ConsumesCounters: []resourceapi.DeviceCounterConsumption{
{
CounterSet: "gpu-0-counter-set",
Counters: map[string]resourceapi.Counter{
"memory": {
Value: resource.MustParse("10Gi"),
},
},
},
},
},
)
}
devices = append(devices,
resourceapi.Device{
Name: "gpu-0",
Capacity: map[resourceapi.QualifiedName]resourceapi.DeviceCapacity{
"memory": {
Value: resource.MustParse(fmt.Sprintf("%dGi", 10*partitionCount)),
},
},
ConsumesCounters: []resourceapi.DeviceCounterConsumption{
{
CounterSet: "gpu-0-counter-set",
Counters: map[string]resourceapi.Counter{
"memory": {
Value: resource.MustParse(fmt.Sprintf("%dGi", 10*partitionCount)),
},
},
},
},
},
)
resourceSlice := &resourceapi.ResourceSlice{
ObjectMeta: metav1.ObjectMeta{Name: sliceName, UID: types.UID(sliceName)},
Spec: resourceapi.ResourceSliceSpec{
Driver: driverName,
NodeName: &nodeName,
Pool: resourceapi.ResourcePool{Name: poolName, Generation: int64(poolGen), ResourceSliceCount: 1},
Devices: devices,
SharedCounters: []resourceapi.CounterSet{
{
Name: "gpu-0-counter-set",
Counters: map[string]resourceapi.Counter{
"memory": {
Value: resource.MustParse(fmt.Sprintf("%dGi", 10*partitionCount)),
},
},
},
},
},
}
return []*resourceapi.ResourceSlice{resourceSlice}
}

func testPodsWithClaims(driverName, poolName, nodeName string, deviceCount, devicesPerPod int64) []*framework.PodInfo {
podCount := deviceCount / devicesPerPod

Expand Down Expand Up @@ -220,6 +310,39 @@ func testPodsWithClaims(driverName, poolName, nodeName string, deviceCount, devi
return result
}

func testPodsWithCustomClaims(driverName, poolName, nodeName string, devices []string) []*framework.PodInfo {
deviceIndex := 0
var result []*framework.PodInfo
pod := test.BuildTestPod(fmt.Sprintf("%s-%s-pod", driverName, poolName), 1, 1)
var claims []*resourceapi.ResourceClaim
var results []resourceapi.DeviceRequestAllocationResult
for deviceIndex, device := range devices {
results = append(
results,
resourceapi.DeviceRequestAllocationResult{
Request: fmt.Sprintf("request-%d", deviceIndex),
Driver: driverName,
Pool: poolName,
Device: device,
},
)
}
claimName := fmt.Sprintf("%s-claim", pod.Name)
claims = append(claims, &resourceapi.ResourceClaim{
ObjectMeta: metav1.ObjectMeta{Name: claimName, UID: types.UID(claimName)},
Status: resourceapi.ResourceClaimStatus{
Allocation: &resourceapi.AllocationResult{
Devices: resourceapi.DeviceAllocationResult{
Results: results,
},
},
},
})
deviceIndex++
result = append(result, framework.NewPodInfo(pod, claims))
return result
}

func mergeLists[T any](sliceLists ...[]T) []T {
var result []T
for _, sliceList := range sliceLists {
Expand Down
Loading