NVIDIA
diff --git a/‎docs/fairness/README.md‎
Lines changed: 16 additions & 3 deletions b/‎docs/fairness/README.md‎
Lines changed: 16 additions & 3 deletions
diff --git a/‎pkg/scheduler/actions/reclaim/reclaimDepartments_test.go‎
Lines changed: 296 additions & 0 deletions b/‎pkg/scheduler/actions/reclaim/reclaimDepartments_test.go‎
Lines changed: 296 additions & 0 deletions
diff --git a/‎pkg/scheduler/plugins/proportion/proportion.go‎
Lines changed: 26 additions & 10 deletions b/‎pkg/scheduler/plugins/proportion/proportion.go‎
Lines changed: 26 additions & 10 deletions
@@ -18,12 +18,25 @@ These two steps are repeated across all hierarchy levels until every leaf queue
 ## Fair Share
 Once the fair share for each queue is calculated, it serves two primary purposes:
 1. Queue Order - Queues with a fair share further below their allocation will be prioritized for scheduling.
-2. Reclaim action - If scheduling cannot be performed due to limited resources in the cluster, the scheduler will evict workloads from queues that have exceeded their fair share, giving priority to queues that are below their fair share. For more details, refer to the reclaim strategies.
+2. Reclaim action – When reclamation is required, the scheduler compares the **Saturation Ratio** (`Allocated / FairShare`) of queues that share the same parent. A queue can only reclaim resources if, **after** the transfer, its utilisation ratio remains lower than that of every sibling queue. For more details see the reclaim strategies.
 
 ## Reclaim Strategies
 There are two main reclaim strategies:
 1. Workloads from queues with resources below their fair share can evict workloads from queues that have exceeded their fair share.
 2. Workloads from queues under their quota can evict workloads from queues that have exceeded their quota.
 
-In both strategies, the scheduler ensures that the initial state remains unchanged after resource reclamation. Specifically, a queue below its fair share will not exceed that share after reclamation, and a queue below its quota will not exceed the quota.
-The scheduler will prioritize the first strategy.
+In both strategies, the scheduler ensures that the relative ordering is preserved: a queue that had the lowest utilisation ratio in its level before reclamation will still have the lowest ratio afterwards. Likewise, a queue that was below its quota will remain below its quota.
+The scheduler will prioritize the first strategy.
+
+### Reclaim Ratio Adjustment
+The Saturation Ratio comparison can be adjusted using the `reclaimerUtilizationMultiplier` plugin argument. This multiplier is applied to the reclaimer's Saturation Ratio before comparison:
+- Values > 1.0 make it harder for jobs to reclaim resources (more conservative)
+- Minimum value is 1.0 (standard comparison, default)
+- Values < 1.0 are not allowed and will be set to 1.0 - These values could cause infinite reclaim cycles that we want to avoid.
+
+Example configuration:
+```yaml
+pluginArguments:
+  proportion:
+    reclaimerUtilizationMultiplier: "1.2"  # Makes reclamation 20% more conservative
+```
@@ -765,5 +765,301 @@ func getTestsDepartmentsMetadata() []integration_tests_utils.TestTopologyMetadat
 				},
 			},
 		},
+		{
+			TestTopologyBasic: test_utils.TestTopologyBasic{
+				Name: "Reclaim from overquota department with multiple departments",
+				Jobs: []*jobs_fake.TestJobBasic{
+					{
+						Name:                "d1_p1_pending_job",
+						RequiredGPUsPerTask: 1,
+						Priority:            constants.PriorityInferenceNumber,
+						QueueName:           "d1_project1",
+						Tasks: []*tasks_fake.TestTaskBasic{
+							{
+								State: pod_status.Pending,
+							},
+						},
+					},
+					{
+						Name:                "d1_p2_job1",
+						RequiredGPUsPerTask: 1,
+						Priority:            constants.PriorityTrainNumber,
+						QueueName:           "d1_project2",
+						Tasks: []*tasks_fake.TestTaskBasic{
+							{
+								State:    pod_status.Running,
+								NodeName: "node0",
+							},
+						},
+					},
+					{
+						Name:                "d1_p2_job2",
+						RequiredGPUsPerTask: 1,
+						Priority:            constants.PriorityTrainNumber,
+						QueueName:           "d1_project2",
+						Tasks: []*tasks_fake.TestTaskBasic{
+							{
+								State:    pod_status.Running,
+								NodeName: "node0",
+							},
+						},
+					},
+					{
+						Name:                "d1_p2_job3",
+						RequiredGPUsPerTask: 1,
+						Priority:            constants.PriorityTrainNumber,
+						QueueName:           "d1_project2",
+						Tasks: []*tasks_fake.TestTaskBasic{
+							{
+								State:    pod_status.Running,
+								NodeName: "node0",
+							},
+						},
+					},
+					{
+						Name:                "d1_p2_job4",
+						RequiredGPUsPerTask: 1,
+						Priority:            constants.PriorityTrainNumber,
+						QueueName:           "d1_project2",
+						Tasks: []*tasks_fake.TestTaskBasic{
+							{
+								State:    pod_status.Running,
+								NodeName: "node0",
+							},
+						},
+					},
+					{
+						Name:                "d2_job1",
+						RequiredGPUsPerTask: 1,
+						Priority:            constants.PriorityTrainNumber,
+						QueueName:           "d2_project1",
+						Tasks: []*tasks_fake.TestTaskBasic{
+							{
+								State:    pod_status.Running,
+								NodeName: "node0",
+							},
+						},
+					},
+					{
+						Name:                "d2_job2",
+						RequiredGPUsPerTask: 1,
+						Priority:            constants.PriorityTrainNumber,
+						QueueName:           "d2_project1",
+						Tasks: []*tasks_fake.TestTaskBasic{
+							{
+								State:    pod_status.Running,
+								NodeName: "node0",
+							},
+						},
+					},
+					{
+						Name:                "d2_job3",
+						RequiredGPUsPerTask: 1,
+						Priority:            constants.PriorityTrainNumber,
+						QueueName:           "d2_project1",
+						Tasks: []*tasks_fake.TestTaskBasic{
+							{
+								State:    pod_status.Running,
+								NodeName: "node0",
+							},
+						},
+					},
+					{
+						Name:                "d2_job4",
+						RequiredGPUsPerTask: 1,
+						Priority:            constants.PriorityTrainNumber,
+						QueueName:           "d2_project1",
+						Tasks: []*tasks_fake.TestTaskBasic{
+							{
+								State:    pod_status.Running,
+								NodeName: "node0",
+							},
+						},
+					},
+					{
+						Name:                "d2_job5",
+						RequiredGPUsPerTask: 1,
+						Priority:            constants.PriorityTrainNumber,
+						QueueName:           "d2_project1",
+						Tasks: []*tasks_fake.TestTaskBasic{
+							{
+								State:    pod_status.Running,
+								NodeName: "node0",
+							},
+						},
+					},
+					{
+						Name:                "d2_job6",
+						RequiredGPUsPerTask: 1,
+						Priority:            constants.PriorityTrainNumber,
+						QueueName:           "d2_project1",
+						Tasks: []*tasks_fake.TestTaskBasic{
+							{
+								State:    pod_status.Running,
+								NodeName: "node0",
+							},
+						},
+					},
+					{
+						Name:                "d2_job7",
+						RequiredGPUsPerTask: 1,
+						Priority:            constants.PriorityTrainNumber,
+						QueueName:           "d2_project1",
+						Tasks: []*tasks_fake.TestTaskBasic{
+							{
+								State:    pod_status.Running,
+								NodeName: "node0",
+							},
+						},
+					},
+					{
+						Name:                "d2_job8",
+						RequiredGPUsPerTask: 1,
+						Priority:            constants.PriorityTrainNumber,
+						QueueName:           "d2_project1",
+						Tasks: []*tasks_fake.TestTaskBasic{
+							{
+								State:    pod_status.Running,
+								NodeName: "node0",
+							},
+						},
+					},
+					{
+						Name:                "d3_job1",
+						RequiredGPUsPerTask: 1,
+						Priority:            constants.PriorityTrainNumber,
+						QueueName:           "d3_project1",
+						Tasks: []*tasks_fake.TestTaskBasic{
+							{
+								State:    pod_status.Running,
+								NodeName: "node0",
+							},
+						},
+					},
+					{
+						Name:                "d3_job2",
+						RequiredGPUsPerTask: 1,
+						Priority:            constants.PriorityTrainNumber,
+						QueueName:           "d3_project1",
+						Tasks: []*tasks_fake.TestTaskBasic{
+							{
+								State:    pod_status.Running,
+								NodeName: "node0",
+							},
+						},
+					},
+					{
+						Name:                "d3_job3",
+						RequiredGPUsPerTask: 1,
+						Priority:            constants.PriorityTrainNumber,
+						QueueName:           "d3_project1",
+						Tasks: []*tasks_fake.TestTaskBasic{
+							{
+								State:    pod_status.Running,
+								NodeName: "node0",
+							},
+						},
+					},
+					{
+						Name:                "d3_job4",
+						RequiredGPUsPerTask: 1,
+						Priority:            constants.PriorityTrainNumber,
+						QueueName:           "d3_project1",
+						Tasks: []*tasks_fake.TestTaskBasic{
+							{
+								State:    pod_status.Running,
+								NodeName: "node0",
+							},
+						},
+					},
+					{
+						Name:                "d4_job1",
+						RequiredGPUsPerTask: 5,
+						Priority:            constants.PriorityTrainNumber,
+						QueueName:           "d4_project1",
+						Tasks: []*tasks_fake.TestTaskBasic{
+							{
+								State:    pod_status.Pending,
+								NodeName: "node0",
+							},
+						},
+					},
+				},
+				Nodes: map[string]nodes_fake.TestNodeBasic{
+					"node0": {
+						GPUs: 16,
+					},
+				},
+				Queues: []test_utils.TestQueueBasic{
+					{
+						Name:               "d1_project1",
+						DeservedGPUs:       3,
+						GPUOverQuotaWeight: 3,
+						ParentQueue:        "d1",
+					},
+					{
+						Name:               "d1_project2",
+						DeservedGPUs:       1,
+						GPUOverQuotaWeight: 1,
+						ParentQueue:        "d1",
+					},
+					{
+						Name:               "d2_project1",
+						DeservedGPUs:       4,
+						GPUOverQuotaWeight: 4,
+						ParentQueue:        "d2",
+					},
+					{
+						Name:               "d3_project1",
+						DeservedGPUs:       4,
+						GPUOverQuotaWeight: 4,
+						ParentQueue:        "d3",
+					},
+					{
+						Name:               "d4_project1",
+						DeservedGPUs:       4,
+						GPUOverQuotaWeight: 4,
+						ParentQueue:        "d4",
+					},
+				},
+				Departments: []test_utils.TestDepartmentBasic{
+					{
+						Name:         "d1",
+						DeservedGPUs: 4,
+					},
+					{
+						Name:         "d2",
+						DeservedGPUs: 4,
+					},
+					{
+						Name:         "d3",
+						DeservedGPUs: 4,
+					},
+					{
+						Name:         "d4",
+						DeservedGPUs: 4,
+					},
+				},
+				JobExpectedResults: map[string]test_utils.TestExpectedResultBasic{
+					"d1_p1_pending_job": {
+						NodeName:     "node0",
+						GPUsRequired: 1,
+						Status:       pod_status.Pipelined,
+					},
+					"d2_job8": {
+						NodeName:     "node0",
+						GPUsRequired: 1,
+						Status:       pod_status.Releasing,
+					},
+				},
+				Mocks: &test_utils.TestMock{
+					CacheRequirements: &test_utils.CacheMocking{
+						NumberOfCacheBinds:      1,
+						NumberOfCacheEvictions:  1,
+						NumberOfPipelineActions: 1,
+					},
+				},
+			},
+		},
 	}
 }
@@ -21,6 +21,7 @@ package proportion
 
 import (
 	"math"
+	"strconv"
 
 	commonconstants "github.com/NVIDIA/KAI-scheduler/pkg/common/constants"
 	"github.com/NVIDIA/KAI-scheduler/pkg/scheduler/api"
@@ -54,19 +55,34 @@ type proportionPlugin struct {
 	queues              map[common_info.QueueID]*rs.QueueAttributes
 	jobSimulationQueues map[common_info.QueueID]*rs.QueueAttributes
 	// Arguments given for the plugin
-	pluginArguments           map[string]string
-	subGroupOrderFn           common_info.LessFn
-	taskOrderFunc             common_info.LessFn
-	reclaimablePlugin         *rec.Reclaimable
-	isInferencePreemptible    bool
-	allowConsolidatingReclaim bool
+	pluginArguments               map[string]string
+	subGroupOrderFn               common_info.LessFn
+	taskOrderFunc                 common_info.LessFn
+	reclaimablePlugin             *rec.Reclaimable
+	isInferencePreemptible        bool
+	allowConsolidatingReclaim     bool
+	relcaimerSaturationMultiplier float64
 }
 
 func New(arguments map[string]string) framework.Plugin {
+	multiplier := 1.0
+	if val, exists := arguments["relcaimerSaturationMultiplier"]; exists {
+		if m, err := strconv.ParseFloat(val, 64); err == nil {
+			if m < 1.0 {
+				log.InfraLogger.Warningf("relcaimerSaturationMultiplier must be >= 1.0, got %v. Using default value of 1.0", m)
+			} else {
+				multiplier = m
+			}
+		} else {
+			log.InfraLogger.V(1).Errorf("Failed to parse relcaimerSaturationMultiplier: %s. Using default 1.", val)
+		}
+	}
+
 	return &proportionPlugin{
-		totalResource:   rs.EmptyResourceQuantities(),
-		queues:          map[common_info.QueueID]*rs.QueueAttributes{},
-		pluginArguments: arguments,
+		totalResource:                 rs.EmptyResourceQuantities(),
+		queues:                        map[common_info.QueueID]*rs.QueueAttributes{},
+		pluginArguments:               arguments,
+		relcaimerSaturationMultiplier: multiplier,
 	}
 }
 
@@ -78,7 +94,7 @@ func (pp *proportionPlugin) OnSessionOpen(ssn *framework.Session) {
 	pp.calculateResourcesProportion(ssn)
 	pp.subGroupOrderFn = ssn.SubGroupOrderFn
 	pp.taskOrderFunc = ssn.TaskOrderFn
-	pp.reclaimablePlugin = rec.New(ssn.IsInferencePreemptible())
+	pp.reclaimablePlugin = rec.New(pp.relcaimerSaturationMultiplier)
 	pp.isInferencePreemptible = ssn.IsInferencePreemptible()
 	capacityPolicy := cp.New(pp.queues, ssn.IsInferencePreemptible())
 	ssn.AddQueueOrderFn(pp.queueOrder)