Added allocation test with subgroups e2e test (#434)

romanbaron · web-flow · commit 83a5be7984ba · 2025-08-26T10:21:25.000+03:00
diff --git a/pkg/scheduler/actions/allocate/allocate_subgroups_test.go b/pkg/scheduler/actions/allocate/allocate_subgroups_test.go
@@ -4,20 +4,22 @@
 package allocate_test
 
 import (
-	"github.com/NVIDIA/KAI-scheduler/pkg/scheduler/api/pod_status"
-	"github.com/NVIDIA/KAI-scheduler/pkg/scheduler/api/podgroup_info"
-	"github.com/NVIDIA/KAI-scheduler/pkg/scheduler/constants"
-	"github.com/NVIDIA/KAI-scheduler/pkg/scheduler/test_utils/jobs_fake"
-	"github.com/NVIDIA/KAI-scheduler/pkg/scheduler/test_utils/nodes_fake"
-	"github.com/NVIDIA/KAI-scheduler/pkg/scheduler/test_utils/tasks_fake"
+	"testing"
+
 	"k8s.io/utils/pointer"
 	"k8s.io/utils/ptr"
-	"testing"
+
+	. "go.uber.org/mock/gomock"
 
 	"github.com/NVIDIA/KAI-scheduler/pkg/scheduler/actions/allocate"
 	"github.com/NVIDIA/KAI-scheduler/pkg/scheduler/actions/integration_tests/integration_tests_utils"
+	"github.com/NVIDIA/KAI-scheduler/pkg/scheduler/api/pod_status"
+	"github.com/NVIDIA/KAI-scheduler/pkg/scheduler/api/podgroup_info"
+	"github.com/NVIDIA/KAI-scheduler/pkg/scheduler/constants"
 	"github.com/NVIDIA/KAI-scheduler/pkg/scheduler/test_utils"
-	. "go.uber.org/mock/gomock"
+	"github.com/NVIDIA/KAI-scheduler/pkg/scheduler/test_utils/jobs_fake"
+	"github.com/NVIDIA/KAI-scheduler/pkg/scheduler/test_utils/nodes_fake"
+	"github.com/NVIDIA/KAI-scheduler/pkg/scheduler/test_utils/tasks_fake"
 )
 
 func TestHandleSubGroupsAllocation(t *testing.T) {
@@ -533,5 +535,130 @@ func getAllocationSubGroupsTestsMetadata() []integration_tests_utils.TestTopolog
 				},
 			},
 		},
+		{
+			TestTopologyBasic: test_utils.TestTopologyBasic{
+				Name: "Allocate multiple jobs with SubGroups",
+				Jobs: []*jobs_fake.TestJobBasic{
+					{
+						Name:      "pending_job0",
+						QueueName: "queue0",
+						Priority:  constants.PriorityTrainNumber,
+						SubGroups: map[string]*podgroup_info.SubGroupInfo{
+							"sub0": podgroup_info.NewSubGroupInfo("sub0", 1),
+							"sub1": podgroup_info.NewSubGroupInfo("sub1", 1),
+						},
+						Tasks: []*tasks_fake.TestTaskBasic{
+							{
+								State:        pod_status.Pending,
+								SubGroupName: "sub0",
+								RequiredGPUs: ptr.To(int64(1)),
+							},
+							{
+								State:        pod_status.Pending,
+								SubGroupName: "sub0",
+								RequiredGPUs: ptr.To(int64(1)),
+							},
+							{
+								State:        pod_status.Pending,
+								SubGroupName: "sub1",
+								RequiredGPUs: ptr.To(int64(1)),
+							},
+							{
+								State:        pod_status.Pending,
+								SubGroupName: "sub1",
+								RequiredGPUs: ptr.To(int64(1)),
+							},
+						},
+						MinAvailable: pointer.Int32(2),
+					},
+					{
+						Name:      "pending_job1",
+						QueueName: "queue0",
+						Priority:  constants.PriorityTrainNumber,
+						SubGroups: map[string]*podgroup_info.SubGroupInfo{
+							"sub0": podgroup_info.NewSubGroupInfo("sub0", 1),
+							"sub1": podgroup_info.NewSubGroupInfo("sub1", 1),
+						},
+						Tasks: []*tasks_fake.TestTaskBasic{
+							{
+								State:        pod_status.Pending,
+								SubGroupName: "sub0",
+								RequiredGPUs: ptr.To(int64(1)),
+							},
+							{
+								State:        pod_status.Pending,
+								SubGroupName: "sub0",
+								RequiredGPUs: ptr.To(int64(1)),
+							},
+							{
+								State:        pod_status.Pending,
+								SubGroupName: "sub1",
+								RequiredGPUs: ptr.To(int64(1)),
+							},
+							{
+								State:        pod_status.Pending,
+								SubGroupName: "sub1",
+								RequiredGPUs: ptr.To(int64(1)),
+							},
+						},
+						MinAvailable: pointer.Int32(2),
+					},
+				},
+				Nodes: map[string]nodes_fake.TestNodeBasic{
+					"node0": {
+						GPUs: 4,
+					},
+				},
+				Queues: []test_utils.TestQueueBasic{
+					{
+						Name:         "queue0",
+						DeservedGPUs: 1,
+					},
+				},
+				Mocks: &test_utils.TestMock{
+					CacheRequirements: &test_utils.CacheMocking{
+						NumberOfCacheBinds: 4,
+					},
+				},
+				TaskExpectedResults: map[string]test_utils.TestExpectedResultBasic{
+					"pending_job0-0": {
+						GPUsRequired: 1,
+						Status:       pod_status.Binding,
+						NodeName:     "node0",
+					},
+					"pending_job0-1": {
+						GPUsRequired: 1,
+						Status:       pod_status.Pending,
+					},
+					"pending_job0-2": {
+						GPUsRequired: 1,
+						Status:       pod_status.Binding,
+						NodeName:     "node0",
+					},
+					"pending_job0-3": {
+						GPUsRequired: 1,
+						Status:       pod_status.Pending,
+					},
+					"pending_job1-0": {
+						GPUsRequired: 1,
+						Status:       pod_status.Binding,
+						NodeName:     "node0",
+					},
+					"pending_job1-1": {
+						GPUsRequired: 1,
+						Status:       pod_status.Pending,
+					},
+					"pending_job1-2": {
+						GPUsRequired: 1,
+						Status:       pod_status.Binding,
+						NodeName:     "node0",
+					},
+					"pending_job1-3": {
+						GPUsRequired: 1,
+						Status:       pod_status.Pending,
+					},
+				},
+			},
+		},
 	}
 }
diff --git a/test/e2e/suites/allocate/elastic/allocate_suite_test.go b/test/e2e/suites/allocate/elastic/allocate_suite_test.go
@@ -13,7 +13,7 @@ import (
 	. "github.com/onsi/gomega"
 )
 
-func TestPriority(t *testing.T) {
+func TestAllocate(t *testing.T) {
 	utils.SetLogger()
 	RegisterFailHandler(Fail)
 	RunSpecs(t, "Elastic allocation Suite")
diff --git a/test/e2e/suites/allocate/subgroups/subgroups_test.go b/test/e2e/suites/allocate/subgroups/subgroups_test.go
@@ -0,0 +1,196 @@
+/*
+Copyright 2025 NVIDIA CORPORATION
+SPDX-License-Identifier: Apache-2.0
+*/
+package subgroups
+
+import (
+	"context"
+	"testing"
+
+	v1 "k8s.io/api/core/v1"
+	"k8s.io/apimachinery/pkg/api/resource"
+	metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
+	"k8s.io/client-go/kubernetes"
+
+	v2 "github.com/NVIDIA/KAI-scheduler/pkg/apis/scheduling/v2"
+	schedulingv2alpha2 "github.com/NVIDIA/KAI-scheduler/pkg/apis/scheduling/v2alpha2"
+	commonconsts "github.com/NVIDIA/KAI-scheduler/pkg/common/constants"
+	testcontext "github.com/NVIDIA/KAI-scheduler/test/e2e/modules/context"
+	"github.com/NVIDIA/KAI-scheduler/test/e2e/modules/resources/capacity"
+	"github.com/NVIDIA/KAI-scheduler/test/e2e/modules/resources/rd"
+	"github.com/NVIDIA/KAI-scheduler/test/e2e/modules/resources/rd/pod_group"
+	"github.com/NVIDIA/KAI-scheduler/test/e2e/modules/resources/rd/queue"
+	"github.com/NVIDIA/KAI-scheduler/test/e2e/modules/utils"
+	"github.com/NVIDIA/KAI-scheduler/test/e2e/modules/wait"
+
+	. "github.com/onsi/ginkgo/v2"
+	. "github.com/onsi/gomega"
+)
+
+func TestSubGroups(t *testing.T) {
+	utils.SetLogger()
+	RegisterFailHandler(Fail)
+	RunSpecs(t, "SubGroups Allocation Suite")
+}
+
+var _ = Describe("Allocation scenario with subgroups", Ordered, func() {
+	var (
+		testCtx *testcontext.TestContext
+	)
+
+	BeforeAll(func(ctx context.Context) {
+		testCtx = testcontext.GetConnectivity(ctx, Default)
+
+		parentQueue := queue.CreateQueueObject(utils.GenerateRandomK8sName(10), "")
+		childQueue := queue.CreateQueueObject(utils.GenerateRandomK8sName(10), parentQueue.Name)
+		childQueue.Spec.Resources.CPU.Quota = 600
+		childQueue.Spec.Resources.CPU.Limit = 600
+		testCtx.InitQueues([]*v2.Queue{childQueue, parentQueue})
+
+		capacity.SkipIfInsufficientClusterTopologyResources(testCtx.KubeClientset, []capacity.ResourceList{
+			{
+				Cpu:      resource.MustParse("600m"),
+				PodCount: 6,
+			},
+		})
+	})
+
+	AfterAll(func(ctx context.Context) {
+		err := rd.DeleteAllE2EPriorityClasses(ctx, testCtx.ControllerClient)
+		Expect(err).To(Succeed())
+		testCtx.ClusterCleanup(ctx)
+	})
+
+	AfterEach(func(ctx context.Context) {
+		testCtx.TestContextCleanup(ctx)
+	})
+
+	It("Partial allocation", func(ctx context.Context) {
+		pgName := utils.GenerateRandomK8sName(10)
+		subGroup1Pods := createSubGroupPods(ctx, testCtx.KubeClientset, testCtx.Queues[0], pgName, "sub-1", 5)
+		subGroup2Pods := createSubGroupPods(ctx, testCtx.KubeClientset, testCtx.Queues[0], pgName, "sub-2", 5)
+
+		namespace := queue.GetConnectedNamespaceToQueue(testCtx.Queues[0])
+		podGroup := pod_group.Create(namespace, pgName, testCtx.Queues[0].Name)
+		podGroup.Spec.MinMember = 6
+		podGroup.Spec.SubGroups = []schedulingv2alpha2.SubGroup{
+			{Name: "sub-1", MinMember: 3},
+			{Name: "sub-2", MinMember: 3},
+		}
+		_, err := testCtx.KubeAiSchedClientset.SchedulingV2alpha2().PodGroups(namespace).Create(ctx,
+			podGroup, metav1.CreateOptions{})
+		Expect(err).To(Succeed())
+
+		wait.ForAtLeastNPodsScheduled(ctx, testCtx.ControllerClient, podGroup.Namespace, subGroup1Pods, 3)
+		wait.ForAtLeastNPodsUnschedulable(ctx, testCtx.ControllerClient, podGroup.Namespace, subGroup1Pods, 2)
+		wait.ForAtLeastNPodsScheduled(ctx, testCtx.ControllerClient, podGroup.Namespace, subGroup2Pods, 3)
+		wait.ForAtLeastNPodsUnschedulable(ctx, testCtx.ControllerClient, podGroup.Namespace, subGroup2Pods, 2)
+	})
+
+	It("Balance 2 jobs with subgroups", func(ctx context.Context) {
+		namespace := queue.GetConnectedNamespaceToQueue(testCtx.Queues[0])
+
+		pg1Name := utils.GenerateRandomK8sName(10)
+		pg1SubGroup1Pods := createSubGroupPods(ctx, testCtx.KubeClientset, testCtx.Queues[0], pg1Name, "sub-1", 3)
+		pg1SubGroup2Pods := createSubGroupPods(ctx, testCtx.KubeClientset, testCtx.Queues[0], pg1Name, "sub-2", 3)
+		pg1 := pod_group.Create(namespace, pg1Name, testCtx.Queues[0].Name)
+		pg1.Spec.MinMember = 2
+		pg1.Spec.SubGroups = []schedulingv2alpha2.SubGroup{
+			{Name: "sub-1", MinMember: 1},
+			{Name: "sub-2", MinMember: 1},
+		}
+
+		pg2Name := utils.GenerateRandomK8sName(10)
+		pg2SubGroup1Pods := createSubGroupPods(ctx, testCtx.KubeClientset, testCtx.Queues[0], pg2Name, "sub-1", 3)
+		pg2SubGroup2Pods := createSubGroupPods(ctx, testCtx.KubeClientset, testCtx.Queues[0], pg2Name, "sub-2", 3)
+		pg2 := pod_group.Create(namespace, pg2Name, testCtx.Queues[0].Name)
+		pg2.Spec.MinMember = 2
+		pg2.Spec.SubGroups = []schedulingv2alpha2.SubGroup{
+			{Name: "sub-1", MinMember: 1},
+			{Name: "sub-2", MinMember: 1},
+		}
+
+		_, err := testCtx.KubeAiSchedClientset.SchedulingV2alpha2().PodGroups(namespace).Create(ctx,
+			pg1, metav1.CreateOptions{})
+		Expect(err).To(Succeed())
+		_, err = testCtx.KubeAiSchedClientset.SchedulingV2alpha2().PodGroups(namespace).Create(ctx,
+			pg2, metav1.CreateOptions{})
+		Expect(err).To(Succeed())
+
+		pg1Pods := append(pg1SubGroup1Pods, pg1SubGroup2Pods...)
+		wait.ForAtLeastNPodsScheduled(ctx, testCtx.ControllerClient, namespace, pg1SubGroup1Pods, 1)
+		wait.ForAtLeastNPodsScheduled(ctx, testCtx.ControllerClient, namespace, pg1SubGroup2Pods, 1)
+		wait.ForAtLeastNPodsScheduled(ctx, testCtx.ControllerClient, namespace, pg1Pods, 3)
+		wait.ForAtLeastNPodsUnschedulable(ctx, testCtx.ControllerClient, namespace, pg1Pods, 3)
+
+		pg2Pods := append(pg2SubGroup1Pods, pg2SubGroup2Pods...)
+		wait.ForAtLeastNPodsScheduled(ctx, testCtx.ControllerClient, namespace, pg2SubGroup1Pods, 1)
+		wait.ForAtLeastNPodsScheduled(ctx, testCtx.ControllerClient, namespace, pg2SubGroup2Pods, 1)
+		wait.ForAtLeastNPodsScheduled(ctx, testCtx.ControllerClient, namespace, pg2Pods, 3)
+		wait.ForAtLeastNPodsUnschedulable(ctx, testCtx.ControllerClient, namespace, pg2Pods, 3)
+	})
+
+	It("Don't schedule job if subgroup gang is not satisfied", func(ctx context.Context) {
+		namespace := queue.GetConnectedNamespaceToQueue(testCtx.Queues[0])
+
+		pg1Name := utils.GenerateRandomK8sName(10)
+		pg1SubGroup1Pods := createSubGroupPods(ctx, testCtx.KubeClientset, testCtx.Queues[0], pg1Name, "sub-1", 3)
+		pg1SubGroup2Pods := createSubGroupPods(ctx, testCtx.KubeClientset, testCtx.Queues[0], pg1Name, "sub-2", 3)
+		pg1 := pod_group.Create(namespace, pg1Name, testCtx.Queues[0].Name)
+		pg1.Spec.MinMember = 4
+		pg1.Spec.SubGroups = []schedulingv2alpha2.SubGroup{
+			{Name: "sub-1", MinMember: 2},
+			{Name: "sub-2", MinMember: 2},
+		}
+		_, err := testCtx.KubeAiSchedClientset.SchedulingV2alpha2().PodGroups(namespace).Create(ctx,
+			pg1, metav1.CreateOptions{})
+		Expect(err).To(Succeed())
+
+		// wait until pg1 is scheduled to ensure that it will be the one running at the end of the test
+		wait.ForAtLeastNPodsScheduled(ctx, testCtx.ControllerClient, namespace, pg1SubGroup1Pods, 1)
+
+		pg2Name := utils.GenerateRandomK8sName(10)
+		pg2SubGroup1Pods := createSubGroupPods(ctx, testCtx.KubeClientset, testCtx.Queues[0], pg2Name, "sub-1", 3)
+		pg2SubGroup2Pods := createSubGroupPods(ctx, testCtx.KubeClientset, testCtx.Queues[0], pg2Name, "sub-2", 3)
+		pg2 := pod_group.Create(namespace, pg2Name, testCtx.Queues[0].Name)
+		pg2.Spec.MinMember = 4
+		pg2.Spec.SubGroups = []schedulingv2alpha2.SubGroup{
+			{Name: "sub-1", MinMember: 2},
+			{Name: "sub-2", MinMember: 2},
+		}
+
+		_, err = testCtx.KubeAiSchedClientset.SchedulingV2alpha2().PodGroups(namespace).Create(ctx,
+			pg2, metav1.CreateOptions{})
+		Expect(err).To(Succeed())
+
+		pg1Pods := append(pg1SubGroup1Pods, pg1SubGroup2Pods...)
+		wait.ForAtLeastNPodsScheduled(ctx, testCtx.ControllerClient, namespace, pg1Pods, 6)
+
+		pg2Pods := append(pg2SubGroup1Pods, pg2SubGroup2Pods...)
+		wait.ForAtLeastNPodsUnschedulable(ctx, testCtx.ControllerClient, namespace, pg2Pods, 6)
+	})
+})
+
+func createSubGroupPods(ctx context.Context, client *kubernetes.Clientset, queue *v2.Queue,
+	podGroupName string, subGroupName string, numPods int) []*v1.Pod {
+	var pods []*v1.Pod
+	for i := 0; i < numPods; i++ {
+		pod := createPod(ctx, client, queue, podGroupName, subGroupName, "100m")
+		pods = append(pods, pod)
+	}
+	return pods
+}
+
+func createPod(ctx context.Context, client *kubernetes.Clientset, queue *v2.Queue, podGroupName string,
+	subGroupName string, cpuPerPod string) *v1.Pod {
+	pod := rd.CreatePodWithPodGroupReference(queue, podGroupName, v1.ResourceRequirements{
+		Limits: map[v1.ResourceName]resource.Quantity{
+			v1.ResourceCPU: resource.MustParse(cpuPerPod),
+		},
+	})
+	pod.Labels[commonconsts.SubGroupLabelKey] = subGroupName
+	pod, err := rd.CreatePod(ctx, client, pod)
+	Expect(err).To(Succeed())
+	return pod
+}

Original file line number	Diff line number	Diff line change
`@@ -13,7 +13,7 @@ import (`
`13`	`13`	`. "github.com/onsi/gomega"`
`14`	`14`	`)`
`15`	`15`
`16`		`-func TestPriority(t *testing.T) {`
	`16`	`+func TestAllocate(t *testing.T) {`
`17`	`17`	`utils.SetLogger()`
`18`	`18`	`RegisterFailHandler(Fail)`
`19`	`19`	`RunSpecs(t, "Elastic allocation Suite")`