@@ -3710,5 +3710,344 @@ func getTestsMetadata() []integration_tests_utils.TestTopologyMetadata {
37103710 },
37113711 },
37123712 },
3713+ {
3714+ // In some cases, when multiple tasks and nodes are involved, tasks get re-written to the podgroupinfo object
3715+ // in the session - this test is to ensure that we don't have any issues with that
3716+ TestTopologyBasic : test_utils.TestTopologyBasic {
3717+ Name : "queue0 is in deserved share, queue1 is under fair share - do not reclaim - multiple tasks" ,
3718+ Jobs : []* jobs_fake.TestJobBasic {
3719+ {
3720+ Name : "q0_n0_job0" ,
3721+ RequiredGPUsPerTask : 1 ,
3722+ Priority : constants .PriorityTrainNumber ,
3723+ QueueName : "queue0" ,
3724+ Tasks : []* tasks_fake.TestTaskBasic {
3725+ {
3726+ NodeName : "node0" ,
3727+ State : pod_status .Running ,
3728+ },
3729+ },
3730+ }, {
3731+ Name : "q0_n0_job1" ,
3732+ RequiredGPUsPerTask : 1 ,
3733+ Priority : constants .PriorityTrainNumber ,
3734+ QueueName : "queue0" ,
3735+ Tasks : []* tasks_fake.TestTaskBasic {
3736+ {
3737+ NodeName : "node0" ,
3738+ State : pod_status .Running ,
3739+ },
3740+ },
3741+ },
3742+ {
3743+ Name : "q0_n0_job2" ,
3744+ RequiredGPUsPerTask : 1 ,
3745+ Priority : constants .PriorityTrainNumber ,
3746+ QueueName : "queue0" ,
3747+ Tasks : []* tasks_fake.TestTaskBasic {
3748+ {
3749+ NodeName : "node0" ,
3750+ State : pod_status .Running ,
3751+ },
3752+ },
3753+ },
3754+ {
3755+ Name : "q0_n0_job3" ,
3756+ RequiredGPUsPerTask : 1 ,
3757+ Priority : constants .PriorityTrainNumber ,
3758+ QueueName : "queue0" ,
3759+ Tasks : []* tasks_fake.TestTaskBasic {
3760+ {
3761+ NodeName : "node0" ,
3762+ State : pod_status .Running ,
3763+ },
3764+ },
3765+ },
3766+ {
3767+ Name : "q0_n1_job0" ,
3768+ RequiredGPUsPerTask : 1 ,
3769+ Priority : constants .PriorityTrainNumber ,
3770+ QueueName : "queue0" ,
3771+ Tasks : []* tasks_fake.TestTaskBasic {
3772+ {
3773+ NodeName : "node1" ,
3774+ State : pod_status .Running ,
3775+ },
3776+ },
3777+ }, {
3778+ Name : "q0_n1_job1" ,
3779+ RequiredGPUsPerTask : 1 ,
3780+ Priority : constants .PriorityTrainNumber ,
3781+ QueueName : "queue0" ,
3782+ Tasks : []* tasks_fake.TestTaskBasic {
3783+ {
3784+ NodeName : "node1" ,
3785+ State : pod_status .Running ,
3786+ },
3787+ },
3788+ },
3789+ {
3790+ Name : "q0_n1_job2" ,
3791+ RequiredGPUsPerTask : 1 ,
3792+ Priority : constants .PriorityTrainNumber ,
3793+ QueueName : "queue0" ,
3794+ Tasks : []* tasks_fake.TestTaskBasic {
3795+ {
3796+ NodeName : "node1" ,
3797+ State : pod_status .Running ,
3798+ },
3799+ },
3800+ },
3801+ {
3802+ Name : "q0_n1_job3" ,
3803+ RequiredGPUsPerTask : 1 ,
3804+ Priority : constants .PriorityTrainNumber ,
3805+ QueueName : "queue0" ,
3806+ Tasks : []* tasks_fake.TestTaskBasic {
3807+ {
3808+ NodeName : "node1" ,
3809+ State : pod_status .Running ,
3810+ },
3811+ },
3812+ }, {
3813+ Name : "q0_n2_job0" ,
3814+ RequiredGPUsPerTask : 1 ,
3815+ Priority : constants .PriorityTrainNumber ,
3816+ QueueName : "queue0" ,
3817+ Tasks : []* tasks_fake.TestTaskBasic {
3818+ {
3819+ NodeName : "node2" ,
3820+ State : pod_status .Running ,
3821+ },
3822+ },
3823+ }, {
3824+ Name : "q0_n2_job1" ,
3825+ RequiredGPUsPerTask : 1 ,
3826+ Priority : constants .PriorityTrainNumber ,
3827+ QueueName : "queue0" ,
3828+ Tasks : []* tasks_fake.TestTaskBasic {
3829+ {
3830+ NodeName : "node2" ,
3831+ State : pod_status .Running ,
3832+ },
3833+ },
3834+ },
3835+ {
3836+ Name : "q0_n2_job2" ,
3837+ RequiredGPUsPerTask : 1 ,
3838+ Priority : constants .PriorityTrainNumber ,
3839+ QueueName : "queue0" ,
3840+ Tasks : []* tasks_fake.TestTaskBasic {
3841+ {
3842+ NodeName : "node2" ,
3843+ State : pod_status .Running ,
3844+ },
3845+ },
3846+ },
3847+ {
3848+ Name : "q0_n2_job3" ,
3849+ RequiredGPUsPerTask : 1 ,
3850+ Priority : constants .PriorityTrainNumber ,
3851+ QueueName : "queue0" ,
3852+ Tasks : []* tasks_fake.TestTaskBasic {
3853+ {
3854+ NodeName : "node2" ,
3855+ State : pod_status .Running ,
3856+ },
3857+ },
3858+ },
3859+ {
3860+ Name : "q0_n3_job0" ,
3861+ RequiredGPUsPerTask : 1 ,
3862+ Priority : constants .PriorityTrainNumber ,
3863+ QueueName : "queue0" ,
3864+ Tasks : []* tasks_fake.TestTaskBasic {
3865+ {
3866+ NodeName : "node3" ,
3867+ State : pod_status .Running ,
3868+ },
3869+ },
3870+ }, {
3871+ Name : "q0_n3_job1" ,
3872+ RequiredGPUsPerTask : 1 ,
3873+ Priority : constants .PriorityTrainNumber ,
3874+ QueueName : "queue0" ,
3875+ Tasks : []* tasks_fake.TestTaskBasic {
3876+ {
3877+ NodeName : "node3" ,
3878+ State : pod_status .Running ,
3879+ },
3880+ },
3881+ },
3882+ {
3883+ Name : "q0_n3_job2" ,
3884+ RequiredGPUsPerTask : 1 ,
3885+ Priority : constants .PriorityTrainNumber ,
3886+ QueueName : "queue0" ,
3887+ Tasks : []* tasks_fake.TestTaskBasic {
3888+ {
3889+ NodeName : "node3" ,
3890+ State : pod_status .Running ,
3891+ },
3892+ },
3893+ },
3894+ {
3895+ Name : "q0_n3_job3" ,
3896+ RequiredGPUsPerTask : 1 ,
3897+ Priority : constants .PriorityTrainNumber ,
3898+ QueueName : "queue0" ,
3899+ Tasks : []* tasks_fake.TestTaskBasic {
3900+ {
3901+ NodeName : "node3" ,
3902+ State : pod_status .Running ,
3903+ },
3904+ },
3905+ },
3906+ {
3907+ Name : "q1_job1" ,
3908+ RequiredGPUsPerTask : 1 ,
3909+ Priority : constants .PriorityTrainNumber ,
3910+ QueueName : "queue1" ,
3911+ Tasks : []* tasks_fake.TestTaskBasic {
3912+ {
3913+ State : pod_status .Pending ,
3914+ },
3915+ {
3916+ State : pod_status .Pending ,
3917+ },
3918+ {
3919+ State : pod_status .Pending ,
3920+ },
3921+ {
3922+ State : pod_status .Pending ,
3923+ },
3924+ {
3925+ State : pod_status .Pending ,
3926+ },
3927+ },
3928+ },
3929+ },
3930+ Nodes : map [string ]nodes_fake.TestNodeBasic {
3931+ "node0" : {
3932+ GPUs : 4 ,
3933+ },
3934+ "node1" : {
3935+ GPUs : 4 ,
3936+ },
3937+ "node2" : {
3938+ GPUs : 4 ,
3939+ },
3940+ "node3" : {
3941+ GPUs : 4 ,
3942+ },
3943+ },
3944+ Queues : []test_utils.TestQueueBasic {
3945+ {
3946+ Name : "queue0" ,
3947+ DeservedGPUs : 12 ,
3948+ GPUOverQuotaWeight : 0 ,
3949+ },
3950+ {
3951+ Name : "queue1" ,
3952+ DeservedGPUs : 5 ,
3953+ GPUOverQuotaWeight : 1 ,
3954+ },
3955+ },
3956+ JobExpectedResults : map [string ]test_utils.TestExpectedResultBasic {
3957+ "q0_n0_job0" : {
3958+ GPUsRequired : 1 ,
3959+ Status : pod_status .Running ,
3960+ DontValidateGPUGroup : true ,
3961+ },
3962+ "q0_n0_job1" : {
3963+ GPUsRequired : 1 ,
3964+ Status : pod_status .Running ,
3965+ DontValidateGPUGroup : true ,
3966+ },
3967+ "q0_n0_job2" : {
3968+ GPUsRequired : 1 ,
3969+ Status : pod_status .Running ,
3970+ DontValidateGPUGroup : true ,
3971+ },
3972+ "q0_n0_job3" : {
3973+ GPUsRequired : 1 ,
3974+ Status : pod_status .Running ,
3975+ DontValidateGPUGroup : true ,
3976+ },
3977+ "q0_n1_job0" : {
3978+ GPUsRequired : 1 ,
3979+ Status : pod_status .Running ,
3980+ DontValidateGPUGroup : true ,
3981+ },
3982+ "q0_n1_job1" : {
3983+ GPUsRequired : 1 ,
3984+ Status : pod_status .Running ,
3985+ DontValidateGPUGroup : true ,
3986+ },
3987+ "q0_n1_job2" : {
3988+ GPUsRequired : 1 ,
3989+ Status : pod_status .Running ,
3990+ DontValidateGPUGroup : true ,
3991+ },
3992+ "q0_n1_job3" : {
3993+ GPUsRequired : 1 ,
3994+ Status : pod_status .Running ,
3995+ DontValidateGPUGroup : true ,
3996+ },
3997+ "q0_n2_job0" : {
3998+ GPUsRequired : 1 ,
3999+ Status : pod_status .Running ,
4000+ DontValidateGPUGroup : true ,
4001+ },
4002+ "q0_n2_job1" : {
4003+ GPUsRequired : 1 ,
4004+ Status : pod_status .Running ,
4005+ DontValidateGPUGroup : true ,
4006+ },
4007+ "q0_n2_job2" : {
4008+ GPUsRequired : 1 ,
4009+ Status : pod_status .Running ,
4010+ DontValidateGPUGroup : true ,
4011+ },
4012+ "q0_n2_job3" : {
4013+ GPUsRequired : 1 ,
4014+ Status : pod_status .Running ,
4015+ DontValidateGPUGroup : true ,
4016+ },
4017+ "q0_n3_job0" : {
4018+ GPUsRequired : 1 ,
4019+ Status : pod_status .Running ,
4020+ DontValidateGPUGroup : true ,
4021+ },
4022+ "q0_n3_job1" : {
4023+ GPUsRequired : 1 ,
4024+ Status : pod_status .Running ,
4025+ DontValidateGPUGroup : true ,
4026+ },
4027+ "q0_n3_job2" : {
4028+ GPUsRequired : 1 ,
4029+ Status : pod_status .Running ,
4030+ DontValidateGPUGroup : true ,
4031+ },
4032+ "q0_n3_job3" : {
4033+ GPUsRequired : 1 ,
4034+ Status : pod_status .Running ,
4035+ DontValidateGPUGroup : true ,
4036+ },
4037+ "q1_job1" : {
4038+ GPUsRequired : 5 ,
4039+ Status : pod_status .Pending ,
4040+ DontValidateGPUGroup : true ,
4041+ },
4042+ },
4043+ Mocks : & test_utils.TestMock {
4044+ CacheRequirements : & test_utils.CacheMocking {
4045+ NumberOfCacheBinds : 0 ,
4046+ NumberOfCacheEvictions : 0 ,
4047+ NumberOfPipelineActions : 0 ,
4048+ },
4049+ },
4050+ },
4051+ },
37134052 }
37144053}
0 commit comments