Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 3 additions & 3 deletions pkg/scheduler/api/podgroup_info/job_info.go
Original file line number Diff line number Diff line change
Expand Up @@ -407,7 +407,7 @@ func (pgi *PodGroupInfo) IsStale() bool {
return true
}
for _, subGroup := range pgi.GetActiveSubGroupInfos() {
if !subGroup.IsGangSatisfied() {
if subGroup.IsStale() {
return true
}
}
Expand All @@ -416,8 +416,8 @@ func (pgi *PodGroupInfo) IsStale() bool {
}

func (pgi *PodGroupInfo) IsGangSatisfied() bool {
numActiveTasks := pgi.GetNumActiveUsedTasks()
if numActiveTasks < int(pgi.GetDefaultMinAvailable()) {
numActiveAllocatedTasks := pgi.GetActiveAllocatedTasksCount()
if numActiveAllocatedTasks < int(pgi.GetDefaultMinAvailable()) {
return false
}
for _, subGroup := range pgi.SubGroups {
Expand Down
7 changes: 6 additions & 1 deletion pkg/scheduler/api/podgroup_info/subgroup_info.go
Original file line number Diff line number Diff line change
Expand Up @@ -113,10 +113,15 @@ func (sgi *SubGroupInfo) IsReadyForScheduling() bool {
}

func (sgi *SubGroupInfo) IsGangSatisfied() bool {
numActiveTasks := sgi.GetNumActiveUsedTasks()
numActiveTasks := sgi.GetNumActiveAllocatedTasks()
return numActiveTasks >= int(sgi.minAvailable)
}

func (sgi *SubGroupInfo) IsStale() bool {
numActiveUsedTasks := sgi.GetNumActiveUsedTasks()
return numActiveUsedTasks < int(sgi.minAvailable)
}

func (sgi *SubGroupInfo) GetNumActiveAllocatedTasks() int {
return sgi.numActiveAllocatedTasks
}
Expand Down
71 changes: 71 additions & 0 deletions pkg/scheduler/api/podgroup_info/subgroup_info_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -254,6 +254,77 @@ func TestIsGangSatisfied(t *testing.T) {
}
}

func TestIsStale(t *testing.T) {
tests := []struct {
name string
minAvailable int32
pods []*pod_info.PodInfo
expected bool
}{
{
name: "not stale when used >= minAvailable",
minAvailable: 2,
pods: []*pod_info.PodInfo{
{UID: "1", Status: pod_status.Running},
{UID: "2", Status: pod_status.Pipelined},
},
expected: false,
},
{
name: "not stale with releasing status counted as used",
minAvailable: 2,
pods: []*pod_info.PodInfo{
{UID: "1", Status: pod_status.Running},
{UID: "2", Status: pod_status.Releasing},
},
expected: false,
},
{
name: "stale when used < minAvailable",
minAvailable: 3,
pods: []*pod_info.PodInfo{
{UID: "1", Status: pod_status.Running},
{UID: "2", Status: pod_status.Releasing},
},
expected: true,
},
{
name: "stale when no used pods",
minAvailable: 1,
pods: []*pod_info.PodInfo{},
expected: true,
},
{
name: "not stale when used equals minAvailable",
minAvailable: 1,
pods: []*pod_info.PodInfo{
{UID: "1", Status: pod_status.Releasing},
},
expected: false,
},
{
name: "stale with only non-used statuses",
minAvailable: 1,
pods: []*pod_info.PodInfo{
{UID: "1", Status: pod_status.Gated},
},
expected: true,
},
}

for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
sgi := NewSubGroupInfo("test", tt.minAvailable)
for _, pod := range tt.pods {
sgi.AssignTask(pod)
}
if got := sgi.IsStale(); got != tt.expected {
t.Errorf("IsStale() = %v, want %v", got, tt.expected)
}
})
}
}

func TestGetNumAliveTasks(t *testing.T) {
sgi := NewSubGroupInfo("test", 2)
pods := []*pod_info.PodInfo{
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -247,7 +247,7 @@ func (su *defaultStatusUpdater) recordStaleJobEvent(job *podgroup_info.PodGroupI
job.GetNumActiveUsedTasks(), job.GetDefaultMinAvailable())

for _, subGroup := range job.GetActiveSubGroupInfos() {
if !subGroup.IsGangSatisfied() {
if subGroup.IsStale() {
message += fmt.Sprintf(", subGroup %s minMember is %d and %d pods are active",
subGroup.GetName(), subGroup.GetMinAvailable(), subGroup.GetNumActiveUsedTasks())
}
Expand Down
Loading