Skip to content

Commit 7bd6f5c

Browse files
authored
fix: use default maxBytes and ncclBuffSize for p4 running all-to-all (#691)
Signed-off-by: Yutong Sun <[email protected]>
1 parent fbeef8a commit 7bd6f5c

File tree

1 file changed

+7
-2
lines changed

1 file changed

+7
-2
lines changed

test/cases/nvidia/mpi_test.go

Lines changed: 7 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -70,8 +70,13 @@ func multiNode(testName string) features.Feature {
7070
ncclBuffSize := "4194304"
7171
if slices.Contains(instanceSupportsRdmaRead, *nodeType) {
7272
t.Log("Instance supports RDMA")
73-
maxBytes = "16G"
74-
ncclBuffSize = "8388608"
73+
// TODO: revisit this with some kind of per-instance optimizer, or maybe use the defaults for all instance types unless specified
74+
if testName == "alltoall_perf" && strings.Contains(*nodeType, "p4") {
75+
// Keep default values for P4 running all-to-all
76+
} else {
77+
maxBytes = "16G"
78+
ncclBuffSize = "8388608"
79+
}
7580
}
7681
var err error
7782
renderedMpiJobNcclTestMultiNodeManifest, err = fwext.RenderManifests(mpiJobNcclTestMultiNodeManifest, ncclTestManifestTplVars{

0 commit comments

Comments
 (0)