@@ -922,10 +922,12 @@ ncclResult_t ret = ncclSuccess;
922922 cudaStream_t hostStream, deviceStream;
923923 NCCLCHECKGOTO (ncclStrongStreamAcquire (ncclCudaGraphNone (), &comm->sharedRes ->hostStream , /* concurrent=*/ false , &hostStream), ret, fail);
924924 NCCLCHECKGOTO (ncclStrongStreamAcquire (ncclCudaGraphNone (), &comm->sharedRes ->deviceStream , /* concurrent=*/ false , &deviceStream), ret, fail);
925- if (regRecord->regIpcAddrs .devPeerRmtAddrs == NULL )
925+ if (regRecord->regIpcAddrs .devPeerRmtAddrs == NULL ) {
926926 NCCLCHECKGOTO (ncclCudaCallocAsync (®Record->regIpcAddrs .devPeerRmtAddrs , comm->localRanks , hostStream), ret, fail);
927- if (needUpdate)
928927 NCCLCHECKGOTO (ncclCudaMemcpyAsync (regRecord->regIpcAddrs .devPeerRmtAddrs , regRecord->regIpcAddrs .hostPeerRmtAddrs , comm->localRanks , hostStream), ret, fail);
928+ } else if (needUpdate) {
929+ NCCLCHECKGOTO (ncclCudaMemcpyAsync (regRecord->regIpcAddrs .devPeerRmtAddrs , regRecord->regIpcAddrs .hostPeerRmtAddrs , comm->localRanks , hostStream), ret, fail);
930+ }
929931 NCCLCHECKGOTO (ncclStreamWaitStream (deviceStream, hostStream, comm->sharedRes ->scratchEvent ), ret, fail);
930932 NCCLCHECKGOTO (ncclStrongStreamRelease (ncclCudaGraphNone (), &comm->sharedRes ->hostStream , /* concurrent=*/ false ), ret, fail);
931933 NCCLCHECKGOTO (ncclStrongStreamRelease (ncclCudaGraphNone (), &comm->sharedRes ->deviceStream , /* concurrent=*/ false ), ret, fail);
0 commit comments