diff --git a/src/enqueue.cc b/src/enqueue.cc index 00a0ef8da..23a2961e3 100644 --- a/src/enqueue.cc +++ b/src/enqueue.cc @@ -41,7 +41,10 @@ ncclResult_t ncclInitKernelsForDevice(int cudaArch, int maxSharedMem, size_t* ma if (fn == nullptr) continue; cudaError_t errcode = cudaFuncGetAttributes(&attr, fn); - if (errcode != cudaSuccess) continue; // Silently ignore failures + if (errcode != cudaSuccess) { + cudaGetLastError(); // Drain error code + continue; // Silently ignore failures + } if (maxStackSize) { if (attr.localSizeBytes > *maxStackSize) *maxStackSize = attr.localSizeBytes; }