@@ -502,7 +502,7 @@ CUB_RUNTIME_FUNCTION inline cudaError_t SmVersion(int &sm_version,
502502 */
503503CUB_RUNTIME_FUNCTION inline cudaError_t SyncStream (cudaStream_t stream)
504504{
505- cudaError_t result = cudaErrorUnknown ;
505+ cudaError_t result = cudaErrorNotSupported ;
506506
507507 NV_IF_TARGET (NV_IS_HOST,
508508 (result = CubDebug (cudaStreamSynchronize (stream));),
@@ -532,21 +532,42 @@ namespace detail
532532CUB_RUNTIME_FUNCTION inline cudaError_t DebugSyncStream (cudaStream_t stream)
533533{
534534#ifndef CUB_DETAIL_DEBUG_ENABLE_SYNC
535- (void )stream;
536535
537- return cudaSuccess;
538- #else
539- #if 1 // All valid targets currently support device-side synchronization
540- _CubLog (" %s\n " , " Synchronizing..." );
541- return SyncStream (stream);
542- #else
543536 (void )stream;
544- _CubLog("%s\n",
545- "WARNING: Skipping CUB `debug_synchronous` synchronization "
546- "(unsupported target).");
547537 return cudaSuccess;
548- #endif
549- #endif
538+
539+ #else // CUB_DETAIL_DEBUG_ENABLE_SYNC:
540+
541+ #define CUB_TMP_SYNC_AVAILABLE \
542+ _CubLog (" %s\n " , " Synchronizing..." ); \
543+ return SyncStream (stream)
544+
545+ #define CUB_TMP_DEVICE_SYNC_UNAVAILABLE \
546+ (void )stream; \
547+ _CubLog (" WARNING: Skipping CUB `debug_synchronous` synchronization (%s).\n " , \
548+ " device-side sync requires <sm_90, RDC, and CDPv1" ); \
549+ return cudaSuccess
550+
551+ #ifdef CUB_DETAIL_CDPv1
552+
553+ // Can sync everywhere but SM_90+
554+ NV_IF_TARGET (NV_PROVIDES_SM_90,
555+ (CUB_TMP_DEVICE_SYNC_UNAVAILABLE;),
556+ (CUB_TMP_SYNC_AVAILABLE;));
557+
558+ #else // CDPv2 or no CDP:
559+
560+ // Can only sync on host
561+ NV_IF_TARGET (NV_IS_HOST,
562+ (CUB_TMP_SYNC_AVAILABLE;),
563+ (CUB_TMP_DEVICE_SYNC_UNAVAILABLE;));
564+
565+ #endif // CDP version
566+
567+ #undef CUB_TMP_DEVICE_SYNC_UNAVAILABLE
568+ #undef CUB_TMP_SYNC_AVAILABLE
569+
570+ #endif // CUB_DETAIL_DEBUG_ENABLE_SYNC
550571}
551572
552573/* * \brief Gets whether the current device supports unified addressing */
0 commit comments