diff --git a/libcudacxx/include/cuda/__device/arch_id.h b/libcudacxx/include/cuda/__device/arch_id.h index 07a66af6a01..5f7d4c883aa 100644 --- a/libcudacxx/include/cuda/__device/arch_id.h +++ b/libcudacxx/include/cuda/__device/arch_id.h @@ -37,6 +37,7 @@ enum class arch_id : int { sm_60 = 60, sm_61 = 61, + sm_62 = 62, sm_70 = 70, sm_75 = 75, sm_80 = 80, @@ -64,6 +65,7 @@ enum class arch_id : int { case ::cuda::std::to_underlying(arch_id::sm_60): case ::cuda::std::to_underlying(arch_id::sm_61): + case ::cuda::std::to_underlying(arch_id::sm_62): case ::cuda::std::to_underlying(arch_id::sm_70): case ::cuda::std::to_underlying(arch_id::sm_75): case ::cuda::std::to_underlying(arch_id::sm_80): diff --git a/libcudacxx/include/cuda/__device/arch_traits.h b/libcudacxx/include/cuda/__device/arch_traits.h index 90db61dc86f..94f4d46f7b7 100644 --- a/libcudacxx/include/cuda/__device/arch_traits.h +++ b/libcudacxx/include/cuda/__device/arch_traits.h @@ -227,6 +227,20 @@ template <> return __traits; }; +template <> +[[nodiscard]] _CCCL_API constexpr arch_traits_t arch_traits() noexcept +{ + auto __traits = ::cuda::__common_arch_traits(arch_id::sm_62); + __traits.max_shared_memory_per_multiprocessor = 64 * 1024; + __traits.max_blocks_per_multiprocessor = 32; + __traits.max_threads_per_multiprocessor = 2048; + __traits.max_warps_per_multiprocessor = __traits.max_threads_per_multiprocessor / __traits.warp_size; + __traits.max_shared_memory_per_block_optin = 48 * 1024; + __traits.max_registers_per_block = 32 * 1024; + + return __traits; +}; + template <> [[nodiscard]] _CCCL_API constexpr arch_traits_t arch_traits() noexcept { @@ -452,6 +466,8 @@ template <> return ::cuda::arch_traits(); case arch_id::sm_61: return ::cuda::arch_traits(); + case arch_id::sm_62: + return ::cuda::arch_traits(); case arch_id::sm_70: return ::cuda::arch_traits(); case arch_id::sm_75: diff --git a/libcudacxx/test/libcudacxx/cuda/ccclrt/device/arch_id.c2h.cu b/libcudacxx/test/libcudacxx/cuda/ccclrt/device/arch_id.c2h.cu index bf8a1f91e38..1a37ddb816f 100644 --- a/libcudacxx/test/libcudacxx/cuda/ccclrt/device/arch_id.c2h.cu +++ b/libcudacxx/test/libcudacxx/cuda/ccclrt/device/arch_id.c2h.cu @@ -29,6 +29,7 @@ C2H_CCCLRT_TEST("Architecture id", "[device]") STATIC_REQUIRE(cuda::std::is_same_v, int>); STATIC_REQUIRE(cuda::std::to_underlying(cuda::arch_id::sm_60) == 60); STATIC_REQUIRE(cuda::std::to_underlying(cuda::arch_id::sm_61) == 61); + STATIC_REQUIRE(cuda::std::to_underlying(cuda::arch_id::sm_62) == 62); STATIC_REQUIRE(cuda::std::to_underlying(cuda::arch_id::sm_70) == 70); STATIC_REQUIRE(cuda::std::to_underlying(cuda::arch_id::sm_75) == 75); STATIC_REQUIRE(cuda::std::to_underlying(cuda::arch_id::sm_80) == 80);