-
Notifications
You must be signed in to change notification settings - Fork 294
Implement CUDA backend for parallel cuda::std::for_each
#5610
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
9809940
aa7f9b1
9555466
7802f25
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,53 @@ | ||
| //===----------------------------------------------------------------------===// | ||
| // | ||
| // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. | ||
| // See https://llvm.org/LICENSE.txt for license information. | ||
| // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception | ||
| // SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES | ||
| // | ||
| //===----------------------------------------------------------------------===// | ||
|
|
||
| #ifndef _CUDA___EXECUTION_POLICY_H | ||
| #define _CUDA___EXECUTION_POLICY_H | ||
|
|
||
| #include <cuda/std/detail/__config> | ||
|
|
||
| #if defined(_CCCL_IMPLICIT_SYSTEM_HEADER_GCC) | ||
| # pragma GCC system_header | ||
| #elif defined(_CCCL_IMPLICIT_SYSTEM_HEADER_CLANG) | ||
| # pragma clang system_header | ||
| #elif defined(_CCCL_IMPLICIT_SYSTEM_HEADER_MSVC) | ||
| # pragma system_header | ||
| #endif // no system header | ||
|
|
||
| #if _CCCL_HAS_BACKEND_CUDA() | ||
|
|
||
| # include <cuda/__fwd/execution_policy.h> | ||
| # include <cuda/std/__execution/policy.h> | ||
| # include <cuda/std/__type_traits/is_execution_policy.h> | ||
|
|
||
| # include <cuda/std/__cccl/prologue.h> | ||
|
|
||
| _CCCL_BEGIN_NAMESPACE_CUDA_STD_EXECUTION | ||
|
|
||
| template <uint32_t _Policy> | ||
| struct _CCCL_DECLSPEC_EMPTY_BASES __execution_policy_base<_Policy, __execution_backend::__cuda> | ||
| : __execution_policy_base<_Policy, __execution_backend::__none> | ||
| {}; | ||
|
|
||
| _CCCL_END_NAMESPACE_CUDA_STD_EXECUTION | ||
|
|
||
| _CCCL_BEGIN_NAMESPACE_CUDA_EXECUTION | ||
|
|
||
| using __cub_parallel_unsequenced_policy = | ||
| ::cuda::std::execution::__execution_policy_base<::cuda::std::execution::__with_cuda_backend<static_cast<uint32_t>( | ||
| ::cuda::std::execution::__execution_policy::__parallel_unsequenced)>()>; | ||
| _CCCL_GLOBAL_CONSTANT __cub_parallel_unsequenced_policy __cub_par_unseq{}; | ||
|
|
||
| _CCCL_END_NAMESPACE_CUDA_EXECUTION | ||
|
|
||
| # include <cuda/std/__cccl/epilogue.h> | ||
|
|
||
| #endif // _CCCL_HAS_BACKEND_CUDA() | ||
|
|
||
| #endif // _CUDA___EXECUTION_POLICY_H | ||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,47 @@ | ||
| //===----------------------------------------------------------------------===// | ||
| // | ||
| // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. | ||
| // See https://llvm.org/LICENSE.txt for license information. | ||
| // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception | ||
| // SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES | ||
| // | ||
| //===----------------------------------------------------------------------===// | ||
|
|
||
| #ifndef _CUDA___FWD_EXECUTION_POLICY_H | ||
| #define _CUDA___FWD_EXECUTION_POLICY_H | ||
|
|
||
| #include <cuda/std/detail/__config> | ||
|
|
||
| #if defined(_CCCL_IMPLICIT_SYSTEM_HEADER_GCC) | ||
| # pragma GCC system_header | ||
| #elif defined(_CCCL_IMPLICIT_SYSTEM_HEADER_CLANG) | ||
| # pragma clang system_header | ||
| #elif defined(_CCCL_IMPLICIT_SYSTEM_HEADER_MSVC) | ||
| # pragma system_header | ||
| #endif // no system header | ||
|
|
||
| #if _CCCL_HAS_BACKEND_CUDA() | ||
|
|
||
| # include <cuda/std/__fwd/execution_policy.h> | ||
|
|
||
| # include <cuda/std/__cccl/prologue.h> | ||
|
|
||
| _CCCL_BEGIN_NAMESPACE_CUDA_STD_EXECUTION | ||
|
|
||
| //! @brief Sets the execution backend to cuda | ||
| template <uint32_t _Policy> | ||
| [[nodiscard]] _CCCL_API constexpr uint32_t __with_cuda_backend() noexcept | ||
| { | ||
| constexpr uint32_t __backend_mask{0xFFFF00FF}; | ||
| constexpr uint32_t __new_policy = | ||
| (_Policy & __backend_mask) | (static_cast<uint32_t>(__execution_backend::__cuda) << 8); | ||
| return __new_policy; | ||
| } | ||
|
|
||
| _CCCL_END_NAMESPACE_CUDA_STD_EXECUTION | ||
|
|
||
| # include <cuda/std/__cccl/epilogue.h> | ||
|
|
||
| #endif // _CCCL_HAS_BACKEND_CUDA() | ||
|
|
||
| #endif // _CUDA___FWD_EXECUTION_POLICY_H |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,73 @@ | ||
| //===----------------------------------------------------------------------===// | ||
| // | ||
| // Part of libcu++, the C++ Standard Library for your entire system, | ||
| // under the Apache License v2.0 with LLVM Exceptions. | ||
| // See https://llvm.org/LICENSE.txt for license information. | ||
| // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception | ||
| // SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. | ||
| // | ||
| //===----------------------------------------------------------------------===// | ||
|
|
||
| #ifndef _CUDA_STD___FWD_EXECUTION_POLICY_H | ||
| #define _CUDA_STD___FWD_EXECUTION_POLICY_H | ||
|
|
||
| #include <cuda/std/detail/__config> | ||
|
|
||
| #if defined(_CCCL_IMPLICIT_SYSTEM_HEADER_GCC) | ||
| # pragma GCC system_header | ||
| #elif defined(_CCCL_IMPLICIT_SYSTEM_HEADER_CLANG) | ||
| # pragma clang system_header | ||
| #elif defined(_CCCL_IMPLICIT_SYSTEM_HEADER_MSVC) | ||
| # pragma system_header | ||
| #endif // no system header | ||
|
|
||
| #include <cuda/std/cstdint> | ||
|
|
||
| #include <cuda/std/__cccl/prologue.h> | ||
|
|
||
| _CCCL_BEGIN_NAMESPACE_CUDA_STD_EXECUTION | ||
|
|
||
| //! @brief Enumerates the standard execution policies | ||
| enum class __execution_policy : uint8_t | ||
| { | ||
| __invalid_execution_policy = 0, | ||
| __sequenced = 1 << 0, | ||
| __parallel = 1 << 1, | ||
| __unsequenced = 1 << 2, | ||
| __parallel_unsequenced = __execution_policy::__parallel | __execution_policy::__unsequenced, | ||
| }; | ||
|
|
||
| //! @brief Extracts the execution policy from the stored _Policy | ||
| template <uint32_t _Policy> | ||
| inline constexpr __execution_policy __policy_to_execution_policy = __execution_policy{(_Policy & uint32_t{0x000000FF})}; | ||
|
|
||
| //! @brief Enumerates the different backends we support | ||
| //! @note Not an enum class because a user might specify multiple backends | ||
| enum __execution_backend : uint8_t | ||
| { | ||
| // The backends we provide | ||
| __none = 0, | ||
| #if _CCCL_HAS_BACKEND_CUDA() | ||
| __cuda = 1 << 1, | ||
| #endif // _CCCL_HAS_BACKEND_CUDA() | ||
| #if _CCCL_HAS_BACKEND_OMP() | ||
| __omp = 1 << 2, | ||
| #endif // _CCCL_HAS_BACKEND_OMP() | ||
| #if _CCCL_HAS_BACKEND_TBB() | ||
| __tbb = 1 << 3, | ||
| #endif // _CCCL_HAS_BACKEND_TBB() | ||
| }; | ||
|
|
||
| //! @brief Extracts the execution backend from the stored _Policy | ||
| template <uint32_t _Policy> | ||
|
Collaborator
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. suggestion: it feels like we are missing some other concept here. We have policy: how code should run (sequenced, parallel, unsequenced, etc.) and backend: where code should run (cuda, omp, tbb, etc.). In this template parameter, we have "something" that's policy + backend, which is also called policy. This makes it hard to understand what exactly we are working with. The only information that disambiguates the two is uint32 vs uint8, but it's hard to spot. If you could come up with some different term for policy + backend, that'd be great.
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I thought about this a ton and I did not find a truly better name. What i would say is that there is a difference between the execution policy and what I would consider the whole policy. The former is what the standard gives us as classification of how code should run. The latter is how we implement that for the different backends. So yeah it is a tad close, but I did not find a better name. |
||
| inline constexpr __execution_backend __policy_to_execution_backend = | ||
| __execution_backend{(_Policy & uint32_t{0x0000FF00}) >> 8}; | ||
|
|
||
| template <uint32_t _Policy, __execution_backend _Backend = __policy_to_execution_backend<_Policy>> | ||
| struct __execution_policy_base; | ||
|
|
||
| _CCCL_END_NAMESPACE_CUDA_STD_EXECUTION | ||
|
|
||
| #include <cuda/std/__cccl/epilogue.h> | ||
|
|
||
| #endif // _CUDA_STD___FWD_EXECUTION_POLICY_H | ||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,32 @@ | ||
| //===----------------------------------------------------------------------===// | ||
| // | ||
| // Part of libcu++, the C++ Standard Library for your entire system, | ||
| // under the Apache License v2.0 with LLVM Exceptions. | ||
| // See https://llvm.org/LICENSE.txt for license information. | ||
| // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception | ||
| // SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. | ||
| // | ||
| //===----------------------------------------------------------------------===// | ||
|
|
||
| #ifndef _CUDA_STD___INTERNAL_PSTL_CONFIG_H | ||
| #define _CUDA_STD___INTERNAL_PSTL_CONFIG_H | ||
|
|
||
| #include <cuda/std/detail/__config> | ||
|
|
||
| #if defined(_CCCL_IMPLICIT_SYSTEM_HEADER_GCC) | ||
| # pragma GCC system_header | ||
| #elif defined(_CCCL_IMPLICIT_SYSTEM_HEADER_CLANG) | ||
| # pragma clang system_header | ||
| #elif defined(_CCCL_IMPLICIT_SYSTEM_HEADER_MSVC) | ||
| # pragma system_header | ||
| #endif // no system header | ||
|
|
||
| #include <cuda/std/__cccl/prologue.h> | ||
|
|
||
| #define _CCCL_HAS_BACKEND_CUDA() _CCCL_CUDA_COMPILATION() && !_CCCL_COMPILER(NVRTC) | ||
| #define _CCCL_HAS_BACKEND_OMP() 0 | ||
| #define _CCCL_HAS_BACKEND_TBB() 0 | ||
|
|
||
| #include <cuda/std/__cccl/epilogue.h> | ||
|
|
||
| #endif // _CUDA_STD___INTERNAL_PSTL_CONFIG_H |
Uh oh!
There was an error while loading. Please reload this page.