Skip to content

Commit 2dad112

Browse files
committed
Provide a stub dispatch for for_each
1 parent fc0080c commit 2dad112

File tree

8 files changed

+429
-0
lines changed

8 files changed

+429
-0
lines changed

libcudacxx/include/cuda/std/__execution/policy.h

Lines changed: 30 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -34,6 +34,18 @@ enum class __execution_policy : uint32_t
3434
__parallel = 1 << 1,
3535
__unsequenced = 1 << 2,
3636
__parallel_unsequenced = __execution_policy::__parallel | __execution_policy::__unsequenced,
37+
38+
// The backends we provide
39+
__backend_invalid = 1 << 4,
40+
#if _CCCL_HAS_BACKEND_CUDA()
41+
__backend_cuda = 1 << 5,
42+
#endif // _CCCL_HAS_BACKEND_CUDA()
43+
#if _CCCL_HAS_BACKEND_OMP()
44+
__backend_omp = 1 << 6,
45+
#endif // _CCCL_HAS_BACKEND_OMP()
46+
#if _CCCL_HAS_BACKEND_TBB()
47+
__backend_tbb = 1 << 7,
48+
#endif // _CCCL_HAS_BACKEND_TBB()
3749
};
3850

3951
[[nodiscard]] _CCCL_API constexpr bool
@@ -42,6 +54,24 @@ __satisfies_execution_policy(__execution_policy __lhs, __execution_policy __rhs)
4254
return (static_cast<uint32_t>(__lhs) & static_cast<uint32_t>(__rhs)) != 0;
4355
}
4456

57+
[[nodiscard]] _CCCL_API constexpr __execution_policy __extract_backend(__execution_policy __policy) noexcept
58+
{
59+
constexpr uint32_t __backend_mask = static_cast<uint32_t>(-1) << 4;
60+
return static_cast<__execution_policy>(static_cast<uint32_t>(__policy) & __backend_mask);
61+
}
62+
63+
[[nodiscard]] _CCCL_API constexpr bool
64+
__requires_matching_backend(__execution_policy __pol, __execution_policy __backend) noexcept
65+
{
66+
return (static_cast<uint32_t>(::cuda::std::execution::__extract_backend(__pol)) & static_cast<uint32_t>(__backend));
67+
}
68+
69+
[[nodiscard]] _CCCL_API constexpr bool
70+
__requires_unique_backend(__execution_policy __pol, __execution_policy __backend) noexcept
71+
{
72+
return !(static_cast<uint32_t>(::cuda::std::execution::__extract_backend(__pol)) | ~static_cast<uint32_t>(__backend));
73+
}
74+
4575
template <__execution_policy _Policy>
4676
struct __policy
4777
{
Lines changed: 32 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,32 @@
1+
//===----------------------------------------------------------------------===//
2+
//
3+
// Part of libcu++, the C++ Standard Library for your entire system,
4+
// under the Apache License v2.0 with LLVM Exceptions.
5+
// See https://llvm.org/LICENSE.txt for license information.
6+
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7+
// SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES.
8+
//
9+
//===----------------------------------------------------------------------===//
10+
11+
#ifndef _LIBCUDACXX___INTERNAL_PSTL_CONFIG_H
12+
#define _LIBCUDACXX___INTERNAL_PSTL_CONFIG_H
13+
14+
#include <cuda/std/detail/__config>
15+
16+
#if defined(_CCCL_IMPLICIT_SYSTEM_HEADER_GCC)
17+
# pragma GCC system_header
18+
#elif defined(_CCCL_IMPLICIT_SYSTEM_HEADER_CLANG)
19+
# pragma clang system_header
20+
#elif defined(_CCCL_IMPLICIT_SYSTEM_HEADER_MSVC)
21+
# pragma system_header
22+
#endif // no system header
23+
24+
#include <cuda/std/__cccl/prologue.h>
25+
26+
#define _CCCL_HAS_BACKEND_CUDA() 0
27+
#define _CCCL_HAS_BACKEND_OMP() 0
28+
#define _CCCL_HAS_BACKEND_TBB() 0
29+
30+
#include <cuda/std/__cccl/epilogue.h>
31+
32+
#endif // _LIBCUDACXX___INTERNAL_PSTL_CONFIG_H
Lines changed: 174 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,174 @@
1+
//===----------------------------------------------------------------------===//
2+
//
3+
// Part of libcu++, the C++ Standard Library for your entire system,
4+
// under the Apache License v2.0 with LLVM Exceptions.
5+
// See https://llvm.org/LICENSE.txt for license information.
6+
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7+
// SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES.
8+
//
9+
//===----------------------------------------------------------------------===//
10+
11+
#ifndef _LIBCUDACXX___PSTL_DISPATCH_H
12+
#define _LIBCUDACXX___PSTL_DISPATCH_H
13+
14+
#include <cuda/std/detail/__config>
15+
16+
#if defined(_CCCL_IMPLICIT_SYSTEM_HEADER_GCC)
17+
# pragma GCC system_header
18+
#elif defined(_CCCL_IMPLICIT_SYSTEM_HEADER_CLANG)
19+
# pragma clang system_header
20+
#elif defined(_CCCL_IMPLICIT_SYSTEM_HEADER_MSVC)
21+
# pragma system_header
22+
#endif // no system header
23+
24+
#include <cuda/std/__execution/policy.h>
25+
#include <cuda/std/__type_traits/is_base_of.h>
26+
27+
#include <cuda/std/__cccl/prologue.h>
28+
29+
_CCCL_BEGIN_NAMESPACE_EXECUTION
30+
31+
enum class __pstl_algorithm
32+
{
33+
// The find_if family
34+
__find,
35+
__find_if,
36+
__any_of,
37+
__all_of,
38+
__none_of,
39+
__is_partitioned,
40+
41+
// merge family
42+
// non implemented
43+
44+
// sort family
45+
__sort,
46+
47+
// for_each family
48+
__for_each_n,
49+
__fill,
50+
__fill_n,
51+
__replace,
52+
__replace_if,
53+
__generate,
54+
__generate_n,
55+
56+
// transform_reduce and transform_reduce_binary family
57+
__count_if,
58+
__count,
59+
__equal,
60+
__reduce,
61+
62+
// transform and transform_binary family
63+
__replace_copy_if,
64+
__replace_copy,
65+
__move,
66+
__copy,
67+
__copy_n,
68+
__rotate_copy,
69+
};
70+
71+
//! @brief tag type to indicate that we cannot dispatch to a parallel algorithm and should run the algorithm serially
72+
struct __pstl_no_dispatch
73+
{};
74+
75+
//! @brief Dispatcher for a given @tparam _Algorith and @tparam _Policy
76+
//! If @class __pstl_dispatch is not specialized by the chosen backend we will fall back to serial execution
77+
template <__pstl_algorithm _Algorithm, __execution_policy _Policy>
78+
struct __pstl_dispatch : public __pstl_no_dispatch
79+
{};
80+
81+
//! @brief Helper variable that detects whether @class __pstl_dispatch has been specialized so that we can
82+
//! dispatch
83+
template <class>
84+
inline constexpr bool __pstl_can_dispatch = false;
85+
86+
template <__pstl_algorithm _Algorithm, __execution_policy _Policy>
87+
inline constexpr bool __pstl_can_dispatch<__pstl_dispatch<_Algorithm, _Policy>> =
88+
!::cuda::std::is_base_of_v<__pstl_no_dispatch, __pstl_dispatch<_Algorithm, _Policy>>;
89+
90+
//! @brief Top layer dispatcher that returns a concrete dispatch if possible
91+
template <__pstl_algorithm _Algorithm, __execution_policy _Policy>
92+
[[nodiscard]] _CCCL_API static constexpr auto __pstl_select_dispatch() noexcept
93+
{
94+
// If the user requests a specific backend, we need to use that if available
95+
#if _CCCL_HAS_BACKEND_CUDA()
96+
if constexpr (::cuda::std::execution::__requires_unique_backend(_Policy, __execution_policy::__backend_cuda))
97+
{
98+
return __pstl_dispatch<_Algorithm, __execution_policy::__backend_cuda>{};
99+
}
100+
#endif // _CCCL_HAS_BACKEND_CUDA()
101+
#if _CCCL_HAS_BACKEND_OMP()
102+
if constexpr (::cuda::std::execution::__requires_unique_backend(_Policy, __execution_policy::__backend_omp))
103+
{
104+
return __pstl_dispatch<_Algorithm, __execution_policy::__backend_omp>{};
105+
}
106+
#endif // _CCCL_HAS_BACKEND_OMP()
107+
#if _CCCL_HAS_BACKEND_TBB()
108+
if constexpr (::cuda::std::execution::__requires_unique_backend(_Policy, __execution_policy::__backend_tbb))
109+
{
110+
return __pstl_dispatch<_Algorithm, __execution_policy::__backend_tbb>{};
111+
}
112+
#endif // _CCCL_HAS_BACKEND_TBB()
113+
114+
// If the user requests multiple backends, we can take the first available one of the selected ones
115+
#if _CCCL_HAS_BACKEND_CUDA()
116+
if constexpr (::cuda::std::execution::__requires_matching_backend(_Policy, __execution_policy::__backend_cuda))
117+
{
118+
using __dispatch = __pstl_dispatch<_Algorithm, __execution_policy::__backend_cuda>;
119+
if constexpr (__pstl_can_dispatch<__dispatch>)
120+
{
121+
return __dispatch{};
122+
}
123+
}
124+
#endif // _CCCL_HAS_BACKEND_CUDA()
125+
#if _CCCL_HAS_BACKEND_OMP()
126+
if constexpr (::cuda::std::execution::__requires_matching_backend(_Policy, __execution_policy::__backend_omp))
127+
{
128+
using __dispatch = __pstl_dispatch<_Algorithm, __execution_policy::__backend_omp>;
129+
if constexpr (__pstl_can_dispatch<__dispatch>)
130+
{
131+
return __dispatch{};
132+
}
133+
}
134+
#endif // _CCCL_HAS_BACKEND_OMP()
135+
#if _CCCL_HAS_BACKEND_TBB()
136+
if constexpr (::cuda::std::execution::__requires_matching_backend(_Policy, __execution_policy::__backend_tbb))
137+
{
138+
using __dispatch = __pstl_dispatch<_Algorithm, __execution_policy::__backend_tbb>;
139+
if constexpr (__pstl_can_dispatch<__dispatch>)
140+
{
141+
return __dispatch{};
142+
}
143+
}
144+
#endif // _CCCL_HAS_BACKEND_TBB()
145+
146+
// If the user requests no backend, we can take the first available one that suites us
147+
#if _CCCL_HAS_BACKEND_CUDA()
148+
if constexpr (__pstl_can_dispatch<__pstl_dispatch<_Algorithm, __execution_policy::__backend_cuda>>)
149+
{
150+
return __pstl_dispatch<_Algorithm, __execution_policy::__backend_cuda>{};
151+
}
152+
#endif // _CCCL_HAS_BACKEND_CUDA()
153+
#if _CCCL_HAS_BACKEND_OMP()
154+
if constexpr (__pstl_can_dispatch<__pstl_dispatch<_Algorithm, __execution_policy::__backend_omp>>)
155+
{
156+
return __pstl_dispatch<_Algorithm, __execution_policy::__backend_omp>{};
157+
}
158+
#endif // _CCCL_HAS_BACKEND_OMP()
159+
#if _CCCL_HAS_BACKEND_TBB()
160+
if constexpr (__pstl_can_dispatch<__pstl_dispatch<_Algorithm, __execution_policy::__backend_tbb>>)
161+
{
162+
return __pstl_dispatch<_Algorithm, __execution_policy::__backend_omp>{};
163+
}
164+
#endif // _CCCL_HAS_BACKEND_TBB()
165+
166+
// No dispatch found, return invalid to signal serial execution
167+
return __pstl_dispatch<_Algorithm, __execution_policy::__backend_invalid>{};
168+
}
169+
170+
_CCCL_END_NAMESPACE_EXECUTION
171+
172+
#include <cuda/std/__cccl/epilogue.h>
173+
174+
#endif // _LIBCUDACXX___PSTL_DISPATCH_H
Lines changed: 66 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,66 @@
1+
//===----------------------------------------------------------------------===//
2+
//
3+
// Part of libcu++, the C++ Standard Library for your entire system,
4+
// under the Apache License v2.0 with LLVM Exceptions.
5+
// See https://llvm.org/LICENSE.txt for license information.
6+
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7+
// SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES.
8+
//
9+
//===----------------------------------------------------------------------===//
10+
11+
#ifndef _LIBCUDACXX___PSTL_FOR_EACH_H
12+
#define _LIBCUDACXX___PSTL_FOR_EACH_H
13+
14+
#include <cuda/std/detail/__config>
15+
16+
#if defined(_CCCL_IMPLICIT_SYSTEM_HEADER_GCC)
17+
# pragma GCC system_header
18+
#elif defined(_CCCL_IMPLICIT_SYSTEM_HEADER_CLANG)
19+
# pragma clang system_header
20+
#elif defined(_CCCL_IMPLICIT_SYSTEM_HEADER_MSVC)
21+
# pragma system_header
22+
#endif // no system header
23+
24+
#include <cuda/std/__algorithm/for_each_n.h>
25+
#include <cuda/std/__execution/policy.h>
26+
#include <cuda/std/__iterator/distance.h>
27+
#include <cuda/std/__pstl/dispatch.h>
28+
#include <cuda/std/__utility/move.h>
29+
30+
#include <cuda/std/__cccl/prologue.h>
31+
32+
_CCCL_BEGIN_NAMESPACE_CUDA_STD
33+
34+
template <::cuda::std::execution::__execution_policy _Policy, class _ForwardIterator, class _Size, class _Function>
35+
_CCCL_API void for_each_n(
36+
const ::cuda::std::execution::__policy<_Policy>& __pol, _ForwardIterator __first, _Size __orig_n, _Function __func)
37+
{
38+
[[maybe_unused]] auto __dispatch =
39+
::cuda::std::execution::__pstl_select_dispatch<::cuda::std::execution::__pstl_algorithm::__for_each_n, _Policy>();
40+
if constexpr (::cuda::std::execution::__pstl_can_dispatch<decltype(__dispatch)>)
41+
{
42+
__dispatch(__pol, ::cuda::std::move(__first), __orig_n, ::cuda::std::move(__func));
43+
}
44+
::cuda::std::for_each_n(::cuda::std::move(__first), __orig_n, ::cuda::std::move(__func));
45+
}
46+
47+
template <::cuda::std::execution::__execution_policy _Policy, class _ForwardIterator, class _Function>
48+
_CCCL_API void for_each(const ::cuda::std::execution::__policy<_Policy>& __pol,
49+
_ForwardIterator __first,
50+
_ForwardIterator __last,
51+
_Function __func)
52+
{
53+
[[maybe_unused]] auto __dispatch =
54+
::cuda::std::execution::__pstl_select_dispatch<::cuda::std::execution::__pstl_algorithm::__for_each_n, _Policy>();
55+
if constexpr (::cuda::std::execution::__pstl_can_dispatch<decltype(__dispatch)>)
56+
{
57+
__dispatch(__pol, ::cuda::std::move(__first), ::cuda::std::distance(__first, __last), ::cuda::std::move(__func));
58+
}
59+
::cuda::std::for_each(::cuda::std::move(__first), ::cuda::std::move(__last), ::cuda::std::move(__func));
60+
}
61+
62+
_CCCL_END_NAMESPACE_CUDA_STD
63+
64+
#include <cuda/std/__cccl/epilogue.h>
65+
66+
#endif // _LIBCUDACXX___PSTL_FOR_EACH_H

libcudacxx/include/cuda/std/detail/libcxx/include/__config

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,7 @@
2424
#include <cuda/std/__internal/cpp_dialect.h>
2525
#include <cuda/std/__internal/features.h>
2626
#include <cuda/std/__internal/namespaces.h>
27+
#include <cuda/std/__internal/pstl_config.h>
2728

2829
#ifdef __cplusplus
2930

libcudacxx/include/cuda/std/detail/libcxx/include/algorithm

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -122,6 +122,7 @@
122122
#include <cuda/std/__iterator/wrap_iter.h>
123123
#include <cuda/std/__memory/destruct_n.h>
124124
#include <cuda/std/__memory/temporary_buffer.h>
125+
#include <cuda/std/__pstl/for_each.h>
125126
#include <cuda/std/__random/linear_congruential_engine.h>
126127
#include <cuda/std/__random/uniform_int_distribution.h>
127128
#include <cuda/std/__type_traits/common_type.h>

0 commit comments

Comments
 (0)