Provide a stub dispatch for for_each

miscco · miscco · commit 2dad1124fc66 · 2025-08-21T14:39:03.000+02:00
diff --git a/libcudacxx/include/cuda/std/__execution/policy.h b/libcudacxx/include/cuda/std/__execution/policy.h
@@ -34,6 +34,18 @@ enum class __execution_policy : uint32_t
   __parallel                 = 1 << 1,
   __unsequenced              = 1 << 2,
   __parallel_unsequenced     = __execution_policy::__parallel | __execution_policy::__unsequenced,
+
+  // The backends we provide
+  __backend_invalid = 1 << 4,
+#if _CCCL_HAS_BACKEND_CUDA()
+  __backend_cuda = 1 << 5,
+#endif // _CCCL_HAS_BACKEND_CUDA()
+#if _CCCL_HAS_BACKEND_OMP()
+  __backend_omp = 1 << 6,
+#endif // _CCCL_HAS_BACKEND_OMP()
+#if _CCCL_HAS_BACKEND_TBB()
+  __backend_tbb = 1 << 7,
+#endif // _CCCL_HAS_BACKEND_TBB()
 };
 
 [[nodiscard]] _CCCL_API constexpr bool
@@ -42,6 +54,24 @@ __satisfies_execution_policy(__execution_policy __lhs, __execution_policy __rhs)
   return (static_cast<uint32_t>(__lhs) & static_cast<uint32_t>(__rhs)) != 0;
 }
 
+[[nodiscard]] _CCCL_API constexpr __execution_policy __extract_backend(__execution_policy __policy) noexcept
+{
+  constexpr uint32_t __backend_mask = static_cast<uint32_t>(-1) << 4;
+  return static_cast<__execution_policy>(static_cast<uint32_t>(__policy) & __backend_mask);
+}
+
+[[nodiscard]] _CCCL_API constexpr bool
+__requires_matching_backend(__execution_policy __pol, __execution_policy __backend) noexcept
+{
+  return (static_cast<uint32_t>(::cuda::std::execution::__extract_backend(__pol)) & static_cast<uint32_t>(__backend));
+}
+
+[[nodiscard]] _CCCL_API constexpr bool
+__requires_unique_backend(__execution_policy __pol, __execution_policy __backend) noexcept
+{
+  return !(static_cast<uint32_t>(::cuda::std::execution::__extract_backend(__pol)) | ~static_cast<uint32_t>(__backend));
+}
+
 template <__execution_policy _Policy>
 struct __policy
 {
diff --git a/libcudacxx/include/cuda/std/__internal/pstl_config.h b/libcudacxx/include/cuda/std/__internal/pstl_config.h
@@ -0,0 +1,32 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of libcu++, the C++ Standard Library for your entire system,
+// under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+// SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef _LIBCUDACXX___INTERNAL_PSTL_CONFIG_H
+#define _LIBCUDACXX___INTERNAL_PSTL_CONFIG_H
+
+#include <cuda/std/detail/__config>
+
+#if defined(_CCCL_IMPLICIT_SYSTEM_HEADER_GCC)
+#  pragma GCC system_header
+#elif defined(_CCCL_IMPLICIT_SYSTEM_HEADER_CLANG)
+#  pragma clang system_header
+#elif defined(_CCCL_IMPLICIT_SYSTEM_HEADER_MSVC)
+#  pragma system_header
+#endif // no system header
+
+#include <cuda/std/__cccl/prologue.h>
+
+#define _CCCL_HAS_BACKEND_CUDA() 0
+#define _CCCL_HAS_BACKEND_OMP()  0
+#define _CCCL_HAS_BACKEND_TBB()  0
+
+#include <cuda/std/__cccl/epilogue.h>
+
+#endif // _LIBCUDACXX___INTERNAL_PSTL_CONFIG_H
diff --git a/libcudacxx/include/cuda/std/__pstl/dispatch.h b/libcudacxx/include/cuda/std/__pstl/dispatch.h
@@ -0,0 +1,174 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of libcu++, the C++ Standard Library for your entire system,
+// under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+// SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef _LIBCUDACXX___PSTL_DISPATCH_H
+#define _LIBCUDACXX___PSTL_DISPATCH_H
+
+#include <cuda/std/detail/__config>
+
+#if defined(_CCCL_IMPLICIT_SYSTEM_HEADER_GCC)
+#  pragma GCC system_header
+#elif defined(_CCCL_IMPLICIT_SYSTEM_HEADER_CLANG)
+#  pragma clang system_header
+#elif defined(_CCCL_IMPLICIT_SYSTEM_HEADER_MSVC)
+#  pragma system_header
+#endif // no system header
+
+#include <cuda/std/__execution/policy.h>
+#include <cuda/std/__type_traits/is_base_of.h>
+
+#include <cuda/std/__cccl/prologue.h>
+
+_CCCL_BEGIN_NAMESPACE_EXECUTION
+
+enum class __pstl_algorithm
+{
+  // The find_if family
+  __find,
+  __find_if,
+  __any_of,
+  __all_of,
+  __none_of,
+  __is_partitioned,
+
+  // merge family
+  // non implemented
+
+  // sort family
+  __sort,
+
+  // for_each family
+  __for_each_n,
+  __fill,
+  __fill_n,
+  __replace,
+  __replace_if,
+  __generate,
+  __generate_n,
+
+  // transform_reduce and transform_reduce_binary family
+  __count_if,
+  __count,
+  __equal,
+  __reduce,
+
+  // transform and transform_binary family
+  __replace_copy_if,
+  __replace_copy,
+  __move,
+  __copy,
+  __copy_n,
+  __rotate_copy,
+};
+
+//! @brief tag type to indicate that we cannot dispatch to a parallel algorithm and should run the algorithm serially
+struct __pstl_no_dispatch
+{};
+
+//! @brief Dispatcher for a given @tparam _Algorith and @tparam _Policy
+//! If @class __pstl_dispatch is not specialized by the chosen backend we will fall back to serial execution
+template <__pstl_algorithm _Algorithm, __execution_policy _Policy>
+struct __pstl_dispatch : public __pstl_no_dispatch
+{};
+
+//! @brief Helper variable that detects whether @class __pstl_dispatch has been specialized so that we can
+//! dispatch
+template <class>
+inline constexpr bool __pstl_can_dispatch = false;
+
+template <__pstl_algorithm _Algorithm, __execution_policy _Policy>
+inline constexpr bool __pstl_can_dispatch<__pstl_dispatch<_Algorithm, _Policy>> =
+  !::cuda::std::is_base_of_v<__pstl_no_dispatch, __pstl_dispatch<_Algorithm, _Policy>>;
+
+//! @brief Top layer dispatcher that returns a concrete dispatch if possible
+template <__pstl_algorithm _Algorithm, __execution_policy _Policy>
+[[nodiscard]] _CCCL_API static constexpr auto __pstl_select_dispatch() noexcept
+{
+  // If the user requests a specific backend, we need to use that if available
+#if _CCCL_HAS_BACKEND_CUDA()
+  if constexpr (::cuda::std::execution::__requires_unique_backend(_Policy, __execution_policy::__backend_cuda))
+  {
+    return __pstl_dispatch<_Algorithm, __execution_policy::__backend_cuda>{};
+  }
+#endif // _CCCL_HAS_BACKEND_CUDA()
+#if _CCCL_HAS_BACKEND_OMP()
+  if constexpr (::cuda::std::execution::__requires_unique_backend(_Policy, __execution_policy::__backend_omp))
+  {
+    return __pstl_dispatch<_Algorithm, __execution_policy::__backend_omp>{};
+  }
+#endif // _CCCL_HAS_BACKEND_OMP()
+#if _CCCL_HAS_BACKEND_TBB()
+  if constexpr (::cuda::std::execution::__requires_unique_backend(_Policy, __execution_policy::__backend_tbb))
+  {
+    return __pstl_dispatch<_Algorithm, __execution_policy::__backend_tbb>{};
+  }
+#endif // _CCCL_HAS_BACKEND_TBB()
+
+  // If the user requests multiple backends, we can take the first available one of the selected ones
+#if _CCCL_HAS_BACKEND_CUDA()
+  if constexpr (::cuda::std::execution::__requires_matching_backend(_Policy, __execution_policy::__backend_cuda))
+  {
+    using __dispatch = __pstl_dispatch<_Algorithm, __execution_policy::__backend_cuda>;
+    if constexpr (__pstl_can_dispatch<__dispatch>)
+    {
+      return __dispatch{};
+    }
+  }
+#endif // _CCCL_HAS_BACKEND_CUDA()
+#if _CCCL_HAS_BACKEND_OMP()
+  if constexpr (::cuda::std::execution::__requires_matching_backend(_Policy, __execution_policy::__backend_omp))
+  {
+    using __dispatch = __pstl_dispatch<_Algorithm, __execution_policy::__backend_omp>;
+    if constexpr (__pstl_can_dispatch<__dispatch>)
+    {
+      return __dispatch{};
+    }
+  }
+#endif // _CCCL_HAS_BACKEND_OMP()
+#if _CCCL_HAS_BACKEND_TBB()
+  if constexpr (::cuda::std::execution::__requires_matching_backend(_Policy, __execution_policy::__backend_tbb))
+  {
+    using __dispatch = __pstl_dispatch<_Algorithm, __execution_policy::__backend_tbb>;
+    if constexpr (__pstl_can_dispatch<__dispatch>)
+    {
+      return __dispatch{};
+    }
+  }
+#endif // _CCCL_HAS_BACKEND_TBB()
+
+  // If the user requests no backend, we can take the first available one that suites us
+#if _CCCL_HAS_BACKEND_CUDA()
+  if constexpr (__pstl_can_dispatch<__pstl_dispatch<_Algorithm, __execution_policy::__backend_cuda>>)
+  {
+    return __pstl_dispatch<_Algorithm, __execution_policy::__backend_cuda>{};
+  }
+#endif // _CCCL_HAS_BACKEND_CUDA()
+#if _CCCL_HAS_BACKEND_OMP()
+  if constexpr (__pstl_can_dispatch<__pstl_dispatch<_Algorithm, __execution_policy::__backend_omp>>)
+  {
+    return __pstl_dispatch<_Algorithm, __execution_policy::__backend_omp>{};
+  }
+#endif // _CCCL_HAS_BACKEND_OMP()
+#if _CCCL_HAS_BACKEND_TBB()
+  if constexpr (__pstl_can_dispatch<__pstl_dispatch<_Algorithm, __execution_policy::__backend_tbb>>)
+  {
+    return __pstl_dispatch<_Algorithm, __execution_policy::__backend_omp>{};
+  }
+#endif // _CCCL_HAS_BACKEND_TBB()
+
+  // No dispatch found, return invalid to signal serial execution
+  return __pstl_dispatch<_Algorithm, __execution_policy::__backend_invalid>{};
+}
+
+_CCCL_END_NAMESPACE_EXECUTION
+
+#include <cuda/std/__cccl/epilogue.h>
+
+#endif // _LIBCUDACXX___PSTL_DISPATCH_H
diff --git a/libcudacxx/include/cuda/std/__pstl/for_each.h b/libcudacxx/include/cuda/std/__pstl/for_each.h
@@ -0,0 +1,66 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of libcu++, the C++ Standard Library for your entire system,
+// under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+// SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef _LIBCUDACXX___PSTL_FOR_EACH_H
+#define _LIBCUDACXX___PSTL_FOR_EACH_H
+
+#include <cuda/std/detail/__config>
+
+#if defined(_CCCL_IMPLICIT_SYSTEM_HEADER_GCC)
+#  pragma GCC system_header
+#elif defined(_CCCL_IMPLICIT_SYSTEM_HEADER_CLANG)
+#  pragma clang system_header
+#elif defined(_CCCL_IMPLICIT_SYSTEM_HEADER_MSVC)
+#  pragma system_header
+#endif // no system header
+
+#include <cuda/std/__algorithm/for_each_n.h>
+#include <cuda/std/__execution/policy.h>
+#include <cuda/std/__iterator/distance.h>
+#include <cuda/std/__pstl/dispatch.h>
+#include <cuda/std/__utility/move.h>
+
+#include <cuda/std/__cccl/prologue.h>
+
+_CCCL_BEGIN_NAMESPACE_CUDA_STD
+
+template <::cuda::std::execution::__execution_policy _Policy, class _ForwardIterator, class _Size, class _Function>
+_CCCL_API void for_each_n(
+  const ::cuda::std::execution::__policy<_Policy>& __pol, _ForwardIterator __first, _Size __orig_n, _Function __func)
+{
+  [[maybe_unused]] auto __dispatch =
+    ::cuda::std::execution::__pstl_select_dispatch<::cuda::std::execution::__pstl_algorithm::__for_each_n, _Policy>();
+  if constexpr (::cuda::std::execution::__pstl_can_dispatch<decltype(__dispatch)>)
+  {
+    __dispatch(__pol, ::cuda::std::move(__first), __orig_n, ::cuda::std::move(__func));
+  }
+  ::cuda::std::for_each_n(::cuda::std::move(__first), __orig_n, ::cuda::std::move(__func));
+}
+
+template <::cuda::std::execution::__execution_policy _Policy, class _ForwardIterator, class _Function>
+_CCCL_API void for_each(const ::cuda::std::execution::__policy<_Policy>& __pol,
+                        _ForwardIterator __first,
+                        _ForwardIterator __last,
+                        _Function __func)
+{
+  [[maybe_unused]] auto __dispatch =
+    ::cuda::std::execution::__pstl_select_dispatch<::cuda::std::execution::__pstl_algorithm::__for_each_n, _Policy>();
+  if constexpr (::cuda::std::execution::__pstl_can_dispatch<decltype(__dispatch)>)
+  {
+    __dispatch(__pol, ::cuda::std::move(__first), ::cuda::std::distance(__first, __last), ::cuda::std::move(__func));
+  }
+  ::cuda::std::for_each(::cuda::std::move(__first), ::cuda::std::move(__last), ::cuda::std::move(__func));
+}
+
+_CCCL_END_NAMESPACE_CUDA_STD
+
+#include <cuda/std/__cccl/epilogue.h>
+
+#endif // _LIBCUDACXX___PSTL_FOR_EACH_H
diff --git a/libcudacxx/include/cuda/std/detail/libcxx/include/__config b/libcudacxx/include/cuda/std/detail/libcxx/include/__config
@@ -24,6 +24,7 @@
 #include <cuda/std/__internal/cpp_dialect.h>
 #include <cuda/std/__internal/features.h>
 #include <cuda/std/__internal/namespaces.h>
+#include <cuda/std/__internal/pstl_config.h>
 
 #ifdef __cplusplus
 
diff --git a/libcudacxx/include/cuda/std/detail/libcxx/include/algorithm b/libcudacxx/include/cuda/std/detail/libcxx/include/algorithm
@@ -122,6 +122,7 @@
 #include <cuda/std/__iterator/wrap_iter.h>
 #include <cuda/std/__memory/destruct_n.h>
 #include <cuda/std/__memory/temporary_buffer.h>
+#include <cuda/std/__pstl/for_each.h>
 #include <cuda/std/__random/linear_congruential_engine.h>
 #include <cuda/std/__random/uniform_int_distribution.h>
 #include <cuda/std/__type_traits/common_type.h>
diff --git a/libcudacxx/test/libcudacxx/std/algorithms/alg.nonmodifying/alg.for_each/pstl_for_each.pass.cpp b/libcudacxx/test/libcudacxx/std/algorithms/alg.nonmodifying/alg.for_each/pstl_for_each.pass.cpp
diff --git a/libcudacxx/test/support/test_execution_policies.h b/libcudacxx/test/support/test_execution_policies.h