NVIDIA · miscco · Nov 25, 2025 · Nov 10, 2025 · Nov 10, 2025 · Nov 17, 2025
@@ -16,7 +16,6 @@
 #include <cub/device/dispatch/dispatch_reduce.cuh>
 #include <cub/iterator/arg_index_input_iterator.cuh>
 
-#include <thrust/iterator/constant_iterator.h>
 #include <thrust/iterator/iterator_adaptor.h>
 
 #include <cuda/__iterator/tabulate_output_iterator.h>

@@ -44,48 +44,48 @@ struct any_execution_policy
 
   _CCCL_HIDE_FROM_ABI any_execution_policy() = default;
 
-  template <__execution_policy _Policy>
-  _CCCL_HOST_API constexpr any_execution_policy(::cuda::std::execution::__policy<_Policy>) noexcept
-      : value(_Policy)
+  template <uint32_t _Policy>
+  _CCCL_HOST_API constexpr any_execution_policy(::cuda::std::execution::__execution_policy_base<_Policy>) noexcept
+      : value(value_type{_Policy})
   {}
 
   _CCCL_HOST_API constexpr operator __execution_policy() const noexcept
   {
     return value;
   }
 
-  _CCCL_HOST_API constexpr auto operator()() const noexcept -> __execution_policy
+  _CCCL_HOST_API constexpr auto operator()() const noexcept -> value_type
   {
     return value;
   }
 
-  template <__execution_policy _Policy>
+  template <uint32_t _Policy>
   [[nodiscard]] _CCCL_HOST_API friend constexpr bool
-  operator==(const any_execution_policy& pol, const ::cuda::std::execution::__policy<_Policy>&) noexcept
+  operator==(const any_execution_policy& pol, const ::cuda::std::execution::__execution_policy_base<_Policy>&) noexcept
   {
-    return pol.value == _Policy;
+    return pol.value == value_type{_Policy};
   }
 
 #if _CCCL_STD_VER <= 2017
-  template <__execution_policy _Policy>
+  template <uint32_t _Policy>
   [[nodiscard]] _CCCL_HOST_API friend constexpr bool
-  operator==(const ::cuda::std::execution::__policy<_Policy>&, const any_execution_policy& pol) noexcept
+  operator==(const ::cuda::std::execution::__execution_policy_base<_Policy>&, const any_execution_policy& pol) noexcept
   {
-    return pol.value == _Policy;
+    return pol.value == value_type{_Policy};
   }
 
-  template <__execution_policy _Policy>
+  template <uint32_t _Policy>
   [[nodiscard]] _CCCL_HOST_API friend constexpr bool
-  operator!=(const any_execution_policy& pol, const ::cuda::std::execution::__policy<_Policy>&) noexcept
+  operator!=(const any_execution_policy& pol, const ::cuda::std::execution::__execution_policy_base<_Policy>&) noexcept
   {
-    return pol.value != _Policy;
+    return pol.value != value_type{_Policy};
   }
 
-  template <__execution_policy _Policy>
+  template <uint32_t _Policy>
   [[nodiscard]] _CCCL_HOST_API friend constexpr bool
-  operator!=(const ::cuda::std::execution::__policy<_Policy>&, const any_execution_policy& pol)
+  operator!=(const ::cuda::std::execution::__execution_policy_base<_Policy>&, const any_execution_policy& pol)
   {
-    return pol.value != _Policy;
+    return pol.value != value_type{_Policy};
   }
 #endif // _CCCL_STD_VER <= 2017
 

@@ -53,5 +53,7 @@ function(libcudacxx_build_compiler_targets)
       # order matters here, we need the libcudacxx options to override the cccl options.
       cccl.compiler_interface
       libcudacxx.compiler_flags
+      Thrust::Thrust
+      CUB::CUB
   )
 endfunction()
@@ -0,0 +1,53 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+// SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef _CUDA___EXECUTION_POLICY_H
+#define _CUDA___EXECUTION_POLICY_H
+
+#include <cuda/std/detail/__config>
+
+#if defined(_CCCL_IMPLICIT_SYSTEM_HEADER_GCC)
+#  pragma GCC system_header
+#elif defined(_CCCL_IMPLICIT_SYSTEM_HEADER_CLANG)
+#  pragma clang system_header
+#elif defined(_CCCL_IMPLICIT_SYSTEM_HEADER_MSVC)
+#  pragma system_header
+#endif // no system header
+
+#if _CCCL_HAS_BACKEND_CUDA()
+
+#  include <cuda/__fwd/execution_policy.h>
+#  include <cuda/std/__execution/policy.h>
+#  include <cuda/std/__type_traits/is_execution_policy.h>
+
+#  include <cuda/std/__cccl/prologue.h>
+
+_CCCL_BEGIN_NAMESPACE_CUDA_STD_EXECUTION
+
+template <uint32_t _Policy>
+struct _CCCL_DECLSPEC_EMPTY_BASES __execution_policy_base<_Policy, __execution_backend::__cuda>
+    : __execution_policy_base<_Policy, __execution_backend::__none>
+{};
+
+_CCCL_END_NAMESPACE_CUDA_STD_EXECUTION
+
+_CCCL_BEGIN_NAMESPACE_CUDA_EXECUTION
+
+using __cub_parallel_unsequenced_policy =
+  ::cuda::std::execution::__execution_policy_base<::cuda::std::execution::__with_cuda_backend<static_cast<uint32_t>(
+    ::cuda::std::execution::__execution_policy::__parallel_unsequenced)>()>;
+_CCCL_GLOBAL_CONSTANT __cub_parallel_unsequenced_policy __cub_par_unseq{};
+
+_CCCL_END_NAMESPACE_CUDA_EXECUTION
+
+#  include <cuda/std/__cccl/epilogue.h>
+
+#endif // _CCCL_HAS_BACKEND_CUDA()
+
+#endif // _CUDA___EXECUTION_POLICY_H
@@ -0,0 +1,47 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+// SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef _CUDA___FWD_EXECUTION_POLICY_H
+#define _CUDA___FWD_EXECUTION_POLICY_H
+
+#include <cuda/std/detail/__config>
+
+#if defined(_CCCL_IMPLICIT_SYSTEM_HEADER_GCC)
+#  pragma GCC system_header
+#elif defined(_CCCL_IMPLICIT_SYSTEM_HEADER_CLANG)
+#  pragma clang system_header
+#elif defined(_CCCL_IMPLICIT_SYSTEM_HEADER_MSVC)
+#  pragma system_header
+#endif // no system header
+
+#if _CCCL_HAS_BACKEND_CUDA()
+
+#  include <cuda/std/__fwd/execution_policy.h>
+
+#  include <cuda/std/__cccl/prologue.h>
+
+_CCCL_BEGIN_NAMESPACE_CUDA_STD_EXECUTION
+
+//! @brief Sets the execution backend to cuda
+template <uint32_t _Policy>
+[[nodiscard]] _CCCL_API constexpr uint32_t __with_cuda_backend() noexcept
+{
+  constexpr uint32_t __backend_mask{0xFFFF00FF};
+  constexpr uint32_t __new_policy =
+    (_Policy & __backend_mask) | (static_cast<uint32_t>(__execution_backend::__cuda) << 8);
+  return __new_policy;
+}
+
+_CCCL_END_NAMESPACE_CUDA_STD_EXECUTION
+
+#  include <cuda/std/__cccl/epilogue.h>
+
+#endif // _CCCL_HAS_BACKEND_CUDA()
+
+#endif // _CUDA___FWD_EXECUTION_POLICY_H
@@ -20,65 +20,67 @@
 #  pragma system_header
 #endif // no system header
 
-#include <cuda/std/__type_traits/underlying_type.h>
+#include <cuda/std/__bit/has_single_bit.h>
+#include <cuda/std/__fwd/execution_policy.h>
 #include <cuda/std/cstdint>
 
 #include <cuda/std/__cccl/prologue.h>
 
 _CCCL_BEGIN_NAMESPACE_CUDA_STD_EXECUTION
 
-enum class __execution_policy : uint32_t
+[[nodiscard]] _CCCL_API constexpr bool __has_unique_backend(const __execution_backend __backends) noexcept
 {
-  __invalid_execution_policy = 0,
-  __sequenced                = 1 << 0,
-  __parallel                 = 1 << 1,
-  __unsequenced              = 1 << 2,
-  __parallel_unsequenced     = __execution_policy::__parallel | __execution_policy::__unsequenced,
-};
-
-[[nodiscard]] _CCCL_API constexpr bool
-__satisfies_execution_policy(__execution_policy __lhs, __execution_policy __rhs) noexcept
-{
-  return (static_cast<uint32_t>(__lhs) & static_cast<uint32_t>(__rhs)) != 0;
+  return ::cuda::std::has_single_bit(static_cast<uint32_t>(__backends));
 }
 
-template <__execution_policy _Policy>
-struct __policy
+//! @brief Base class for our execution policies.
+//! It takes an untagged uint32_t because we want to be able to store 3 different enumerations in it.
+template <uint32_t _Policy, __execution_backend _Backend>
+struct __execution_policy_base
 {
-  template <__execution_policy _OtherPolicy>
-  [[nodiscard]] _CCCL_API friend constexpr bool operator==(const __policy&, const __policy<_OtherPolicy>&) noexcept
+  //! @brief Tag that identifies this and all derived classes as a CCCL execution policy
+  static constexpr uint32_t __cccl_policy_ = _Policy;
+
+  template <uint32_t _OtherPolicy, __execution_backend _OtherBackend>
+  [[nodiscard]] _CCCL_API friend constexpr bool
+  operator==(const __execution_policy_base&, const __execution_policy_base<_OtherPolicy, _OtherBackend>&) noexcept
   {
-    using __underlying_t = underlying_type_t<__execution_policy>;
-    return (static_cast<__underlying_t>(_Policy) == static_cast<__underlying_t>(_OtherPolicy));
+    return _Policy == _OtherPolicy;
   }
 
 #if _CCCL_STD_VER <= 2017
-  template <__execution_policy _OtherPolicy>
-  [[nodiscard]] _CCCL_API friend constexpr bool operator!=(const __policy&, const __policy<_OtherPolicy>&) noexcept
+  template <uint32_t _OtherPolicy, __execution_backend _OtherBackend>
+  [[nodiscard]] _CCCL_API friend constexpr bool
+  operator!=(const __execution_policy_base&, const __execution_policy_base<_OtherPolicy, _OtherBackend>&) noexcept
   {
-    using __underlying_t = underlying_type_t<__execution_policy>;
-    return (static_cast<__underlying_t>(_Policy) != static_cast<__underlying_t>(_OtherPolicy));
+    return _Policy != _OtherPolicy;
   }
 #endif // _CCCL_STD_VER <= 2017
 
-  static constexpr __execution_policy __policy_ = _Policy;
-};
+  //! @brief Extracts the execution policy from the stored _Policy
+  [[nodiscard]] _CCCL_API static constexpr __execution_policy __get_policy() noexcept
+  {
+    return __policy_to_execution_policy<_Policy>;
+  }
 
-struct sequenced_policy : public __policy<__execution_policy::__sequenced>
-{};
+  //! @brief Extracts the execution backend from the stored _Policy
+  [[nodiscard]] _CCCL_API static constexpr __execution_backend __get_backend() noexcept
+  {
+    return __policy_to_execution_backend<_Policy>;
+  }
+};
 
+using sequenced_policy = __execution_policy_base<static_cast<uint32_t>(__execution_policy::__sequenced)>;
 _CCCL_GLOBAL_CONSTANT sequenced_policy seq{};
 
-struct parallel_policy : public __policy<__execution_policy::__parallel>
-{};
+using parallel_policy = __execution_policy_base<static_cast<uint32_t>(__execution_policy::__parallel)>;
 _CCCL_GLOBAL_CONSTANT parallel_policy par{};
 
-struct parallel_unsequenced_policy : public __policy<__execution_policy::__parallel_unsequenced>
-{};
+using parallel_unsequenced_policy =
+  __execution_policy_base<static_cast<uint32_t>(__execution_policy::__parallel_unsequenced)>;
 _CCCL_GLOBAL_CONSTANT parallel_unsequenced_policy par_unseq{};
 
-struct unsequenced_policy : public __policy<__execution_policy::__unsequenced>
-{};
+using unsequenced_policy = __execution_policy_base<static_cast<uint32_t>(__execution_policy::__unsequenced)>;
 _CCCL_GLOBAL_CONSTANT unsequenced_policy unseq{};
 
 _CCCL_END_NAMESPACE_CUDA_STD_EXECUTION

@@ -0,0 +1,73 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of libcu++, the C++ Standard Library for your entire system,
+// under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+// SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef _CUDA_STD___FWD_EXECUTION_POLICY_H
+#define _CUDA_STD___FWD_EXECUTION_POLICY_H
+
+#include <cuda/std/detail/__config>
+
+#if defined(_CCCL_IMPLICIT_SYSTEM_HEADER_GCC)
+#  pragma GCC system_header
+#elif defined(_CCCL_IMPLICIT_SYSTEM_HEADER_CLANG)
+#  pragma clang system_header
+#elif defined(_CCCL_IMPLICIT_SYSTEM_HEADER_MSVC)
+#  pragma system_header
+#endif // no system header
+
+#include <cuda/std/cstdint>
+
+#include <cuda/std/__cccl/prologue.h>
+
+_CCCL_BEGIN_NAMESPACE_CUDA_STD_EXECUTION
+
+//! @brief Enumerates the standard execution policies
+enum class __execution_policy : uint8_t
+{
+  __invalid_execution_policy = 0,
+  __sequenced                = 1 << 0,
+  __parallel                 = 1 << 1,
+  __unsequenced              = 1 << 2,
+  __parallel_unsequenced     = __execution_policy::__parallel | __execution_policy::__unsequenced,
+};
+
+//! @brief Extracts the execution policy from the stored _Policy
+template <uint32_t _Policy>
+inline constexpr __execution_policy __policy_to_execution_policy = __execution_policy{(_Policy & uint32_t{0x000000FF})};
+
+//! @brief Enumerates the different backends we support
+//! @note Not an enum class because a user might specify multiple backends
+enum __execution_backend : uint8_t
+{
+  // The backends we provide
+  __none = 0,
+#if _CCCL_HAS_BACKEND_CUDA()
+  __cuda = 1 << 1,
+#endif // _CCCL_HAS_BACKEND_CUDA()
+#if _CCCL_HAS_BACKEND_OMP()
+  __omp = 1 << 2,
+#endif // _CCCL_HAS_BACKEND_OMP()
+#if _CCCL_HAS_BACKEND_TBB()
+  __tbb = 1 << 3,
+#endif // _CCCL_HAS_BACKEND_TBB()
+};
+
+//! @brief Extracts the execution backend from the stored _Policy
+template <uint32_t _Policy>
+inline constexpr __execution_backend __policy_to_execution_backend =
+  __execution_backend{(_Policy & uint32_t{0x0000FF00}) >> 8};
+
+template <uint32_t _Policy, __execution_backend _Backend = __policy_to_execution_backend<_Policy>>
+struct __execution_policy_base;
+
+_CCCL_END_NAMESPACE_CUDA_STD_EXECUTION
+
+#include <cuda/std/__cccl/epilogue.h>
+
+#endif // _CUDA_STD___FWD_EXECUTION_POLICY_H
@@ -0,0 +1,32 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of libcu++, the C++ Standard Library for your entire system,
+// under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+// SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef _CUDA_STD___INTERNAL_PSTL_CONFIG_H
+#define _CUDA_STD___INTERNAL_PSTL_CONFIG_H
+
+#include <cuda/std/detail/__config>
+
+#if defined(_CCCL_IMPLICIT_SYSTEM_HEADER_GCC)
+#  pragma GCC system_header
+#elif defined(_CCCL_IMPLICIT_SYSTEM_HEADER_CLANG)
+#  pragma clang system_header
+#elif defined(_CCCL_IMPLICIT_SYSTEM_HEADER_MSVC)
+#  pragma system_header
+#endif // no system header
+
+#include <cuda/std/__cccl/prologue.h>
+
+#define _CCCL_HAS_BACKEND_CUDA() _CCCL_CUDA_COMPILATION() && !_CCCL_COMPILER(NVRTC)
+#define _CCCL_HAS_BACKEND_OMP()  0
+#define _CCCL_HAS_BACKEND_TBB()  0
+
+#include <cuda/std/__cccl/epilogue.h>
+
+#endif // _CUDA_STD___INTERNAL_PSTL_CONFIG_H