NVIDIA
diff --git a/‎cub/cub/device/dispatch/dispatch_streaming_reduce.cuh‎
Lines changed: 0 additions & 1 deletion b/‎cub/cub/device/dispatch/dispatch_streaming_reduce.cuh‎
Lines changed: 0 additions & 1 deletion
diff --git a/‎cudax/include/cuda/experimental/__execution/policy.cuh‎
Lines changed: 16 additions & 16 deletions b/‎cudax/include/cuda/experimental/__execution/policy.cuh‎
Lines changed: 16 additions & 16 deletions
diff --git a/‎libcudacxx/cmake/LibcudacxxBuildCompilerTargets.cmake‎
Lines changed: 2 additions & 0 deletions b/‎libcudacxx/cmake/LibcudacxxBuildCompilerTargets.cmake‎
Lines changed: 2 additions & 0 deletions
diff --git a/‎libcudacxx/include/cuda/__execution/policy.h‎
Lines changed: 53 additions & 0 deletions b/‎libcudacxx/include/cuda/__execution/policy.h‎
Lines changed: 53 additions & 0 deletions
diff --git a/‎libcudacxx/include/cuda/__fwd/execution_policy.h‎
Lines changed: 47 additions & 0 deletions b/‎libcudacxx/include/cuda/__fwd/execution_policy.h‎
Lines changed: 47 additions & 0 deletions
diff --git a/‎libcudacxx/include/cuda/std/__execution/policy.h‎
Lines changed: 35 additions & 33 deletions b/‎libcudacxx/include/cuda/std/__execution/policy.h‎
Lines changed: 35 additions & 33 deletions
diff --git a/‎libcudacxx/include/cuda/std/__fwd/execution_policy.h‎
Lines changed: 73 additions & 0 deletions b/‎libcudacxx/include/cuda/std/__fwd/execution_policy.h‎
Lines changed: 73 additions & 0 deletions
diff --git a/‎libcudacxx/include/cuda/std/__internal/pstl_config.h‎
Lines changed: 32 additions & 0 deletions b/‎libcudacxx/include/cuda/std/__internal/pstl_config.h‎
Lines changed: 32 additions & 0 deletions
@@ -16,7 +16,6 @@
 #include <cub/device/dispatch/dispatch_reduce.cuh>
 #include <cub/iterator/arg_index_input_iterator.cuh>
 
-#include <thrust/iterator/constant_iterator.h>
 #include <thrust/iterator/iterator_adaptor.h>
 
 #include <cuda/__iterator/tabulate_output_iterator.h>
 
@@ -44,48 +44,48 @@ struct any_execution_policy
 
   _CCCL_HIDE_FROM_ABI any_execution_policy() = default;
 
-  template <__execution_policy _Policy>
-  _CCCL_HOST_API constexpr any_execution_policy(::cuda::std::execution::__policy<_Policy>) noexcept
-      : value(_Policy)
+  template <uint32_t _Policy>
+  _CCCL_HOST_API constexpr any_execution_policy(::cuda::std::execution::__execution_policy_base<_Policy>) noexcept
+      : value(value_type{_Policy})
   {}
 
   _CCCL_HOST_API constexpr operator __execution_policy() const noexcept
   {
     return value;
   }
 
-  _CCCL_HOST_API constexpr auto operator()() const noexcept -> __execution_policy
+  _CCCL_HOST_API constexpr auto operator()() const noexcept -> value_type
   {
     return value;
   }
 
-  template <__execution_policy _Policy>
+  template <uint32_t _Policy>
   [[nodiscard]] _CCCL_HOST_API friend constexpr bool
-  operator==(const any_execution_policy& pol, const ::cuda::std::execution::__policy<_Policy>&) noexcept
+  operator==(const any_execution_policy& pol, const ::cuda::std::execution::__execution_policy_base<_Policy>&) noexcept
   {
-    return pol.value == _Policy;
+    return pol.value == value_type{_Policy};
   }
 
 #if _CCCL_STD_VER <= 2017
-  template <__execution_policy _Policy>
+  template <uint32_t _Policy>
   [[nodiscard]] _CCCL_HOST_API friend constexpr bool
-  operator==(const ::cuda::std::execution::__policy<_Policy>&, const any_execution_policy& pol) noexcept
+  operator==(const ::cuda::std::execution::__execution_policy_base<_Policy>&, const any_execution_policy& pol) noexcept
   {
-    return pol.value == _Policy;
+    return pol.value == value_type{_Policy};
   }
 
-  template <__execution_policy _Policy>
+  template <uint32_t _Policy>
   [[nodiscard]] _CCCL_HOST_API friend constexpr bool
-  operator!=(const any_execution_policy& pol, const ::cuda::std::execution::__policy<_Policy>&) noexcept
+  operator!=(const any_execution_policy& pol, const ::cuda::std::execution::__execution_policy_base<_Policy>&) noexcept
   {
-    return pol.value != _Policy;
+    return pol.value != value_type{_Policy};
   }
 
-  template <__execution_policy _Policy>
+  template <uint32_t _Policy>
   [[nodiscard]] _CCCL_HOST_API friend constexpr bool
-  operator!=(const ::cuda::std::execution::__policy<_Policy>&, const any_execution_policy& pol)
+  operator!=(const ::cuda::std::execution::__execution_policy_base<_Policy>&, const any_execution_policy& pol)
   {
-    return pol.value != _Policy;
+    return pol.value != value_type{_Policy};
   }
 #endif // _CCCL_STD_VER <= 2017
 
 
@@ -53,5 +53,7 @@ function(libcudacxx_build_compiler_targets)
       # order matters here, we need the libcudacxx options to override the cccl options.
       cccl.compiler_interface
       libcudacxx.compiler_flags
+      Thrust::Thrust
+      CUB::CUB
   )
 endfunction()
@@ -0,0 +1,53 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+// SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef _CUDA___EXECUTION_POLICY_H
+#define _CUDA___EXECUTION_POLICY_H
+
+#include <cuda/std/detail/__config>
+
+#if defined(_CCCL_IMPLICIT_SYSTEM_HEADER_GCC)
+#  pragma GCC system_header
+#elif defined(_CCCL_IMPLICIT_SYSTEM_HEADER_CLANG)
+#  pragma clang system_header
+#elif defined(_CCCL_IMPLICIT_SYSTEM_HEADER_MSVC)
+#  pragma system_header
+#endif // no system header
+
+#if _CCCL_HAS_BACKEND_CUDA()
+
+#  include <cuda/__fwd/execution_policy.h>
+#  include <cuda/std/__execution/policy.h>
+#  include <cuda/std/__type_traits/is_execution_policy.h>
+
+#  include <cuda/std/__cccl/prologue.h>
+
+_CCCL_BEGIN_NAMESPACE_CUDA_STD_EXECUTION
+
+template <uint32_t _Policy>
+struct _CCCL_DECLSPEC_EMPTY_BASES __execution_policy_base<_Policy, __execution_backend::__cuda>
+    : __execution_policy_base<_Policy, __execution_backend::__none>
+{};
+
+_CCCL_END_NAMESPACE_CUDA_STD_EXECUTION
+
+_CCCL_BEGIN_NAMESPACE_CUDA_EXECUTION
+
+using __cub_parallel_unsequenced_policy =
+  ::cuda::std::execution::__execution_policy_base<::cuda::std::execution::__with_cuda_backend<static_cast<uint32_t>(
+    ::cuda::std::execution::__execution_policy::__parallel_unsequenced)>()>;
+_CCCL_GLOBAL_CONSTANT __cub_parallel_unsequenced_policy __cub_par_unseq{};
+
+_CCCL_END_NAMESPACE_CUDA_EXECUTION
+
+#  include <cuda/std/__cccl/epilogue.h>
+
+#endif // _CCCL_HAS_BACKEND_CUDA()
+
+#endif // _CUDA___EXECUTION_POLICY_H
@@ -0,0 +1,47 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+// SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef _CUDA___FWD_EXECUTION_POLICY_H
+#define _CUDA___FWD_EXECUTION_POLICY_H
+
+#include <cuda/std/detail/__config>
+
+#if defined(_CCCL_IMPLICIT_SYSTEM_HEADER_GCC)
+#  pragma GCC system_header
+#elif defined(_CCCL_IMPLICIT_SYSTEM_HEADER_CLANG)
+#  pragma clang system_header
+#elif defined(_CCCL_IMPLICIT_SYSTEM_HEADER_MSVC)
+#  pragma system_header
+#endif // no system header
+
+#if _CCCL_HAS_BACKEND_CUDA()
+
+#  include <cuda/std/__fwd/execution_policy.h>
+
+#  include <cuda/std/__cccl/prologue.h>
+
+_CCCL_BEGIN_NAMESPACE_CUDA_STD_EXECUTION
+
+//! @brief Sets the execution backend to cuda
+template <uint32_t _Policy>
+[[nodiscard]] _CCCL_API constexpr uint32_t __with_cuda_backend() noexcept
+{
+  constexpr uint32_t __backend_mask{0xFFFF00FF};
+  constexpr uint32_t __new_policy =
+    (_Policy & __backend_mask) | (static_cast<uint32_t>(__execution_backend::__cuda) << 8);
+  return __new_policy;
+}
+
+_CCCL_END_NAMESPACE_CUDA_STD_EXECUTION
+
+#  include <cuda/std/__cccl/epilogue.h>
+
+#endif // _CCCL_HAS_BACKEND_CUDA()
+
+#endif // _CUDA___FWD_EXECUTION_POLICY_H
@@ -20,65 +20,67 @@
 #  pragma system_header
 #endif // no system header
 
-#include <cuda/std/__type_traits/underlying_type.h>
+#include <cuda/std/__bit/has_single_bit.h>
+#include <cuda/std/__fwd/execution_policy.h>
 #include <cuda/std/cstdint>
 
 #include <cuda/std/__cccl/prologue.h>
 
 _CCCL_BEGIN_NAMESPACE_CUDA_STD_EXECUTION
 
-enum class __execution_policy : uint32_t
+[[nodiscard]] _CCCL_API constexpr bool __has_unique_backend(const __execution_backend __backends) noexcept
 {
-  __invalid_execution_policy = 0,
-  __sequenced                = 1 << 0,
-  __parallel                 = 1 << 1,
-  __unsequenced              = 1 << 2,
-  __parallel_unsequenced     = __execution_policy::__parallel | __execution_policy::__unsequenced,
-};
-
-[[nodiscard]] _CCCL_API constexpr bool
-__satisfies_execution_policy(__execution_policy __lhs, __execution_policy __rhs) noexcept
-{
-  return (static_cast<uint32_t>(__lhs) & static_cast<uint32_t>(__rhs)) != 0;
+  return ::cuda::std::has_single_bit(static_cast<uint32_t>(__backends));
 }
 
-template <__execution_policy _Policy>
-struct __policy
+//! @brief Base class for our execution policies.
+//! It takes an untagged uint32_t because we want to be able to store 3 different enumerations in it.
+template <uint32_t _Policy, __execution_backend _Backend>
+struct __execution_policy_base
 {
-  template <__execution_policy _OtherPolicy>
-  [[nodiscard]] _CCCL_API friend constexpr bool operator==(const __policy&, const __policy<_OtherPolicy>&) noexcept
+  //! @brief Tag that identifies this and all derived classes as a CCCL execution policy
+  static constexpr uint32_t __cccl_policy_ = _Policy;
+
+  template <uint32_t _OtherPolicy, __execution_backend _OtherBackend>
+  [[nodiscard]] _CCCL_API friend constexpr bool
+  operator==(const __execution_policy_base&, const __execution_policy_base<_OtherPolicy, _OtherBackend>&) noexcept
   {
-    using __underlying_t = underlying_type_t<__execution_policy>;
-    return (static_cast<__underlying_t>(_Policy) == static_cast<__underlying_t>(_OtherPolicy));
+    return _Policy == _OtherPolicy;
   }
 
 #if _CCCL_STD_VER <= 2017
-  template <__execution_policy _OtherPolicy>
-  [[nodiscard]] _CCCL_API friend constexpr bool operator!=(const __policy&, const __policy<_OtherPolicy>&) noexcept
+  template <uint32_t _OtherPolicy, __execution_backend _OtherBackend>
+  [[nodiscard]] _CCCL_API friend constexpr bool
+  operator!=(const __execution_policy_base&, const __execution_policy_base<_OtherPolicy, _OtherBackend>&) noexcept
   {
-    using __underlying_t = underlying_type_t<__execution_policy>;
-    return (static_cast<__underlying_t>(_Policy) != static_cast<__underlying_t>(_OtherPolicy));
+    return _Policy != _OtherPolicy;
   }
 #endif // _CCCL_STD_VER <= 2017
 
-  static constexpr __execution_policy __policy_ = _Policy;
-};
+  //! @brief Extracts the execution policy from the stored _Policy
+  [[nodiscard]] _CCCL_API static constexpr __execution_policy __get_policy() noexcept
+  {
+    return __policy_to_execution_policy<_Policy>;
+  }
 
-struct sequenced_policy : public __policy<__execution_policy::__sequenced>
-{};
+  //! @brief Extracts the execution backend from the stored _Policy
+  [[nodiscard]] _CCCL_API static constexpr __execution_backend __get_backend() noexcept
+  {
+    return __policy_to_execution_backend<_Policy>;
+  }
+};
 
+using sequenced_policy = __execution_policy_base<static_cast<uint32_t>(__execution_policy::__sequenced)>;
 _CCCL_GLOBAL_CONSTANT sequenced_policy seq{};
 
-struct parallel_policy : public __policy<__execution_policy::__parallel>
-{};
+using parallel_policy = __execution_policy_base<static_cast<uint32_t>(__execution_policy::__parallel)>;
 _CCCL_GLOBAL_CONSTANT parallel_policy par{};
 
-struct parallel_unsequenced_policy : public __policy<__execution_policy::__parallel_unsequenced>
-{};
+using parallel_unsequenced_policy =
+  __execution_policy_base<static_cast<uint32_t>(__execution_policy::__parallel_unsequenced)>;
 _CCCL_GLOBAL_CONSTANT parallel_unsequenced_policy par_unseq{};
 
-struct unsequenced_policy : public __policy<__execution_policy::__unsequenced>
-{};
+using unsequenced_policy = __execution_policy_base<static_cast<uint32_t>(__execution_policy::__unsequenced)>;
 _CCCL_GLOBAL_CONSTANT unsequenced_policy unseq{};
 
 _CCCL_END_NAMESPACE_CUDA_STD_EXECUTION
 
@@ -0,0 +1,73 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of libcu++, the C++ Standard Library for your entire system,
+// under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+// SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef _CUDA_STD___FWD_EXECUTION_POLICY_H
+#define _CUDA_STD___FWD_EXECUTION_POLICY_H
+
+#include <cuda/std/detail/__config>
+
+#if defined(_CCCL_IMPLICIT_SYSTEM_HEADER_GCC)
+#  pragma GCC system_header
+#elif defined(_CCCL_IMPLICIT_SYSTEM_HEADER_CLANG)
+#  pragma clang system_header
+#elif defined(_CCCL_IMPLICIT_SYSTEM_HEADER_MSVC)
+#  pragma system_header
+#endif // no system header
+
+#include <cuda/std/cstdint>
+
+#include <cuda/std/__cccl/prologue.h>
+
+_CCCL_BEGIN_NAMESPACE_CUDA_STD_EXECUTION
+
+//! @brief Enumerates the standard execution policies
+enum class __execution_policy : uint8_t
+{
+  __invalid_execution_policy = 0,
+  __sequenced                = 1 << 0,
+  __parallel                 = 1 << 1,
+  __unsequenced              = 1 << 2,
+  __parallel_unsequenced     = __execution_policy::__parallel | __execution_policy::__unsequenced,
+};
+
+//! @brief Extracts the execution policy from the stored _Policy
+template <uint32_t _Policy>
+inline constexpr __execution_policy __policy_to_execution_policy = __execution_policy{(_Policy & uint32_t{0x000000FF})};
+
+//! @brief Enumerates the different backends we support
+//! @note Not an enum class because a user might specify multiple backends
+enum __execution_backend : uint8_t
+{
+  // The backends we provide
+  __none = 0,
+#if _CCCL_HAS_BACKEND_CUDA()
+  __cuda = 1 << 1,
+#endif // _CCCL_HAS_BACKEND_CUDA()
+#if _CCCL_HAS_BACKEND_OMP()
+  __omp = 1 << 2,
+#endif // _CCCL_HAS_BACKEND_OMP()
+#if _CCCL_HAS_BACKEND_TBB()
+  __tbb = 1 << 3,
+#endif // _CCCL_HAS_BACKEND_TBB()
+};
+
+//! @brief Extracts the execution backend from the stored _Policy
+template <uint32_t _Policy>
+inline constexpr __execution_backend __policy_to_execution_backend =
+  __execution_backend{(_Policy & uint32_t{0x0000FF00}) >> 8};
+
+template <uint32_t _Policy, __execution_backend _Backend = __policy_to_execution_backend<_Policy>>
+struct __execution_policy_base;
+
+_CCCL_END_NAMESPACE_CUDA_STD_EXECUTION
+
+#include <cuda/std/__cccl/epilogue.h>
+
+#endif // _CUDA_STD___FWD_EXECUTION_POLICY_H
@@ -0,0 +1,32 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of libcu++, the C++ Standard Library for your entire system,
+// under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+// SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef _CUDA_STD___INTERNAL_PSTL_CONFIG_H
+#define _CUDA_STD___INTERNAL_PSTL_CONFIG_H
+
+#include <cuda/std/detail/__config>
+
+#if defined(_CCCL_IMPLICIT_SYSTEM_HEADER_GCC)
+#  pragma GCC system_header
+#elif defined(_CCCL_IMPLICIT_SYSTEM_HEADER_CLANG)
+#  pragma clang system_header
+#elif defined(_CCCL_IMPLICIT_SYSTEM_HEADER_MSVC)
+#  pragma system_header
+#endif // no system header
+
+#include <cuda/std/__cccl/prologue.h>
+
+#define _CCCL_HAS_BACKEND_CUDA() _CCCL_CUDA_COMPILATION() && !_CCCL_COMPILER(NVRTC)
+#define _CCCL_HAS_BACKEND_OMP()  0
+#define _CCCL_HAS_BACKEND_TBB()  0
+
+#include <cuda/std/__cccl/epilogue.h>
+
+#endif // _CUDA_STD___INTERNAL_PSTL_CONFIG_H
Original file line number	Diff line number	Diff line change
`@@ -44,48 +44,48 @@ struct any_execution_policy`
`44`	`44`
`45`	`45`	`_CCCL_HIDE_FROM_ABI any_execution_policy() = default;`
`46`	`46`
`47`		`- template <__execution_policy _Policy>`
`48`		`- _CCCL_HOST_API constexpr any_execution_policy(::cuda::std::execution::__policy<_Policy>) noexcept`
`49`		`- : value(_Policy)`
	`47`	`+ template <uint32_t _Policy>`
	`48`	`+ _CCCL_HOST_API constexpr any_execution_policy(::cuda::std::execution::__execution_policy_base<_Policy>) noexcept`
	`49`	`+ : value(value_type{_Policy})`
`50`	`50`	`{}`
`51`	`51`
`52`	`52`	`_CCCL_HOST_API constexpr operator __execution_policy() const noexcept`
`53`	`53`	`{`
`54`	`54`	`return value;`
`55`	`55`	`}`
`56`	`56`
`57`		`- _CCCL_HOST_API constexpr auto operator()() const noexcept -> __execution_policy`
	`57`	`+ _CCCL_HOST_API constexpr auto operator()() const noexcept -> value_type`
`58`	`58`	`{`
`59`	`59`	`return value;`
`60`	`60`	`}`
`61`	`61`
`62`		`- template <__execution_policy _Policy>`
	`62`	`+ template <uint32_t _Policy>`
`63`	`63`	`[[nodiscard]] _CCCL_HOST_API friend constexpr bool`
`64`		`- operator==(const any_execution_policy& pol, const ::cuda::std::execution::__policy<_Policy>&) noexcept`
	`64`	`+ operator==(const any_execution_policy& pol, const ::cuda::std::execution::__execution_policy_base<_Policy>&) noexcept`
`65`	`65`	`{`
`66`		`- return pol.value == _Policy;`
	`66`	`+ return pol.value == value_type{_Policy};`
`67`	`67`	`}`
`68`	`68`
`69`	`69`	`#if _CCCL_STD_VER <= 2017`
`70`		`- template <__execution_policy _Policy>`
	`70`	`+ template <uint32_t _Policy>`
`71`	`71`	`[[nodiscard]] _CCCL_HOST_API friend constexpr bool`
`72`		`- operator==(const ::cuda::std::execution::__policy<_Policy>&, const any_execution_policy& pol) noexcept`
	`72`	`+ operator==(const ::cuda::std::execution::__execution_policy_base<_Policy>&, const any_execution_policy& pol) noexcept`
`73`	`73`	`{`
`74`		`- return pol.value == _Policy;`
	`74`	`+ return pol.value == value_type{_Policy};`
`75`	`75`	`}`
`76`	`76`
`77`		`- template <__execution_policy _Policy>`
	`77`	`+ template <uint32_t _Policy>`
`78`	`78`	`[[nodiscard]] _CCCL_HOST_API friend constexpr bool`
`79`		`- operator!=(const any_execution_policy& pol, const ::cuda::std::execution::__policy<_Policy>&) noexcept`
	`79`	`+ operator!=(const any_execution_policy& pol, const ::cuda::std::execution::__execution_policy_base<_Policy>&) noexcept`
`80`	`80`	`{`
`81`		`- return pol.value != _Policy;`
	`81`	`+ return pol.value != value_type{_Policy};`
`82`	`82`	`}`
`83`	`83`
`84`		`- template <__execution_policy _Policy>`
	`84`	`+ template <uint32_t _Policy>`
`85`	`85`	`[[nodiscard]] _CCCL_HOST_API friend constexpr bool`
`86`		`- operator!=(const ::cuda::std::execution::__policy<_Policy>&, const any_execution_policy& pol)`
	`86`	`+ operator!=(const ::cuda::std::execution::__execution_policy_base<_Policy>&, const any_execution_policy& pol)`
`87`	`87`	`{`
`88`		`- return pol.value != _Policy;`
	`88`	`+ return pol.value != value_type{_Policy};`
`89`	`89`	`}`
`90`	`90`	`#endif // _CCCL_STD_VER <= 2017`
`91`	`91`
Original file line number	Diff line number	Diff line change
`@@ -53,5 +53,7 @@ function(libcudacxx_build_compiler_targets)`
`53`	`53`	`# order matters here, we need the libcudacxx options to override the cccl options.`
`54`	`54`	`cccl.compiler_interface`
`55`	`55`	`libcudacxx.compiler_flags`
	`56`	`+ Thrust::Thrust`
	`57`	`+ CUB::CUB`
`56`	`58`	`)`
`57`	`59`	`endfunction()`