diff --git a/cub/cub/device/dispatch/dispatch_streaming_reduce.cuh b/cub/cub/device/dispatch/dispatch_streaming_reduce.cuh
index 533bb5cedaf..933f489be6f 100644
--- a/cub/cub/device/dispatch/dispatch_streaming_reduce.cuh
+++ b/cub/cub/device/dispatch/dispatch_streaming_reduce.cuh
@@ -16,7 +16,6 @@
 #include <cub/device/dispatch/dispatch_reduce.cuh>
 #include <cub/iterator/arg_index_input_iterator.cuh>
 
-#include <thrust/iterator/constant_iterator.h>
 #include <thrust/iterator/iterator_adaptor.h>
 
 #include <cuda/__iterator/tabulate_output_iterator.h>
diff --git a/cudax/include/cuda/experimental/__execution/policy.cuh b/cudax/include/cuda/experimental/__execution/policy.cuh
index e5f9248aae9..d004dfa06a5 100644
--- a/cudax/include/cuda/experimental/__execution/policy.cuh
+++ b/cudax/include/cuda/experimental/__execution/policy.cuh
@@ -44,9 +44,9 @@ struct any_execution_policy
 
   _CCCL_HIDE_FROM_ABI any_execution_policy() = default;
 
-  template <__execution_policy _Policy>
-  _CCCL_HOST_API constexpr any_execution_policy(::cuda::std::execution::__policy<_Policy>) noexcept
-      : value(_Policy)
+  template <uint32_t _Policy>
+  _CCCL_HOST_API constexpr any_execution_policy(::cuda::std::execution::__execution_policy_base<_Policy>) noexcept
+      : value(value_type{_Policy})
   {}
 
   _CCCL_HOST_API constexpr operator __execution_policy() const noexcept
@@ -54,38 +54,38 @@ struct any_execution_policy
     return value;
   }
 
-  _CCCL_HOST_API constexpr auto operator()() const noexcept -> __execution_policy
+  _CCCL_HOST_API constexpr auto operator()() const noexcept -> value_type
   {
     return value;
   }
 
-  template <__execution_policy _Policy>
+  template <uint32_t _Policy>
   [[nodiscard]] _CCCL_HOST_API friend constexpr bool
-  operator==(const any_execution_policy& pol, const ::cuda::std::execution::__policy<_Policy>&) noexcept
+  operator==(const any_execution_policy& pol, const ::cuda::std::execution::__execution_policy_base<_Policy>&) noexcept
   {
-    return pol.value == _Policy;
+    return pol.value == value_type{_Policy};
   }
 
 #if _CCCL_STD_VER <= 2017
-  template <__execution_policy _Policy>
+  template <uint32_t _Policy>
   [[nodiscard]] _CCCL_HOST_API friend constexpr bool
-  operator==(const ::cuda::std::execution::__policy<_Policy>&, const any_execution_policy& pol) noexcept
+  operator==(const ::cuda::std::execution::__execution_policy_base<_Policy>&, const any_execution_policy& pol) noexcept
   {
-    return pol.value == _Policy;
+    return pol.value == value_type{_Policy};
   }
 
-  template <__execution_policy _Policy>
+  template <uint32_t _Policy>
   [[nodiscard]] _CCCL_HOST_API friend constexpr bool
-  operator!=(const any_execution_policy& pol, const ::cuda::std::execution::__policy<_Policy>&) noexcept
+  operator!=(const any_execution_policy& pol, const ::cuda::std::execution::__execution_policy_base<_Policy>&) noexcept
   {
-    return pol.value != _Policy;
+    return pol.value != value_type{_Policy};
   }
 
-  template <__execution_policy _Policy>
+  template <uint32_t _Policy>
   [[nodiscard]] _CCCL_HOST_API friend constexpr bool
-  operator!=(const ::cuda::std::execution::__policy<_Policy>&, const any_execution_policy& pol)
+  operator!=(const ::cuda::std::execution::__execution_policy_base<_Policy>&, const any_execution_policy& pol)
   {
-    return pol.value != _Policy;
+    return pol.value != value_type{_Policy};
   }
 #endif // _CCCL_STD_VER <= 2017
 
diff --git a/libcudacxx/cmake/LibcudacxxBuildCompilerTargets.cmake b/libcudacxx/cmake/LibcudacxxBuildCompilerTargets.cmake
index 961721d6d5c..ac97d395c28 100644
--- a/libcudacxx/cmake/LibcudacxxBuildCompilerTargets.cmake
+++ b/libcudacxx/cmake/LibcudacxxBuildCompilerTargets.cmake
@@ -53,5 +53,7 @@ function(libcudacxx_build_compiler_targets)
       # order matters here, we need the libcudacxx options to override the cccl options.
       cccl.compiler_interface
       libcudacxx.compiler_flags
+      Thrust::Thrust
+      CUB::CUB
   )
 endfunction()
diff --git a/libcudacxx/include/cuda/__execution/policy.h b/libcudacxx/include/cuda/__execution/policy.h
new file mode 100644
index 00000000000..3613a4099e9
--- /dev/null
+++ b/libcudacxx/include/cuda/__execution/policy.h
@@ -0,0 +1,53 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+// SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef _CUDA___EXECUTION_POLICY_H
+#define _CUDA___EXECUTION_POLICY_H
+
+#include <cuda/std/detail/__config>
+
+#if defined(_CCCL_IMPLICIT_SYSTEM_HEADER_GCC)
+#  pragma GCC system_header
+#elif defined(_CCCL_IMPLICIT_SYSTEM_HEADER_CLANG)
+#  pragma clang system_header
+#elif defined(_CCCL_IMPLICIT_SYSTEM_HEADER_MSVC)
+#  pragma system_header
+#endif // no system header
+
+#if _CCCL_HAS_BACKEND_CUDA()
+
+#  include <cuda/__fwd/execution_policy.h>
+#  include <cuda/std/__execution/policy.h>
+#  include <cuda/std/__type_traits/is_execution_policy.h>
+
+#  include <cuda/std/__cccl/prologue.h>
+
+_CCCL_BEGIN_NAMESPACE_CUDA_STD_EXECUTION
+
+template <uint32_t _Policy>
+struct _CCCL_DECLSPEC_EMPTY_BASES __execution_policy_base<_Policy, __execution_backend::__cuda>
+    : __execution_policy_base<_Policy, __execution_backend::__none>
+{};
+
+_CCCL_END_NAMESPACE_CUDA_STD_EXECUTION
+
+_CCCL_BEGIN_NAMESPACE_CUDA_EXECUTION
+
+using __cub_parallel_unsequenced_policy =
+  ::cuda::std::execution::__execution_policy_base<::cuda::std::execution::__with_cuda_backend<static_cast<uint32_t>(
+    ::cuda::std::execution::__execution_policy::__parallel_unsequenced)>()>;
+_CCCL_GLOBAL_CONSTANT __cub_parallel_unsequenced_policy __cub_par_unseq{};
+
+_CCCL_END_NAMESPACE_CUDA_EXECUTION
+
+#  include <cuda/std/__cccl/epilogue.h>
+
+#endif // _CCCL_HAS_BACKEND_CUDA()
+
+#endif // _CUDA___EXECUTION_POLICY_H
diff --git a/libcudacxx/include/cuda/__fwd/execution_policy.h b/libcudacxx/include/cuda/__fwd/execution_policy.h
new file mode 100644
index 00000000000..d0928b10291
--- /dev/null
+++ b/libcudacxx/include/cuda/__fwd/execution_policy.h
@@ -0,0 +1,47 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+// SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef _CUDA___FWD_EXECUTION_POLICY_H
+#define _CUDA___FWD_EXECUTION_POLICY_H
+
+#include <cuda/std/detail/__config>
+
+#if defined(_CCCL_IMPLICIT_SYSTEM_HEADER_GCC)
+#  pragma GCC system_header
+#elif defined(_CCCL_IMPLICIT_SYSTEM_HEADER_CLANG)
+#  pragma clang system_header
+#elif defined(_CCCL_IMPLICIT_SYSTEM_HEADER_MSVC)
+#  pragma system_header
+#endif // no system header
+
+#if _CCCL_HAS_BACKEND_CUDA()
+
+#  include <cuda/std/__fwd/execution_policy.h>
+
+#  include <cuda/std/__cccl/prologue.h>
+
+_CCCL_BEGIN_NAMESPACE_CUDA_STD_EXECUTION
+
+//! @brief Sets the execution backend to cuda
+template <uint32_t _Policy>
+[[nodiscard]] _CCCL_API constexpr uint32_t __with_cuda_backend() noexcept
+{
+  constexpr uint32_t __backend_mask{0xFFFF00FF};
+  constexpr uint32_t __new_policy =
+    (_Policy & __backend_mask) | (static_cast<uint32_t>(__execution_backend::__cuda) << 8);
+  return __new_policy;
+}
+
+_CCCL_END_NAMESPACE_CUDA_STD_EXECUTION
+
+#  include <cuda/std/__cccl/epilogue.h>
+
+#endif // _CCCL_HAS_BACKEND_CUDA()
+
+#endif // _CUDA___FWD_EXECUTION_POLICY_H
diff --git a/libcudacxx/include/cuda/std/__execution/policy.h b/libcudacxx/include/cuda/std/__execution/policy.h
index 4ee74d48fc0..d4bf5e05c4a 100644
--- a/libcudacxx/include/cuda/std/__execution/policy.h
+++ b/libcudacxx/include/cuda/std/__execution/policy.h
@@ -20,65 +20,67 @@
 #  pragma system_header
 #endif // no system header
 
-#include <cuda/std/__type_traits/underlying_type.h>
+#include <cuda/std/__bit/has_single_bit.h>
+#include <cuda/std/__fwd/execution_policy.h>
 #include <cuda/std/cstdint>
 
 #include <cuda/std/__cccl/prologue.h>
 
 _CCCL_BEGIN_NAMESPACE_CUDA_STD_EXECUTION
 
-enum class __execution_policy : uint32_t
+[[nodiscard]] _CCCL_API constexpr bool __has_unique_backend(const __execution_backend __backends) noexcept
 {
-  __invalid_execution_policy = 0,
-  __sequenced                = 1 << 0,
-  __parallel                 = 1 << 1,
-  __unsequenced              = 1 << 2,
-  __parallel_unsequenced     = __execution_policy::__parallel | __execution_policy::__unsequenced,
-};
-
-[[nodiscard]] _CCCL_API constexpr bool
-__satisfies_execution_policy(__execution_policy __lhs, __execution_policy __rhs) noexcept
-{
-  return (static_cast<uint32_t>(__lhs) & static_cast<uint32_t>(__rhs)) != 0;
+  return ::cuda::std::has_single_bit(static_cast<uint32_t>(__backends));
 }
 
-template <__execution_policy _Policy>
-struct __policy
+//! @brief Base class for our execution policies.
+//! It takes an untagged uint32_t because we want to be able to store 3 different enumerations in it.
+template <uint32_t _Policy, __execution_backend _Backend>
+struct __execution_policy_base
 {
-  template <__execution_policy _OtherPolicy>
-  [[nodiscard]] _CCCL_API friend constexpr bool operator==(const __policy&, const __policy<_OtherPolicy>&) noexcept
+  //! @brief Tag that identifies this and all derived classes as a CCCL execution policy
+  static constexpr uint32_t __cccl_policy_ = _Policy;
+
+  template <uint32_t _OtherPolicy, __execution_backend _OtherBackend>
+  [[nodiscard]] _CCCL_API friend constexpr bool
+  operator==(const __execution_policy_base&, const __execution_policy_base<_OtherPolicy, _OtherBackend>&) noexcept
   {
-    using __underlying_t = underlying_type_t<__execution_policy>;
-    return (static_cast<__underlying_t>(_Policy) == static_cast<__underlying_t>(_OtherPolicy));
+    return _Policy == _OtherPolicy;
   }
 
 #if _CCCL_STD_VER <= 2017
-  template <__execution_policy _OtherPolicy>
-  [[nodiscard]] _CCCL_API friend constexpr bool operator!=(const __policy&, const __policy<_OtherPolicy>&) noexcept
+  template <uint32_t _OtherPolicy, __execution_backend _OtherBackend>
+  [[nodiscard]] _CCCL_API friend constexpr bool
+  operator!=(const __execution_policy_base&, const __execution_policy_base<_OtherPolicy, _OtherBackend>&) noexcept
   {
-    using __underlying_t = underlying_type_t<__execution_policy>;
-    return (static_cast<__underlying_t>(_Policy) != static_cast<__underlying_t>(_OtherPolicy));
+    return _Policy != _OtherPolicy;
   }
 #endif // _CCCL_STD_VER <= 2017
 
-  static constexpr __execution_policy __policy_ = _Policy;
-};
+  //! @brief Extracts the execution policy from the stored _Policy
+  [[nodiscard]] _CCCL_API static constexpr __execution_policy __get_policy() noexcept
+  {
+    return __policy_to_execution_policy<_Policy>;
+  }
 
-struct sequenced_policy : public __policy<__execution_policy::__sequenced>
-{};
+  //! @brief Extracts the execution backend from the stored _Policy
+  [[nodiscard]] _CCCL_API static constexpr __execution_backend __get_backend() noexcept
+  {
+    return __policy_to_execution_backend<_Policy>;
+  }
+};
 
+using sequenced_policy = __execution_policy_base<static_cast<uint32_t>(__execution_policy::__sequenced)>;
 _CCCL_GLOBAL_CONSTANT sequenced_policy seq{};
 
-struct parallel_policy : public __policy<__execution_policy::__parallel>
-{};
+using parallel_policy = __execution_policy_base<static_cast<uint32_t>(__execution_policy::__parallel)>;
 _CCCL_GLOBAL_CONSTANT parallel_policy par{};
 
-struct parallel_unsequenced_policy : public __policy<__execution_policy::__parallel_unsequenced>
-{};
+using parallel_unsequenced_policy =
+  __execution_policy_base<static_cast<uint32_t>(__execution_policy::__parallel_unsequenced)>;
 _CCCL_GLOBAL_CONSTANT parallel_unsequenced_policy par_unseq{};
 
-struct unsequenced_policy : public __policy<__execution_policy::__unsequenced>
-{};
+using unsequenced_policy = __execution_policy_base<static_cast<uint32_t>(__execution_policy::__unsequenced)>;
 _CCCL_GLOBAL_CONSTANT unsequenced_policy unseq{};
 
 _CCCL_END_NAMESPACE_CUDA_STD_EXECUTION
diff --git a/libcudacxx/include/cuda/std/__fwd/execution_policy.h b/libcudacxx/include/cuda/std/__fwd/execution_policy.h
new file mode 100644
index 00000000000..61f183e13bf
--- /dev/null
+++ b/libcudacxx/include/cuda/std/__fwd/execution_policy.h
@@ -0,0 +1,73 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of libcu++, the C++ Standard Library for your entire system,
+// under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+// SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef _CUDA_STD___FWD_EXECUTION_POLICY_H
+#define _CUDA_STD___FWD_EXECUTION_POLICY_H
+
+#include <cuda/std/detail/__config>
+
+#if defined(_CCCL_IMPLICIT_SYSTEM_HEADER_GCC)
+#  pragma GCC system_header
+#elif defined(_CCCL_IMPLICIT_SYSTEM_HEADER_CLANG)
+#  pragma clang system_header
+#elif defined(_CCCL_IMPLICIT_SYSTEM_HEADER_MSVC)
+#  pragma system_header
+#endif // no system header
+
+#include <cuda/std/cstdint>
+
+#include <cuda/std/__cccl/prologue.h>
+
+_CCCL_BEGIN_NAMESPACE_CUDA_STD_EXECUTION
+
+//! @brief Enumerates the standard execution policies
+enum class __execution_policy : uint8_t
+{
+  __invalid_execution_policy = 0,
+  __sequenced                = 1 << 0,
+  __parallel                 = 1 << 1,
+  __unsequenced              = 1 << 2,
+  __parallel_unsequenced     = __execution_policy::__parallel | __execution_policy::__unsequenced,
+};
+
+//! @brief Extracts the execution policy from the stored _Policy
+template <uint32_t _Policy>
+inline constexpr __execution_policy __policy_to_execution_policy = __execution_policy{(_Policy & uint32_t{0x000000FF})};
+
+//! @brief Enumerates the different backends we support
+//! @note Not an enum class because a user might specify multiple backends
+enum __execution_backend : uint8_t
+{
+  // The backends we provide
+  __none = 0,
+#if _CCCL_HAS_BACKEND_CUDA()
+  __cuda = 1 << 1,
+#endif // _CCCL_HAS_BACKEND_CUDA()
+#if _CCCL_HAS_BACKEND_OMP()
+  __omp = 1 << 2,
+#endif // _CCCL_HAS_BACKEND_OMP()
+#if _CCCL_HAS_BACKEND_TBB()
+  __tbb = 1 << 3,
+#endif // _CCCL_HAS_BACKEND_TBB()
+};
+
+//! @brief Extracts the execution backend from the stored _Policy
+template <uint32_t _Policy>
+inline constexpr __execution_backend __policy_to_execution_backend =
+  __execution_backend{(_Policy & uint32_t{0x0000FF00}) >> 8};
+
+template <uint32_t _Policy, __execution_backend _Backend = __policy_to_execution_backend<_Policy>>
+struct __execution_policy_base;
+
+_CCCL_END_NAMESPACE_CUDA_STD_EXECUTION
+
+#include <cuda/std/__cccl/epilogue.h>
+
+#endif // _CUDA_STD___FWD_EXECUTION_POLICY_H
diff --git a/libcudacxx/include/cuda/std/__internal/pstl_config.h b/libcudacxx/include/cuda/std/__internal/pstl_config.h
new file mode 100644
index 00000000000..9c08c05294d
--- /dev/null
+++ b/libcudacxx/include/cuda/std/__internal/pstl_config.h
@@ -0,0 +1,32 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of libcu++, the C++ Standard Library for your entire system,
+// under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+// SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef _CUDA_STD___INTERNAL_PSTL_CONFIG_H
+#define _CUDA_STD___INTERNAL_PSTL_CONFIG_H
+
+#include <cuda/std/detail/__config>
+
+#if defined(_CCCL_IMPLICIT_SYSTEM_HEADER_GCC)
+#  pragma GCC system_header
+#elif defined(_CCCL_IMPLICIT_SYSTEM_HEADER_CLANG)
+#  pragma clang system_header
+#elif defined(_CCCL_IMPLICIT_SYSTEM_HEADER_MSVC)
+#  pragma system_header
+#endif // no system header
+
+#include <cuda/std/__cccl/prologue.h>
+
+#define _CCCL_HAS_BACKEND_CUDA() _CCCL_CUDA_COMPILATION() && !_CCCL_COMPILER(NVRTC)
+#define _CCCL_HAS_BACKEND_OMP()  0
+#define _CCCL_HAS_BACKEND_TBB()  0
+
+#include <cuda/std/__cccl/epilogue.h>
+
+#endif // _CUDA_STD___INTERNAL_PSTL_CONFIG_H
diff --git a/libcudacxx/include/cuda/std/__pstl/cuda/for_each_n.h b/libcudacxx/include/cuda/std/__pstl/cuda/for_each_n.h
new file mode 100644
index 00000000000..78a95b5e221
--- /dev/null
+++ b/libcudacxx/include/cuda/std/__pstl/cuda/for_each_n.h
@@ -0,0 +1,97 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of libcu++, the C++ Standard Library for your entire system,
+// under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+// SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef _CUDA_STD___PSTL_CUDA_FOR_EACH_N_H
+#define _CUDA_STD___PSTL_CUDA_FOR_EACH_N_H
+
+#include <cuda/std/detail/__config>
+
+#if defined(_CCCL_IMPLICIT_SYSTEM_HEADER_GCC)
+#  pragma GCC system_header
+#elif defined(_CCCL_IMPLICIT_SYSTEM_HEADER_CLANG)
+#  pragma clang system_header
+#elif defined(_CCCL_IMPLICIT_SYSTEM_HEADER_MSVC)
+#  pragma system_header
+#endif // no system header
+
+#if _CCCL_HAS_BACKEND_CUDA()
+
+#  include <cub/device/device_for.cuh>
+
+#  include <cuda/__execution/policy.h>
+#  include <cuda/__runtime/api_wrapper.h>
+#  include <cuda/__stream/stream_ref.h>
+#  include <cuda/std/__algorithm/for_each_n.h>
+#  include <cuda/std/__exception/cuda_error.h>
+#  include <cuda/std/__exception/terminate.h>
+#  include <cuda/std/__execution/policy.h>
+#  include <cuda/std/__iterator/iterator_traits.h>
+#  include <cuda/std/__pstl/dispatch.h>
+#  include <cuda/std/__type_traits/always_false.h>
+#  include <cuda/std/__utility/convert_to_integral.h>
+#  include <cuda/std/__utility/move.h>
+
+#  include <nv/target>
+
+#  include <cuda/std/__cccl/prologue.h>
+
+_CCCL_BEGIN_NAMESPACE_CUDA_STD_EXECUTION
+
+_CCCL_BEGIN_NAMESPACE_ARCH_DEPENDENT
+
+template <>
+struct __pstl_dispatch<__pstl_algorithm::__for_each_n, __execution_backend::__cuda>
+{
+  template <class _Policy, class _Iter, class _Size, class _Fn>
+  [[nodiscard]] _CCCL_HOST_API static _Iter
+  __par_impl([[maybe_unused]] _Policy __policy, _Iter __first, _Size __orig_n, _Fn __func) noexcept
+  {
+    const auto __count = ::cuda::std::__convert_to_integral(__orig_n);
+    ::cuda::stream_ref __stream{cudaStreamPerThread};
+
+    _CCCL_TRY_CUDA_API(
+      ::cub::DeviceFor::ForEachN,
+      "__pstl_dispatch: kernel launch failed",
+      __first,
+      __count,
+      ::cuda::std::move(__func),
+      __stream.get());
+
+    __stream.sync();
+
+    return __first + __count;
+  }
+
+  template <class _Policy, class _Iter, class _Size, class _Fn>
+  [[nodiscard]] _CCCL_HOST_API _CCCL_FORCEINLINE _Iter
+  operator()(_Policy __policy, _Iter __first, _Size __orig_n, _Fn __func) const noexcept
+  {
+    if constexpr (::cuda::std::__has_random_access_traversal<_Iter>)
+    {
+      return __par_impl(::cuda::std::move(__policy), ::cuda::std::move(__first), __orig_n, ::cuda::std::move(__func));
+    }
+    else
+    {
+      static_assert(__always_false_v<_Policy>,
+                    "__pstl_dispatch: CUDA backend of cuda::std::for_each_n requires at least random access iterators");
+      return ::cuda::std::for_each_n(::cuda::std::move(__first), __orig_n, ::cuda::std::move(__func));
+    }
+  }
+};
+
+_CCCL_END_NAMESPACE_ARCH_DEPENDENT
+
+_CCCL_END_NAMESPACE_CUDA_STD_EXECUTION
+
+#  include <cuda/std/__cccl/epilogue.h>
+
+#endif /// _CCCL_HAS_BACKEND_CUDA()
+
+#endif // _CUDA_STD___PSTL_CUDA_FOR_EACH_N_H
diff --git a/libcudacxx/include/cuda/std/__pstl/dispatch.h b/libcudacxx/include/cuda/std/__pstl/dispatch.h
new file mode 100644
index 00000000000..22b49051f90
--- /dev/null
+++ b/libcudacxx/include/cuda/std/__pstl/dispatch.h
@@ -0,0 +1,123 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of libcu++, the C++ Standard Library for your entire system,
+// under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+// SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef _CUDA_STD___PSTL_DISPATCH_H
+#define _CUDA_STD___PSTL_DISPATCH_H
+
+#include <cuda/std/detail/__config>
+
+#if defined(_CCCL_IMPLICIT_SYSTEM_HEADER_GCC)
+#  pragma GCC system_header
+#elif defined(_CCCL_IMPLICIT_SYSTEM_HEADER_CLANG)
+#  pragma clang system_header
+#elif defined(_CCCL_IMPLICIT_SYSTEM_HEADER_MSVC)
+#  pragma system_header
+#endif // no system header
+
+#include <cuda/std/__execution/policy.h>
+#include <cuda/std/__type_traits/always_false.h>
+#include <cuda/std/__type_traits/is_base_of.h>
+#include <cuda/std/cstdint>
+
+#include <cuda/std/__cccl/prologue.h>
+
+_CCCL_BEGIN_NAMESPACE_CUDA_STD_EXECUTION
+
+enum class __pstl_algorithm
+{
+  // The find_if family
+  __find,
+  __find_if,
+  __any_of,
+  __all_of,
+  __none_of,
+  __is_partitioned,
+
+  // merge family
+  // non implemented
+
+  // sort family
+  __sort,
+
+  // for_each family
+  __for_each_n,
+  __fill,
+  __fill_n,
+  __replace,
+  __replace_if,
+  __generate,
+  __generate_n,
+
+  // transform_reduce and transform_reduce_binary family
+  __count_if,
+  __count,
+  __equal,
+  __reduce,
+
+  // transform and transform_binary family
+  __replace_copy_if,
+  __replace_copy,
+  __move,
+  __copy,
+  __copy_n,
+  __rotate_copy,
+};
+
+//! @brief tag type to indicate that we cannot dispatch to a parallel algorithm and should run the algorithm serially
+struct __pstl_no_dispatch
+{};
+
+_CCCL_BEGIN_NAMESPACE_ARCH_DEPENDENT
+
+//! @brief Dispatcher for a given @tparam _Algorith and @tparam _Policy
+//! If @class __pstl_dispatch is not specialized by the chosen backend we will fall back to serial execution
+template <__pstl_algorithm _Algorithm, __execution_backend _Backend>
+struct __pstl_dispatch : public __pstl_no_dispatch
+{};
+
+_CCCL_END_NAMESPACE_ARCH_DEPENDENT
+
+//! @brief Helper variable that detects whether @class __pstl_dispatch has been specialized so that we can
+//! dispatch
+template <class>
+inline constexpr bool __pstl_can_dispatch = false;
+
+template <__pstl_algorithm _Algorithm, __execution_backend _Backend>
+inline constexpr bool __pstl_can_dispatch<__pstl_dispatch<_Algorithm, _Backend>> =
+  !::cuda::std::is_base_of_v<__pstl_no_dispatch, __pstl_dispatch<_Algorithm, _Backend>>;
+
+_CCCL_BEGIN_NAMESPACE_ARCH_DEPENDENT
+
+//! @brief Top layer dispatcher that returns a concrete dispatch if possible
+template <__pstl_algorithm _Algorithm, class _Policy>
+[[nodiscard]] _CCCL_API _CCCL_CONSTEVAL auto __pstl_select_dispatch() noexcept
+{
+  // First extract the desired backend from the policy
+  constexpr __execution_backend __backend = _Policy::__get_backend();
+
+  // If the user requests a unique backends, we must take that
+  if constexpr (::cuda::std::execution::__has_unique_backend(__backend))
+  {
+    return __pstl_dispatch<_Algorithm, __backend>{};
+  }
+  else
+  {
+    // No dispatch found, return invalid to signal serial execution
+    return __pstl_dispatch<_Algorithm, __execution_backend::__none>{};
+  }
+}
+
+_CCCL_END_NAMESPACE_ARCH_DEPENDENT
+
+_CCCL_END_NAMESPACE_CUDA_STD_EXECUTION
+
+#include <cuda/std/__cccl/epilogue.h>
+
+#endif // _CUDA_STD___PSTL_DISPATCH_H
diff --git a/libcudacxx/include/cuda/std/__pstl/for_each.h b/libcudacxx/include/cuda/std/__pstl/for_each.h
new file mode 100644
index 00000000000..6bdf0bf9b28
--- /dev/null
+++ b/libcudacxx/include/cuda/std/__pstl/for_each.h
@@ -0,0 +1,71 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of libcu++, the C++ Standard Library for your entire system,
+// under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+// SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef _CUDA_STD___PSTL_FOR_EACH_H
+#define _CUDA_STD___PSTL_FOR_EACH_H
+
+#include <cuda/std/detail/__config>
+
+#if defined(_CCCL_IMPLICIT_SYSTEM_HEADER_GCC)
+#  pragma GCC system_header
+#elif defined(_CCCL_IMPLICIT_SYSTEM_HEADER_CLANG)
+#  pragma clang system_header
+#elif defined(_CCCL_IMPLICIT_SYSTEM_HEADER_MSVC)
+#  pragma system_header
+#endif // no system header
+
+#if !_CCCL_COMPILER(NVRTC)
+
+#  include <cuda/std/__algorithm/for_each_n.h>
+#  include <cuda/std/__execution/policy.h>
+#  include <cuda/std/__iterator/distance.h>
+#  include <cuda/std/__pstl/dispatch.h>
+#  include <cuda/std/__type_traits/always_false.h>
+#  include <cuda/std/__utility/move.h>
+
+#  if _CCCL_HAS_BACKEND_CUDA()
+#    include <cuda/std/__pstl/cuda/for_each_n.h>
+#  endif // _CCCL_HAS_BACKEND_CUDA()
+
+#  include <cuda/std/__cccl/prologue.h>
+
+_CCCL_BEGIN_NAMESPACE_CUDA_STD
+
+_CCCL_BEGIN_NAMESPACE_ARCH_DEPENDENT
+
+_CCCL_TEMPLATE(class _Policy, class _Iter, class _Fn)
+_CCCL_REQUIRES(__has_forward_traversal<_Iter> _CCCL_AND is_execution_policy_v<_Policy>)
+_CCCL_HOST_API void for_each([[maybe_unused]] _Policy __policy, _Iter __first, _Iter __last, _Fn __func)
+{
+  [[maybe_unused]] auto __dispatch =
+    ::cuda::std::execution::__pstl_select_dispatch<::cuda::std::execution::__pstl_algorithm::__for_each_n, _Policy>();
+  if constexpr (::cuda::std::execution::__pstl_can_dispatch<decltype(__dispatch)>)
+  {
+    (void) __dispatch(::cuda::std::move(__policy),
+                      ::cuda::std::move(__first),
+                      ::cuda::std::distance(__first, __last),
+                      ::cuda::std::move(__func));
+  }
+  else
+  {
+    static_assert(__always_false_v<_Policy>, "Parallel cuda::std::for_each requires at least one selected backend");
+    ::cuda::std::for_each(::cuda::std::move(__first), ::cuda::std::move(__last), ::cuda::std::move(__func));
+  }
+}
+
+_CCCL_END_NAMESPACE_ARCH_DEPENDENT
+
+_CCCL_END_NAMESPACE_CUDA_STD
+
+#  include <cuda/std/__cccl/epilogue.h>
+
+#endif // !_CCCL_COMPILER(NVRTC)
+
+#endif // _CUDA_STD___PSTL_FOR_EACH_H
diff --git a/libcudacxx/include/cuda/std/__pstl/for_each_n.h b/libcudacxx/include/cuda/std/__pstl/for_each_n.h
new file mode 100644
index 00000000000..082e1a0d385
--- /dev/null
+++ b/libcudacxx/include/cuda/std/__pstl/for_each_n.h
@@ -0,0 +1,68 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of libcu++, the C++ Standard Library for your entire system,
+// under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+// SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef _CUDA_STD___PSTL_FOR_EACH_N_H
+#define _CUDA_STD___PSTL_FOR_EACH_N_H
+
+#include <cuda/std/detail/__config>
+
+#if defined(_CCCL_IMPLICIT_SYSTEM_HEADER_GCC)
+#  pragma GCC system_header
+#elif defined(_CCCL_IMPLICIT_SYSTEM_HEADER_CLANG)
+#  pragma clang system_header
+#elif defined(_CCCL_IMPLICIT_SYSTEM_HEADER_MSVC)
+#  pragma system_header
+#endif // no system header
+
+#if !_CCCL_COMPILER(NVRTC)
+
+#  include <cuda/std/__algorithm/for_each_n.h>
+#  include <cuda/std/__execution/policy.h>
+#  include <cuda/std/__iterator/distance.h>
+#  include <cuda/std/__pstl/dispatch.h>
+#  include <cuda/std/__type_traits/always_false.h>
+#  include <cuda/std/__utility/move.h>
+
+#  if _CCCL_HAS_BACKEND_CUDA()
+#    include <cuda/std/__pstl/cuda/for_each_n.h>
+#  endif // _CCCL_HAS_BACKEND_CUDA()
+
+#  include <cuda/std/__cccl/prologue.h>
+
+_CCCL_BEGIN_NAMESPACE_CUDA_STD
+
+_CCCL_BEGIN_NAMESPACE_ARCH_DEPENDENT
+
+_CCCL_TEMPLATE(class _Policy, class _Iter, class _Size, class _Fn)
+_CCCL_REQUIRES(__has_forward_traversal<_Iter> _CCCL_AND is_execution_policy_v<_Policy>)
+_CCCL_HOST_API _Iter for_each_n([[maybe_unused]] _Policy __policy, _Iter __first, _Size __orig_n, _Fn __func)
+{
+  [[maybe_unused]] auto __dispatch =
+    ::cuda::std::execution::__pstl_select_dispatch<::cuda::std::execution::__pstl_algorithm::__for_each_n, _Policy>();
+  if constexpr (::cuda::std::execution::__pstl_can_dispatch<decltype(__dispatch)>)
+  {
+    return __dispatch(::cuda::std::move(__policy), ::cuda::std::move(__first), __orig_n, ::cuda::std::move(__func));
+  }
+  else
+  {
+    static_assert(__always_false_v<_Policy>, "Parallel cuda::std::for_each_n requires at least one selected backend");
+    return ::cuda::std::for_each_n(::cuda::std::move(__first), __orig_n, ::cuda::std::move(__func));
+  }
+}
+
+_CCCL_END_NAMESPACE_ARCH_DEPENDENT
+
+_CCCL_END_NAMESPACE_CUDA_STD
+
+#  include <cuda/std/__cccl/epilogue.h>
+
+#endif // !_CCCL_COMPILER(NVRTC)
+
+#endif // _CUDA_STD___PSTL_FOR_EACH_N_H
diff --git a/libcudacxx/include/cuda/std/__type_traits/is_execution_policy.h b/libcudacxx/include/cuda/std/__type_traits/is_execution_policy.h
index 7114201e056..338b5aa76fb 100644
--- a/libcudacxx/include/cuda/std/__type_traits/is_execution_policy.h
+++ b/libcudacxx/include/cuda/std/__type_traits/is_execution_policy.h
@@ -20,7 +20,7 @@
 #  pragma system_header
 #endif // no system header
 
-#include <cuda/std/__execution/policy.h>
+#include <cuda/std/__fwd/execution_policy.h>
 #include <cuda/std/__type_traits/integral_constant.h>
 #include <cuda/std/__type_traits/void_t.h>
 
@@ -28,51 +28,33 @@
 
 _CCCL_BEGIN_NAMESPACE_CUDA_STD
 
-template <class>
+template <class, class = void>
 inline constexpr bool is_execution_policy_v = false;
 
-// Ensure we ignore cv qualifiers
-template <class _Tp>
-inline constexpr bool is_execution_policy_v<const _Tp> = is_execution_policy_v<_Tp>;
-
-template <class _Tp>
-inline constexpr bool is_execution_policy_v<volatile _Tp> = is_execution_policy_v<_Tp>;
-
-template <class _Tp>
-inline constexpr bool is_execution_policy_v<const volatile _Tp> = is_execution_policy_v<_Tp>;
-
-// Explicitly mark our execution policies as such
-template <>
-inline constexpr bool is_execution_policy_v<::cuda::std::execution::sequenced_policy> = true;
-
-template <>
-inline constexpr bool is_execution_policy_v<::cuda::std::execution::parallel_policy> = true;
-
-template <>
-inline constexpr bool is_execution_policy_v<::cuda::std::execution::parallel_unsequenced_policy> = true;
-
-template <>
-inline constexpr bool is_execution_policy_v<::cuda::std::execution::unsequenced_policy> = true;
+template <class _Policy>
+inline constexpr bool is_execution_policy_v<_Policy, void_t<decltype(_Policy::__cccl_policy_)>> = true;
 
 template <class _Tp>
 struct _CCCL_NO_SPECIALIZATIONS is_execution_policy : bool_constant<is_execution_policy_v<_Tp>>
 {};
 
 // Detect parallel policies
-template <class, class = void>
+template <class _Policy, bool = is_execution_policy_v<_Policy>>
 inline constexpr bool __is_parallel_execution_policy_v = false;
 
 template <class _Policy>
-inline constexpr bool __is_parallel_execution_policy_v<_Policy, void_t<decltype(_Policy::__policy_)>> =
-  __satisfies_execution_policy(_Policy::__policy_, ::cuda::std::execution::__execution_policy::__parallel);
+inline constexpr bool __is_parallel_execution_policy_v<_Policy, true> =
+  _Policy::__get_policy() == ::cuda::std::execution::__execution_policy::__parallel
+  || _Policy::__get_policy() == ::cuda::std::execution::__execution_policy::__parallel_unsequenced;
 
 // Detect unsequenced policies
-template <class, class = void>
+template <class _Policy, bool = is_execution_policy_v<_Policy>>
 inline constexpr bool __is_unsequenced_execution_policy_v = false;
 
 template <class _Policy>
-inline constexpr bool __is_unsequenced_execution_policy_v<_Policy, void_t<decltype(_Policy::__policy_)>> =
-  __satisfies_execution_policy(_Policy::__policy_, ::cuda::std::execution::__execution_policy::__unsequenced);
+inline constexpr bool __is_unsequenced_execution_policy_v<_Policy, true> =
+  _Policy::__get_policy() == ::cuda::std::execution::__execution_policy::__unsequenced
+  || _Policy::__get_policy() == ::cuda::std::execution::__execution_policy::__parallel_unsequenced;
 
 _CCCL_END_NAMESPACE_CUDA_STD
 
diff --git a/libcudacxx/include/cuda/std/detail/__config b/libcudacxx/include/cuda/std/detail/__config
index 5cf896c89c2..a3ebabc5fe3 100644
--- a/libcudacxx/include/cuda/std/detail/__config
+++ b/libcudacxx/include/cuda/std/detail/__config
@@ -15,6 +15,7 @@
 #include <cuda/std/__internal/cpp_dialect.h>
 #include <cuda/std/__internal/features.h>
 #include <cuda/std/__internal/namespaces.h>
+#include <cuda/std/__internal/pstl_config.h>
 #include <cuda/std/__internal/thread_api.h>
 #include <cuda/std/__internal/version.h>
 
diff --git a/libcudacxx/test/libcudacxx/std/algorithms/alg.nonmodifying/alg.for_each/pstl.for_each.pass.cpp b/libcudacxx/test/libcudacxx/std/algorithms/alg.nonmodifying/alg.for_each/pstl.for_each.pass.cpp
new file mode 100644
index 00000000000..6eace2d56b7
--- /dev/null
+++ b/libcudacxx/test/libcudacxx/std/algorithms/alg.nonmodifying/alg.for_each/pstl.for_each.pass.cpp
@@ -0,0 +1,65 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+// SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES
+//
+//===----------------------------------------------------------------------===//
+
+// UNSUPPORTED: nvrtc
+// XFAIL: true
+
+// template<class ExecutionPolicy, class ForwardIterator, class Function>
+//   void for_each(ExecutionPolicy&& exec,
+//                 ForwardIterator first, ForwardIterator last,
+//                 Function f);
+
+#include <cuda/std/__algorithm_>
+#include <cuda/std/__pstl/for_each.h>
+#include <cuda/std/cassert>
+
+#include "test_execution_policies.h"
+#include "test_iterators.h"
+#include "test_macros.h"
+
+EXECUTION_POLICY_SFINAE_TEST(for_each);
+
+static_assert(!sfinae_test_for_each<int, int*, int*, bool (*)(int)>);
+static_assert(sfinae_test_for_each<cuda::std::execution::parallel_policy, int*, int*, bool (*)(int)>);
+
+int data[100];
+bool called[100];
+
+template <class Iter>
+struct Test
+{
+  template <class Policy>
+  void operator()(Policy&& policy)
+  {
+    int sizes[] = {0, 1, 2, 100};
+    for (auto size : sizes)
+    {
+      cuda::std::fill(called, called + size, false);
+      cuda::std::for_each(policy, Iter(data), Iter(data + size), [&](int& v) {
+        assert(!called[&v - data]);
+        called[&v - data] = true;
+      });
+      assert(cuda::std::all_of(called, called + size, [](bool b) {
+        return b;
+      }));
+    }
+  }
+};
+
+__host__ void test()
+{
+  types::for_each(types::forward_iterator_list<int*>{}, TestIteratorWithPolicies<Test>{});
+}
+
+int main(int, char**)
+{
+  NV_IF_TARGET(NV_IS_HOST, test();)
+
+  return 0;
+}
diff --git a/libcudacxx/test/libcudacxx/std/algorithms/alg.nonmodifying/alg.for_each/pstl.for_each_n.pass.cpp b/libcudacxx/test/libcudacxx/std/algorithms/alg.nonmodifying/alg.for_each/pstl.for_each_n.pass.cpp
new file mode 100644
index 00000000000..6a05cabc69f
--- /dev/null
+++ b/libcudacxx/test/libcudacxx/std/algorithms/alg.nonmodifying/alg.for_each/pstl.for_each_n.pass.cpp
@@ -0,0 +1,64 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+// SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES
+//
+//===----------------------------------------------------------------------===//
+
+// UNSUPPORTED: nvrtc
+// XFAIL: true
+
+// template<class ExecutionPolicy, class ForwardIterator, class Size, class Function>
+//   ForwardIterator for_each_n(ExecutionPolicy&& exec, ForwardIterator first, Size n,
+//                              Function f);
+
+#include <cuda/std/__algorithm_>
+#include <cuda/std/__pstl/for_each_n.h>
+#include <cuda/std/cassert>
+
+#include "test_execution_policies.h"
+#include "test_iterators.h"
+#include "test_macros.h"
+
+EXECUTION_POLICY_SFINAE_TEST(for_each_n);
+
+static_assert(!sfinae_test_for_each_n<int, int*, int, bool (*)(int)>);
+static_assert(sfinae_test_for_each_n<cuda::std::execution::parallel_policy, int*, int, bool (*)(int)>);
+
+int data[100];
+bool called[100];
+
+template <class Iter>
+struct Test
+{
+  template <class Policy>
+  void operator()(Policy&& policy)
+  {
+    int sizes[] = {0, 1, 2, 100};
+    for (auto size : sizes)
+    {
+      cuda::std::fill(called, called + size, false);
+      cuda::std::for_each_n(policy, Iter(data), size, [&](int& v) {
+        assert(!called[&v - data]);
+        called[&v - data] = true;
+      });
+      assert(cuda::std::all_of(called, called + size, [](bool b) {
+        return b;
+      }));
+    }
+  }
+};
+
+__host__ void test()
+{
+  types::for_each(types::forward_iterator_list<int*>{}, TestIteratorWithPolicies<Test>{});
+}
+
+int main(int, char**)
+{
+  NV_IF_TARGET(NV_IS_HOST, test();)
+
+  return 0;
+}
diff --git a/libcudacxx/test/libcudacxx/std/algorithms/alg.nonmodifying/alg.for_each/pstl_for_each.cu b/libcudacxx/test/libcudacxx/std/algorithms/alg.nonmodifying/alg.for_each/pstl_for_each.cu
new file mode 100644
index 00000000000..286324f2ec7
--- /dev/null
+++ b/libcudacxx/test/libcudacxx/std/algorithms/alg.nonmodifying/alg.for_each/pstl_for_each.cu
@@ -0,0 +1,49 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of libcu++, the C++ Standard Library for your entire system,
+// under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+// SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES.
+//
+//===----------------------------------------------------------------------===//
+
+// template<class ExecutionPolicy, class ForwardIterator, class Function>
+//   void for_each(ExecutionPolicy&& exec,
+//                 ForwardIterator first, ForwardIterator last,
+//                 Function f);
+
+#include <thrust/device_vector.h>
+#include <thrust/execution_policy.h>
+#include <thrust/logical.h>
+
+#include <cuda/iterator>
+#include <cuda/std/__pstl/for_each.h>
+#include <cuda/std/execution>
+#include <cuda/std/functional>
+
+#include <testing.cuh>
+#include <utility.cuh>
+
+inline constexpr int size = 1000;
+
+struct mark_present_for_each
+{
+  bool* ptr_;
+
+  template <typename T>
+  __host__ __device__ void operator()(T val) const noexcept
+  {
+    ptr_[val] = true;
+  }
+};
+
+C2H_TEST("cuda::std::for_each", "[parallel algorithm]")
+{
+  thrust::device_vector<bool> res(size, false);
+  mark_present_for_each fn{thrust::raw_pointer_cast(res.data())};
+
+  const auto policy = cuda::execution::__cub_par_unseq;
+  cuda::std::for_each(policy, cuda::counting_iterator{0}, cuda::counting_iterator{size}, fn);
+  CHECK(thrust::all_of(res.begin(), res.end(), cuda::std::identity{}));
+}
diff --git a/libcudacxx/test/libcudacxx/std/algorithms/alg.nonmodifying/alg.for_each/pstl_for_each_n.cu b/libcudacxx/test/libcudacxx/std/algorithms/alg.nonmodifying/alg.for_each/pstl_for_each_n.cu
new file mode 100644
index 00000000000..60f17d59eaf
--- /dev/null
+++ b/libcudacxx/test/libcudacxx/std/algorithms/alg.nonmodifying/alg.for_each/pstl_for_each_n.cu
@@ -0,0 +1,49 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of libcu++, the C++ Standard Library for your entire system,
+// under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+// SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES.
+//
+//===----------------------------------------------------------------------===//
+
+// template<class ExecutionPolicy, class ForwardIterator, class Function>
+//   void for_each(ExecutionPolicy&& exec,
+//                 ForwardIterator first, ForwardIterator last,
+//                 Function f);
+
+#include <thrust/device_vector.h>
+#include <thrust/execution_policy.h>
+#include <thrust/logical.h>
+
+#include <cuda/iterator>
+#include <cuda/std/__pstl/for_each_n.h>
+#include <cuda/std/execution>
+#include <cuda/std/functional>
+
+#include <testing.cuh>
+#include <utility.cuh>
+
+inline constexpr int size = 1000;
+
+struct mark_present_for_each
+{
+  bool* ptr_;
+
+  template <typename T>
+  __host__ __device__ void operator()(T val) const noexcept
+  {
+    ptr_[val] = true;
+  }
+};
+
+C2H_TEST("cuda::std::for_each_n", "[parallel algorithm]")
+{
+  thrust::device_vector<bool> res(size, false);
+  mark_present_for_each fn{thrust::raw_pointer_cast(res.data())};
+
+  const auto policy = cuda::execution::__cub_par_unseq;
+  cuda::std::for_each_n(policy, cuda::counting_iterator{0}, size, fn);
+  CHECK(thrust::all_of(res.begin(), res.end(), cuda::std::identity{}));
+}
diff --git a/libcudacxx/test/support/test_execution_policies.h b/libcudacxx/test/support/test_execution_policies.h
new file mode 100644
index 00000000000..d6adaf64a26
--- /dev/null
+++ b/libcudacxx/test/support/test_execution_policies.h
@@ -0,0 +1,55 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+// SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef TEST_SUPPORT_TEST_EXECUTION_POLICIES
+#define TEST_SUPPORT_TEST_EXECUTION_POLICIES
+
+#include <cuda/std/cstdlib>
+#include <cuda/std/execution>
+#include <cuda/std/type_traits>
+#include <cuda/std/utility>
+
+#include "test_macros.h"
+
+#define EXECUTION_POLICY_SFINAE_TEST(FUNCTION)                                                                          \
+  template <class, class...>                                                                                            \
+  struct sfinae_test_##FUNCTION##_impl : cuda::std::false_type                                                          \
+  {};                                                                                                                   \
+                                                                                                                        \
+  template <class... Args>                                                                                              \
+  struct sfinae_test_##FUNCTION##_impl<cuda::std::void_t<decltype(cuda::std::FUNCTION(cuda::std::declval<Args>()...))>, \
+                                       Args...> : cuda::std::true_type                                                  \
+  {};                                                                                                                   \
+                                                                                                                        \
+  template <class... Args>                                                                                              \
+  inline constexpr bool sfinae_test_##FUNCTION = sfinae_test_##FUNCTION##_impl<void, Args...>::value;
+
+_CCCL_EXEC_CHECK_DISABLE
+template <class Functor>
+__host__ __device__ bool test_execution_policies(Functor func)
+{
+  func(cuda::std::execution::seq);
+  func(cuda::std::execution::unseq);
+  func(cuda::std::execution::par);
+  func(cuda::std::execution::par_unseq);
+
+  return true;
+}
+
+template <template <class Iter> class TestClass>
+struct TestIteratorWithPolicies
+{
+  template <class Iter>
+  __host__ __device__ void operator()() const
+  {
+    test_execution_policies(TestClass<Iter>{});
+  }
+};
+
+#endif // TEST_SUPPORT_TEST_EXECUTION_POLICIES
diff --git a/libcudacxx/test/utils/libcudacxx/test/config.py b/libcudacxx/test/utils/libcudacxx/test/config.py
index eba7b924b12..595619ff51e 100644
--- a/libcudacxx/test/utils/libcudacxx/test/config.py
+++ b/libcudacxx/test/utils/libcudacxx/test/config.py
@@ -1049,9 +1049,13 @@ def configure_compile_flags_header_includes(self):
         #    self.cxx.compile_flags += ['-nostdinc++']
         if cxx_headers is None:
             cxx_headers = os.path.join(self.libcudacxx_src_root, "include")
+            thrust_headers = os.path.join(self.libcudacxx_src_root, "../thrust/")
+            cub_headers = os.path.join(self.libcudacxx_src_root, "../cub/")
         if not os.path.isdir(cxx_headers):
             self.lit_config.fatal("cxx_headers='%s' is not a directory." % cxx_headers)
         self.cxx.compile_flags += ["-I" + cxx_headers]
+        self.cxx.compile_flags += ["-I" + thrust_headers]
+        self.cxx.compile_flags += ["-I" + cub_headers]
         if self.libcudacxx_obj_root is not None:
             cxxabi_headers = os.path.join(
                 self.libcudacxx_obj_root, "include", "c++build"