Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 0 additions & 1 deletion cub/cub/device/dispatch/dispatch_streaming_reduce.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,6 @@
#include <cub/device/dispatch/dispatch_reduce.cuh>
#include <cub/iterator/arg_index_input_iterator.cuh>

#include <thrust/iterator/constant_iterator.h>
#include <thrust/iterator/iterator_adaptor.h>

#include <cuda/__iterator/tabulate_output_iterator.h>
Expand Down
32 changes: 16 additions & 16 deletions cudax/include/cuda/experimental/__execution/policy.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -44,48 +44,48 @@ struct any_execution_policy

_CCCL_HIDE_FROM_ABI any_execution_policy() = default;

template <__execution_policy _Policy>
_CCCL_HOST_API constexpr any_execution_policy(::cuda::std::execution::__policy<_Policy>) noexcept
: value(_Policy)
template <uint32_t _Policy>
_CCCL_HOST_API constexpr any_execution_policy(::cuda::std::execution::__execution_policy_base<_Policy>) noexcept
: value(value_type{_Policy})
{}

_CCCL_HOST_API constexpr operator __execution_policy() const noexcept
{
return value;
}

_CCCL_HOST_API constexpr auto operator()() const noexcept -> __execution_policy
_CCCL_HOST_API constexpr auto operator()() const noexcept -> value_type
{
return value;
}

template <__execution_policy _Policy>
template <uint32_t _Policy>
[[nodiscard]] _CCCL_HOST_API friend constexpr bool
operator==(const any_execution_policy& pol, const ::cuda::std::execution::__policy<_Policy>&) noexcept
operator==(const any_execution_policy& pol, const ::cuda::std::execution::__execution_policy_base<_Policy>&) noexcept
{
return pol.value == _Policy;
return pol.value == value_type{_Policy};
}

#if _CCCL_STD_VER <= 2017
template <__execution_policy _Policy>
template <uint32_t _Policy>
[[nodiscard]] _CCCL_HOST_API friend constexpr bool
operator==(const ::cuda::std::execution::__policy<_Policy>&, const any_execution_policy& pol) noexcept
operator==(const ::cuda::std::execution::__execution_policy_base<_Policy>&, const any_execution_policy& pol) noexcept
{
return pol.value == _Policy;
return pol.value == value_type{_Policy};
}

template <__execution_policy _Policy>
template <uint32_t _Policy>
[[nodiscard]] _CCCL_HOST_API friend constexpr bool
operator!=(const any_execution_policy& pol, const ::cuda::std::execution::__policy<_Policy>&) noexcept
operator!=(const any_execution_policy& pol, const ::cuda::std::execution::__execution_policy_base<_Policy>&) noexcept
{
return pol.value != _Policy;
return pol.value != value_type{_Policy};
}

template <__execution_policy _Policy>
template <uint32_t _Policy>
[[nodiscard]] _CCCL_HOST_API friend constexpr bool
operator!=(const ::cuda::std::execution::__policy<_Policy>&, const any_execution_policy& pol)
operator!=(const ::cuda::std::execution::__execution_policy_base<_Policy>&, const any_execution_policy& pol)
{
return pol.value != _Policy;
return pol.value != value_type{_Policy};
}
#endif // _CCCL_STD_VER <= 2017

Expand Down
2 changes: 2 additions & 0 deletions libcudacxx/cmake/LibcudacxxBuildCompilerTargets.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -53,5 +53,7 @@ function(libcudacxx_build_compiler_targets)
# order matters here, we need the libcudacxx options to override the cccl options.
cccl.compiler_interface
libcudacxx.compiler_flags
Thrust::Thrust
CUB::CUB
)
endfunction()
53 changes: 53 additions & 0 deletions libcudacxx/include/cuda/__execution/policy.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,53 @@
//===----------------------------------------------------------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
// SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES
//
//===----------------------------------------------------------------------===//

#ifndef _CUDA___EXECUTION_POLICY_H
#define _CUDA___EXECUTION_POLICY_H

#include <cuda/std/detail/__config>

#if defined(_CCCL_IMPLICIT_SYSTEM_HEADER_GCC)
# pragma GCC system_header
#elif defined(_CCCL_IMPLICIT_SYSTEM_HEADER_CLANG)
# pragma clang system_header
#elif defined(_CCCL_IMPLICIT_SYSTEM_HEADER_MSVC)
# pragma system_header
#endif // no system header

#if _CCCL_HAS_BACKEND_CUDA()

# include <cuda/__fwd/execution_policy.h>
# include <cuda/std/__execution/policy.h>
# include <cuda/std/__type_traits/is_execution_policy.h>

# include <cuda/std/__cccl/prologue.h>

_CCCL_BEGIN_NAMESPACE_CUDA_STD_EXECUTION

template <uint32_t _Policy>
struct _CCCL_DECLSPEC_EMPTY_BASES __execution_policy_base<_Policy, __execution_backend::__cuda>
: __execution_policy_base<_Policy, __execution_backend::__none>
{};

_CCCL_END_NAMESPACE_CUDA_STD_EXECUTION

_CCCL_BEGIN_NAMESPACE_CUDA_EXECUTION

using __cub_parallel_unsequenced_policy =
::cuda::std::execution::__execution_policy_base<::cuda::std::execution::__with_cuda_backend<static_cast<uint32_t>(
::cuda::std::execution::__execution_policy::__parallel_unsequenced)>()>;
_CCCL_GLOBAL_CONSTANT __cub_parallel_unsequenced_policy __cub_par_unseq{};

_CCCL_END_NAMESPACE_CUDA_EXECUTION

# include <cuda/std/__cccl/epilogue.h>

#endif // _CCCL_HAS_BACKEND_CUDA()

#endif // _CUDA___EXECUTION_POLICY_H
47 changes: 47 additions & 0 deletions libcudacxx/include/cuda/__fwd/execution_policy.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,47 @@
//===----------------------------------------------------------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
// SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES
//
//===----------------------------------------------------------------------===//

#ifndef _CUDA___FWD_EXECUTION_POLICY_H
#define _CUDA___FWD_EXECUTION_POLICY_H

#include <cuda/std/detail/__config>

#if defined(_CCCL_IMPLICIT_SYSTEM_HEADER_GCC)
# pragma GCC system_header
#elif defined(_CCCL_IMPLICIT_SYSTEM_HEADER_CLANG)
# pragma clang system_header
#elif defined(_CCCL_IMPLICIT_SYSTEM_HEADER_MSVC)
# pragma system_header
#endif // no system header

#if _CCCL_HAS_BACKEND_CUDA()

# include <cuda/std/__fwd/execution_policy.h>

# include <cuda/std/__cccl/prologue.h>

_CCCL_BEGIN_NAMESPACE_CUDA_STD_EXECUTION

//! @brief Sets the execution backend to cuda
template <uint32_t _Policy>
[[nodiscard]] _CCCL_API constexpr uint32_t __with_cuda_backend() noexcept
{
constexpr uint32_t __backend_mask{0xFFFF00FF};
constexpr uint32_t __new_policy =
(_Policy & __backend_mask) | (static_cast<uint32_t>(__execution_backend::__cuda) << 8);
return __new_policy;
}

_CCCL_END_NAMESPACE_CUDA_STD_EXECUTION

# include <cuda/std/__cccl/epilogue.h>

#endif // _CCCL_HAS_BACKEND_CUDA()

#endif // _CUDA___FWD_EXECUTION_POLICY_H
68 changes: 35 additions & 33 deletions libcudacxx/include/cuda/std/__execution/policy.h
Original file line number Diff line number Diff line change
Expand Up @@ -20,65 +20,67 @@
# pragma system_header
#endif // no system header

#include <cuda/std/__type_traits/underlying_type.h>
#include <cuda/std/__bit/has_single_bit.h>
#include <cuda/std/__fwd/execution_policy.h>
#include <cuda/std/cstdint>

#include <cuda/std/__cccl/prologue.h>

_CCCL_BEGIN_NAMESPACE_CUDA_STD_EXECUTION

enum class __execution_policy : uint32_t
[[nodiscard]] _CCCL_API constexpr bool __has_unique_backend(const __execution_backend __backends) noexcept
{
__invalid_execution_policy = 0,
__sequenced = 1 << 0,
__parallel = 1 << 1,
__unsequenced = 1 << 2,
__parallel_unsequenced = __execution_policy::__parallel | __execution_policy::__unsequenced,
};

[[nodiscard]] _CCCL_API constexpr bool
__satisfies_execution_policy(__execution_policy __lhs, __execution_policy __rhs) noexcept
{
return (static_cast<uint32_t>(__lhs) & static_cast<uint32_t>(__rhs)) != 0;
return ::cuda::std::has_single_bit(static_cast<uint32_t>(__backends));
}

template <__execution_policy _Policy>
struct __policy
//! @brief Base class for our execution policies.
//! It takes an untagged uint32_t because we want to be able to store 3 different enumerations in it.
template <uint32_t _Policy, __execution_backend _Backend>
struct __execution_policy_base
{
template <__execution_policy _OtherPolicy>
[[nodiscard]] _CCCL_API friend constexpr bool operator==(const __policy&, const __policy<_OtherPolicy>&) noexcept
//! @brief Tag that identifies this and all derived classes as a CCCL execution policy
static constexpr uint32_t __cccl_policy_ = _Policy;

template <uint32_t _OtherPolicy, __execution_backend _OtherBackend>
[[nodiscard]] _CCCL_API friend constexpr bool
operator==(const __execution_policy_base&, const __execution_policy_base<_OtherPolicy, _OtherBackend>&) noexcept
{
using __underlying_t = underlying_type_t<__execution_policy>;
return (static_cast<__underlying_t>(_Policy) == static_cast<__underlying_t>(_OtherPolicy));
return _Policy == _OtherPolicy;
}

#if _CCCL_STD_VER <= 2017
template <__execution_policy _OtherPolicy>
[[nodiscard]] _CCCL_API friend constexpr bool operator!=(const __policy&, const __policy<_OtherPolicy>&) noexcept
template <uint32_t _OtherPolicy, __execution_backend _OtherBackend>
[[nodiscard]] _CCCL_API friend constexpr bool
operator!=(const __execution_policy_base&, const __execution_policy_base<_OtherPolicy, _OtherBackend>&) noexcept
{
using __underlying_t = underlying_type_t<__execution_policy>;
return (static_cast<__underlying_t>(_Policy) != static_cast<__underlying_t>(_OtherPolicy));
return _Policy != _OtherPolicy;
}
#endif // _CCCL_STD_VER <= 2017

static constexpr __execution_policy __policy_ = _Policy;
};
//! @brief Extracts the execution policy from the stored _Policy
[[nodiscard]] _CCCL_API static constexpr __execution_policy __get_policy() noexcept
{
return __policy_to_execution_policy<_Policy>;
}

struct sequenced_policy : public __policy<__execution_policy::__sequenced>
{};
//! @brief Extracts the execution backend from the stored _Policy
[[nodiscard]] _CCCL_API static constexpr __execution_backend __get_backend() noexcept
{
return __policy_to_execution_backend<_Policy>;
}
};

using sequenced_policy = __execution_policy_base<static_cast<uint32_t>(__execution_policy::__sequenced)>;
_CCCL_GLOBAL_CONSTANT sequenced_policy seq{};

struct parallel_policy : public __policy<__execution_policy::__parallel>
{};
using parallel_policy = __execution_policy_base<static_cast<uint32_t>(__execution_policy::__parallel)>;
_CCCL_GLOBAL_CONSTANT parallel_policy par{};

struct parallel_unsequenced_policy : public __policy<__execution_policy::__parallel_unsequenced>
{};
using parallel_unsequenced_policy =
__execution_policy_base<static_cast<uint32_t>(__execution_policy::__parallel_unsequenced)>;
_CCCL_GLOBAL_CONSTANT parallel_unsequenced_policy par_unseq{};

struct unsequenced_policy : public __policy<__execution_policy::__unsequenced>
{};
using unsequenced_policy = __execution_policy_base<static_cast<uint32_t>(__execution_policy::__unsequenced)>;
_CCCL_GLOBAL_CONSTANT unsequenced_policy unseq{};

_CCCL_END_NAMESPACE_CUDA_STD_EXECUTION
Expand Down
73 changes: 73 additions & 0 deletions libcudacxx/include/cuda/std/__fwd/execution_policy.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,73 @@
//===----------------------------------------------------------------------===//
//
// Part of libcu++, the C++ Standard Library for your entire system,
// under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
// SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES.
//
//===----------------------------------------------------------------------===//

#ifndef _CUDA_STD___FWD_EXECUTION_POLICY_H
#define _CUDA_STD___FWD_EXECUTION_POLICY_H

#include <cuda/std/detail/__config>

#if defined(_CCCL_IMPLICIT_SYSTEM_HEADER_GCC)
# pragma GCC system_header
#elif defined(_CCCL_IMPLICIT_SYSTEM_HEADER_CLANG)
# pragma clang system_header
#elif defined(_CCCL_IMPLICIT_SYSTEM_HEADER_MSVC)
# pragma system_header
#endif // no system header

#include <cuda/std/cstdint>

#include <cuda/std/__cccl/prologue.h>

_CCCL_BEGIN_NAMESPACE_CUDA_STD_EXECUTION

//! @brief Enumerates the standard execution policies
enum class __execution_policy : uint8_t
{
__invalid_execution_policy = 0,
__sequenced = 1 << 0,
__parallel = 1 << 1,
__unsequenced = 1 << 2,
__parallel_unsequenced = __execution_policy::__parallel | __execution_policy::__unsequenced,
};

//! @brief Extracts the execution policy from the stored _Policy
template <uint32_t _Policy>
inline constexpr __execution_policy __policy_to_execution_policy = __execution_policy{(_Policy & uint32_t{0x000000FF})};

//! @brief Enumerates the different backends we support
//! @note Not an enum class because a user might specify multiple backends
enum __execution_backend : uint8_t
{
// The backends we provide
__none = 0,
#if _CCCL_HAS_BACKEND_CUDA()
__cuda = 1 << 1,
#endif // _CCCL_HAS_BACKEND_CUDA()
#if _CCCL_HAS_BACKEND_OMP()
__omp = 1 << 2,
#endif // _CCCL_HAS_BACKEND_OMP()
#if _CCCL_HAS_BACKEND_TBB()
__tbb = 1 << 3,
#endif // _CCCL_HAS_BACKEND_TBB()
};

//! @brief Extracts the execution backend from the stored _Policy
template <uint32_t _Policy>
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

suggestion: it feels like we are missing some other concept here. We have policy: how code should run (sequenced, parallel, unsequenced, etc.) and backend: where code should run (cuda, omp, tbb, etc.). In this template parameter, we have "something" that's policy + backend, which is also called policy. This makes it hard to understand what exactly we are working with. The only information that disambiguates the two is uint32 vs uint8, but it's hard to spot. If you could come up with some different term for policy + backend, that'd be great.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I thought about this a ton and I did not find a truly better name.

What i would say is that there is a difference between the execution policy and what I would consider the whole policy. The former is what the standard gives us as classification of how code should run. The latter is how we implement that for the different backends.

So yeah it is a tad close, but I did not find a better name.

inline constexpr __execution_backend __policy_to_execution_backend =
__execution_backend{(_Policy & uint32_t{0x0000FF00}) >> 8};

template <uint32_t _Policy, __execution_backend _Backend = __policy_to_execution_backend<_Policy>>
struct __execution_policy_base;

_CCCL_END_NAMESPACE_CUDA_STD_EXECUTION

#include <cuda/std/__cccl/epilogue.h>

#endif // _CUDA_STD___FWD_EXECUTION_POLICY_H
32 changes: 32 additions & 0 deletions libcudacxx/include/cuda/std/__internal/pstl_config.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
//===----------------------------------------------------------------------===//
//
// Part of libcu++, the C++ Standard Library for your entire system,
// under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
// SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES.
//
//===----------------------------------------------------------------------===//

#ifndef _CUDA_STD___INTERNAL_PSTL_CONFIG_H
#define _CUDA_STD___INTERNAL_PSTL_CONFIG_H

#include <cuda/std/detail/__config>

#if defined(_CCCL_IMPLICIT_SYSTEM_HEADER_GCC)
# pragma GCC system_header
#elif defined(_CCCL_IMPLICIT_SYSTEM_HEADER_CLANG)
# pragma clang system_header
#elif defined(_CCCL_IMPLICIT_SYSTEM_HEADER_MSVC)
# pragma system_header
#endif // no system header

#include <cuda/std/__cccl/prologue.h>

#define _CCCL_HAS_BACKEND_CUDA() _CCCL_CUDA_COMPILATION() && !_CCCL_COMPILER(NVRTC)
#define _CCCL_HAS_BACKEND_OMP() 0
#define _CCCL_HAS_BACKEND_TBB() 0

#include <cuda/std/__cccl/epilogue.h>

#endif // _CUDA_STD___INTERNAL_PSTL_CONFIG_H
Loading
Loading