Skip to content

Commit 419077d

Browse files
committed
Implement cuda::sincos
1 parent 48ff637 commit 419077d

File tree

6 files changed

+302
-12
lines changed

6 files changed

+302
-12
lines changed

docs/libcudacxx/extended_api/math.rst

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -97,3 +97,8 @@ Math
9797
- Most significant half of the product
9898
- CCCL 3.2.0
9999
- CUDA 13.2
100+
101+
* - :ref:`sincos <libcudacxx-extended-api-math-sincos>`
102+
- Computes sine and cosine of a value at the same time.
103+
- CCCL 3.3.0
104+
- CUDA 13.3
Lines changed: 64 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,64 @@
1+
.. _libcudacxx-extended-api-math-sincos:
2+
3+
``cuda::sincos``
4+
====================================
5+
6+
Defined in the ``<cuda/cmath>`` header.
7+
8+
.. code:: cuda
9+
10+
namespace cuda {
11+
12+
template <class T>
13+
struct sincos_result
14+
{
15+
T sin;
16+
T cos;
17+
};
18+
19+
template <class T>
20+
[[nodiscard]] __host__ __device__
21+
sincos_result<T> sincos(T value) noexcept;
22+
23+
} // namespace cuda
24+
25+
Computes :math:`\sin value` and :math:`\cos value` at the same time using more efficient algorithms than if operations were computed separately.
26+
27+
**Parameters**
28+
29+
- ``value``: The input value.
30+
31+
**Return value**
32+
33+
- ``cuda::sincos_result`` object with both values set to ``NaN`` if the ``value`` is :math:`\pm\infty` or ``NaN`` and to results of :math:`\sin value` and :math:`\cos value` otherwise.
34+
35+
**Constraints**
36+
37+
- ``T`` is an arithmetic type.
38+
39+
**Performance considerations**
40+
41+
- If available, the functionality is implemented by compiler builtins, otherwise fallbacks to ``cuda::std::sin(value)`` and ``cuda::std::cos(value)``.
42+
43+
Example
44+
-------
45+
46+
.. code:: cuda
47+
48+
#include <cuda/cmath>
49+
#include <cuda/std/cassert>
50+
#include <cuda/std/numbers>
51+
52+
__global__ void sincos_kernel() {
53+
auto [sin_pi, cos_pi] = cuda::sincos(cuda::std::numbers::pi_v<float>);
54+
assert(sin_pi == 0.f);
55+
assert(cos_pi == 1.f);
56+
}
57+
58+
int main() {
59+
sincos_kernel<<<1, 1>>>();
60+
cudaDeviceSynchronize();
61+
return 0;
62+
}
63+
64+
`See it on Godbolt 🔗 <https://godbolt.org/z/WYfEnhGaq>`__
Lines changed: 128 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,128 @@
1+
//===----------------------------------------------------------------------===//
2+
//
3+
// Part of libcu++, the C++ Standard Library for your entire system,
4+
// under the Apache License v2.0 with LLVM Exceptions.
5+
// See https://llvm.org/LICENSE.txt for license information.
6+
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7+
// SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES.
8+
//
9+
//===----------------------------------------------------------------------===//
10+
11+
#ifndef _CUDA___CMATH_SINCOS_H
12+
#define _CUDA___CMATH_SINCOS_H
13+
14+
#include <cuda/std/detail/__config>
15+
16+
#if defined(_CCCL_IMPLICIT_SYSTEM_HEADER_GCC)
17+
# pragma GCC system_header
18+
#elif defined(_CCCL_IMPLICIT_SYSTEM_HEADER_CLANG)
19+
# pragma clang system_header
20+
#elif defined(_CCCL_IMPLICIT_SYSTEM_HEADER_MSVC)
21+
# pragma system_header
22+
#endif // no system header
23+
24+
#include <cuda/std/__cmath/trigonometric_functions.h>
25+
#include <cuda/std/__concepts/concept_macros.h>
26+
#include <cuda/std/__floating_point/traits.h>
27+
#include <cuda/std/__type_traits/is_integral.h>
28+
#include <cuda/std/__type_traits/is_same.h>
29+
30+
#include <cuda/std/__cccl/prologue.h>
31+
32+
#if _CCCL_HAS_BUILTIN(__builtin_sincosf) || _CCCL_COMPILER(GCC)
33+
# define _CCCL_BUILTIN_SINCOSF(...) __builtin_sincosf(__VA_ARGS__)
34+
#endif // _CCCL_HAS_BUILTIN(__builtin_sincosf) || _CCCL_COMPILER(GCC)
35+
36+
#if _CCCL_HAS_BUILTIN(__builtin_sincos) || _CCCL_COMPILER(GCC)
37+
# define _CCCL_BUILTIN_SINCOS(...) __builtin_sincos(__VA_ARGS__)
38+
#endif // _CCCL_HAS_BUILTIN(__builtin_sincos) || _CCCL_COMPILER(GCC)
39+
40+
#if _CCCL_HAS_BUILTIN(__builtin_sincosl) || _CCCL_COMPILER(GCC)
41+
# define _CCCL_BUILTIN_SINCOSL(...) __builtin_sincosl(__VA_ARGS__)
42+
#endif // _CCCL_HAS_BUILTIN(__builtin_sincosl) || _CCCL_COMPILER(GCC)
43+
44+
// clang-cuda crashes if these builtins are used.
45+
#if _CCCL_CUDA_COMPILER(CLANG)
46+
# undef _CCCL_BUILTIN_SINCOSF
47+
# undef _CCCL_BUILTIN_SINCOS
48+
# undef _CCCL_BUILTIN_SINCOSL
49+
#endif // _CCCL_CUDA_COMPILER(CLANG)
50+
51+
_CCCL_BEGIN_NAMESPACE_CUDA
52+
53+
//! @brief Type returned by \c cuda::sincos.
54+
template <class _Tp>
55+
struct _CCCL_TYPE_VISIBILITY_DEFAULT sincos_result
56+
{
57+
_Tp sin; //!< The sin result.
58+
_Tp cos; //!< The cos result.
59+
};
60+
61+
//! @brief Computes sin and cos operation of a value.
62+
//!
63+
//! @param __v The value.
64+
//!
65+
//! @return The \c cuda::sincos_result with the results of sin and cos operations.
66+
_CCCL_TEMPLATE(class _Tp)
67+
_CCCL_REQUIRES(::cuda::std::__is_fp_v<_Tp>)
68+
[[nodiscard]] _CCCL_API sincos_result<_Tp> sincos(_Tp __v) noexcept
69+
{
70+
sincos_result<_Tp> __ret{};
71+
#if defined(_CCCL_BUILTIN_SINCOSF)
72+
if constexpr (::cuda::std::is_same_v<_Tp, float>)
73+
{
74+
_CCCL_BUILTIN_SINCOSF(__v, &__ret.sin, &__ret.cos);
75+
return __ret;
76+
}
77+
#endif // _CCCL_BUILTIN_SINCOSF
78+
#if defined(_CCCL_BUILTIN_SINCOS)
79+
if constexpr (::cuda::std::is_same_v<_Tp, double>)
80+
{
81+
_CCCL_BUILTIN_SINCOS(__v, &__ret.sin, &__ret.cos);
82+
return __ret;
83+
}
84+
#endif // _CCCL_BUILTIN_SINCOS
85+
#if _CCCL_HAS_LONG_DOUBLE() && defined(_CCCL_BUILTIN_SINCOSL)
86+
if constexpr (::cuda::std::is_same_v<_Tp, long double>)
87+
{
88+
_CCCL_BUILTIN_SINCOSL(__v, &__ret.sin, &__ret.cos);
89+
return __ret;
90+
}
91+
#endif // _CCCL_HAS_LONG_DOUBLE() && _CCCL_BUILTIN_SINCOSL
92+
93+
// clang-cuda crashes if these builtins are used.
94+
#if !_CCCL_CUDA_COMPILER(CLANG)
95+
_CCCL_IF_NOT_CONSTEVAL_DEFAULT
96+
{
97+
if constexpr (::cuda::std::is_same_v<_Tp, float>)
98+
{
99+
NV_IF_TARGET(NV_IS_DEVICE, (::sincosf(__v, &__ret.sin, &__ret.cos); return __ret;))
100+
}
101+
if constexpr (::cuda::std::is_same_v<_Tp, double>)
102+
{
103+
NV_IF_TARGET(NV_IS_DEVICE, (::sincos(__v, &__ret.sin, &__ret.cos); return __ret;))
104+
}
105+
}
106+
#endif // !_CCCL_CUDA_COMPILER(CLANG)
107+
__ret.sin = ::cuda::std::sin(__v);
108+
__ret.cos = ::cuda::std::cos(__v);
109+
return __ret;
110+
}
111+
112+
//! @brief Computes sin and cos operation of a value.
113+
//!
114+
//! @param __v The value.
115+
//!
116+
//! @return The \c cuda::sincos_result with the results of sin and cos operations.
117+
_CCCL_TEMPLATE(class _Tp)
118+
_CCCL_REQUIRES(::cuda::std::is_integral_v<_Tp>)
119+
[[nodiscard]] _CCCL_API sincos_result<double> sincos(_Tp __v) noexcept
120+
{
121+
return ::cuda::sincos(static_cast<double>(__v));
122+
}
123+
124+
_CCCL_END_NAMESPACE_CUDA
125+
126+
#include <cuda/std/__cccl/epilogue.h>
127+
128+
#endif // _CUDA___CMATH_SINCOS_H

libcudacxx/include/cuda/cmath

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,7 @@
3131
#include <cuda/__cmath/pow2.h>
3232
#include <cuda/__cmath/round_down.h>
3333
#include <cuda/__cmath/round_up.h>
34+
#include <cuda/__cmath/sincos.h>
3435
#include <cuda/__cmath/uabs.h>
3536
#include <cuda/std/cmath>
3637

libcudacxx/include/cuda/std/__complex/exponential_functions.h

Lines changed: 3 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,7 @@
2121
# pragma system_header
2222
#endif // no system header
2323

24+
#include <cuda/__cmath/sincos.h>
2425
#include <cuda/std/__cmath/copysign.h>
2526
#include <cuda/std/__cmath/isfinite.h>
2627
#include <cuda/std/__cmath/isinf.h>
@@ -175,12 +176,7 @@ _CCCL_API inline complex<float> exp(const complex<float>& __x)
175176
__exp_r_reduced = (__r < 0.0f) ? 0.0f : 1e3f;
176177
}
177178

178-
// Compile to sincos when possible:
179-
float __sin_i;
180-
float __cos_i;
181-
NV_IF_ELSE_TARGET(NV_IS_DEVICE,
182-
(::sincosf(__i, &__sin_i, &__cos_i);),
183-
(__sin_i = ::cuda::std::sinf(__i); __cos_i = ::cuda::std::cosf(__i);))
179+
const auto [__sin_i, __cos_i] = ::cuda::sincos(__i);
184180

185181
// Our answer now is: (ldexp(__exp_r_reduced * __sin_r, __j_int), ldexp(__exp_r_reduced * __sin_r, __j_int))
186182
// However we don't need a full ldexp here, and if __exp_r_reduced*__sin_r is denormal we can lose bits.
@@ -265,12 +261,7 @@ _CCCL_API inline complex<double> exp<double>(const complex<double>& __x)
265261
__exp_r_reduced = (__r < 0.0) ? 0.0 : 1e10;
266262
}
267263

268-
// Compile to sincos when possible:
269-
double __sin_i;
270-
double __cos_i;
271-
NV_IF_ELSE_TARGET(NV_IS_DEVICE,
272-
(::sincos(__i, &__sin_i, &__cos_i);),
273-
(__sin_i = ::cuda::std::sin(__i); __cos_i = ::cuda::std::cos(__i);))
264+
const auto [__sin_i, __cos_i] = ::cuda::sincos(__i);
274265

275266
// Our answer now is: (ldexp(__exp_mant * __sin_r, __j_int), ldexp(__exp_mant * __sin_r, __j_int))
276267
// However we don't need a full ldexp here, and if __exp_mant*__sin_r is denormal we can lose bits.
Lines changed: 101 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,101 @@
1+
//===----------------------------------------------------------------------===//
2+
//
3+
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4+
// See https://llvm.org/LICENSE.txt for license information.
5+
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6+
// SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES.
7+
//
8+
//===----------------------------------------------------------------------===//
9+
10+
// <cuda/cmath>
11+
12+
#include <cuda/cmath>
13+
#include <cuda/std/cassert>
14+
#include <cuda/std/limits>
15+
#include <cuda/std/type_traits>
16+
#include <cuda/std/utility>
17+
18+
template <class T>
19+
__host__ __device__ void test_type(float zero)
20+
{
21+
using Result = cuda::std::conditional_t<cuda::std::is_integral_v<T>, double, T>;
22+
23+
// 1. Test signature.
24+
static_assert(cuda::std::is_same_v<cuda::sincos_result<Result>, decltype(cuda::sincos(T{}))>);
25+
static_assert(noexcept(cuda::sincos(cuda::std::declval<T>())));
26+
27+
// 2. Test sincos(0).
28+
{
29+
auto result = cuda::sincos(static_cast<T>(zero));
30+
static_assert(cuda::std::is_same_v<Result, decltype(result.sin)>);
31+
static_assert(cuda::std::is_same_v<Result, decltype(result.cos)>);
32+
assert(result.sin == Result{0});
33+
assert(result.cos == Result{1});
34+
}
35+
36+
// 3. Test sincos(+-inf)
37+
if constexpr (cuda::std::numeric_limits<T>::has_infinity && cuda::std::numeric_limits<T>::has_quiet_NaN)
38+
{
39+
auto pos_result = cuda::sincos(cuda::std::numeric_limits<T>::infinity());
40+
assert(cuda::std::isnan(pos_result.sin));
41+
assert(cuda::std::isnan(pos_result.cos));
42+
43+
auto neg_result = cuda::sincos(-cuda::std::numeric_limits<T>::infinity());
44+
assert(cuda::std::isnan(neg_result.sin));
45+
assert(cuda::std::isnan(neg_result.cos));
46+
}
47+
48+
// 3. Test sincos(+-nan)
49+
if constexpr (cuda::std::numeric_limits<T>::has_quiet_NaN)
50+
{
51+
auto pos_result = cuda::sincos(cuda::std::numeric_limits<T>::quiet_NaN());
52+
assert(cuda::std::isnan(pos_result.sin));
53+
assert(cuda::std::isnan(pos_result.cos));
54+
55+
auto neg_result = cuda::sincos(-cuda::std::numeric_limits<T>::quiet_NaN());
56+
assert(cuda::std::isnan(neg_result.sin));
57+
assert(cuda::std::isnan(neg_result.cos));
58+
}
59+
}
60+
61+
__host__ __device__ void test(float zero)
62+
{
63+
test_type<float>(zero);
64+
test_type<double>(zero);
65+
#if _CCCL_HAS_LONG_DOUBLE()
66+
test_type<long double>(zero);
67+
#endif // _CCCL_HAS_LONG_DOUBLE()
68+
#if _LIBCUDACXX_HAS_NVFP16()
69+
test_type<__half>(zero);
70+
#endif // _LIBCUDACXX_HAS_NVFP16()
71+
#if _LIBCUDACXX_HAS_NVBF16()
72+
test_type<__nv_bfloat16>(zero);
73+
#endif // _LIBCUDACXX_HAS_NVBF16()
74+
75+
// todo: add tests for f128 once supported
76+
77+
test_type<signed char>(zero);
78+
test_type<signed short>(zero);
79+
test_type<signed int>(zero);
80+
test_type<signed long>(zero);
81+
test_type<signed long long>(zero);
82+
#if _CCCL_HAS_INT128()
83+
test_type<__int128_t>(zero);
84+
#endif // _CCCL_HAS_INT128()
85+
86+
test_type<unsigned char>(zero);
87+
test_type<unsigned short>(zero);
88+
test_type<unsigned int>(zero);
89+
test_type<unsigned long>(zero);
90+
test_type<unsigned long long>(zero);
91+
#if _CCCL_HAS_INT128()
92+
test_type<__uint128_t>(zero);
93+
#endif // _CCCL_HAS_INT128()
94+
}
95+
96+
int main(int, char**)
97+
{
98+
volatile float zero = 0.0f;
99+
test(zero);
100+
return 0;
101+
}

0 commit comments

Comments
 (0)