Skip to content

Commit a5b2ae0

Browse files
committed
Implement cuda::sincos
1 parent 48ff637 commit a5b2ae0

File tree

4 files changed

+189
-12
lines changed

4 files changed

+189
-12
lines changed
Lines changed: 113 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,113 @@
1+
//===----------------------------------------------------------------------===//
2+
//
3+
// Part of libcu++, the C++ Standard Library for your entire system,
4+
// under the Apache License v2.0 with LLVM Exceptions.
5+
// See https://llvm.org/LICENSE.txt for license information.
6+
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7+
// SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES.
8+
//
9+
//===----------------------------------------------------------------------===//
10+
11+
#ifndef _CUDA___CMATH_SINCOS_H
12+
#define _CUDA___CMATH_SINCOS_H
13+
14+
#include <cuda/std/detail/__config>
15+
16+
#if defined(_CCCL_IMPLICIT_SYSTEM_HEADER_GCC)
17+
# pragma GCC system_header
18+
#elif defined(_CCCL_IMPLICIT_SYSTEM_HEADER_CLANG)
19+
# pragma clang system_header
20+
#elif defined(_CCCL_IMPLICIT_SYSTEM_HEADER_MSVC)
21+
# pragma system_header
22+
#endif // no system header
23+
24+
#include <cuda/std/__cmath/trigonometric_functions.h>
25+
#include <cuda/std/__concepts/concept_macros.h>
26+
#include <cuda/std/__floating_point/traits.h>
27+
#include <cuda/std/__type_traits/is_integral.h>
28+
#include <cuda/std/__type_traits/is_same.h>
29+
30+
#include <cuda/std/__cccl/prologue.h>
31+
32+
#if _CCCL_HAS_BUILTIN(__builtin_sincosf) || _CCCL_COMPILER(GCC)
33+
# define _CCCL_BUILTIN_SINCOSF(...) __builtin_sincosf(__VA_ARGS__)
34+
#endif // _CCCL_HAS_BUILTIN(__builtin_sincosf) || _CCCL_COMPILER(GCC)
35+
36+
#if _CCCL_HAS_BUILTIN(__builtin_sincos) || _CCCL_COMPILER(GCC)
37+
# define _CCCL_BUILTIN_SINCOS(...) __builtin_sincos(__VA_ARGS__)
38+
#endif // _CCCL_HAS_BUILTIN(__builtin_sincos) || _CCCL_COMPILER(GCC)
39+
40+
#if _CCCL_HAS_BUILTIN(__builtin_sincosl) || _CCCL_COMPILER(GCC)
41+
# define _CCCL_BUILTIN_SINCOSL(...) __builtin_sincosl(__VA_ARGS__)
42+
#endif // _CCCL_HAS_BUILTIN(__builtin_sincosl) || _CCCL_COMPILER(GCC)
43+
44+
// clang-cuda crashes when using these builtins in device code
45+
#if _CCCL_CUDA_COMPILER(CLANG) && _CCCL_DEVICE_COMPILATION()
46+
# undef _CCCL_BUILTIN_SINCOSF
47+
# undef _CCCL_BUILTIN_SINCOS
48+
# undef _CCCL_BUILTIN_SINCOSL
49+
#endif // _CCCL_CUDA_COMPILER(CLANG) && _CCCL_DEVICE_COMPILATION()
50+
51+
_CCCL_BEGIN_NAMESPACE_CUDA
52+
53+
template <class _Tp>
54+
struct _CCCL_TYPE_VISIBILITY_DEFAULT sincos_result
55+
{
56+
_Tp sin;
57+
_Tp cos;
58+
};
59+
60+
_CCCL_TEMPLATE(class _Tp)
61+
_CCCL_REQUIRES(::cuda::std::__is_fp_v<_Tp>)
62+
[[nodiscard]] _CCCL_API sincos_result<_Tp> sincos(_Tp __v) noexcept
63+
{
64+
sincos_result<_Tp> __ret{};
65+
#if defined(_CCCL_BUILTIN_SINCOSF)
66+
if constexpr (::cuda::std::is_same_v<_Tp, float>)
67+
{
68+
_CCCL_BUILTIN_SINCOSF(__v, &__ret.sin, &__ret.cos);
69+
return __ret;
70+
}
71+
#endif // _CCCL_BUILTIN_SINCOSF
72+
#if defined(_CCCL_BUILTIN_SINCOS)
73+
if constexpr (::cuda::std::is_same_v<_Tp, double>)
74+
{
75+
_CCCL_BUILTIN_SINCOS(__v, &__ret.sin, &__ret.cos);
76+
return __ret;
77+
}
78+
#endif // _CCCL_BUILTIN_SINCOS
79+
#if _CCCL_HAS_LONG_DOUBLE() && defined(_CCCL_BUILTIN_SINCOSL)
80+
if constexpr (::cuda::std::is_same_v<_Tp, long double>)
81+
{
82+
_CCCL_BUILTIN_SINCOSL(__v, &__ret.sin, &__ret.cos);
83+
return __ret;
84+
}
85+
#endif // _CCCL_HAS_LONG_DOUBLE() && _CCCL_BUILTIN_SINCOSL
86+
_CCCL_IF_NOT_CONSTEVAL_DEFAULT
87+
{
88+
if constexpr (::cuda::std::is_same_v<_Tp, float>)
89+
{
90+
NV_IF_TARGET(NV_IS_DEVICE, (::sincosf(__v, &__ret.sin, &__ret.cos); return __ret;))
91+
}
92+
if constexpr (::cuda::std::is_same_v<_Tp, double>)
93+
{
94+
NV_IF_TARGET(NV_IS_DEVICE, (::sincos(__v, &__ret.sin, &__ret.cos); return __ret;))
95+
}
96+
}
97+
__ret.sin = ::cuda::std::sin(__v);
98+
__ret.cos = ::cuda::std::cos(__v);
99+
return __ret;
100+
}
101+
102+
_CCCL_TEMPLATE(class _Tp)
103+
_CCCL_REQUIRES(::cuda::std::is_integral_v<_Tp>)
104+
[[nodiscard]] _CCCL_API sincos_result<double> sincos(_Tp __v) noexcept
105+
{
106+
return ::cuda::sincos(static_cast<double>(__v));
107+
}
108+
109+
_CCCL_END_NAMESPACE_CUDA
110+
111+
#include <cuda/std/__cccl/epilogue.h>
112+
113+
#endif // _CUDA___CMATH_SINCOS_H

libcudacxx/include/cuda/cmath

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,7 @@
3131
#include <cuda/__cmath/pow2.h>
3232
#include <cuda/__cmath/round_down.h>
3333
#include <cuda/__cmath/round_up.h>
34+
#include <cuda/__cmath/sincos.h>
3435
#include <cuda/__cmath/uabs.h>
3536
#include <cuda/std/cmath>
3637

libcudacxx/include/cuda/std/__complex/exponential_functions.h

Lines changed: 3 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,7 @@
2121
# pragma system_header
2222
#endif // no system header
2323

24+
#include <cuda/__cmath/sincos.h>
2425
#include <cuda/std/__cmath/copysign.h>
2526
#include <cuda/std/__cmath/isfinite.h>
2627
#include <cuda/std/__cmath/isinf.h>
@@ -175,12 +176,7 @@ _CCCL_API inline complex<float> exp(const complex<float>& __x)
175176
__exp_r_reduced = (__r < 0.0f) ? 0.0f : 1e3f;
176177
}
177178

178-
// Compile to sincos when possible:
179-
float __sin_i;
180-
float __cos_i;
181-
NV_IF_ELSE_TARGET(NV_IS_DEVICE,
182-
(::sincosf(__i, &__sin_i, &__cos_i);),
183-
(__sin_i = ::cuda::std::sinf(__i); __cos_i = ::cuda::std::cosf(__i);))
179+
const auto [__sin_i, __cos_i] = ::cuda::sincos(__i);
184180

185181
// Our answer now is: (ldexp(__exp_r_reduced * __sin_r, __j_int), ldexp(__exp_r_reduced * __sin_r, __j_int))
186182
// However we don't need a full ldexp here, and if __exp_r_reduced*__sin_r is denormal we can lose bits.
@@ -265,12 +261,7 @@ _CCCL_API inline complex<double> exp<double>(const complex<double>& __x)
265261
__exp_r_reduced = (__r < 0.0) ? 0.0 : 1e10;
266262
}
267263

268-
// Compile to sincos when possible:
269-
double __sin_i;
270-
double __cos_i;
271-
NV_IF_ELSE_TARGET(NV_IS_DEVICE,
272-
(::sincos(__i, &__sin_i, &__cos_i);),
273-
(__sin_i = ::cuda::std::sin(__i); __cos_i = ::cuda::std::cos(__i);))
264+
const auto [__sin_i, __cos_i] = ::cuda::sincos(__i);
274265

275266
// Our answer now is: (ldexp(__exp_mant * __sin_r, __j_int), ldexp(__exp_mant * __sin_r, __j_int))
276267
// However we don't need a full ldexp here, and if __exp_mant*__sin_r is denormal we can lose bits.
Lines changed: 72 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,72 @@
1+
//===----------------------------------------------------------------------===//
2+
//
3+
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4+
// See https://llvm.org/LICENSE.txt for license information.
5+
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6+
// SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES.
7+
//
8+
//===----------------------------------------------------------------------===//
9+
10+
// <cuda/cmath>
11+
12+
#include <cuda/cmath>
13+
#include <cuda/std/cassert>
14+
#include <cuda/std/type_traits>
15+
#include <cuda/std/utility>
16+
17+
template <class T>
18+
__host__ __device__ void test_type(float zero)
19+
{
20+
using Result = cuda::std::conditional_t<cuda::std::is_integral_v<T>, double, T>;
21+
22+
static_assert(cuda::std::is_same_v<cuda::sincos_result<Result>, decltype(cuda::sincos(T{}))>);
23+
static_assert(noexcept(cuda::sincos(cuda::std::declval<T>())));
24+
25+
auto result = cuda::sincos(static_cast<T>(zero));
26+
static_assert(cuda::std::is_same_v<Result, decltype(result.sin)>);
27+
static_assert(cuda::std::is_same_v<Result, decltype(result.cos)>);
28+
assert(result.sin == Result{0});
29+
assert(result.cos == Result{1});
30+
}
31+
32+
__host__ __device__ void test(float zero)
33+
{
34+
test_type<float>(zero);
35+
test_type<double>(zero);
36+
#if _CCCL_HAS_LONG_DOUBLE()
37+
test_type<long double>(zero);
38+
#endif // _CCCL_HAS_LONG_DOUBLE()
39+
#if _LIBCUDACXX_HAS_NVFP16()
40+
test_type<__half>(zero);
41+
#endif // _LIBCUDACXX_HAS_NVFP16()
42+
#if _LIBCUDACXX_HAS_NVBF16()
43+
test_type<__nv_bfloat16>(zero);
44+
#endif // _LIBCUDACXX_HAS_NVBF16()
45+
46+
// todo: add tests for f128 once supported
47+
48+
test_type<signed char>(zero);
49+
test_type<signed short>(zero);
50+
test_type<signed int>(zero);
51+
test_type<signed long>(zero);
52+
test_type<signed long long>(zero);
53+
#if _CCCL_HAS_INT128()
54+
test_type<__int128_t>(zero);
55+
#endif // _CCCL_HAS_INT128()
56+
57+
test_type<unsigned char>(zero);
58+
test_type<unsigned short>(zero);
59+
test_type<unsigned int>(zero);
60+
test_type<unsigned long>(zero);
61+
test_type<unsigned long long>(zero);
62+
#if _CCCL_HAS_INT128()
63+
test_type<__uint128_t>(zero);
64+
#endif // _CCCL_HAS_INT128()
65+
}
66+
67+
int main(int, char**)
68+
{
69+
volatile float zero = 0.0f;
70+
test(zero);
71+
return 0;
72+
}

0 commit comments

Comments
 (0)