2828
2929_CCCL_BEGIN_NAMESPACE_CUDA_STD
3030
31- // Reference is compatible with __atomic_base_tag and uses the default dispatch
31+ template <typename _Tp, bool _IsSmall>
32+ struct __atomic_ref_storage_impl ;
33+
3234template <typename _Tp>
33- struct __atomic_ref_storage
35+ struct __atomic_ref_storage_impl <_Tp, false >
3436{
3537 using __underlying_t = _Tp;
3638 static constexpr __atomic_tag __tag = __atomic_tag::__atomic_base_tag;
@@ -41,9 +43,9 @@ struct __atomic_ref_storage
4143
4244 _Tp* __a_value;
4345
44- __atomic_ref_storage () = delete ;
46+ __atomic_ref_storage_impl () = delete ;
4547
46- _CCCL_HOST_DEVICE constexpr explicit inline __atomic_ref_storage (_Tp* value) noexcept
48+ _CCCL_HOST_DEVICE constexpr explicit inline __atomic_ref_storage_impl (_Tp* value) noexcept
4749 : __a_value(value)
4850 {}
4951
@@ -65,6 +67,45 @@ struct __atomic_ref_storage
6567 }
6668};
6769
70+ template <typename _Tp>
71+ struct __atomic_ref_storage_impl <_Tp, true >
72+ {
73+ using __underlying_t = _Tp;
74+ static constexpr __atomic_tag __tag = __atomic_tag::__atomic_ref_small_tag;
75+
76+ #if !_CCCL_COMPILER(GCC) || _CCCL_COMPILER(GCC, >=, 5)
77+ static_assert (is_trivially_copyable_v<_Tp>, " std::atomic_ref<Tp> requires that 'Tp' be a trivially copyable type" );
78+ #endif
79+
80+ _Tp* __a_value;
81+
82+ __atomic_ref_storage_impl () = delete ;
83+
84+ _CCCL_HOST_DEVICE constexpr explicit inline __atomic_ref_storage_impl (_Tp* value) noexcept
85+ : __a_value(value)
86+ {}
87+
88+ _CCCL_HOST_DEVICE inline auto get () noexcept -> __underlying_t*
89+ {
90+ return __a_value;
91+ }
92+ _CCCL_HOST_DEVICE inline auto get () const noexcept -> __underlying_t*
93+ {
94+ return __a_value;
95+ }
96+ _CCCL_HOST_DEVICE inline auto get () volatile noexcept -> volatile __underlying_t*
97+ {
98+ return __a_value;
99+ }
100+ _CCCL_HOST_DEVICE inline auto get () const volatile noexcept -> volatile __underlying_t*
101+ {
102+ return __a_value;
103+ }
104+ };
105+
106+ template <typename _Tp>
107+ using __atomic_ref_storage = __atomic_ref_storage_impl<_Tp, (sizeof (_Tp) < 4 )>;
108+
68109_CCCL_END_NAMESPACE_CUDA_STD
69110
70111#include < cuda/std/__cccl/epilogue.h>
0 commit comments