@@ -95,7 +95,7 @@ class bloom_filter_impl {
9595 }
9696
9797 template <class CG >
98- __device__ constexpr void clear (CG const & group)
98+ __device__ constexpr void clear (CG group)
9999 {
100100 for (int i = group.thread_rank (); i < num_blocks_ * words_per_block; i += group.size ()) {
101101 words_[i] = 0 ;
@@ -149,7 +149,7 @@ class bloom_filter_impl {
149149 }
150150
151151 template <class CG , class ProbeKey >
152- __device__ void add (CG const & group, ProbeKey const & key)
152+ __device__ void add (CG group, ProbeKey const & key)
153153 {
154154 constexpr auto num_threads = tile_size_v<CG>;
155155 constexpr auto optimal_num_threads = add_optimal_cg_size ();
@@ -166,7 +166,7 @@ class bloom_filter_impl {
166166 }
167167
168168 template <class CG , class InputIt >
169- __device__ void add (CG const & group, InputIt first, InputIt last)
169+ __device__ void add (CG group, InputIt first, InputIt last)
170170 {
171171 namespace cg = cooperative_groups;
172172
@@ -208,7 +208,7 @@ class bloom_filter_impl {
208208 typename policy_type::hash_result_type hash_value;
209209 size_type block_index;
210210
211- auto const worker_group = cg::tiled_partition<worker_num_threads>(group);
211+ auto const worker_group = cg::tiled_partition<worker_num_threads, CG >(group);
212212 auto const worker_offset = worker_num_threads * worker_group.meta_group_rank ();
213213
214214 auto const group_iters = cuco::detail::int_div_ceil (num_keys, num_threads);
@@ -229,7 +229,7 @@ class bloom_filter_impl {
229229 }
230230
231231 template <class CG , class HashValue , class BlockIndex >
232- __device__ void add_impl (CG const & group, HashValue const & hash_value, BlockIndex block_index)
232+ __device__ void add_impl (CG group, HashValue const & hash_value, BlockIndex block_index)
233233 {
234234 constexpr auto num_threads = tile_size_v<CG>;
235235
@@ -327,7 +327,7 @@ class bloom_filter_impl {
327327 }
328328
329329 template <class CG , class ProbeKey >
330- [[nodiscard]] __device__ bool contains (CG const & group, ProbeKey const & key) const
330+ [[nodiscard]] __device__ bool contains (CG group, ProbeKey const & key) const
331331 {
332332 constexpr auto num_threads = tile_size_v<CG>;
333333 constexpr auto optimal_num_threads = contains_optimal_cg_size ();
@@ -359,7 +359,7 @@ class bloom_filter_impl {
359359
360360 // TODO
361361 // template <class CG, class InputIt, class OutputIt>
362- // __device__ void contains(CG const& group, InputIt first, InputIt last, OutputIt output_begin)
362+ // __device__ void contains(CG group, InputIt first, InputIt last, OutputIt output_begin)
363363 // const;
364364
365365 template <class InputIt , class OutputIt >
@@ -432,7 +432,7 @@ class bloom_filter_impl {
432432 // [[nodiscard]] __host__ double expected_false_positive_rate(size_t unique_keys) const
433433 // [[nodiscard]] __host__ __device__ static uint32_t optimal_pattern_bits(size_t num_blocks)
434434 // template <typename CG, cuda::thread_scope NewScope = thread_scope>
435- // [[nodiscard]] __device__ constexpr auto make_copy(CG const& group, word_type* const
435+ // [[nodiscard]] __device__ constexpr auto make_copy(CG group, word_type* const
436436 // memory_to_use, cuda_thread_scope<NewScope> scope = {}) const noexcept;
437437
438438 private:
0 commit comments