Skip to content

Commit 5a48999

Browse files
committed
Apply CCCL version checks to all stream wait
1 parent 907df2a commit 5a48999

File tree

10 files changed

+199
-3
lines changed

10 files changed

+199
-3
lines changed

include/cuco/detail/bloom_filter/bloom_filter_impl.cuh

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -105,7 +105,11 @@ class bloom_filter_impl {
105105
__host__ constexpr void clear(cuda::stream_ref stream)
106106
{
107107
this->clear_async(stream);
108+
#if CCCL_MAJOR_VERSION > 3 || (CCCL_MAJOR_VERSION == 3 && CCCL_MINOR_VERSION >= 1)
109+
stream.sync();
110+
#else
108111
stream.wait();
112+
#endif
109113
}
110114

111115
__host__ constexpr void clear_async(cuda::stream_ref stream)
@@ -253,7 +257,11 @@ class bloom_filter_impl {
253257
__host__ constexpr void add(InputIt first, InputIt last, cuda::stream_ref stream)
254258
{
255259
this->add_async(first, last, stream);
260+
#if CCCL_MAJOR_VERSION > 3 || (CCCL_MAJOR_VERSION == 3 && CCCL_MINOR_VERSION >= 1)
261+
stream.sync();
262+
#else
256263
stream.wait();
264+
#endif
257265
}
258266

259267
template <class InputIt>
@@ -287,7 +295,11 @@ class bloom_filter_impl {
287295
InputIt first, InputIt last, StencilIt stencil, Predicate pred, cuda::stream_ref stream)
288296
{
289297
this->add_if_async(first, last, stencil, pred, stream);
298+
#if CCCL_MAJOR_VERSION > 3 || (CCCL_MAJOR_VERSION == 3 && CCCL_MINOR_VERSION >= 1)
299+
stream.sync();
300+
#else
290301
stream.wait();
302+
#endif
291303
}
292304

293305
template <class InputIt, class StencilIt, class Predicate>
@@ -369,7 +381,11 @@ class bloom_filter_impl {
369381
cuda::stream_ref stream) const
370382
{
371383
this->contains_async(first, last, output_begin, stream);
384+
#if CCCL_MAJOR_VERSION > 3 || (CCCL_MAJOR_VERSION == 3 && CCCL_MINOR_VERSION >= 1)
385+
stream.sync();
386+
#else
372387
stream.wait();
388+
#endif
373389
}
374390

375391
template <class InputIt, class OutputIt>
@@ -391,7 +407,11 @@ class bloom_filter_impl {
391407
cuda::stream_ref stream) const
392408
{
393409
this->contains_if_async(first, last, stencil, pred, output_begin, stream);
410+
#if CCCL_MAJOR_VERSION > 3 || (CCCL_MAJOR_VERSION == 3 && CCCL_MINOR_VERSION >= 1)
411+
stream.sync();
412+
#else
394413
stream.wait();
414+
#endif
395415
}
396416

397417
template <class InputIt, class StencilIt, class Predicate, class OutputIt>

include/cuco/detail/hyperloglog/hyperloglog_impl.cuh

Lines changed: 17 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
/*
2-
* Copyright (c) 2024, NVIDIA CORPORATION.
2+
* Copyright (c) 2024-2025, NVIDIA CORPORATION.
33
*
44
* Licensed under the Apache License, Version 2.0 (the "License");
55
* you may not use this file except in compliance with the License.
@@ -124,7 +124,11 @@ class hyperloglog_impl {
124124
__host__ constexpr void clear(cuda::stream_ref stream)
125125
{
126126
this->clear_async(stream);
127+
#if CCCL_MAJOR_VERSION > 3 || (CCCL_MAJOR_VERSION == 3 && CCCL_MINOR_VERSION >= 1)
128+
stream.sync();
129+
#else
127130
stream.wait();
131+
#endif
128132
}
129133

130134
/**
@@ -265,7 +269,11 @@ class hyperloglog_impl {
265269
__host__ constexpr void add(InputIt first, InputIt last, cuda::stream_ref stream)
266270
{
267271
this->add_async(first, last, stream);
272+
#if CCCL_MAJOR_VERSION > 3 || (CCCL_MAJOR_VERSION == 3 && CCCL_MINOR_VERSION >= 1)
273+
stream.sync();
274+
#else
268275
stream.wait();
276+
#endif
269277
}
270278

271279
/**
@@ -329,7 +337,11 @@ class hyperloglog_impl {
329337
cuda::stream_ref stream)
330338
{
331339
this->merge_async(other, stream);
340+
#if CCCL_MAJOR_VERSION > 3 || (CCCL_MAJOR_VERSION == 3 && CCCL_MINOR_VERSION >= 1)
341+
stream.sync();
342+
#else
332343
stream.wait();
344+
#endif
333345
}
334346

335347
/**
@@ -413,7 +425,11 @@ class hyperloglog_impl {
413425
sizeof(register_type) * num_regs,
414426
cudaMemcpyDefault,
415427
stream.get()));
428+
#if CCCL_MAJOR_VERSION > 3 || (CCCL_MAJOR_VERSION == 3 && CCCL_MINOR_VERSION >= 1)
429+
stream.sync();
430+
#else
416431
stream.wait();
432+
#endif
417433

418434
fp_type sum = 0;
419435
int zeroes = 0;

include/cuco/detail/open_addressing/open_addressing_impl.cuh

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -880,7 +880,11 @@ class open_addressing_impl {
880880
size_type temp_count;
881881
CUCO_CUDA_TRY(cudaMemcpyAsync(
882882
&temp_count, d_num_out, sizeof(size_type), cudaMemcpyDeviceToHost, stream.get()));
883+
#if CCCL_MAJOR_VERSION > 3 || (CCCL_MAJOR_VERSION == 3 && CCCL_MINOR_VERSION >= 1)
884+
stream.sync();
885+
#else
883886
stream.wait();
887+
#endif
884888
h_num_out += temp_count;
885889
temp_allocator.deallocate(d_temp_storage, temp_storage_bytes);
886890
}
@@ -994,7 +998,11 @@ class open_addressing_impl {
994998
void rehash(Container const& container, cuda::stream_ref stream)
995999
{
9961000
this->rehash_async(container, stream);
1001+
#if CCCL_MAJOR_VERSION > 3 || (CCCL_MAJOR_VERSION == 3 && CCCL_MINOR_VERSION >= 1)
1002+
stream.sync();
1003+
#else
9971004
stream.wait();
1005+
#endif
9981006
}
9991007

10001008
/**
@@ -1023,7 +1031,11 @@ class open_addressing_impl {
10231031
void rehash(extent_type extent, Container const& container, cuda::stream_ref stream)
10241032
{
10251033
this->rehash_async(extent, container, stream);
1034+
#if CCCL_MAJOR_VERSION > 3 || (CCCL_MAJOR_VERSION == 3 && CCCL_MINOR_VERSION >= 1)
1035+
stream.sync();
1036+
#else
10261037
stream.wait();
1038+
#endif
10271039
}
10281040

10291041
/**

include/cuco/detail/roaring_bitmap/roaring_bitmap_impl.cuh

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -64,7 +64,11 @@ class roaring_bitmap_impl<cuda::std::uint32_t> {
6464
cuda::stream_ref stream = {}) const
6565
{
6666
this->contains_async(first, last, contained, stream);
67+
#if CCCL_MAJOR_VERSION > 3 || (CCCL_MAJOR_VERSION == 3 && CCCL_MINOR_VERSION >= 1)
68+
stream.sync();
69+
#else
6770
stream.wait();
71+
#endif
6872
}
6973

7074
template <class InputIt, class OutputIt>
@@ -303,7 +307,11 @@ class roaring_bitmap_impl<cuda::std::uint64_t> {
303307
cuda::stream_ref stream = {}) const
304308
{
305309
this->contains_async(first, last, contained, stream);
310+
#if CCCL_MAJOR_VERSION > 3 || (CCCL_MAJOR_VERSION == 3 && CCCL_MINOR_VERSION >= 1)
311+
stream.sync();
312+
#else
306313
stream.wait();
314+
#endif
307315
}
308316

309317
template <class InputIt, class OutputIt>

include/cuco/detail/static_map/static_map.inl

Lines changed: 44 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -194,7 +194,11 @@ void static_map<Key, T, Extent, Scope, KeyEqual, ProbingScheme, Allocator, Stora
194194
cuda::stream_ref stream)
195195
{
196196
insert_and_find_async(first, last, found_begin, inserted_begin, stream);
197+
#if CCCL_MAJOR_VERSION > 3 || (CCCL_MAJOR_VERSION == 3 && CCCL_MINOR_VERSION >= 1)
198+
stream.sync();
199+
#else
197200
stream.wait();
201+
#endif
198202
}
199203

200204
template <class Key,
@@ -265,7 +269,11 @@ void static_map<Key, T, Extent, Scope, KeyEqual, ProbingScheme, Allocator, Stora
265269
insert_or_assign(InputIt first, InputIt last, cuda::stream_ref stream)
266270
{
267271
this->insert_or_assign_async(first, last, stream);
272+
#if CCCL_MAJOR_VERSION > 3 || (CCCL_MAJOR_VERSION == 3 && CCCL_MINOR_VERSION >= 1)
273+
stream.sync();
274+
#else
268275
stream.wait();
276+
#endif
269277
}
270278

271279
template <class Key,
@@ -303,7 +311,11 @@ void static_map<Key, T, Extent, Scope, KeyEqual, ProbingScheme, Allocator, Stora
303311
insert_or_apply(InputIt first, InputIt last, Op op, cuda::stream_ref stream)
304312
{
305313
this->insert_or_apply_async(first, last, op, stream);
314+
#if CCCL_MAJOR_VERSION > 3 || (CCCL_MAJOR_VERSION == 3 && CCCL_MINOR_VERSION >= 1)
315+
stream.sync();
316+
#else
306317
stream.wait();
318+
#endif
307319
}
308320

309321
template <class Key,
@@ -319,7 +331,11 @@ void static_map<Key, T, Extent, Scope, KeyEqual, ProbingScheme, Allocator, Stora
319331
insert_or_apply(InputIt first, InputIt last, Init init, Op op, cuda::stream_ref stream)
320332
{
321333
this->insert_or_apply_async(first, last, init, op, stream);
334+
#if CCCL_MAJOR_VERSION > 3 || (CCCL_MAJOR_VERSION == 3 && CCCL_MINOR_VERSION >= 1)
335+
stream.sync();
336+
#else
322337
stream.wait();
338+
#endif
323339
}
324340

325341
template <class Key,
@@ -371,7 +387,11 @@ void static_map<Key, T, Extent, Scope, KeyEqual, ProbingScheme, Allocator, Stora
371387
InputIt first, InputIt last, cuda::stream_ref stream)
372388
{
373389
erase_async(first, last, stream);
390+
#if CCCL_MAJOR_VERSION > 3 || (CCCL_MAJOR_VERSION == 3 && CCCL_MINOR_VERSION >= 1)
391+
stream.sync();
392+
#else
374393
stream.wait();
394+
#endif
375395
}
376396

377397
template <class Key,
@@ -402,7 +422,11 @@ void static_map<Key, T, Extent, Scope, KeyEqual, ProbingScheme, Allocator, Stora
402422
InputIt first, InputIt last, OutputIt output_begin, cuda::stream_ref stream) const
403423
{
404424
contains_async(first, last, output_begin, stream);
425+
#if CCCL_MAJOR_VERSION > 3 || (CCCL_MAJOR_VERSION == 3 && CCCL_MINOR_VERSION >= 1)
426+
stream.sync();
427+
#else
405428
stream.wait();
429+
#endif
406430
}
407431

408432
template <class Key,
@@ -438,7 +462,11 @@ void static_map<Key, T, Extent, Scope, KeyEqual, ProbingScheme, Allocator, Stora
438462
cuda::stream_ref stream) const
439463
{
440464
contains_if_async(first, last, stencil, pred, output_begin, stream);
465+
#if CCCL_MAJOR_VERSION > 3 || (CCCL_MAJOR_VERSION == 3 && CCCL_MINOR_VERSION >= 1)
466+
stream.sync();
467+
#else
441468
stream.wait();
469+
#endif
442470
}
443471

444472
template <class Key,
@@ -474,7 +502,11 @@ void static_map<Key, T, Extent, Scope, KeyEqual, ProbingScheme, Allocator, Stora
474502
InputIt first, InputIt last, OutputIt output_begin, cuda::stream_ref stream) const
475503
{
476504
find_async(first, last, output_begin, stream);
505+
#if CCCL_MAJOR_VERSION > 3 || (CCCL_MAJOR_VERSION == 3 && CCCL_MINOR_VERSION >= 1)
506+
stream.sync();
507+
#else
477508
stream.wait();
509+
#endif
478510
}
479511

480512
template <class Key,
@@ -534,7 +566,11 @@ void static_map<Key, T, Extent, Scope, KeyEqual, ProbingScheme, Allocator, Stora
534566
cuda::stream_ref stream) const
535567
{
536568
this->find_if_async(first, last, stencil, pred, output_begin, stream);
569+
#if CCCL_MAJOR_VERSION > 3 || (CCCL_MAJOR_VERSION == 3 && CCCL_MINOR_VERSION >= 1)
570+
stream.sync();
571+
#else
537572
stream.wait();
573+
#endif
538574
}
539575

540576
template <class Key,
@@ -603,7 +639,11 @@ void static_map<Key, T, Extent, Scope, KeyEqual, ProbingScheme, Allocator, Stora
603639
CallbackOp&& callback_op, cuda::stream_ref stream) const
604640
{
605641
impl_->for_each_async(std::forward<CallbackOp>(callback_op), stream);
642+
#if CCCL_MAJOR_VERSION > 3 || (CCCL_MAJOR_VERSION == 3 && CCCL_MINOR_VERSION >= 1)
643+
stream.sync();
644+
#else
606645
stream.wait();
646+
#endif
607647
}
608648

609649
template <class Key,
@@ -635,7 +675,11 @@ void static_map<Key, T, Extent, Scope, KeyEqual, ProbingScheme, Allocator, Stora
635675
{
636676
impl_->for_each_async(
637677
first, last, std::forward<CallbackOp>(callback_op), ref(op::for_each), stream);
678+
#if CCCL_MAJOR_VERSION > 3 || (CCCL_MAJOR_VERSION == 3 && CCCL_MINOR_VERSION >= 1)
679+
stream.sync();
680+
#else
638681
stream.wait();
682+
#endif
639683
}
640684

641685
template <class Key,

include/cuco/detail/static_multimap/static_multimap.inl

Lines changed: 28 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -156,7 +156,11 @@ void static_multimap<Key, T, Extent, Scope, KeyEqual, ProbingScheme, Allocator,
156156
InputIt first, InputIt last, cuda::stream_ref stream)
157157
{
158158
this->insert_async(first, last, stream);
159+
#if CCCL_MAJOR_VERSION > 3 || (CCCL_MAJOR_VERSION == 3 && CCCL_MINOR_VERSION >= 1)
160+
stream.sync();
161+
#else
159162
stream.wait();
163+
#endif
160164
}
161165

162166
template <class Key,
@@ -222,7 +226,11 @@ void static_multimap<Key, T, Extent, Scope, KeyEqual, ProbingScheme, Allocator,
222226
InputIt first, InputIt last, OutputIt output_begin, cuda::stream_ref stream) const
223227
{
224228
this->contains_async(first, last, output_begin, stream);
229+
#if CCCL_MAJOR_VERSION > 3 || (CCCL_MAJOR_VERSION == 3 && CCCL_MINOR_VERSION >= 1)
230+
stream.sync();
231+
#else
225232
stream.wait();
233+
#endif
226234
}
227235

228236
template <class Key,
@@ -261,7 +269,11 @@ void static_multimap<Key, T, Extent, Scope, KeyEqual, ProbingScheme, Allocator,
261269
cuda::stream_ref stream) const
262270
{
263271
this->contains_if_async(first, last, stencil, pred, output_begin, stream);
272+
#if CCCL_MAJOR_VERSION > 3 || (CCCL_MAJOR_VERSION == 3 && CCCL_MINOR_VERSION >= 1)
273+
stream.sync();
274+
#else
264275
stream.wait();
276+
#endif
265277
}
266278

267279
template <class Key,
@@ -297,7 +309,11 @@ void static_multimap<Key, T, Extent, Scope, KeyEqual, ProbingScheme, Allocator,
297309
InputIt first, InputIt last, OutputIt output_begin, cuda::stream_ref stream) const
298310
{
299311
this->find_async(first, last, output_begin, stream);
312+
#if CCCL_MAJOR_VERSION > 3 || (CCCL_MAJOR_VERSION == 3 && CCCL_MINOR_VERSION >= 1)
313+
stream.sync();
314+
#else
300315
stream.wait();
316+
#endif
301317
}
302318

303319
template <class Key,
@@ -333,7 +349,11 @@ void static_multimap<Key, T, Extent, Scope, KeyEqual, ProbingScheme, Allocator,
333349
cuda::stream_ref stream) const
334350
{
335351
this->find_if_async(first, last, stencil, pred, output_begin, stream);
352+
#if CCCL_MAJOR_VERSION > 3 || (CCCL_MAJOR_VERSION == 3 && CCCL_MINOR_VERSION >= 1)
353+
stream.sync();
354+
#else
336355
stream.wait();
356+
#endif
337357
}
338358

339359
template <class Key,
@@ -369,7 +389,11 @@ void static_multimap<Key, T, Extent, Scope, KeyEqual, ProbingScheme, Allocator,
369389
CallbackOp&& callback_op, cuda::stream_ref stream) const
370390
{
371391
impl_->for_each_async(std::forward<CallbackOp>(callback_op), stream);
392+
#if CCCL_MAJOR_VERSION > 3 || (CCCL_MAJOR_VERSION == 3 && CCCL_MINOR_VERSION >= 1)
393+
stream.sync();
394+
#else
372395
stream.wait();
396+
#endif
373397
}
374398

375399
template <class Key,
@@ -401,7 +425,11 @@ void static_multimap<Key, T, Extent, Scope, KeyEqual, ProbingScheme, Allocator,
401425
{
402426
impl_->for_each_async(
403427
first, last, std::forward<CallbackOp>(callback_op), ref(op::for_each), stream);
428+
#if CCCL_MAJOR_VERSION > 3 || (CCCL_MAJOR_VERSION == 3 && CCCL_MINOR_VERSION >= 1)
429+
stream.sync();
430+
#else
404431
stream.wait();
432+
#endif
405433
}
406434

407435
template <class Key,

0 commit comments

Comments
 (0)