Skip to content

Commit 6798552

Browse files
authored
Merge branch 'dev' into add-std-overloads
2 parents bb4f5f6 + de9d8c8 commit 6798552

File tree

5 files changed

+47
-15
lines changed

5 files changed

+47
-15
lines changed

benchmarks/bloom_filter/add_bench.cu

Lines changed: 10 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -49,7 +49,7 @@ void bloom_filter_add(nvbench::state& state,
4949

5050
auto const num_keys = state.get_int64("NumInputs");
5151
auto const filter_size_mb = state.get_int64("FilterSizeMB");
52-
auto const pattern_bits = state.get_int64("PatternBits");
52+
auto const pattern_bits = state.get_int64_or_default("PatternBits", WordsPerBlock);
5353

5454
try {
5555
auto const policy = policy_type{static_cast<uint32_t>(pattern_bits)};
@@ -70,6 +70,12 @@ void bloom_filter_add(nvbench::state& state,
7070

7171
filter_type filter{num_sub_filters, {}, {static_cast<uint32_t>(pattern_bits)}};
7272

73+
state.collect_dram_throughput();
74+
state.collect_l1_hit_rates();
75+
state.collect_l2_hit_rates();
76+
state.collect_loads_efficiency();
77+
state.collect_stores_efficiency();
78+
7379
add_fpr_summary(state, filter);
7480

7581
state.exec([&](nvbench::launch& launch) {
@@ -87,8 +93,7 @@ NVBENCH_BENCH_TYPES(bloom_filter_add,
8793
.set_type_axes_names({"Key", "Hash", "Word", "WordsPerBlock", "Distribution"})
8894
.set_max_noise(defaults::MAX_NOISE)
8995
.add_int64_axis("NumInputs", {defaults::BF_N})
90-
.add_int64_axis("FilterSizeMB", defaults::BF_SIZE_MB_RANGE_CACHE)
91-
.add_int64_axis("PatternBits", {defaults::BF_PATTERN_BITS});
96+
.add_int64_axis("FilterSizeMB", defaults::BF_SIZE_MB_RANGE_CACHE);
9297

9398
NVBENCH_BENCH_TYPES(bloom_filter_add,
9499
NVBENCH_TYPE_AXES(nvbench::type_list<defaults::BF_KEY>,
@@ -100,8 +105,7 @@ NVBENCH_BENCH_TYPES(bloom_filter_add,
100105
.set_type_axes_names({"Key", "Hash", "Word", "WordsPerBlock", "Distribution"})
101106
.set_max_noise(defaults::MAX_NOISE)
102107
.add_int64_axis("NumInputs", {defaults::BF_N})
103-
.add_int64_axis("FilterSizeMB", {defaults::BF_SIZE_MB})
104-
.add_int64_axis("PatternBits", {defaults::BF_PATTERN_BITS});
108+
.add_int64_axis("FilterSizeMB", {defaults::BF_SIZE_MB});
105109

106110
NVBENCH_BENCH_TYPES(bloom_filter_add,
107111
NVBENCH_TYPE_AXES(nvbench::type_list<defaults::BF_KEY>,
@@ -113,5 +117,4 @@ NVBENCH_BENCH_TYPES(bloom_filter_add,
113117
.set_type_axes_names({"Key", "Hash", "Word", "WordsPerBlock", "Distribution"})
114118
.set_max_noise(defaults::MAX_NOISE)
115119
.add_int64_axis("NumInputs", {defaults::BF_N})
116-
.add_int64_axis("FilterSizeMB", {defaults::BF_SIZE_MB})
117-
.add_int64_axis("PatternBits", {defaults::BF_PATTERN_BITS});
120+
.add_int64_axis("FilterSizeMB", {defaults::BF_SIZE_MB});

benchmarks/bloom_filter/contains_bench.cu

Lines changed: 10 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -51,7 +51,7 @@ void bloom_filter_contains(
5151

5252
auto const num_keys = state.get_int64("NumInputs");
5353
auto const filter_size_mb = state.get_int64("FilterSizeMB");
54-
auto const pattern_bits = state.get_int64("PatternBits");
54+
auto const pattern_bits = state.get_int64_or_default("PatternBits", WordsPerBlock);
5555

5656
try {
5757
auto const policy = policy_type{static_cast<uint32_t>(pattern_bits)};
@@ -73,6 +73,12 @@ void bloom_filter_contains(
7373

7474
filter_type filter{num_sub_filters, {}, {static_cast<uint32_t>(pattern_bits)}};
7575

76+
state.collect_dram_throughput();
77+
state.collect_l1_hit_rates();
78+
state.collect_l2_hit_rates();
79+
state.collect_loads_efficiency();
80+
state.collect_stores_efficiency();
81+
7682
add_fpr_summary(state, filter);
7783

7884
filter.add(keys.begin(), keys.end());
@@ -92,8 +98,7 @@ NVBENCH_BENCH_TYPES(bloom_filter_contains,
9298
.set_type_axes_names({"Key", "Hash", "Word", "WordsPerBlock", "Distribution"})
9399
.set_max_noise(defaults::MAX_NOISE)
94100
.add_int64_axis("NumInputs", {defaults::BF_N})
95-
.add_int64_axis("FilterSizeMB", defaults::BF_SIZE_MB_RANGE_CACHE)
96-
.add_int64_axis("PatternBits", {defaults::BF_PATTERN_BITS});
101+
.add_int64_axis("FilterSizeMB", defaults::BF_SIZE_MB_RANGE_CACHE);
97102

98103
NVBENCH_BENCH_TYPES(bloom_filter_contains,
99104
NVBENCH_TYPE_AXES(nvbench::type_list<defaults::BF_KEY>,
@@ -105,8 +110,7 @@ NVBENCH_BENCH_TYPES(bloom_filter_contains,
105110
.set_type_axes_names({"Key", "Hash", "Word", "WordsPerBlock", "Distribution"})
106111
.set_max_noise(defaults::MAX_NOISE)
107112
.add_int64_axis("NumInputs", {defaults::BF_N})
108-
.add_int64_axis("FilterSizeMB", {defaults::BF_SIZE_MB})
109-
.add_int64_axis("PatternBits", {defaults::BF_PATTERN_BITS});
113+
.add_int64_axis("FilterSizeMB", {defaults::BF_SIZE_MB});
110114

111115
NVBENCH_BENCH_TYPES(bloom_filter_contains,
112116
NVBENCH_TYPE_AXES(nvbench::type_list<defaults::BF_KEY>,
@@ -118,5 +122,4 @@ NVBENCH_BENCH_TYPES(bloom_filter_contains,
118122
.set_type_axes_names({"Key", "Hash", "Word", "WordsPerBlock", "Distribution"})
119123
.set_max_noise(defaults::MAX_NOISE)
120124
.add_int64_axis("NumInputs", {defaults::BF_N})
121-
.add_int64_axis("FilterSizeMB", {defaults::BF_SIZE_MB})
122-
.add_int64_axis("PatternBits", {defaults::BF_PATTERN_BITS});
125+
.add_int64_axis("FilterSizeMB", {defaults::BF_SIZE_MB});

benchmarks/bloom_filter/defaults.hpp

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -33,7 +33,6 @@ using BF_WORD = nvbench::uint32_t;
3333
static constexpr auto BF_N = 400'000'000;
3434
static constexpr auto BF_SIZE_MB = 2'000;
3535
static constexpr auto BF_WORDS_PER_BLOCK = 8;
36-
static constexpr auto BF_PATTERN_BITS = BF_WORDS_PER_BLOCK;
3736

3837
auto const BF_SIZE_MB_RANGE_CACHE =
3938
std::vector<nvbench::int64_t>{1, 2, 4, 8, 16, 32, 64, 128, 256, 512, 1024, 2048};

include/cuco/detail/static_set/static_set_ref.inl

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -298,6 +298,20 @@ static_set_ref<Key, Scope, KeyEqual, ProbingScheme, StorageRef, Operators...>::r
298298
this->storage_ref()};
299299
}
300300

301+
template <typename Key,
302+
cuda::thread_scope Scope,
303+
typename KeyEqual,
304+
typename ProbingScheme,
305+
typename StorageRef,
306+
typename... Operators>
307+
template <typename NewKeyEqual>
308+
__host__ __device__ constexpr auto
309+
static_set_ref<Key, Scope, KeyEqual, ProbingScheme, StorageRef, Operators...>::with_key_eq(
310+
NewKeyEqual const& key_equal) const noexcept
311+
{
312+
return this->rebind_key_eq(key_equal);
313+
}
314+
301315
template <typename Key,
302316
cuda::thread_scope Scope,
303317
typename KeyEqual,

include/cuco/static_set_ref.cuh

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -245,6 +245,19 @@ class static_set_ref
245245
[[nodiscard]] __host__ __device__ constexpr auto rebind_key_eq(
246246
NewKeyEqual const& key_equal) const noexcept;
247247

248+
/**
249+
* @brief Makes a copy of the current device reference with the given key comparator
250+
*
251+
* @tparam NewKeyEqual The new key equal type
252+
*
253+
* @param key_equal New key comparator
254+
*
255+
* @return Copy of the current device ref
256+
*/
257+
template <typename NewKeyEqual>
258+
[[nodiscard]] __host__ __device__ constexpr auto with_key_eq(
259+
NewKeyEqual const& key_equal) const noexcept;
260+
248261
/**
249262
* @brief Makes a copy of the current device reference with the given hasher
250263
*

0 commit comments

Comments
 (0)