Skip to content

Commit 9115ffa

Browse files
authored
Use sequence data in arrow policy tests for consistent bitsets (#740)
Closes #735 This PR inserts deterministic sequential data to validate the bitsets to a `cuco::bloom_filter` with `cuco::arrow_filter_policy` against the bitsets generated by inserting the same keys to the implementation in Arrow. Reference bitset generated with arrow here: https://godbolt.org/z/eEWMaWa3P
1 parent 541bdc0 commit 9115ffa

File tree

1 file changed

+40
-45
lines changed

1 file changed

+40
-45
lines changed

tests/bloom_filter/arrow_policy_test.cu

Lines changed: 40 additions & 45 deletions
Original file line numberDiff line numberDiff line change
@@ -33,36 +33,43 @@ template <typename Key>
3333
thrust::device_vector<uint32_t> get_arrow_filter_reference_bitset()
3434
{
3535
static std::vector<thrust::device_vector<uint32_t>> const reference_bitsets{
36-
{4294752255,
37-
928963967,
38-
4227333887,
39-
3183462382,
40-
3892030683,
41-
3481206270,
42-
3513757613,
43-
3220961761,
44-
3186616955,
45-
4026531705,
46-
4110408887,
47-
804913147,
48-
1039007726,
49-
4286569403,
50-
2675948542,
51-
3688689479}, // type = int32, blocks = 2, num_keys = 100
52-
{2290897413, 3368027184, 2432735301, 2013315170, 610406792, 35787348, 43061541,
53-
1145143906, 238486532, 2840527950, 241188878, 624061504, 759830680, 184694210,
54-
2282459916, 3232258264, 285316692, 3284142851, 2760958614, 2974341265, 38749317,
55-
2655160577, 2193666087, 261196816, 411328595, 5391621, 2308014147, 2550892738,
56-
1224755395, 1396835974, 3227911200, 307324929}, // type = int64, blocks = 4, num_keys = 50
57-
{3037098621, 1001208422, 3070541682, 3611620780, 372254302, 2869772027, 2629135999, 3332804862,
58-
2832966981, 1225184253, 1315442262, 211922492, 1020510327, 2725704195, 2909038118, 2783622989,
59-
4214109798, 535934391, 2385459605, 4109595381, 3219664733, 3164400602, 1995984498, 2917029602,
60-
3047576211, 2212973933, 1672737343, 300902378, 3000318461, 1561320274, 2710202091, 3067275349,
61-
2734901244, 2638172076, 3669981206, 3719000395, 793729452, 2258222966, 4111863618, 2391109497,
62-
240119500, 855317864, 2893522276, 1103034386, 738173080, 4098968587, 1271241025, 499361504,
63-
4174530401, 3259956170, 3823469907, 578271374, 3168397042, 3890816473, 431898609, 1583427570,
64-
1835797371, 2078281027, 2741410265, 2639785266, 3422606831, 1589476610, 3972396492, 3611525326}
65-
// type = float, blocks = 8, num_keys = 200
36+
{
37+
3017764846,
38+
4219371383,
39+
4160077310,
40+
3214786543,
41+
4020088765,
42+
4294437885,
43+
2013200345,
44+
2550116063,
45+
855631359,
46+
4290436829,
47+
2884632042,
48+
1592483646,
49+
4281695998,
50+
2080111551,
51+
3220060030,
52+
4021279731,
53+
}, // type = int32, blocks = 2, num_keys = 100
54+
{
55+
860053560, 186397876, 1518788617, 2013987426, 545522943, 79856155, 103371656,
56+
20265733, 2168586373, 1210138712, 2437452036, 1342183988, 1107366672, 3560981000,
57+
2184221186, 1661010032, 2317009736, 1442875878, 1116227467, 3458613792, 114398528,
58+
679658134, 206734656, 340863450, 2220104352, 141846788, 948331524, 2344943952,
59+
4030989912, 3239203139, 2941256193, 4035057968,
60+
}, // type = int64, blocks = 4, num_keys = 50
61+
{
62+
3807057303, 3207519405, 2508188120, 1491024175, 2073585514, 2094743110, 2533287591,
63+
691662424, 1498889215, 2069126314, 2270481639, 796401059, 1961968732, 3512881027,
64+
3162306144, 2277085974, 3477648628, 1090385857, 4035761415, 1165385841, 4047856262,
65+
2297893848, 902599838, 418175153, 1437192944, 3673877288, 1536198910, 98677451,
66+
3620189521, 3794688342, 3625373537, 3550967313, 2119503598, 1805574667, 4076413870,
67+
2999897588, 3050286944, 4146882307, 3459690182, 167235913, 2078961096, 1863964920,
68+
1408130860, 4190644775, 532451008, 1563872186, 2529714129, 465761275, 3161649891,
69+
4204002248, 3931628891, 3251515903, 1421507581, 3849056446, 1748476671, 4223388125,
70+
1627644727, 2717076288, 2992639576, 3864567831, 190096788, 1885360347, 724608293,
71+
2768994330,
72+
} // type = float, blocks = 8, num_keys = 200
6673
};
6774

6875
if constexpr (std::is_same_v<Key, int32_t>) {
@@ -97,22 +104,10 @@ std::pair<size_t, size_t> get_arrow_filter_test_settings()
97104
}
98105

99106
template <typename Key>
100-
std::vector<Key> random_values(size_t size)
107+
std::vector<Key> sequence_values(size_t size)
101108
{
102109
std::vector<Key> values(size);
103-
104-
using uniform_distribution =
105-
typename std::conditional_t<std::is_same_v<Key, bool>,
106-
std::bernoulli_distribution,
107-
std::conditional_t<std::is_floating_point_v<Key>,
108-
std::uniform_real_distribution<Key>,
109-
std::uniform_int_distribution<Key>>>;
110-
111-
static constexpr auto seed = 0xf00d;
112-
static std::mt19937 engine{seed};
113-
static uniform_distribution dist{};
114-
std::generate_n(values.begin(), size, [&]() { return Key{dist(engine)}; });
115-
110+
std::iota(values.begin(), values.end(), Key{1});
116111
return values;
117112
}
118113

@@ -125,7 +120,7 @@ void test_filter_bitset(Filter& filter, size_t num_keys)
125120
using word_type = typename Filter::word_type;
126121

127122
// Generate keys
128-
auto const h_keys = random_values<key_type>(num_keys);
123+
auto const h_keys = sequence_values<key_type>(num_keys);
129124
thrust::device_vector<key_type> d_keys(h_keys.begin(), h_keys.end());
130125

131126
// Insert to the bloom filter

0 commit comments

Comments
 (0)