Skip to content

Commit 48ae8be

Browse files
committed
Use proper namespace for int32_t
1 parent afe3456 commit 48ae8be

21 files changed

+217
-213
lines changed

include/cuco/detail/bloom_filter/arrow_filter_policy.cuh

Lines changed: 15 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -36,7 +36,7 @@ namespace cuco::detail {
3636
*
3737
* Example:
3838
* @code{.cpp}
39-
* template <typename KeyType, std::uint32_t NUM_FILTER_BLOCKS>
39+
* template <typename KeyType, ::int NUM_FILTER_BLOCKS>
4040
* void bulk_insert_and_eval_arrow_policy_bloom_filter(device_vector<KeyType> const& positive_keys,
4141
* device_vector<KeyType> const& negative_keys)
4242
* {
@@ -84,18 +84,20 @@ template <class Key, template <typename> class XXHash64>
8484
class arrow_filter_policy {
8585
public:
8686
using hasher = XXHash64<Key>; ///< 64-bit XXHash hasher for Arrow bloom filter policy
87-
using word_type = std::uint32_t; ///< uint32_t for Arrow bloom filter policy
88-
using key_type = Key; ///< Hash function input type
89-
using hash_result_type = std::uint64_t; ///< hash function output type
87+
using word_type = cuda::std::uint32_t; ///< uint32_t for Arrow bloom filter policy
88+
using key_type = Key; ///< Hash function input type
89+
using hash_result_type = cuda::std::uint64_t; ///< hash function output type
9090

91-
static constexpr uint32_t bits_set_per_block = 8; ///< hardcoded bits set per Arrow filter block
92-
static constexpr uint32_t words_per_block = 8; ///< hardcoded words per Arrow filter block
91+
static constexpr cuda::std::int32_t bits_set_per_block =
92+
8; ///< hardcoded bits set per Arrow filter block
93+
static constexpr cuda::std::int32_t words_per_block =
94+
8; ///< hardcoded words per Arrow filter block
9395

94-
static constexpr std::uint32_t bytes_per_filter_block =
96+
static constexpr cuda::std::int32_t bytes_per_filter_block =
9597
32; ///< Number of bytes in one Arrow filter block
96-
static constexpr std::uint32_t max_arrow_filter_bytes =
98+
static constexpr cuda::std::int32_t max_arrow_filter_bytes =
9799
128 * 1024 * 1024; ///< Max bytes in Arrow bloom filter
98-
static constexpr std::uint32_t max_filter_blocks =
100+
static constexpr cuda::std::int32_t max_filter_blocks =
99101
(max_arrow_filter_bytes /
100102
bytes_per_filter_block); ///< Max sub-filter blocks allowed in Arrow bloom filter
101103

@@ -153,10 +155,11 @@ class arrow_filter_policy {
153155
*
154156
* @return The bit pattern for the word/segment in the filter block
155157
*/
156-
__device__ constexpr word_type word_pattern(hash_result_type hash, std::uint32_t word_index) const
158+
__device__ constexpr word_type word_pattern(hash_result_type hash,
159+
cuda::std::int32_t word_index) const
157160
{
158161
word_type const key = static_cast<word_type>(hash);
159-
std::uint32_t salt;
162+
cuda::std::int32_t salt;
160163

161164
// Basically a switch (word_index) { case 0-7 ... }
162165
// First split: 0..3 versus 4..7.
@@ -186,4 +189,4 @@ class arrow_filter_policy {
186189
hasher hash_;
187190
};
188191

189-
} // namespace cuco::detail
192+
} // namespace cuco::detail

include/cuco/detail/bloom_filter/bloom_filter_impl.cuh

Lines changed: 13 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,7 @@
2929
#include <cuda/std/__algorithm/min.h> // TODO #include <cuda/std/algorithm> once available
3030
#include <cuda/std/array>
3131
#include <cuda/std/bit>
32+
#include <cuda/std/cstdint>
3233
#include <cuda/std/functional>
3334
#include <cuda/std/tuple>
3435
#include <cuda/std/type_traits>
@@ -37,8 +38,6 @@
3738

3839
#include <cooperative_groups.h>
3940

40-
#include <cstdint>
41-
4241
namespace cuco::detail {
4342

4443
template <class Key, class Extent, cuda::thread_scope Scope, class Policy>
@@ -138,7 +137,7 @@ class bloom_filter_impl {
138137
__device__ void add_impl(HashValue const& hash_value, BlockIndex block_index)
139138
{
140139
#pragma unroll words_per_block
141-
for (uint32_t i = 0; i < words_per_block; ++i) {
140+
for (cuda::std::int32_t i = 0; i < words_per_block; ++i) {
142141
auto const word = policy_.word_pattern(hash_value, i);
143142
if (word != 0) {
144143
auto atom_word = cuda::atomic_ref<word_type, thread_scope>{
@@ -200,7 +199,7 @@ class bloom_filter_impl {
200199
block_index = policy_.block_index(hash_value, num_blocks_);
201200
}
202201

203-
for (uint32_t j = 0; (j < num_threads) and (i + j < num_keys); ++j) {
202+
for (cuda::std::int32_t j = 0; (j < num_threads) and (i + j < num_keys); ++j) {
204203
this->add_impl(group, group.shfl(hash_value, j), group.shfl(block_index, j));
205204
}
206205
}
@@ -220,7 +219,9 @@ class bloom_filter_impl {
220219
block_index = policy_.block_index(hash_value, num_blocks_);
221220
}
222221

223-
for (uint32_t j = 0; (j < worker_num_threads) and (i + worker_offset + j < num_keys); ++j) {
222+
for (cuda::std::int32_t j = 0;
223+
(j < worker_num_threads) and (i + worker_offset + j < num_keys);
224+
++j) {
224225
this->add_impl(
225226
worker_group, worker_group.shfl(hash_value, j), worker_group.shfl(block_index, j));
226227
}
@@ -318,7 +319,7 @@ class bloom_filter_impl {
318319
policy_.block_index(hash_value, num_blocks_) * words_per_block);
319320

320321
#pragma unroll words_per_block
321-
for (uint32_t i = 0; i < words_per_block; ++i) {
322+
for (cuda::std::int32_t i = 0; i < words_per_block; ++i) {
322323
auto const expected_pattern = policy_.word_pattern(hash_value, i);
323324
if ((stored_pattern[i] & expected_pattern) != expected_pattern) { return false; }
324325
}
@@ -342,12 +343,12 @@ class bloom_filter_impl {
342343
bool success = true;
343344

344345
#pragma unroll
345-
for (uint32_t i = rank; i < optimal_num_threads; i += num_threads) {
346+
for (cuda::std::int32_t i = rank; i < optimal_num_threads; i += num_threads) {
346347
auto const thread_offset = i * words_per_thread;
347348
auto const stored_pattern = this->vec_load_words<words_per_thread>(
348349
policy_.block_index(hash_value, num_blocks_) * words_per_block + thread_offset);
349350
#pragma unroll words_per_thread
350-
for (uint32_t j = 0; j < words_per_thread; ++j) {
351+
for (cuda::std::int32_t j = 0; j < words_per_thread; ++j) {
351352
auto const expected_pattern = policy_.word_pattern(hash_value, thread_offset + j);
352353
if ((stored_pattern[j] & expected_pattern) != expected_pattern) { success = false; }
353354
}
@@ -430,25 +431,25 @@ class bloom_filter_impl {
430431
// TODO
431432
// [[nodiscard]] __host__ double occupancy() const;
432433
// [[nodiscard]] __host__ double expected_false_positive_rate(size_t unique_keys) const
433-
// [[nodiscard]] __host__ __device__ static uint32_t optimal_pattern_bits(size_t num_blocks)
434+
// [[nodiscard]] __host__ __device__ static int32_t optimal_pattern_bits(size_t num_blocks)
434435
// template <typename CG, cuda::thread_scope NewScope = thread_scope>
435436
// [[nodiscard]] __device__ constexpr auto make_copy(CG const& group, word_type* const
436437
// memory_to_use, cuda_thread_scope<NewScope> scope = {}) const noexcept;
437438

438439
private:
439-
template <uint32_t NumWords>
440+
template <::int NumWords>
440441
__device__ constexpr cuda::std::array<word_type, NumWords> vec_load_words(size_type index) const
441442
{
442443
return *reinterpret_cast<cuda::std::array<word_type, NumWords>*>(__builtin_assume_aligned(
443444
words_ + index, cuda::std::min(sizeof(word_type) * NumWords, max_vec_bytes())));
444445
}
445446

446-
[[nodiscard]] __host__ __device__ static constexpr int32_t add_optimal_cg_size()
447+
[[nodiscard]] __host__ __device__ static constexpr cuda::std::int32_t add_optimal_cg_size()
447448
{
448449
return words_per_block; // one thread per word so atomic updates can be coalesced
449450
}
450451

451-
[[nodiscard]] __host__ __device__ static constexpr int32_t contains_optimal_cg_size()
452+
[[nodiscard]] __host__ __device__ static constexpr cuda::std::int32_t contains_optimal_cg_size()
452453
{
453454
constexpr auto word_bytes = sizeof(word_type);
454455
constexpr auto block_bytes = word_bytes * words_per_block;

include/cuco/detail/bloom_filter/default_filter_policy.inl

Lines changed: 9 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
/*
2-
* Copyright (c) 2024, NVIDIA CORPORATION.
2+
* Copyright (c) 2024-2025, NVIDIA CORPORATION.
33
*
44
* Licensed under the Apache License, Version 2.0 (the "License");
55
* you may not use this file except in compliance with the License.
@@ -16,27 +16,27 @@
1616

1717
#pragma once
1818

19-
#include <cstdint>
19+
#include <cuda/std/cstdint>
2020

2121
namespace cuco {
2222

23-
template <class Hash, class Word, uint32_t WordsPerBlock>
23+
template <class Hash, class Word, ::int WordsPerBlock>
2424
__host__
2525
__device__ constexpr default_filter_policy<Hash, Word, WordsPerBlock>::default_filter_policy(
26-
uint32_t pattern_bits, Hash hash)
26+
cuda::std::int32_t pattern_bits, Hash hash)
2727
: impl_{pattern_bits, hash}
2828
{
2929
}
3030

31-
template <class Hash, class Word, uint32_t WordsPerBlock>
31+
template <class Hash, class Word, ::int WordsPerBlock>
3232
__device__ constexpr typename default_filter_policy<Hash, Word, WordsPerBlock>::hash_result_type
3333
default_filter_policy<Hash, Word, WordsPerBlock>::hash(
3434
typename default_filter_policy<Hash, Word, WordsPerBlock>::hash_argument_type const& key) const
3535
{
3636
return impl_.hash(key);
3737
}
3838

39-
template <class Hash, class Word, uint32_t WordsPerBlock>
39+
template <class Hash, class Word, ::int WordsPerBlock>
4040
template <class Extent>
4141
__device__ constexpr auto default_filter_policy<Hash, Word, WordsPerBlock>::block_index(
4242
typename default_filter_policy<Hash, Word, WordsPerBlock>::hash_result_type hash,
@@ -45,13 +45,13 @@ __device__ constexpr auto default_filter_policy<Hash, Word, WordsPerBlock>::bloc
4545
return impl_.block_index(hash, num_blocks);
4646
}
4747

48-
template <class Hash, class Word, uint32_t WordsPerBlock>
48+
template <class Hash, class Word, ::int WordsPerBlock>
4949
__device__ constexpr typename default_filter_policy<Hash, Word, WordsPerBlock>::word_type
5050
default_filter_policy<Hash, Word, WordsPerBlock>::word_pattern(
5151
default_filter_policy<Hash, Word, WordsPerBlock>::hash_result_type hash,
52-
std::uint32_t word_index) const
52+
cuda::std::int32_t word_index) const
5353
{
5454
return impl_.word_pattern(hash, word_index);
5555
}
5656

57-
} // namespace cuco
57+
} // namespace cuco

include/cuco/detail/bloom_filter/default_filter_policy_impl.cuh

Lines changed: 20 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -19,31 +19,31 @@
1919
#include <cuco/detail/error.hpp>
2020

2121
#include <cuda/std/bit>
22+
#include <cuda/std/cstdint>
2223
#include <cuda/std/limits>
2324
#include <cuda/std/tuple>
2425
#include <cuda/std/type_traits>
2526

26-
#include <cstdint>
2727
#include <nv/target>
2828

2929
namespace cuco::detail {
3030

31-
template <class Hash, class Word, uint32_t WordsPerBlock>
31+
template <class Hash, class Word, ::int WordsPerBlock>
3232
class default_filter_policy_impl {
3333
public:
3434
using hasher = Hash;
3535
using word_type = Word;
3636
using hash_argument_type = typename hasher::argument_type;
3737
using hash_result_type = decltype(std::declval<hasher>()(std::declval<hash_argument_type>()));
3838

39-
static constexpr std::uint32_t words_per_block = WordsPerBlock;
39+
static constexpr cuda::std::int32_t words_per_block = WordsPerBlock;
4040

4141
private:
42-
static constexpr std::uint32_t word_bits = cuda::std::numeric_limits<word_type>::digits;
43-
static constexpr std::uint32_t bit_index_width = cuda::std::bit_width(word_bits - 1);
42+
static constexpr cuda::std::int32_t word_bits = cuda::std::numeric_limits<word_type>::digits;
43+
static constexpr cuda::std::int32_t bit_index_width = cuda::std::bit_width(word_bits - 1);
4444

4545
public:
46-
__host__ __device__ explicit constexpr default_filter_policy_impl(uint32_t pattern_bits,
46+
__host__ __device__ explicit constexpr default_filter_policy_impl(cuda::std::int32_t pattern_bits,
4747
Hash hash)
4848
: pattern_bits_{pattern_bits},
4949
min_bits_per_word_{pattern_bits_ / words_per_block},
@@ -54,14 +54,14 @@ class default_filter_policy_impl {
5454
NV_IS_HOST,
5555
( // This ensures each word in the block has at least one bit set; otherwise we would never
5656
// use some of the words
57-
constexpr uint32_t min_pattern_bits = words_per_block;
57+
constexpr cuda::int32_t min_pattern_bits = words_per_block;
5858

5959
// The maximum number of bits to be set for a key is capped by the total number of bits in
6060
// the filter block
61-
constexpr uint32_t max_pattern_bits = word_bits * words_per_block;
61+
constexpr cuda::int32_t max_pattern_bits = word_bits * words_per_block;
6262

63-
constexpr uint32_t hash_bits = cuda::std::numeric_limits<hash_result_type>::digits;
64-
constexpr uint32_t max_pattern_bits_from_hash = hash_bits / bit_index_width;
63+
constexpr cuda::int32_t hash_bits = cuda::std::numeric_limits<hash_result_type>::digits;
64+
constexpr cuda::int32_t max_pattern_bits_from_hash = hash_bits / bit_index_width;
6565
CUCO_EXPECTS(
6666
pattern_bits <= max_pattern_bits_from_hash,
6767
"`hash_result_type` too narrow to generate the requested number of `pattern_bits`");
@@ -85,7 +85,8 @@ class default_filter_policy_impl {
8585
return hash % num_blocks;
8686
}
8787

88-
__device__ constexpr word_type word_pattern(hash_result_type hash, std::uint32_t word_index) const
88+
__device__ constexpr word_type word_pattern(hash_result_type hash,
89+
cuda::std::int32_t word_index) const
8990
{
9091
word_type constexpr bit_index_mask = (word_type{1} << bit_index_width) - 1;
9192

@@ -94,10 +95,11 @@ class default_filter_policy_impl {
9495

9596
hash >>= bits_so_far * bit_index_width;
9697

97-
word_type word = 0;
98-
int32_t const bits_per_word = min_bits_per_word_ + (word_index < remainder_bits_ ? 1 : 0);
98+
word_type word = 0;
99+
cuda::std::int32_t const bits_per_word =
100+
min_bits_per_word_ + (word_index < remainder_bits_ ? 1 : 0);
99101

100-
for (int32_t bit = 0; bit < bits_per_word; ++bit) {
102+
for (cuda::std::int32_t bit = 0; bit < bits_per_word; ++bit) {
101103
word |= word_type{1} << (hash & bit_index_mask);
102104
hash >>= bit_index_width;
103105
}
@@ -106,10 +108,10 @@ class default_filter_policy_impl {
106108
}
107109

108110
private:
109-
uint32_t pattern_bits_;
110-
uint32_t min_bits_per_word_;
111-
uint32_t remainder_bits_;
111+
cuda::std::int32_t pattern_bits_;
112+
cuda::std::int32_t min_bits_per_word_;
113+
cuda::std::int32_t remainder_bits_;
112114
hasher hash_;
113115
};
114116

115-
} // namespace cuco::detail
117+
} // namespace cuco::detail

include/cuco/detail/bloom_filter/kernels.cuh

Lines changed: 4 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -21,13 +21,11 @@
2121

2222
#include <cooperative_groups.h>
2323

24-
#include <cstdint>
25-
2624
namespace cuco::detail::bloom_filter_ns {
2725

2826
CUCO_SUPPRESS_KERNEL_WARNINGS
2927

30-
template <int32_t BlockSize, class InputIt, class Ref>
28+
template <::int BlockSize, class InputIt, class Ref>
3129
CUCO_KERNEL __launch_bounds__(BlockSize) void add(InputIt first,
3230
cuco::detail::index_type n,
3331
Ref ref)
@@ -49,12 +47,7 @@ CUCO_KERNEL __launch_bounds__(BlockSize) void add(InputIt first,
4947
ref.add(tile, first + tile_start, first + tile_stop);
5048
}
5149

52-
template <int32_t CGSize,
53-
int32_t BlockSize,
54-
class InputIt,
55-
class StencilIt,
56-
class Predicate,
57-
class Ref>
50+
template <::int CGSize, ::int BlockSize, class InputIt, class StencilIt, class Predicate, class Ref>
5851
CUCO_KERNEL __launch_bounds__(BlockSize) void add_if_n(
5952
InputIt first, cuco::detail::index_type n, StencilIt stencil, Predicate pred, Ref ref)
6053
{
@@ -75,8 +68,8 @@ CUCO_KERNEL __launch_bounds__(BlockSize) void add_if_n(
7568
}
7669
}
7770

78-
template <int32_t CGSize,
79-
int32_t BlockSize,
71+
template <::int CGSize,
72+
::int BlockSize,
8073
class InputIt,
8174
class StencilIt,
8275
class Predicate,

0 commit comments

Comments
 (0)