Skip to content

Commit e7283ed

Browse files
authored
Bypass key equal check for multiset and multimap insertions (#769)
Closes #767 This PR skips key equality checks during multiset and multimap insertions. It also changes enum types from `int32_t` to `int8_t` to reduce register usage, which may help address issue #761.
1 parent 3b9873a commit e7283ed

File tree

2 files changed

+27
-19
lines changed

2 files changed

+27
-19
lines changed

include/cuco/detail/equal_wrapper.cuh

Lines changed: 19 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
/*
2-
* Copyright (c) 2022-2024, NVIDIA CORPORATION.
2+
* Copyright (c) 2022-2025, NVIDIA CORPORATION.
33
*
44
* Licensed under the Apache License, Version 2.0 (the "License");
55
* you may not use this file except in compliance with the License.
@@ -17,18 +17,17 @@
1717

1818
#include <cuco/detail/bitwise_compare.cuh>
1919

20-
#include <cstddef>
20+
#include <cuda/std/cstdint>
2121

22-
namespace cuco {
23-
namespace detail {
22+
namespace cuco::detail {
2423

2524
/**
2625
* @brief Enum of equality comparison results
2726
*/
2827
// ENUM VALUE MATTERS, DO NOT CHANGE
29-
enum class equal_result : int32_t { UNEQUAL = 0, EQUAL = 1, EMPTY = 2, AVAILABLE = 3 };
28+
enum class equal_result : cuda::std::int8_t { UNEQUAL = 0, EQUAL = 1, EMPTY = 2, AVAILABLE = 3 };
3029

31-
enum class is_insert : bool { YES, NO };
30+
enum class is_insert : cuda::std::int8_t { YES, NO };
3231

3332
/**
3433
* @brief Key equality wrapper.
@@ -37,8 +36,9 @@ enum class is_insert : bool { YES, NO };
3736
*
3837
* @tparam T Right-hand side Element type
3938
* @tparam Equal Type of user-provided equality binary callable
39+
* @tparam AllowsDuplicates Flag indicating whether duplicate keys are allowed
4040
*/
41-
template <typename T, typename Equal>
41+
template <typename T, typename Equal, bool AllowsDuplicates>
4242
struct equal_wrapper {
4343
// TODO: Clean up the sentinel handling since it's duplicated in ref and equal wrapper
4444
T empty_sentinel_; ///< Empty sentinel value
@@ -97,16 +97,22 @@ struct equal_wrapper {
9797
__device__ constexpr equal_result operator()(LHS const& lhs, RHS const& rhs) const noexcept
9898
{
9999
if constexpr (IsInsert == is_insert::YES) {
100-
return (cuco::detail::bitwise_compare(rhs, empty_sentinel_) or
101-
cuco::detail::bitwise_compare(rhs, erased_sentinel_))
102-
? equal_result::AVAILABLE
103-
: this->equal_to(lhs, rhs);
100+
if (cuco::detail::bitwise_compare(rhs, empty_sentinel_) or
101+
cuco::detail::bitwise_compare(rhs, erased_sentinel_)) {
102+
return equal_result::AVAILABLE;
103+
}
104+
// Optimization: For containers that allow duplicates, skip expensive key equality check
105+
// during insertion since we always insert regardless of whether the key already exists
106+
if constexpr (AllowsDuplicates) {
107+
return equal_result::UNEQUAL;
108+
} else {
109+
return this->equal_to(lhs, rhs);
110+
}
104111
} else {
105112
return cuco::detail::bitwise_compare(rhs, empty_sentinel_) ? equal_result::EMPTY
106113
: this->equal_to(lhs, rhs);
107114
}
108115
}
109116
};
110117

111-
} // namespace detail
112-
} // namespace cuco
118+
} // namespace cuco::detail

include/cuco/detail/open_addressing/open_addressing_ref_impl.cuh

Lines changed: 8 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -42,7 +42,7 @@ namespace cuco {
4242
namespace detail {
4343

4444
/// Three-way insert result enum
45-
enum class insert_result : cuda::std::int32_t { CONTINUE = 0, SUCCESS = 1, DUPLICATE = 2 };
45+
enum class insert_result : cuda::std::int8_t { CONTINUE = 0, SUCCESS = 1, DUPLICATE = 2 };
4646

4747
/**
4848
* @brief Helper struct to store intermediate bucket probing results.
@@ -217,8 +217,9 @@ class open_addressing_ref_impl {
217217
*
218218
* @return The key equality predicate
219219
*/
220-
[[nodiscard]] __host__ __device__ constexpr detail::equal_wrapper<key_type, key_equal> predicate()
221-
const noexcept
220+
[[nodiscard]] __host__
221+
__device__ constexpr detail::equal_wrapper<key_type, key_equal, allows_duplicates>
222+
predicate() const noexcept
222223
{
223224
return this->predicate_;
224225
}
@@ -1927,9 +1928,10 @@ class open_addressing_ref_impl {
19271928

19281929
// TODO: Clean up the sentinel handling since it's duplicated in ref and equal wrapper
19291930
value_type empty_slot_sentinel_; ///< Sentinel value indicating an empty slot
1930-
detail::equal_wrapper<key_type, key_equal> predicate_; ///< Key equality binary callable
1931-
probing_scheme_type probing_scheme_; ///< Probing scheme
1932-
storage_ref_type storage_ref_; ///< Slot storage ref
1931+
detail::equal_wrapper<key_type, key_equal, allows_duplicates>
1932+
predicate_; ///< Key equality binary callable
1933+
probing_scheme_type probing_scheme_; ///< Probing scheme
1934+
storage_ref_type storage_ref_; ///< Slot storage ref
19331935
};
19341936

19351937
} // namespace detail

0 commit comments

Comments
 (0)