Skip to content

Commit 0d95f4b

Browse files
committed
fix infinite find/contains/erase when all keys in a submap has been erased
1 parent 6d59add commit 0d95f4b

File tree

3 files changed

+226
-0
lines changed

3 files changed

+226
-0
lines changed

include/cuco/detail/static_map.inl

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -589,6 +589,7 @@ __device__ bool static_map<Key, Value, Scope, Allocator>::device_mutable_view::e
589589
key_type const& k, Hash hash, KeyEqual key_equal) noexcept
590590
{
591591
auto current_slot{this->initial_slot(k, hash)};
592+
auto const init_slot = current_slot;
592593

593594
value_type const insert_pair =
594595
make_pair<Key, Value>(this->get_erased_key_sentinel(), this->get_empty_value_sentinel());
@@ -628,6 +629,8 @@ __device__ bool static_map<Key, Value, Scope, Allocator>::device_mutable_view::e
628629
}
629630

630631
current_slot = this->next_slot(current_slot);
632+
// if all keys in this map has been erased, return false
633+
if (current_slot == init_slot) { return false; }
631634
}
632635
}
633636

@@ -637,6 +640,7 @@ __device__ bool static_map<Key, Value, Scope, Allocator>::device_mutable_view::e
637640
CG g, key_type const& k, Hash hash, KeyEqual key_equal) noexcept
638641
{
639642
auto current_slot = this->initial_slot(g, k, hash);
643+
auto const init_slot = current_slot;
640644
value_type const insert_pair =
641645
make_pair<Key, Value>(this->get_erased_key_sentinel(), this->get_empty_value_sentinel());
642646

@@ -686,6 +690,7 @@ __device__ bool static_map<Key, Value, Scope, Allocator>::device_mutable_view::e
686690
if (g.ballot(slot_is_empty)) { return false; }
687691

688692
current_slot = this->next_slot(g, current_slot);
693+
if (current_slot == init_slot) { return false; }
689694
}
690695
}
691696

@@ -697,6 +702,7 @@ static_map<Key, Value, Scope, Allocator>::device_view::find(Key const& k,
697702
KeyEqual key_equal) noexcept
698703
{
699704
auto current_slot = this->initial_slot(k, hash);
705+
auto const init_slot = current_slot;
700706

701707
while (true) {
702708
auto const existing_key = current_slot->first.load(cuda::std::memory_order_relaxed);
@@ -709,6 +715,7 @@ static_map<Key, Value, Scope, Allocator>::device_view::find(Key const& k,
709715
if (key_equal(existing_key, k)) { return current_slot; }
710716

711717
current_slot = this->next_slot(current_slot);
718+
if (current_slot == init_slot) { return this->end(); }
712719
}
713720
}
714721

@@ -720,6 +727,7 @@ static_map<Key, Value, Scope, Allocator>::device_view::find(Key const& k,
720727
KeyEqual key_equal) const noexcept
721728
{
722729
auto current_slot = this->initial_slot(k, hash);
730+
auto const init_slot = current_slot;
723731

724732
while (true) {
725733
auto const existing_key = current_slot->first.load(cuda::std::memory_order_relaxed);
@@ -732,6 +740,7 @@ static_map<Key, Value, Scope, Allocator>::device_view::find(Key const& k,
732740
if (key_equal(existing_key, k)) { return current_slot; }
733741

734742
current_slot = this->next_slot(current_slot);
743+
if (current_slot == init_slot) { return this->end(); }
735744
}
736745
}
737746

@@ -744,6 +753,7 @@ static_map<Key, Value, Scope, Allocator>::device_view::find(CG g,
744753
KeyEqual key_equal) noexcept
745754
{
746755
auto current_slot = this->initial_slot(g, k, hash);
756+
auto const init_slot = current_slot;
747757

748758
while (true) {
749759
auto const existing_key = current_slot->first.load(cuda::std::memory_order_relaxed);
@@ -770,6 +780,7 @@ static_map<Key, Value, Scope, Allocator>::device_view::find(CG g,
770780
// otherwise, all slots in the current bucket are full with other keys, so we move onto the
771781
// next bucket
772782
current_slot = this->next_slot(g, current_slot);
783+
if (current_slot == init_slot) { return this->end(); }
773784
}
774785
}
775786

@@ -782,6 +793,7 @@ static_map<Key, Value, Scope, Allocator>::device_view::find(CG g,
782793
KeyEqual key_equal) const noexcept
783794
{
784795
auto current_slot = this->initial_slot(g, k, hash);
796+
auto const init_slot = current_slot;
785797

786798
while (true) {
787799
auto const existing_key = current_slot->first.load(cuda::std::memory_order_relaxed);
@@ -810,6 +822,7 @@ static_map<Key, Value, Scope, Allocator>::device_view::find(CG g,
810822
// so we move onto the next bucket in the current submap
811823

812824
current_slot = this->next_slot(g, current_slot);
825+
if (current_slot == init_slot) { return this->end(); }
813826
}
814827
}
815828

@@ -819,6 +832,7 @@ __device__ bool static_map<Key, Value, Scope, Allocator>::device_view::contains(
819832
ProbeKey const& k, Hash hash, KeyEqual key_equal) const noexcept
820833
{
821834
auto current_slot = this->initial_slot(k, hash);
835+
auto const init_slot = current_slot;
822836

823837
while (true) {
824838
auto const existing_key = current_slot->first.load(cuda::std::memory_order_relaxed);
@@ -828,6 +842,7 @@ __device__ bool static_map<Key, Value, Scope, Allocator>::device_view::contains(
828842
if (key_equal(existing_key, k)) { return true; }
829843

830844
current_slot = this->next_slot(current_slot);
845+
if (current_slot == init_slot) { return false; }
831846
}
832847
}
833848

@@ -840,6 +855,7 @@ static_map<Key, Value, Scope, Allocator>::device_view::contains(CG g,
840855
KeyEqual key_equal) const noexcept
841856
{
842857
auto current_slot = this->initial_slot(g, k, hash);
858+
auto const init_slot = current_slot;
843859

844860
while (true) {
845861
key_type const existing_key = current_slot->first.load(cuda::std::memory_order_relaxed);
@@ -859,6 +875,7 @@ static_map<Key, Value, Scope, Allocator>::device_view::contains(CG g,
859875
// otherwise, all slots in the current bucket are full with other keys, so we move onto the
860876
// next bucket
861877
current_slot = this->next_slot(g, current_slot);
878+
if (current_slot == init_slot) { return false; }
862879
}
863880
}
864881
} // namespace cuco::legacy

tests/CMakeLists.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -105,6 +105,7 @@ ConfigureTest(DYNAMIC_MAP_TEST
105105
dynamic_map/unique_sequence_test.cu
106106
dynamic_map/unique_sequence_test_experimental.cu
107107
dynamic_map/erase_test.cu
108+
dynamic_map/find_test.cu
108109
dynamic_map/retrieve_all_test.cu)
109110

110111
###################################################################################################

tests/dynamic_map/find_test.cu

Lines changed: 208 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,208 @@
1+
/*
2+
* Copyright (c) 2022-2025, NVIDIA CORPORATION.
3+
*
4+
* Licensed under the Apache License, Version 2.0 (the "License");
5+
* you may not use this file except in compliance with the License.
6+
* You may obtain a copy of the License at
7+
*
8+
* http://www.apache.org/licenses/LICENSE-2.0
9+
*
10+
* Unless required by applicable law or agreed to in writing, software
11+
* distributed under the License is distributed on an "AS IS" BASIS,
12+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13+
* See the License for the specific language governing permissions and
14+
* limitations under the License.
15+
*/
16+
17+
#include <test_utils.hpp>
18+
19+
#include <cuco/dynamic_map.cuh>
20+
21+
#include <cuda/functional>
22+
#include <cuda/std/functional>
23+
#include <cuda/std/tuple>
24+
#include <thrust/device_vector.h>
25+
#include <thrust/execution_policy.h>
26+
#include <thrust/sequence.h>
27+
#include <thrust/iterator/zip_iterator.h>
28+
#include <thrust/iterator/constant_iterator.h>
29+
30+
#include <catch2/catch_template_test_macros.hpp>
31+
32+
TEMPLATE_TEST_CASE_SIG("dynamic_map find tests",
33+
"",
34+
((typename Key, typename Value), Key, Value),
35+
(int32_t, int32_t),
36+
(int32_t, int64_t),
37+
(int64_t, int32_t),
38+
(int64_t, int64_t))
39+
{
40+
constexpr std::size_t num_keys = 1'000'000;
41+
cuco::dynamic_map<Key, Value> map{num_keys * 2,
42+
cuco::empty_key<Key>{-1},
43+
cuco::empty_value<Value>{-1},
44+
cuco::erased_key<Key>{-2}};
45+
46+
SECTION("Check single submap insert/find")
47+
{
48+
thrust::device_vector<Key> d_keys(num_keys);
49+
thrust::device_vector<Value> d_values(num_keys);
50+
thrust::device_vector<Value> d_found_values(num_keys);
51+
52+
thrust::sequence(thrust::device, d_keys.begin(), d_keys.end(), 1);
53+
thrust::sequence(thrust::device, d_values.begin(), d_values.end(), 1);
54+
55+
auto pairs_begin =
56+
thrust::make_zip_iterator(cuda::std::tuple{d_keys.begin(), d_values.begin()});
57+
58+
map.insert(pairs_begin, pairs_begin + num_keys);
59+
60+
REQUIRE(map.get_size() == num_keys);
61+
62+
// Find all inserted keys
63+
map.find(d_keys.begin(), d_keys.end(), d_found_values.begin());
64+
65+
// Verify that all keys were found with correct values
66+
auto zip_equal = cuda::proclaim_return_type<bool>(
67+
[] __device__(auto const& p) { return cuda::std::get<0>(p) == cuda::std::get<1>(p); });
68+
auto zip =
69+
thrust::make_zip_iterator(cuda::std::tuple{d_values.begin(), d_found_values.begin()});
70+
REQUIRE(cuco::test::all_of(zip, zip + num_keys, zip_equal));
71+
72+
// Test finding non-existent keys
73+
thrust::device_vector<Key> d_nonexistent_keys(100);
74+
thrust::device_vector<Value> d_nonexistent_values(100);
75+
76+
thrust::sequence(thrust::device,
77+
d_nonexistent_keys.begin(),
78+
d_nonexistent_keys.end(),
79+
static_cast<Key>(num_keys + 1));
80+
81+
map.find(d_nonexistent_keys.begin(), d_nonexistent_keys.end(), d_nonexistent_values.begin());
82+
83+
// Verify that non-existent keys return empty value sentinel
84+
auto empty_zip = thrust::make_zip_iterator(
85+
cuda::std::tuple{d_nonexistent_values.begin(),
86+
thrust::constant_iterator<Value>{cuco::empty_value<Value>{-1}.value}});
87+
REQUIRE(cuco::test::all_of(empty_zip, empty_zip + 100, zip_equal));
88+
89+
// Test finding a mix of existing and non-existing keys
90+
thrust::device_vector<Key> d_mixed_keys(200);
91+
thrust::device_vector<Value> d_mixed_values(200);
92+
93+
// First half: existing keys
94+
thrust::copy(d_keys.begin(), d_keys.begin() + 100, d_mixed_keys.begin());
95+
// Second half: non-existing keys
96+
thrust::sequence(thrust::device,
97+
d_mixed_keys.begin() + 100,
98+
d_mixed_keys.end(),
99+
static_cast<Key>(num_keys + 1));
100+
101+
map.find(d_mixed_keys.begin(), d_mixed_keys.end(), d_mixed_values.begin());
102+
103+
// Verify first half found correct values
104+
auto first_half_zip =
105+
thrust::make_zip_iterator(cuda::std::tuple{d_values.begin(), d_mixed_values.begin()});
106+
REQUIRE(cuco::test::all_of(first_half_zip, first_half_zip + 100, zip_equal));
107+
108+
// Verify second half returned empty value sentinel
109+
auto second_half_empty_zip = thrust::make_zip_iterator(
110+
cuda::std::tuple{d_mixed_values.begin() + 100,
111+
thrust::constant_iterator<Value>{cuco::empty_value<Value>{-1}.value}});
112+
REQUIRE(cuco::test::all_of(second_half_empty_zip, second_half_empty_zip + 100, zip_equal));
113+
}
114+
115+
SECTION("Check find after erase")
116+
{
117+
thrust::device_vector<Key> d_keys(num_keys);
118+
thrust::device_vector<Value> d_values(num_keys);
119+
thrust::device_vector<Value> d_found_values(num_keys);
120+
121+
thrust::sequence(thrust::device, d_keys.begin(), d_keys.end(), 1);
122+
thrust::sequence(thrust::device, d_values.begin(), d_values.end(), 1);
123+
124+
auto pairs_begin =
125+
thrust::make_zip_iterator(cuda::std::tuple{d_keys.begin(), d_values.begin()});
126+
127+
map.insert(pairs_begin, pairs_begin + num_keys);
128+
129+
REQUIRE(map.get_size() == num_keys);
130+
131+
// Find all keys before erase
132+
map.find(d_keys.begin(), d_keys.end(), d_found_values.begin());
133+
134+
auto zip_equal = cuda::proclaim_return_type<bool>(
135+
[] __device__(auto const& p) { return cuda::std::get<0>(p) == cuda::std::get<1>(p); });
136+
auto zip =
137+
thrust::make_zip_iterator(cuda::std::tuple{d_values.begin(), d_found_values.begin()});
138+
REQUIRE(cuco::test::all_of(zip, zip + num_keys, zip_equal));
139+
140+
// Erase first half of keys
141+
map.erase(d_keys.begin(), d_keys.begin() + num_keys / 2);
142+
143+
REQUIRE(map.get_size() == num_keys / 2);
144+
145+
// Find all keys after erase
146+
map.find(d_keys.begin(), d_keys.end(), d_found_values.begin());
147+
148+
// First half should return empty value sentinel (erased)
149+
auto first_half_empty_zip = thrust::make_zip_iterator(
150+
cuda::std::tuple{d_found_values.begin(),
151+
thrust::constant_iterator<Value>{cuco::empty_value<Value>{-1}.value}});
152+
REQUIRE(
153+
cuco::test::all_of(first_half_empty_zip, first_half_empty_zip + num_keys / 2, zip_equal));
154+
155+
// Second half should return correct values (not erased)
156+
auto second_half_zip = thrust::make_zip_iterator(cuda::std::tuple{
157+
d_values.begin() + num_keys / 2, d_found_values.begin() + num_keys / 2});
158+
REQUIRE(cuco::test::all_of(second_half_zip, second_half_zip + num_keys / 2, zip_equal));
159+
}
160+
161+
cuco::dynamic_map<Key, Value> indentity_hash_map{
162+
num_keys, cuco::empty_key<Key>{-1}, cuco::empty_value<Value>{-1}, cuco::erased_key<Key>{-2}};
163+
164+
SECTION("Check find in a all erased submap")
165+
{
166+
constexpr float default_load_factor = 0.60;
167+
constexpr std::size_t first_insert_size = num_keys * default_load_factor;
168+
169+
thrust::device_vector<Key> d_keys(num_keys);
170+
thrust::device_vector<Value> d_values(num_keys);
171+
thrust::device_vector<Value> d_found_values(num_keys);
172+
173+
thrust::sequence(thrust::device, d_keys.begin(), d_keys.end(), 1);
174+
thrust::sequence(thrust::device, d_values.begin(), d_values.end(), 1);
175+
176+
auto pairs_begin =
177+
thrust::make_zip_iterator(cuda::std::tuple{d_keys.begin(), d_values.begin()});
178+
179+
// To construct a map with all erased keys, we can't insert all at once
180+
indentity_hash_map.insert(
181+
pairs_begin, pairs_begin + first_insert_size, cuco::identity_hash<Key>());
182+
REQUIRE(indentity_hash_map.get_size() == first_insert_size);
183+
184+
indentity_hash_map.erase(
185+
d_keys.begin(), d_keys.begin() + first_insert_size, cuco::identity_hash<Key>());
186+
REQUIRE(indentity_hash_map.get_size() == 0);
187+
188+
indentity_hash_map.insert(
189+
pairs_begin + first_insert_size, pairs_begin + num_keys, cuco::identity_hash<Key>());
190+
REQUIRE(indentity_hash_map.get_size() == num_keys - first_insert_size);
191+
192+
indentity_hash_map.erase(
193+
d_keys.begin() + first_insert_size, d_keys.end(), cuco::identity_hash<Key>());
194+
REQUIRE(indentity_hash_map.get_size() == 0);
195+
196+
// we've construct a dynamic_map with one submap whose keys are all erased keys (-2 in this case)
197+
// this find would run forever if we don't check whether we have iterated all keys in a submap
198+
indentity_hash_map.find(
199+
d_keys.begin(), d_keys.end(), d_found_values.begin(), cuco::identity_hash<Key>());
200+
// all d_found_values should be empty value sentinel (-1 in this case)
201+
auto empty_zip = thrust::make_zip_iterator(
202+
cuda::std::tuple{d_found_values.begin(),
203+
thrust::constant_iterator<Value>{cuco::empty_value<Value>{-1}.value}});
204+
auto zip_equal = cuda::proclaim_return_type<bool>(
205+
[] __device__(auto const& p) { return cuda::std::get<0>(p) == cuda::std::get<1>(p); });
206+
REQUIRE(cuco::test::all_of(empty_zip, empty_zip + num_keys, zip_equal));
207+
}
208+
}

0 commit comments

Comments
 (0)