Skip to content

Commit f841fce

Browse files
committed
Merge remote-tracking branch 'upstream/dev' into retrieve
2 parents ae8e396 + ae3ac6e commit f841fce

File tree

139 files changed

+7294
-2559
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

139 files changed

+7294
-2559
lines changed

.devcontainer/cuda11.8-gcc11/devcontainer.json

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
{
22
"shutdownAction": "stopContainer",
3-
"image": "rapidsai/devcontainers:24.06-cpp-gcc11-cuda11.8-ubuntu22.04",
3+
"image": "rapidsai/devcontainers:24.10-cpp-gcc11-cuda11.8-ubuntu22.04",
44
"hostRequirements": {
55
"gpu": true
66
},
Lines changed: 42 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,42 @@
1+
{
2+
"shutdownAction": "stopContainer",
3+
"image": "rapidsai/devcontainers:24.10-cpp-gcc12-cuda12.5-ubuntu22.04",
4+
"hostRequirements": {
5+
"gpu": true
6+
},
7+
"initializeCommand": [
8+
"/bin/bash",
9+
"-c",
10+
"mkdir -m 0755 -p ${localWorkspaceFolder}/.{aws,cache,config}"
11+
],
12+
"containerEnv": {
13+
"SCCACHE_REGION": "us-east-2",
14+
"SCCACHE_BUCKET": "rapids-sccache-devs",
15+
"VAULT_HOST": "https://vault.ops.k8s.rapids.ai",
16+
"HISTFILE": "${containerWorkspaceFolder}/.cache/._bash_history",
17+
"DEVCONTAINER_NAME": "cuda12.5-gcc12",
18+
"CUCO_CUDA_VERSION": "12.5",
19+
"CUCO_HOST_COMPILER": "gcc",
20+
"CUCO_HOST_COMPILER_VERSION": "12"
21+
},
22+
"workspaceFolder": "/home/coder/${localWorkspaceFolderBasename}",
23+
"workspaceMount": "source=${localWorkspaceFolder},target=/home/coder/${localWorkspaceFolderBasename},type=bind,consistency=consistent",
24+
"mounts": [
25+
"source=${localWorkspaceFolder}/.aws,target=/home/coder/.aws,type=bind,consistency=consistent",
26+
"source=${localWorkspaceFolder}/.cache,target=/home/coder/.cache,type=bind,consistency=consistent",
27+
"source=${localWorkspaceFolder}/.config,target=/home/coder/.config,type=bind,consistency=consistent"
28+
],
29+
"customizations": {
30+
"vscode": {
31+
"extensions": [
32+
"llvm-vs-code-extensions.vscode-clangd"
33+
],
34+
"settings": {
35+
"clangd.arguments": [
36+
"--compile-commands-dir=${workspaceFolder}/build/latest"
37+
]
38+
}
39+
}
40+
},
41+
"name": "cuda12.5-gcc12"
42+
}
Lines changed: 42 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,42 @@
1+
{
2+
"shutdownAction": "stopContainer",
3+
"image": "rapidsai/devcontainers:24.10-cpp-gcc13-cuda12.5-ubuntu22.04",
4+
"hostRequirements": {
5+
"gpu": true
6+
},
7+
"initializeCommand": [
8+
"/bin/bash",
9+
"-c",
10+
"mkdir -m 0755 -p ${localWorkspaceFolder}/.{aws,cache,config}"
11+
],
12+
"containerEnv": {
13+
"SCCACHE_REGION": "us-east-2",
14+
"SCCACHE_BUCKET": "rapids-sccache-devs",
15+
"VAULT_HOST": "https://vault.ops.k8s.rapids.ai",
16+
"HISTFILE": "${containerWorkspaceFolder}/.cache/._bash_history",
17+
"DEVCONTAINER_NAME": "cuda12.5-gcc13",
18+
"CUCO_CUDA_VERSION": "12.5",
19+
"CUCO_HOST_COMPILER": "gcc",
20+
"CUCO_HOST_COMPILER_VERSION": "13"
21+
},
22+
"workspaceFolder": "/home/coder/${localWorkspaceFolderBasename}",
23+
"workspaceMount": "source=${localWorkspaceFolder},target=/home/coder/${localWorkspaceFolderBasename},type=bind,consistency=consistent",
24+
"mounts": [
25+
"source=${localWorkspaceFolder}/.aws,target=/home/coder/.aws,type=bind,consistency=consistent",
26+
"source=${localWorkspaceFolder}/.cache,target=/home/coder/.cache,type=bind,consistency=consistent",
27+
"source=${localWorkspaceFolder}/.config,target=/home/coder/.config,type=bind,consistency=consistent"
28+
],
29+
"customizations": {
30+
"vscode": {
31+
"extensions": [
32+
"llvm-vs-code-extensions.vscode-clangd"
33+
],
34+
"settings": {
35+
"clangd.arguments": [
36+
"--compile-commands-dir=${workspaceFolder}/build/latest"
37+
]
38+
}
39+
}
40+
},
41+
"name": "cuda12.5-gcc13"
42+
}

.devcontainer/devcontainer.json

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
{
22
"shutdownAction": "stopContainer",
3-
"image": "rapidsai/devcontainers:24.06-cpp-gcc12-cuda12.4-ubuntu22.04",
3+
"image": "rapidsai/devcontainers:24.10-cpp-gcc13-cuda12.5-ubuntu22.04",
44
"hostRequirements": {
55
"gpu": true
66
},
@@ -14,10 +14,10 @@
1414
"SCCACHE_BUCKET": "rapids-sccache-devs",
1515
"VAULT_HOST": "https://vault.ops.k8s.rapids.ai",
1616
"HISTFILE": "${containerWorkspaceFolder}/.cache/._bash_history",
17-
"DEVCONTAINER_NAME": "cuda12.4-gcc12",
18-
"CUCO_CUDA_VERSION": "12.4",
17+
"DEVCONTAINER_NAME": "cuda12.5-gcc13",
18+
"CUCO_CUDA_VERSION": "12.5",
1919
"CUCO_HOST_COMPILER": "gcc",
20-
"CUCO_HOST_COMPILER_VERSION": "12"
20+
"CUCO_HOST_COMPILER_VERSION": "13"
2121
},
2222
"workspaceFolder": "/home/coder/${localWorkspaceFolderBasename}",
2323
"workspaceMount": "source=${localWorkspaceFolder},target=/home/coder/${localWorkspaceFolderBasename},type=bind,consistency=consistent",
@@ -38,5 +38,5 @@
3838
}
3939
}
4040
},
41-
"name": "cuda12.4-gcc12"
41+
"name": "cuda12.5-gcc13"
4242
}

CMakeLists.txt

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,7 @@
1616
cmake_minimum_required(VERSION 3.23.1 FATAL_ERROR)
1717

1818
if(NOT EXISTS ${CMAKE_CURRENT_BINARY_DIR}/CUCO_RAPIDS.cmake)
19-
file(DOWNLOAD https://raw.githubusercontent.com/rapidsai/rapids-cmake/branch-24.08/RAPIDS.cmake
19+
file(DOWNLOAD https://raw.githubusercontent.com/rapidsai/rapids-cmake/branch-24.10/RAPIDS.cmake
2020
${CMAKE_CURRENT_BINARY_DIR}/CUCO_RAPIDS.cmake)
2121
endif()
2222
include(${CMAKE_CURRENT_BINARY_DIR}/CUCO_RAPIDS.cmake)

README.md

Lines changed: 13 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -206,8 +206,8 @@ We plan to add many GPU-accelerated, concurrent data structures to `cuCollection
206206
#### Examples:
207207
- [Host-bulk APIs](https://github.com/NVIDIA/cuCollections/blob/dev/examples/static_set/host_bulk_example.cu) (see [live example in godbolt](https://godbolt.org/z/96re4zhjo))
208208
- [Device-ref APIs for individual operations](https://github.com/NVIDIA/cuCollections/blob/dev/examples/static_set/device_ref_example.cu) (see [live example in godbolt](https://godbolt.org/z/7aKWdGTfx))
209-
- [One single storage for multiple sets](https://github.com/NVIDIA/cuCollections/blob/dev/examples/static_set/device_subsets_example.cu) (see [live example in godbolt](https://godbolt.org/z/sMfqGxdha))
210-
- [Using shared memory as storage](https://github.com/NVIDIA/cuCollections/blob/dev/examples/static_set/shared_memory_example.cu) (see [live example in godbolt](https://godbolt.org/z/zdTnbE1q5))
209+
- [One single storage for multiple sets](https://github.com/NVIDIA/cuCollections/blob/dev/examples/static_set/device_subsets_example.cu) (see [live example in godbolt](https://godbolt.org/z/7f9KW44P4))
210+
- [Using shared memory as storage](https://github.com/NVIDIA/cuCollections/blob/dev/examples/static_set/shared_memory_example.cu) (see [live example in godbolt](https://godbolt.org/z/Ws5c71T4z))
211211
- [Using set as mapping table to handle large keys or indeterministic sentinels](https://github.com/NVIDIA/cuCollections/blob/dev/examples/static_set/mapping_table_example.cu) (see [live example in godbolt](https://godbolt.org/z/KfYo4nMss))
212212

213213
### `static_map`
@@ -234,12 +234,19 @@ We plan to add many GPU-accelerated, concurrent data structures to `cuCollection
234234
#### Examples:
235235
- [Host-bulk APIs (TODO)]()
236236

237-
### `distinct_count_estimator`
237+
### `hyperloglog`
238238

239-
`cuco::distinct_count_estimator` implements the well-established [HyperLogLog++ algorithm](https://static.googleusercontent.com/media/research.google.com/de//pubs/archive/40671.pdf) for approximating the count of distinct items in a multiset/stream.
239+
`cuco::hyperloglog` implements the well-established [HyperLogLog++ algorithm](https://static.googleusercontent.com/media/research.google.com/de//pubs/archive/40671.pdf) for approximating the count of distinct items in a multiset/stream.
240240

241241
#### Examples:
242-
- [Host-bulk APIs](https://github.com/NVIDIA/cuCollections/blob/dev/examples/distinct_count_estimator/host_bulk_example.cu) (see [live example in godbolt](https://godbolt.org/z/sMfofM6qd))
243-
- [Device-ref APIs](https://github.com/NVIDIA/cuCollections/blob/dev/examples/distinct_count_estimator/device_ref_example.cu) (see [live example in godbolt](https://godbolt.org/z/156T9ox7h))
242+
- [Host-bulk APIs](https://github.com/NVIDIA/cuCollections/blob/dev/examples/hyperloglog/host_bulk_example.cu) (see [live example in godbolt](https://godbolt.org/z/G4qdcTezE))
243+
- [Device-ref APIs](https://github.com/NVIDIA/cuCollections/blob/dev/examples/hyperloglog/device_ref_example.cu) (see [live example in godbolt](https://godbolt.org/z/n88713o4n))
244+
245+
### `bloom_filter`
246+
247+
`cuco::bloom_filter` implements a Blocked Bloom Filter for approximate set membership queries.
248+
249+
#### Examples:
250+
- [Host-bulk APIs](https://github.com/NVIDIA/cuCollections/blob/dev/examples/bloom_filter/host_bulk_example.cu) (see [live example in godbolt](https://godbolt.org/z/EY7T5v5aE))
244251

245252

benchmarks/CMakeLists.txt

Lines changed: 36 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -35,7 +35,7 @@ function(ConfigureBench BENCH_NAME)
3535
RUNTIME_OUTPUT_DIRECTORY "${CMAKE_BINARY_DIR}/benchmarks")
3636
target_include_directories(${BENCH_NAME} PRIVATE
3737
"${CMAKE_CURRENT_SOURCE_DIR}")
38-
target_compile_options(${BENCH_NAME} PRIVATE --expt-extended-lambda --expt-relaxed-constexpr -lineinfo)
38+
target_compile_options(${BENCH_NAME} PRIVATE --expt-extended-lambda -lineinfo)
3939
target_link_libraries(${BENCH_NAME} PRIVATE
4040
nvbench::main
4141
pthread
@@ -49,53 +49,59 @@ endfunction(ConfigureBench)
4949
###################################################################################################
5050
# - static_set benchmarks -------------------------------------------------------------------------
5151
ConfigureBench(STATIC_SET_BENCH
52-
hash_table/static_set/contains_bench.cu
53-
hash_table/static_set/find_bench.cu
54-
hash_table/static_set/insert_bench.cu
55-
hash_table/static_set/retrieve_all_bench.cu
56-
hash_table/static_set/size_bench.cu
57-
hash_table/static_set/rehash_bench.cu)
52+
static_set/contains_bench.cu
53+
static_set/find_bench.cu
54+
static_set/insert_bench.cu
55+
static_set/retrieve_all_bench.cu
56+
static_set/size_bench.cu
57+
static_set/rehash_bench.cu)
5858

5959
###################################################################################################
6060
# - static_map benchmarks -------------------------------------------------------------------------
6161
ConfigureBench(STATIC_MAP_BENCH
62-
hash_table/static_map/insert_bench.cu
63-
hash_table/static_map/find_bench.cu
64-
hash_table/static_map/contains_bench.cu
65-
hash_table/static_map/erase_bench.cu
66-
hash_table/static_map/insert_or_apply_bench.cu)
62+
static_map/insert_bench.cu
63+
static_map/find_bench.cu
64+
static_map/contains_bench.cu
65+
static_map/erase_bench.cu
66+
static_map/insert_or_apply_bench.cu)
6767

6868
###################################################################################################
6969
# - static_multiset benchmarks --------------------------------------------------------------------
7070
ConfigureBench(STATIC_MULTISET_BENCH
71-
hash_table/static_multiset/contains_bench.cu
72-
hash_table/static_multiset/retrieve_bench.cu
73-
hash_table/static_multiset/count_bench.cu
74-
hash_table/static_multiset/find_bench.cu
75-
hash_table/static_multiset/insert_bench.cu)
71+
static_multiset/contains_bench.cu
72+
static_multiset/retrieve_bench.cu
73+
static_multiset/count_bench.cu
74+
static_multiset/find_bench.cu
75+
static_multiset/insert_bench.cu)
7676

7777
###################################################################################################
7878
# - static_multimap benchmarks --------------------------------------------------------------------
7979
ConfigureBench(STATIC_MULTIMAP_BENCH
80-
hash_table/static_multimap/insert_bench.cu
81-
hash_table/static_multimap/retrieve_bench.cu
82-
hash_table/static_multimap/query_bench.cu
83-
hash_table/static_multimap/count_bench.cu)
80+
static_multimap/insert_bench.cu
81+
static_multimap/retrieve_bench.cu
82+
static_multimap/query_bench.cu
83+
static_multimap/count_bench.cu)
8484

8585
###################################################################################################
8686
# - dynamic_map benchmarks ------------------------------------------------------------------------
8787
ConfigureBench(DYNAMIC_MAP_BENCH
88-
hash_table/dynamic_map/insert_bench.cu
89-
hash_table/dynamic_map/find_bench.cu
90-
hash_table/dynamic_map/contains_bench.cu
91-
hash_table/dynamic_map/erase_bench.cu)
88+
dynamic_map/insert_bench.cu
89+
dynamic_map/find_bench.cu
90+
dynamic_map/contains_bench.cu
91+
dynamic_map/erase_bench.cu)
9292

9393
###################################################################################################
9494
# - hash function benchmarks ----------------------------------------------------------------------
95-
ConfigureBench(HASH_BENCH
96-
hash_bench.cu)
95+
ConfigureBench(HASH_FUNCTION_BENCH
96+
hash_function/hash_function_bench.cu)
9797

9898
###################################################################################################
99-
# - distinct_count_estimator benchmarks -----------------------------------------------------------
100-
ConfigureBench(DISTINCT_COUNT_ESTIMATOR_BENCH
101-
distinct_count_estimator_bench.cu)
99+
# - hyperloglog benchmarks -----------------------------------------------------------
100+
ConfigureBench(HYPERLOGLOG_BENCH
101+
hyperloglog/hyperloglog_bench.cu)
102+
103+
###################################################################################################
104+
# - bloom_filter benchmarks -----------------------------------------------------------------------
105+
ConfigureBench(BLOOM_FILTER_BENCH
106+
bloom_filter/add_bench.cu
107+
bloom_filter/contains_bench.cu)

0 commit comments

Comments
 (0)