Skip to content

Commit b5bce87

Browse files
committed
External storage interface + rocksdb implementation.
1 parent 460db25 commit b5bce87

File tree

6 files changed

+630
-3
lines changed

6 files changed

+630
-3
lines changed

.gitmodules

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,3 +2,6 @@
22
path = tests/googletest
33
url = https://github.com/google/googletest.git
44
ignore = dirty
5+
[submodule "third_party/rocksdb"]
6+
path = third_party/rocksdb
7+
url = https://github.com/facebook/rocksdb.git

CMakeLists.txt

Lines changed: 20 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -73,13 +73,25 @@ endforeach()
7373

7474
message(CMAKE_CUDA_FLAGS="${CMAKE_CUDA_FLAGS}")
7575

76+
# Sub projects.
77+
add_subdirectory(tests/googletest)
78+
79+
set(CMAKE_POLICY_DEFAULT_CMP0077 NEW)
80+
set(ROCKSDB_BUILD_SHARED OFF)
81+
set(WITH_JNI OFF)
82+
set(WITH_ALL_TESTS OFF)
83+
set(WITH_TESTS OFF)
84+
set(WITH_BENCHMARK_TOOLS OFF)
85+
set(WITH_CORE_TOOLS OFF)
86+
set(WITH_TOOLS OFF)
87+
add_subdirectory(third_party/rocksdb)
88+
7689
include_directories(
7790
${PROJECT_SOURCE_DIR}/include
91+
${PROJECT_SOURCE_DIR}/third_party/rocksdb/include
7892
${PROJECT_SOURCE_DIR}/tests/googletest/googletest/include
7993
)
8094

81-
ADD_SUBDIRECTORY(tests/googletest)
82-
8395
link_directories(
8496
)
8597

@@ -134,4 +146,9 @@ TARGET_LINK_LIBRARIES(group_lock_test gtest_main)
134146
add_executable(find_or_insert_ptr_test tests/find_or_insert_ptr_test.cc.cu)
135147
target_compile_features(find_or_insert_ptr_test PUBLIC cxx_std_14)
136148
set_target_properties(find_or_insert_ptr_test PROPERTIES CUDA_ARCHITECTURES OFF)
137-
TARGET_LINK_LIBRARIES(find_or_insert_ptr_test gtest_main)
149+
TARGET_LINK_LIBRARIES(find_or_insert_ptr_test gtest_main)
150+
151+
add_executable(ext_storage_rocksdb_test tests/ext_storage_rocksdb_test.cc.cu)
152+
target_compile_features(ext_storage_rocksdb_test PUBLIC cxx_std_14)
153+
set_target_properties(ext_storage_rocksdb_test PROPERTIES CUDA_ARCHITECTURES OFF)
154+
TARGET_LINK_LIBRARIES(ext_storage_rocksdb_test gtest_main rocksdb)
Lines changed: 102 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,102 @@
1+
/*
2+
* Copyright (c) 2022, NVIDIA CORPORATION.
3+
*
4+
* Licensed under the Apache License, Version 2.0 (the "License");
5+
* you may not use this file except in compliance with the License.
6+
* You may obtain a copy of the License at
7+
*
8+
* http://www.apache.org/licenses/LICENSE-2.0
9+
*
10+
* Unless required by applicable law or agreed to in writing, software
11+
* distributed under the License is distributed on an "AS IS" BASIS,
12+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13+
* See the License for the specific language governing permissions and
14+
* limitations under the License.
15+
*/
16+
#pragma once
17+
18+
#include <cstdint>
19+
#include <type_traits>
20+
21+
namespace nv {
22+
namespace merlin {
23+
24+
template <class Key, class Value>
25+
class ExternalStorage {
26+
public:
27+
using size_type = size_t;
28+
using key_type = Key;
29+
using value_type = Value;
30+
31+
/**
32+
* @brief Inserts key/value pairs into the external storage. If a key/value
33+
* pair already exists, overwrites the current value.
34+
*
35+
* @param n Number of key/value slots provided in other arguments.
36+
* @param d_masked_keys Device pointer to an (n)-sized array of keys.
37+
* Key-Value slots that should be ignored have the key set to `EMPTY_KEY`.
38+
* @param d_values Device pointer to an (n)-sized array containing pointers to
39+
* respectively a memory location where the current values for a key are
40+
* stored. Each pointer points to a vector of length `value_dim`. Pointers
41+
* *can* be set to `nullptr` for slots where the corresponding key equated to
42+
* the `EMPTY_KEY`. The memory locations can be device or host memory (see
43+
* also `hkvs_is_pure_hbm`).
44+
* @param stream Stream that MUST be used for queuing asynchronous CUDA
45+
* operations. If only the input arguments or resources obtained from
46+
* respectively `dev_mem_pool` and `host_mem_pool` are used for such
47+
* operations, it is not necessary to synchronize the stream prior to
48+
* returning from the function.
49+
*/
50+
virtual void insert_or_assign(size_type n,
51+
const key_type* d_masked_keys, // (n)
52+
const value_type* d_values, // (n)
53+
size_type value_dims, cudaStream_t stream) = 0;
54+
55+
/**
56+
* @brief Attempts to find the supplied `d_keys` if the corresponding
57+
* `d_founds`-flag is `false` and fills the stored into the supplied memory
58+
* locations (i.e. in `d_values`).
59+
*
60+
* @param n Number of key/value slots provided in other arguments.
61+
* @param d_keys Device pointer to an (n)-sized array of keys.
62+
* @param d_values Device pointer to an (n * value_dim)-sized array to store
63+
* the retrieved `d_values`. For slots where the corresponding `d_founds`-flag
64+
* is not `false`, the value may already have been assigned and, thus, MUST
65+
* not be altered.
66+
* @param d_founds Device pointer to an (n)-sized array which indicates
67+
* whether the corresponding `d_values` slot is already filled or not. So, if
68+
* and only if `d_founds` is still false, the implementation shall attempt to
69+
* retrieve and fill in the value for the corresponding key. If a key/value
70+
* was retrieved successfully from external storage, the implementation MUST
71+
* also set `d_founds` to `true`.
72+
* @param stream Stream that MUST be used for queuing asynchronous CUDA
73+
* operations. If only the input arguments or resources obtained from
74+
* respectively `dev_mem_pool` and `host_mem_pool` are used for such
75+
* operations, it is not necessary to synchronize the stream prior to
76+
* returning from the function.
77+
*/
78+
virtual size_type find(size_type n,
79+
const key_type* d_keys, // (n)
80+
value_type* d_values, // (n * value_dim)
81+
size_type value_dims,
82+
bool* d_founds, // (n)
83+
cudaStream_t stream) const = 0;
84+
85+
/**
86+
* @brief Attempts to erase the entries associated with the supplied `d_keys`.
87+
* For keys do not exist nothing happens. It is permissible for this function
88+
* to be implemented asynchronously (i.e., to return before the actual
89+
* deletion has happened).
90+
*
91+
* @param n Number of keys provided in `d_keys` arguments.
92+
* @param d_keys Device pointer to an (n)-sized array of keys. This pointer is
93+
* only guarnteed to be valid for the duration of the call. If easure is
94+
* implemented asynchronously, you must make a copy and manage its lifetime
95+
* yourself.
96+
*/
97+
virtual void erase(size_type n, const key_type* d_keys,
98+
cudaStream_t stream) = 0;
99+
};
100+
101+
} // namespace merlin
102+
} // namespace nv

0 commit comments

Comments
 (0)