Skip to content

Commit a79389c

Browse files
Provide API to detect C2C platform and add NVML shim (#721)
Closes #720 Partially addresses rapidsai/cudf#18272 This PR moves the work-in-progress C2C detection feature from cuDF (rapidsai/cudf#18447) to KvikIO. A few additional changes are made: - Handle the potential problem in a multi-GPU system where CUDA and NVML enumerate devices differently. - Update the conda yaml files to specify NVML dependency only for CUDA >=12, i.e. let the NVML shim only work for CUDA >=12. - Use of CUDA runtime API is replaced by KvikIO's CUDA driver shim in the new PR. - Use of `std::functional` as shim function wrappers is replaced by plain function pointers following the pattern of KvikIO's CUDA and cuFile shims. Authors: - Tianyu Liu (https://github.com/kingcrimsontianyu) Approvers: - Vukasin Milovanovic (https://github.com/vuule) - Ray Douglass (https://github.com/raydouglass) - Robert Maynard (https://github.com/robertmaynard) - Bradley Dice (https://github.com/bdice) - Vyas Ramasubramani (https://github.com/vyasr) URL: #721
1 parent 3ba8782 commit a79389c

File tree

18 files changed

+420
-21
lines changed

18 files changed

+420
-21
lines changed

conda/environments/all_cuda-128_arch-aarch64.yaml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@ dependencies:
1010
- c-compiler
1111
- cmake>=3.30.4
1212
- cuda-nvcc
13+
- cuda-nvml-dev
1314
- cuda-python>=12.6.2,<13.0a0
1415
- cuda-version=12.8
1516
- cupy>=12.0.0

conda/environments/all_cuda-128_arch-x86_64.yaml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@ dependencies:
1010
- c-compiler
1111
- cmake>=3.30.4
1212
- cuda-nvcc
13+
- cuda-nvml-dev
1314
- cuda-python>=12.6.2,<13.0a0
1415
- cuda-version=12.8
1516
- cupy>=12.0.0

conda/recipes/kvikio/recipe.yaml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -98,6 +98,7 @@ requirements:
9898
then: libcufile ${{ cuda11_libcufile_run_version }}
9999
else:
100100
- cuda-cudart
101+
- cuda-nvml-dev
101102
- if: linux
102103
then: libcufile
103104
ignore_run_exports:

conda/recipes/libkvikio/recipe.yaml

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -66,7 +66,9 @@ cache:
6666
then:
6767
- libcufile =${{ cuda11_libcufile_host_version }}
6868
- libcufile-dev =${{ cuda11_libcufile_host_version }}
69-
else: libcufile-dev
69+
else:
70+
- libcufile-dev
71+
- cuda-nvml-dev
7072

7173
outputs:
7274
- package:
@@ -99,6 +101,7 @@ outputs:
99101
- libcufile ${{ cuda11_libcufile_run_version }}
100102
- libcufile-dev ${{ cuda11_libcufile_run_version }}
101103
else:
104+
- cuda-nvml-dev
102105
- if: linux
103106
then: libcufile-dev
104107
ignore_run_exports:

cpp/CMakeLists.txt

Lines changed: 18 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -69,6 +69,9 @@ if(KvikIO_REMOTE_SUPPORT)
6969
endif()
7070

7171
set(cuFile_FOUND 0)
72+
# todo: Let nvml_FOUND be 0 if CUDA version is less than 12. This is to circumvent a potential CUDA
73+
# 11 build issue. Remove nvml_FOUND and KVIKIO_NVML_FOUND once CUDA 11 support is dropped.
74+
set(nvml_FOUND 0)
7275
if(KvikIO_CUDA_SUPPORT)
7376
rapids_find_package(
7477
CUDAToolkit REQUIRED
@@ -77,6 +80,14 @@ if(KvikIO_CUDA_SUPPORT)
7780
)
7881
include(cmake/thirdparty/get_nvtx.cmake)
7982

83+
if(CUDAToolkit_VERSION_MAJOR VERSION_GREATER_EQUAL 12)
84+
if(NOT TARGET CUDA::nvml)
85+
message(FATAL_ERROR "Cannot find NVML")
86+
else()
87+
set(nvml_FOUND 1)
88+
endif()
89+
endif()
90+
8091
if(NOT TARGET CUDA::cuFile)
8192
message(
8293
WARNING "Cannot find cuFile - KvikIO will still work but won't use GPUDirect Storage (GDS)"
@@ -151,6 +162,7 @@ set(SOURCES
151162
"src/posix_io.cpp"
152163
"src/shim/cuda.cpp"
153164
"src/shim/cufile.cpp"
165+
"src/shim/nvml.cpp"
154166
"src/shim/utils.cpp"
155167
"src/stream.cpp"
156168
"src/utils.cpp"
@@ -177,17 +189,21 @@ target_include_directories(
177189
INTERFACE "$<INSTALL_INTERFACE:include>"
178190
)
179191

180-
# Notice, we do not link to cuda or cufile since KvikIO opens them manually using `dlopen()`.
192+
# Notice, we do not link to cuda or cufile since KvikIO opens them manually using `dlopen()`. Also
193+
# note that KvikIO uses an NVML shim that dynamically loads the NVML shared library at runtime. At
194+
# build time only the header location is needed. KvikIO is not linked against the shared library.
195+
# Adding the target `CUDA::toolkit` here serves this purpose.
181196
target_link_libraries(
182197
kvikio
183198
PUBLIC Threads::Threads BS::thread_pool ${CMAKE_DL_LIBS} $<TARGET_NAME_IF_EXISTS:nvtx3::nvtx3-cpp>
184-
PRIVATE $<TARGET_NAME_IF_EXISTS:CURL::libcurl>
199+
PRIVATE $<TARGET_NAME_IF_EXISTS:CURL::libcurl> CUDA::toolkit
185200
)
186201

187202
target_compile_definitions(
188203
kvikio
189204
PUBLIC $<$<BOOL:${KvikIO_REMOTE_SUPPORT}>:KVIKIO_LIBCURL_FOUND>
190205
$<$<BOOL:${KvikIO_CUDA_SUPPORT}>:KVIKIO_CUDA_FOUND>
206+
$<$<BOOL:${nvml_FOUND}>:KVIKIO_NVML_FOUND>
191207
$<$<BOOL:${cuFile_FOUND}>:KVIKIO_CUFILE_FOUND>
192208
$<$<BOOL:${cuFile_BATCH_API_FOUND}>:KVIKIO_CUFILE_BATCH_API_FOUND>
193209
$<$<BOOL:${cuFile_STREAM_API_FOUND}>:KVIKIO_CUFILE_STREAM_API_FOUND>

cpp/include/kvikio/shim/cuda.hpp

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -39,6 +39,9 @@ class cudaAPI {
3939
decltype(cuCtxPushCurrent)* CtxPushCurrent{nullptr};
4040
decltype(cuCtxPopCurrent)* CtxPopCurrent{nullptr};
4141
decltype(cuCtxGetCurrent)* CtxGetCurrent{nullptr};
42+
decltype(cuCtxGetDevice)* CtxGetDevice{nullptr};
43+
decltype(cuDeviceGetUuid_v2)* DeviceGetUuid{nullptr};
44+
decltype(cuDriverGetVersion)* DriverGetVersion{nullptr};
4245
decltype(cuMemGetAddressRange)* MemGetAddressRange{nullptr};
4346
decltype(cuGetErrorName)* GetErrorName{nullptr};
4447
decltype(cuGetErrorString)* GetErrorString{nullptr};

cpp/include/kvikio/shim/cuda_h_wrapper.hpp

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -65,6 +65,9 @@ CUresult cuPointerGetAttributes(...);
6565
CUresult cuCtxPushCurrent(...);
6666
CUresult cuCtxPopCurrent(...);
6767
CUresult cuCtxGetCurrent(...);
68+
CUresult cuCtxGetDevice(...);
69+
CUresult cuDeviceGetUuid_v2(...);
70+
CUresult cuDriverGetVersion(...);
6871
CUresult cuMemGetAddressRange(...);
6972
CUresult cuGetErrorName(...);
7073
CUresult cuGetErrorString(...);

cpp/include/kvikio/utils.hpp

Lines changed: 42 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -191,4 +191,46 @@ bool is_future_done(T const& future)
191191
return future.wait_for(std::chrono::seconds(0)) != std::future_status::timeout;
192192
}
193193

194+
/**
195+
* @brief Check whether the NVML shared library exists.
196+
*
197+
* @return Boolean answer.
198+
*/
199+
#ifdef KVIKIO_NVML_FOUND
200+
bool is_nvml_available();
201+
#else
202+
constexpr bool is_nvml_available() { return false; }
203+
#endif
204+
205+
/**
206+
* @brief Type of the device ID. On a multi-GPU system, CUDA and NVML enumerate devices in different
207+
* ways.
208+
*/
209+
enum class DeviceIdType : uint8_t {
210+
CUDA, ///< CUDA device ID.
211+
NVML, ///< NVML device ID.
212+
};
213+
214+
/**
215+
* @brief Check if the current device has at least one active NVLink-C2C interconnect.
216+
*
217+
* @return Boolean answer.
218+
*/
219+
#ifdef KVIKIO_NVML_FOUND
220+
bool is_c2c_available(int device_idx, DeviceIdType device_id_type = DeviceIdType::CUDA);
221+
#else
222+
constexpr bool is_c2c_available(int device_idx, DeviceIdType device_id_type = DeviceIdType::CUDA)
223+
{
224+
return false;
225+
}
226+
#endif
227+
228+
/**
229+
* @brief Shut down NVML
230+
*
231+
* The NVML shim singleton does not perform shutdown in the destructor. If a cleanup is desired,
232+
* call this function before the return of `main()`.
233+
*/
234+
void nvml_shutdown();
235+
194236
} // namespace kvikio

cpp/src/shim/cuda.cpp

Lines changed: 6 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,8 @@
1414
* limitations under the License.
1515
*/
1616

17+
#include "utils.hpp"
18+
1719
#include <stdexcept>
1820

1921
#include <kvikio/error.hpp>
@@ -38,6 +40,9 @@ cudaAPI::cudaAPI()
3840
get_symbol(CtxPushCurrent, lib, KVIKIO_STRINGIFY(cuCtxPushCurrent));
3941
get_symbol(CtxPopCurrent, lib, KVIKIO_STRINGIFY(cuCtxPopCurrent));
4042
get_symbol(CtxGetCurrent, lib, KVIKIO_STRINGIFY(cuCtxGetCurrent));
43+
get_symbol(CtxGetDevice, lib, KVIKIO_STRINGIFY(cuCtxGetDevice));
44+
get_symbol(DeviceGetUuid, lib, KVIKIO_STRINGIFY(cuDeviceGetUuid_v2));
45+
get_symbol(DriverGetVersion, lib, KVIKIO_STRINGIFY(cuDriverGetVersion));
4146
get_symbol(MemGetAddressRange, lib, KVIKIO_STRINGIFY(cuMemGetAddressRange));
4247
get_symbol(GetErrorName, lib, KVIKIO_STRINGIFY(cuGetErrorName));
4348
get_symbol(GetErrorString, lib, KVIKIO_STRINGIFY(cuGetErrorString));
@@ -59,15 +64,7 @@ cudaAPI& cudaAPI::instance()
5964
}
6065

6166
#ifdef KVIKIO_CUDA_FOUND
62-
bool is_cuda_available()
63-
{
64-
try {
65-
cudaAPI::instance();
66-
} catch (std::runtime_error const&) {
67-
return false;
68-
}
69-
return true;
70-
}
67+
bool is_cuda_available() { return detail::is_available<cudaAPI>(); }
7168
#endif
7269

7370
} // namespace kvikio

cpp/src/shim/cufile.cpp

Lines changed: 3 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,8 @@
1414
* limitations under the License.
1515
*/
1616

17+
#include "utils.hpp"
18+
1719
#include <stdexcept>
1820
#include <string>
1921

@@ -122,15 +124,7 @@ void cuFileAPI::driver_close()
122124
}
123125

124126
#ifdef KVIKIO_CUFILE_FOUND
125-
bool is_cufile_library_available() noexcept
126-
{
127-
try {
128-
cuFileAPI::instance();
129-
} catch (...) {
130-
return false;
131-
}
132-
return true;
133-
}
127+
bool is_cufile_library_available() noexcept { return detail::is_available<cuFileAPI>(); }
134128
#endif
135129

136130
bool is_cufile_available() noexcept

0 commit comments

Comments
 (0)