Skip to content

Commit f5268f9

Browse files
Add C++ benchmarks (part 1/n) (#659)
Partially addresses #606 This PR kick-starts the process of adding benchmarks to KvikIO. The following tasks are done: - Add CMake scripts for the benchmark. - Add a simple benchmark for the threadpool. To build and run the benchmark programs in the dev container: ``` # Build the benchmarks build-kvikio-cpp -DBUILD_TESTS=ON -DBUILD_BENCHMARKS=ON -j 16 # Run specific benchmark ~/kvikio/cpp/build/latest/benchmarks/<benchmark-name> ``` Sample output: ``` 2025-03-14T04:38:46+00:00 Running ./THREADPOOL_BENCHMARK Run on (16 X 5050 MHz CPU s) CPU Caches: L1 Data 32 KiB (x8) L1 Instruction 32 KiB (x8) L2 Unified 1024 KiB (x8) L3 Unified 98304 KiB (x1) Load Average: 0.34, 1.07, 1.22 ***WARNING*** CPU scaling is enabled, the benchmark real time measurements may be noisy and will incur extra overhead. --------------------------------------------------------------------------------------------------------------------------- Benchmark Time CPU Iterations UserCounters... --------------------------------------------------------------------------------------------------------------------------- BM_threadpool_compute:strong_scaling/1/min_time:2.000/real_time 162 ms 0.728 ms 17 threads=1 BM_threadpool_compute:strong_scaling/2/min_time:2.000/real_time 81.6 ms 0.885 ms 34 threads=2 BM_threadpool_compute:strong_scaling/4/min_time:2.000/real_time 41.1 ms 0.972 ms 68 threads=4 BM_threadpool_compute:strong_scaling/8/min_time:2.000/real_time 21.3 ms 1.40 ms 127 threads=8 BM_threadpool_compute:strong_scaling/16/min_time:2.000/real_time 18.7 ms 1.84 ms 150 threads=16 BM_threadpool_compute:strong_scaling/32/min_time:2.000/real_time 19.2 ms 2.76 ms 145 threads=32 BM_threadpool_compute:strong_scaling/64/min_time:2.000/real_time 20.8 ms 5.46 ms 139 threads=64 BM_threadpool_compute:weak_scaling/1/min_time:2.000/real_time 16.2 ms 0.135 ms 172 threads=1 BM_threadpool_compute:weak_scaling/2/min_time:2.000/real_time 16.3 ms 0.260 ms 171 threads=2 BM_threadpool_compute:weak_scaling/4/min_time:2.000/real_time 16.6 ms 0.527 ms 168 threads=4 BM_threadpool_compute:weak_scaling/8/min_time:2.000/real_time 17.2 ms 1.05 ms 164 threads=8 BM_threadpool_compute:weak_scaling/16/min_time:2.000/real_time 29.7 ms 2.66 ms 94 threads=16 BM_threadpool_compute:weak_scaling/32/min_time:2.000/real_time 60.9 ms 8.04 ms 46 threads=32 BM_threadpool_compute:weak_scaling/64/min_time:2.000/real_time 133 ms 36.0 ms 21 threads=64 ``` Authors: - Tianyu Liu (https://github.com/kingcrimsontianyu) Approvers: - Mads R. B. Kristensen (https://github.com/madsbk) - Kyle Edwards (https://github.com/KyleFromNVIDIA) - Muhammad Haseeb (https://github.com/mhaseeb123) URL: #659
1 parent 3adbe7e commit f5268f9

File tree

5 files changed

+172
-6
lines changed

5 files changed

+172
-6
lines changed

cpp/CMakeLists.txt

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -39,6 +39,7 @@ rapids_cmake_build_type(Release)
3939
# * build options ----------------------------------------------------------------------------------
4040

4141
option(BUILD_SHARED_LIBS "Build KvikIO shared library" ON)
42+
option(KvikIO_BUILD_BENCHMARKS "Configure CMake to build benchmarks" ON)
4243
option(KvikIO_BUILD_EXAMPLES "Configure CMake to build examples" ON)
4344
option(KvikIO_BUILD_TESTS "Configure CMake to build tests" ON)
4445
option(KvikIO_REMOTE_SUPPORT "Configure CMake to build with remote IO support" ON)
@@ -203,6 +204,17 @@ set_target_properties(
203204
INTERFACE_POSITION_INDEPENDENT_CODE ON
204205
)
205206

207+
# ##################################################################################################
208+
# * add benchmarks --------------------------------------------------------------------------------
209+
210+
if(KvikIO_BUILD_BENCHMARKS)
211+
# Find or install GoogleBench
212+
include(${rapids-cmake-dir}/cpm/gbench.cmake)
213+
rapids_cpm_gbench(BUILD_STATIC)
214+
215+
add_subdirectory(benchmarks)
216+
endif()
217+
206218
# ##################################################################################################
207219
# * add examples -----------------------------------------------------------------------------------
208220

cpp/benchmarks/CMakeLists.txt

Lines changed: 62 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,62 @@
1+
# =============================================================================
2+
# Copyright (c) 2025, NVIDIA CORPORATION.
3+
#
4+
# Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except
5+
# in compliance with the License. You may obtain a copy of the License at
6+
#
7+
# http://www.apache.org/licenses/LICENSE-2.0
8+
#
9+
# Unless required by applicable law or agreed to in writing, software distributed under the License
10+
# is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
11+
# or implied. See the License for the specific language governing permissions and limitations under
12+
# the License.
13+
# =============================================================================
14+
15+
#[=======================================================================[.rst:
16+
kvikio_add_benchmark
17+
--------------------
18+
19+
Create a KvikIO benchmark.
20+
21+
.. code-block:: cmake
22+
23+
kvikio_add_benchmark(NAME <name> SOURCES <sources>)
24+
25+
``NAME``
26+
Benchmark name. Single-value argument.
27+
28+
``SOURCES``
29+
List of source files for the benchmark. Multi-value argument.
30+
#]=======================================================================]
31+
function(kvikio_add_benchmark)
32+
cmake_parse_arguments(
33+
_KVIKIO # prefix
34+
"" # optional
35+
"NAME" # single value
36+
"SOURCES" # multi-value
37+
${ARGN}
38+
)
39+
40+
if(DEFINED _KVIKIO_UNPARSED_ARGUMENTS)
41+
message(FATAL_ERROR "Unknown argument: ${_KVIKIO_UNPARSED_ARGUMENTS}")
42+
endif()
43+
44+
add_executable(${_KVIKIO_NAME} ${_KVIKIO_SOURCES})
45+
set_target_properties(${_KVIKIO_NAME} PROPERTIES INSTALL_RPATH "\$ORIGIN/../../../lib")
46+
47+
target_link_libraries(${_KVIKIO_NAME} PUBLIC benchmark::benchmark kvikio::kvikio)
48+
49+
if(CMAKE_CXX_COMPILER_ID STREQUAL "GNU")
50+
set(KVIKIO_CXX_FLAGS "-Wall;-Werror;-Wno-unknown-pragmas")
51+
target_compile_options(${_KVIKIO_NAME} PRIVATE "$<$<COMPILE_LANGUAGE:CXX>:${KVIKIO_CXX_FLAGS}>")
52+
endif()
53+
54+
install(
55+
TARGETS ${_KVIKIO_NAME}
56+
COMPONENT testing
57+
DESTINATION bin/benchmarks/libkvikio
58+
EXCLUDE_FROM_ALL
59+
)
60+
endfunction()
61+
62+
kvikio_add_benchmark(NAME THREADPOOL_BENCHMARK SOURCES "threadpool/threadpool_benchmark.cpp")
Lines changed: 93 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,93 @@
1+
/*
2+
* Copyright (c) 2025, NVIDIA CORPORATION.
3+
*
4+
* Licensed under the Apache License, Version 2.0 (the "License");
5+
* you may not use this file except in compliance with the License.
6+
* You may obtain a copy of the License at
7+
*
8+
* http://www.apache.org/licenses/LICENSE-2.0
9+
*
10+
* Unless required by applicable law or agreed to in writing, software
11+
* distributed under the License is distributed on an "AS IS" BASIS,
12+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13+
* See the License for the specific language governing permissions and
14+
* limitations under the License.
15+
*/
16+
17+
// This benchmark assesses the scalability of the thread pool.
18+
//
19+
// In the "strong scaling" study, the total amount of tasks is fixed, and the time to complete
20+
// these tasks is evaluated as a function of thread count.
21+
//
22+
// In the "weak scaling" study, the expected amount of tasks per thread is fixed, and the total
23+
// amount of tasks is then proportional to the thread count. Again, the time is evaluated as a
24+
// function of thread count.
25+
26+
#include <cmath>
27+
#include <cstdint>
28+
29+
#include <benchmark/benchmark.h>
30+
#include <kvikio/defaults.hpp>
31+
32+
namespace kvikio {
33+
enum class ScalingType : uint8_t {
34+
STRONG_SCALING,
35+
WEAK_SCALING,
36+
};
37+
38+
void task_compute(std::size_t num_compute_iterations)
39+
{
40+
[[maybe_unused]] double res{0.0};
41+
for (std::size_t i = 0u; i < num_compute_iterations; ++i) {
42+
auto x{static_cast<double>(i)};
43+
benchmark::DoNotOptimize(res += std::sqrt(x) + std::cbrt(x) + std::sin(x));
44+
}
45+
}
46+
47+
template <ScalingType scaling_type>
48+
void BM_threadpool_compute(benchmark::State& state)
49+
{
50+
auto const num_threads = state.range(0);
51+
52+
std::size_t const num_compute_tasks =
53+
(scaling_type == ScalingType::STRONG_SCALING) ? 10'000 : (1'000 * num_threads);
54+
55+
std::size_t constexpr num_compute_iterations{1'000};
56+
kvikio::defaults::set_thread_pool_nthreads(num_threads);
57+
58+
for (auto _ : state) {
59+
// Submit a total of "num_compute_tasks" tasks to the thread pool.
60+
for (auto i = std::size_t{0}; i < num_compute_tasks; ++i) {
61+
[[maybe_unused]] auto fut =
62+
kvikio::defaults::thread_pool().submit_task([] { task_compute(num_compute_iterations); });
63+
}
64+
kvikio::defaults::thread_pool().wait();
65+
}
66+
67+
state.counters["threads"] = num_threads;
68+
}
69+
} // namespace kvikio
70+
71+
int main(int argc, char** argv)
72+
{
73+
benchmark::Initialize(&argc, argv);
74+
75+
benchmark::RegisterBenchmark("BM_threadpool_compute:strong_scaling",
76+
kvikio::BM_threadpool_compute<kvikio::ScalingType::STRONG_SCALING>)
77+
->RangeMultiplier(2)
78+
->Range(1, 64) // Increase from 1 to 64 (inclusive of both endpoints) with x2 stepping.
79+
->UseRealTime() // Use the wall clock to determine the number of benchmark iterations.
80+
->Unit(benchmark::kMillisecond)
81+
->MinTime(2); // Minimum of 2 seconds.
82+
83+
benchmark::RegisterBenchmark("BM_threadpool_compute:weak_scaling",
84+
kvikio::BM_threadpool_compute<kvikio::ScalingType::WEAK_SCALING>)
85+
->RangeMultiplier(2)
86+
->Range(1, 64)
87+
->UseRealTime()
88+
->Unit(benchmark::kMillisecond)
89+
->MinTime(2);
90+
91+
benchmark::RunSpecifiedBenchmarks();
92+
benchmark::Shutdown();
93+
}

cpp/examples/CMakeLists.txt

Lines changed: 4 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -19,10 +19,9 @@ set(TEST_INSTALL_PATH bin/tests/libkvikio)
1919
if(CUDAToolkit_FOUND)
2020
add_executable(BASIC_IO_EXAMPLE basic_io.cpp)
2121
set_target_properties(BASIC_IO_EXAMPLE PROPERTIES INSTALL_RPATH "\$ORIGIN/../../../lib")
22-
target_include_directories(BASIC_IO_EXAMPLE PRIVATE ../include ${cuFile_INCLUDE_DIRS})
23-
target_link_libraries(BASIC_IO_EXAMPLE PRIVATE kvikio CUDA::cudart)
22+
target_link_libraries(BASIC_IO_EXAMPLE PRIVATE kvikio::kvikio CUDA::cudart)
2423

25-
if(CMAKE_COMPILER_IS_GNUCXX)
24+
if(CMAKE_CXX_COMPILER_ID STREQUAL "GNU")
2625
set(KVIKIO_CXX_FLAGS "-Wall;-Werror;-Wno-unknown-pragmas")
2726
target_compile_options(
2827
BASIC_IO_EXAMPLE PRIVATE "$<$<COMPILE_LANGUAGE:CXX>:${KVIKIO_CXX_FLAGS}>"
@@ -43,10 +42,9 @@ endif()
4342

4443
add_executable(BASIC_NO_CUDA_EXAMPLE basic_no_cuda.cpp)
4544
set_target_properties(BASIC_NO_CUDA_EXAMPLE PROPERTIES INSTALL_RPATH "\$ORIGIN/../../../lib")
46-
target_include_directories(BASIC_NO_CUDA_EXAMPLE PRIVATE ../include)
47-
target_link_libraries(BASIC_NO_CUDA_EXAMPLE PRIVATE kvikio)
45+
target_link_libraries(BASIC_NO_CUDA_EXAMPLE PRIVATE kvikio::kvikio)
4846

49-
if(CMAKE_COMPILER_IS_GNUCXX)
47+
if(CMAKE_CXX_COMPILER_ID STREQUAL "GNU")
5048
set(KVIKIO_CXX_FLAGS "-Wall;-Werror;-Wno-unknown-pragmas")
5149
target_compile_options(
5250
BASIC_NO_CUDA_EXAMPLE PRIVATE "$<$<COMPILE_LANGUAGE:CXX>:${KVIKIO_CXX_FLAGS}>"

python/libkvikio/CMakeLists.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -36,6 +36,7 @@ endif()
3636

3737
unset(kvikio_FOUND)
3838

39+
set(KvikIO_BUILD_BENCHMARKS OFF)
3940
set(KvikIO_BUILD_EXAMPLES OFF)
4041
set(KvikIO_BUILD_TESTS OFF)
4142
if(USE_NVCOMP_RUNTIME_WHEEL)

0 commit comments

Comments
 (0)