Skip to content

Commit 6f14844

Browse files
Improve NVTX annotation for functions (#671)
## Background Previously, to add NVTX function annotation for profiling, one of the following methods is used: ```cpp namespace kvikio::detail { // Method 1 void f() { KVIKIO_NVTX_FUNC_RANGE(); } // Method 2 void f() { KVIKIO_NVTX_SCOPED_RANGE("detail::f()", 0); } } // end namespace kvikio::detail ``` Method 1 has the limitation that: - `__func__` is used as the function name, which is unqualified and contains no information on namespace. Function overloads cannot be well handled. - Users cannot specify NVTX payload or color. - There is no way to annotate a lambda. Any lambda is simply named `operator()`. Method 2 has the limitation that: - Users have to manually enter the function name. Naming inconsistency has been observed across different source files. - Users have to specify an NVTX payload value. ## This PR This PR eliminates the limitations above to improve the annotation experience. More concretely: - `KVIKIO_NVTX_FUNC_RANGE()` now uses fully qualified name with namespace, return type and parameter types. Function overloads can be properly handled. - `KVIKIO_NVTX_FUNC_RANGE()` now works for lambdas. For example, for a lambda inside a function `kvikio::sample::foo()`, the name of the lambda will be: `kvikio::sample::foo::<lambda()>` - `KVIKIO_NVTX_FUNC_RANGE()` now accepts two optional parameters: payload and color. - `KVIKIO_NVTX_SCOPED_RANGE()` now only has one mandatory parameter, which is the message; both the payload and color have become optional. With the flexible `KVIKIO_NVTX_FUNC_RANGE()` available, **this PR adds additional traces to many functions**. Authors: - Tianyu Liu (https://github.com/kingcrimsontianyu) Approvers: - Mads R. B. Kristensen (https://github.com/madsbk) URL: #671
1 parent 968d97e commit 6f14844

File tree

12 files changed

+163
-31
lines changed

12 files changed

+163
-31
lines changed

cpp/include/kvikio/nvtx.hpp

Lines changed: 54 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -50,9 +50,33 @@ using nvtx_registered_string_type = nvtx3::registered_string_in<libkvikio_domain
5050
}(message)
5151

5252
// Implementation of KVIKIO_NVTX_FUNC_RANGE()
53-
#define KVIKIO_NVTX_FUNC_RANGE_IMPL() NVTX3_FUNC_RANGE_IN(kvikio::libkvikio_domain)
53+
// todo: Although supported by many compilers, __PRETTY_FUNCTION__ is non-standard. Replacement may
54+
// be considered once reflection is standardized.
55+
#define KVIKIO_NVTX_FUNC_RANGE_IMPL_0() KVIKIO_NVTX_SCOPED_RANGE_IMPL_1(__PRETTY_FUNCTION__)
56+
#define KVIKIO_NVTX_FUNC_RANGE_IMPL_1(payload) \
57+
KVIKIO_NVTX_SCOPED_RANGE_IMPL_2(__PRETTY_FUNCTION__, payload)
58+
#define KVIKIO_NVTX_FUNC_RANGE_IMPL_2(payload, color) \
59+
KVIKIO_NVTX_SCOPED_RANGE_IMPL_3(__PRETTY_FUNCTION__, payload, color)
60+
#define KVIKIO_NVTX_FUNC_RANGE_SELECTOR(_0, _1, _2, NAME, ...) NAME
61+
// todo: Although supported by gcc and clang, ##__VA_ARGS__ is non-standard, and should be replaced
62+
// by __VA_OPT__ (since C++20) in the future.
63+
#define KVIKIO_NVTX_FUNC_RANGE_IMPL(...) \
64+
KVIKIO_NVTX_FUNC_RANGE_SELECTOR(_0, \
65+
##__VA_ARGS__, \
66+
KVIKIO_NVTX_FUNC_RANGE_IMPL_2, \
67+
KVIKIO_NVTX_FUNC_RANGE_IMPL_1, \
68+
KVIKIO_NVTX_FUNC_RANGE_IMPL_0) \
69+
(__VA_ARGS__)
5470

5571
// Implementation of KVIKIO_NVTX_SCOPED_RANGE(...)
72+
#define KVIKIO_NVTX_SCOPED_RANGE_IMPL_1(message) \
73+
kvikio::nvtx_scoped_range_type KVIKIO_CONCAT(_kvikio_nvtx_range, __LINE__) \
74+
{ \
75+
nvtx3::event_attributes \
76+
{ \
77+
KVIKIO_REGISTER_STRING(message), kvikio::NvtxManager::default_color() \
78+
} \
79+
}
5680
#define KVIKIO_NVTX_SCOPED_RANGE_IMPL_3(message, payload_v, color) \
5781
kvikio::nvtx_scoped_range_type KVIKIO_CONCAT(_kvikio_nvtx_range, __LINE__) \
5882
{ \
@@ -64,9 +88,11 @@ using nvtx_registered_string_type = nvtx3::registered_string_in<libkvikio_domain
6488
#define KVIKIO_NVTX_SCOPED_RANGE_IMPL_2(message, payload) \
6589
KVIKIO_NVTX_SCOPED_RANGE_IMPL_3(message, payload, kvikio::NvtxManager::default_color())
6690
#define KVIKIO_NVTX_SCOPED_RANGE_SELECTOR(_1, _2, _3, NAME, ...) NAME
67-
#define KVIKIO_NVTX_SCOPED_RANGE_IMPL(...) \
68-
KVIKIO_NVTX_SCOPED_RANGE_SELECTOR( \
69-
__VA_ARGS__, KVIKIO_NVTX_SCOPED_RANGE_IMPL_3, KVIKIO_NVTX_SCOPED_RANGE_IMPL_2) \
91+
#define KVIKIO_NVTX_SCOPED_RANGE_IMPL(...) \
92+
KVIKIO_NVTX_SCOPED_RANGE_SELECTOR(__VA_ARGS__, \
93+
KVIKIO_NVTX_SCOPED_RANGE_IMPL_3, \
94+
KVIKIO_NVTX_SCOPED_RANGE_IMPL_2, \
95+
KVIKIO_NVTX_SCOPED_RANGE_IMPL_1) \
7096
(__VA_ARGS__)
7197

7298
// Implementation of KVIKIO_NVTX_MARKER(message, payload)
@@ -124,22 +150,39 @@ class NvtxManager {
124150
};
125151

126152
/**
127-
* @brief Convenience macro for generating an NVTX range in the `libkvikio` domain
128-
* from the lifetime of a function.
153+
* @brief Convenience macro for generating an NVTX range in the `libkvikio` domain from the lifetime
154+
* of a function. Can be used inside a regular function or a lambda expression.
129155
*
130-
* Takes no argument. The name of the immediately enclosing function returned by `__func__` is used
131-
* as the message.
156+
* The function name contains detailed information such as namespace, return type, parameter type,
157+
* etc.
158+
*
159+
* @param payload (Optional) NVTX payload.
160+
* @param color (Optional) NVTX color. If unspecified, a default NVTX color is used.
132161
*
133162
* Example:
134163
* ```
135164
* void some_function(){
136-
* KVIKIO_NVTX_FUNC_RANGE(); // The name `some_function` is used as the message
165+
* // No argument
166+
* KVIKIO_NVTX_FUNC_RANGE();
167+
* ...
168+
* }
169+
*
170+
* void some_function(){
171+
* // Specify payload
172+
* KVIKIO_NVTX_FUNC_RANGE(4096);
173+
* ...
174+
* }
175+
*
176+
* void some_function(){
177+
* // Specify payload and color
178+
* auto const nvtx3::rgb color{0, 255, 0};
179+
* KVIKIO_NVTX_FUNC_RANGE(4096, color);
137180
* ...
138181
* }
139182
* ```
140183
*/
141184
#ifdef KVIKIO_CUDA_FOUND
142-
#define KVIKIO_NVTX_FUNC_RANGE() KVIKIO_NVTX_FUNC_RANGE_IMPL()
185+
#define KVIKIO_NVTX_FUNC_RANGE(...) KVIKIO_NVTX_FUNC_RANGE_IMPL(__VA_ARGS__)
143186
#else
144187
#define KVIKIO_NVTX_FUNC_RANGE(...) \
145188
do { \
@@ -152,7 +195,7 @@ class NvtxManager {
152195
*
153196
* @param message String literal for NVTX annotation. To improve profile-time performance, the
154197
* string literal is registered in NVTX.
155-
* @param payload NVTX payload.
198+
* @param payload (Optional) NVTX payload.
156199
* @param color (Optional) NVTX color. If unspecified, a default NVTX color is used.
157200
*
158201
* Example:

cpp/include/kvikio/posix_io.hpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -183,7 +183,7 @@ std::size_t posix_device_io(int fd,
183183
template <PartialIO PartialIOStatus>
184184
std::size_t posix_host_read(int fd, void* buf, std::size_t size, std::size_t file_offset)
185185
{
186-
KVIKIO_NVTX_SCOPED_RANGE("posix_host_read()", size);
186+
KVIKIO_NVTX_FUNC_RANGE(size);
187187
return detail::posix_host_io<IOOperationType::READ, PartialIOStatus>(
188188
fd, buf, size, convert_size2off(file_offset));
189189
}
@@ -205,7 +205,7 @@ std::size_t posix_host_read(int fd, void* buf, std::size_t size, std::size_t fil
205205
template <PartialIO PartialIOStatus>
206206
std::size_t posix_host_write(int fd, void const* buf, std::size_t size, std::size_t file_offset)
207207
{
208-
KVIKIO_NVTX_SCOPED_RANGE("posix_host_write()", size);
208+
KVIKIO_NVTX_FUNC_RANGE(size);
209209
return detail::posix_host_io<IOOperationType::WRITE, PartialIOStatus>(
210210
fd, buf, size, convert_size2off(file_offset));
211211
}

cpp/include/kvikio/threadpool_wrapper.hpp

Lines changed: 10 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -33,19 +33,26 @@ class thread_pool_wrapper : public pool_type {
3333
*
3434
* @param nthreads The number of threads to use.
3535
*/
36-
thread_pool_wrapper(unsigned int nthreads) : pool_type{nthreads, worker_thread_init_func} {}
36+
thread_pool_wrapper(unsigned int nthreads) : pool_type{nthreads, worker_thread_init_func}
37+
{
38+
KVIKIO_NVTX_FUNC_RANGE();
39+
}
3740

3841
/**
3942
* @brief Reset the number of threads in the thread pool, and invoke a pre-defined initialization
4043
* function in each worker thread.
4144
*
4245
* @param nthreads The number of threads to use.
4346
*/
44-
void reset(unsigned int nthreads) { pool_type::reset(nthreads, worker_thread_init_func); }
47+
void reset(unsigned int nthreads)
48+
{
49+
KVIKIO_NVTX_FUNC_RANGE();
50+
pool_type::reset(nthreads, worker_thread_init_func);
51+
}
4552

4653
private:
4754
inline static std::function<void()> worker_thread_init_func{[] {
48-
KVIKIO_NVTX_SCOPED_RANGE("worker thread init", 0, NvtxManager::default_color());
55+
KVIKIO_NVTX_FUNC_RANGE();
4956
// Rename the worker thread in the thread pool to improve clarity from nsys-ui.
5057
// Note: This NVTX feature is currently not supported by nsys-ui.
5158
NvtxManager::rename_current_thread("thread pool");

cpp/src/bounce_buffer.cpp

Lines changed: 19 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -20,28 +20,40 @@
2020
#include <kvikio/bounce_buffer.hpp>
2121
#include <kvikio/defaults.hpp>
2222
#include <kvikio/error.hpp>
23+
#include <kvikio/nvtx.hpp>
2324
#include <kvikio/shim/cuda.hpp>
2425

2526
namespace kvikio {
2627

2728
AllocRetain::Alloc::Alloc(AllocRetain* manager, void* alloc, std::size_t size)
2829
: _manager(manager), _alloc{alloc}, _size{size}
2930
{
31+
KVIKIO_NVTX_FUNC_RANGE();
3032
}
3133

32-
AllocRetain::Alloc::~Alloc() noexcept { _manager->put(_alloc, _size); }
34+
AllocRetain::Alloc::~Alloc() noexcept
35+
{
36+
KVIKIO_NVTX_FUNC_RANGE();
37+
_manager->put(_alloc, _size);
38+
}
3339

34-
void* AllocRetain::Alloc::get() noexcept { return _alloc; }
40+
void* AllocRetain::Alloc::get() noexcept
41+
{
42+
KVIKIO_NVTX_FUNC_RANGE();
43+
return _alloc;
44+
}
3545

3646
void* AllocRetain::Alloc::get(std::ptrdiff_t offset) noexcept
3747
{
48+
KVIKIO_NVTX_FUNC_RANGE();
3849
return static_cast<char*>(_alloc) + offset;
3950
}
4051

4152
std::size_t AllocRetain::Alloc::size() noexcept { return _size; }
4253

4354
std::size_t AllocRetain::_clear()
4455
{
56+
KVIKIO_NVTX_FUNC_RANGE();
4557
std::size_t ret = _free_allocs.size() * _size;
4658
while (!_free_allocs.empty()) {
4759
CUDA_DRIVER_TRY(cudaAPI::instance().MemFreeHost(_free_allocs.top()));
@@ -52,6 +64,7 @@ std::size_t AllocRetain::_clear()
5264

5365
void AllocRetain::_ensure_alloc_size()
5466
{
67+
KVIKIO_NVTX_FUNC_RANGE();
5568
auto const bounce_buffer_size = defaults::bounce_buffer_size();
5669
if (_size != bounce_buffer_size) {
5770
_clear();
@@ -61,6 +74,7 @@ void AllocRetain::_ensure_alloc_size()
6174

6275
AllocRetain::Alloc AllocRetain::get()
6376
{
77+
KVIKIO_NVTX_FUNC_RANGE();
6478
std::lock_guard const lock(_mutex);
6579
_ensure_alloc_size();
6680

@@ -81,6 +95,7 @@ AllocRetain::Alloc AllocRetain::get()
8195

8296
void AllocRetain::put(void* alloc, std::size_t size)
8397
{
98+
KVIKIO_NVTX_FUNC_RANGE();
8499
std::lock_guard const lock(_mutex);
85100
_ensure_alloc_size();
86101

@@ -95,12 +110,14 @@ void AllocRetain::put(void* alloc, std::size_t size)
95110

96111
std::size_t AllocRetain::clear()
97112
{
113+
KVIKIO_NVTX_FUNC_RANGE();
98114
std::lock_guard const lock(_mutex);
99115
return _clear();
100116
}
101117

102118
AllocRetain& AllocRetain::instance()
103119
{
120+
KVIKIO_NVTX_FUNC_RANGE();
104121
static AllocRetain _instance;
105122
return _instance;
106123
}

cpp/src/buffer.cpp

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,7 @@
2121
#include <kvikio/buffer.hpp>
2222
#include <kvikio/defaults.hpp>
2323
#include <kvikio/error.hpp>
24+
#include <kvikio/nvtx.hpp>
2425
#include <kvikio/shim/cufile.hpp>
2526
#include <kvikio/shim/cufile_h_wrapper.hpp>
2627
#include <kvikio/utils.hpp>
@@ -32,6 +33,7 @@ void buffer_register(void const* devPtr_base,
3233
int flags,
3334
std::vector<int> const& errors_to_ignore)
3435
{
36+
KVIKIO_NVTX_FUNC_RANGE();
3537
if (defaults::is_compat_mode_preferred()) { return; }
3638
CUfileError_t status = cuFileAPI::instance().BufRegister(devPtr_base, size, flags);
3739
if (status.err != CU_FILE_SUCCESS) {
@@ -45,18 +47,21 @@ void buffer_register(void const* devPtr_base,
4547

4648
void buffer_deregister(void const* devPtr_base)
4749
{
50+
KVIKIO_NVTX_FUNC_RANGE();
4851
if (defaults::is_compat_mode_preferred()) { return; }
4952
CUFILE_TRY(cuFileAPI::instance().BufDeregister(devPtr_base));
5053
}
5154

5255
void memory_register(void const* devPtr, int flags, std::vector<int> const& errors_to_ignore)
5356
{
57+
KVIKIO_NVTX_FUNC_RANGE();
5458
auto [base, nbytes, offset] = get_alloc_info(devPtr);
5559
buffer_register(base, nbytes, flags, errors_to_ignore);
5660
}
5761

5862
void memory_deregister(void const* devPtr)
5963
{
64+
KVIKIO_NVTX_FUNC_RANGE();
6065
auto [base, nbytes, offset] = get_alloc_info(devPtr);
6166
buffer_deregister(base);
6267
}

cpp/src/compat_mode.cpp

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -22,13 +22,15 @@
2222
#include <kvikio/cufile/config.hpp>
2323
#include <kvikio/error.hpp>
2424
#include <kvikio/file_handle.hpp>
25+
#include <kvikio/nvtx.hpp>
2526
#include <kvikio/shim/cufile.hpp>
2627

2728
namespace kvikio {
2829

2930
namespace detail {
3031
CompatMode parse_compat_mode_str(std::string_view compat_mode_str)
3132
{
33+
KVIKIO_NVTX_FUNC_RANGE();
3234
// Convert to lowercase
3335
std::string tmp{compat_mode_str};
3436
std::transform(
@@ -50,6 +52,7 @@ CompatMode parse_compat_mode_str(std::string_view compat_mode_str)
5052

5153
CompatMode CompatModeManager::infer_compat_mode_if_auto(CompatMode compat_mode) noexcept
5254
{
55+
KVIKIO_NVTX_FUNC_RANGE();
5356
if (compat_mode == CompatMode::AUTO) {
5457
return is_cufile_available() ? CompatMode::OFF : CompatMode::ON;
5558
}
@@ -84,6 +87,7 @@ CompatModeManager::CompatModeManager(std::string const& file_path,
8487
CompatMode compat_mode_requested_v,
8588
FileHandle* file_handle)
8689
{
90+
KVIKIO_NVTX_FUNC_RANGE();
8791
KVIKIO_EXPECT(file_handle != nullptr,
8892
"The compatibility mode manager does not have a proper owning file handle.",
8993
std::invalid_argument);
@@ -127,6 +131,7 @@ CompatModeManager::CompatModeManager(std::string const& file_path,
127131

128132
void CompatModeManager::validate_compat_mode_for_async() const
129133
{
134+
KVIKIO_NVTX_FUNC_RANGE();
130135
if (!_is_compat_mode_preferred && _is_compat_mode_preferred_for_async &&
131136
_compat_mode_requested == CompatMode::OFF) {
132137
std::string err_msg;

cpp/src/defaults.cpp

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -33,6 +33,7 @@ namespace kvikio {
3333
template <>
3434
bool getenv_or(std::string_view env_var_name, bool default_val)
3535
{
36+
KVIKIO_NVTX_FUNC_RANGE();
3637
auto const* env_val = std::getenv(env_var_name.data());
3738
if (env_val == nullptr) { return default_val; }
3839
try {
@@ -66,6 +67,7 @@ bool getenv_or(std::string_view env_var_name, bool default_val)
6667
template <>
6768
CompatMode getenv_or(std::string_view env_var_name, CompatMode default_val)
6869
{
70+
KVIKIO_NVTX_FUNC_RANGE();
6971
auto* env_val = std::getenv(env_var_name.data());
7072
if (env_val == nullptr) { return default_val; }
7173
return detail::parse_compat_mode_str(env_val);
@@ -74,6 +76,7 @@ CompatMode getenv_or(std::string_view env_var_name, CompatMode default_val)
7476
template <>
7577
std::vector<int> getenv_or(std::string_view env_var_name, std::vector<int> default_val)
7678
{
79+
KVIKIO_NVTX_FUNC_RANGE();
7780
auto* const env_val = std::getenv(env_var_name.data());
7881
if (env_val == nullptr) { return std::move(default_val); }
7982
std::string const int_str(env_val);
@@ -84,13 +87,15 @@ std::vector<int> getenv_or(std::string_view env_var_name, std::vector<int> defau
8487

8588
unsigned int defaults::get_num_threads_from_env()
8689
{
90+
KVIKIO_NVTX_FUNC_RANGE();
8791
int const ret = getenv_or("KVIKIO_NTHREADS", 1);
8892
KVIKIO_EXPECT(ret > 0, "KVIKIO_NTHREADS has to be a positive integer", std::invalid_argument);
8993
return ret;
9094
}
9195

9296
defaults::defaults()
9397
{
98+
KVIKIO_NVTX_FUNC_RANGE();
9499
// Determine the default value of `compat_mode`
95100
{
96101
_compat_mode = getenv_or("KVIKIO_COMPAT_MODE", CompatMode::AUTO);

0 commit comments

Comments
 (0)