1818#include < atomic>
1919#include < cassert>
2020#include < future>
21+ #include < memory>
2122#include < numeric>
2223#include < system_error>
24+ #include < type_traits>
2325#include < utility>
2426#include < vector>
2527
@@ -32,6 +34,29 @@ namespace kvikio {
3234
3335namespace detail {
3436
37+ /* *
38+ * @brief Utility function to create a copyable callable from a move-only callable.
39+ *
40+ * The underlying thread pool uses `std::function` (until C++23) or `std::move_only_function`
41+ * (since C++23) as the element type of the task queue. For the former case that currently applies,
42+ * the `std::function` requires its "target" (associated callable) to be copy-constructible. This
43+ * utility function is a workaround for those move-only callables.
44+ *
45+ * @tparam F Callable type. F shall be move-only.
46+ * @param op Callable.
47+ * @return A new callable that satisfies the copy-constructible condition.
48+ */
49+ template <typename F>
50+ auto make_copyable_lambda (F op)
51+ {
52+ // Create the callable on the heap by moving from op. Use a shared pointer to manage its lifetime.
53+ auto sp = std::make_shared<F>(std::move (op));
54+
55+ // Use the copyable closure as the proxy of the move-only callable.
56+ return
57+ [sp](auto &&... args) -> decltype (auto ) { return (*sp)(std::forward<decltype (args)>(args)...); };
58+ }
59+
3560/* *
3661 * @brief Determine the NVTX color and call index. They are used to identify tasks from different
3762 * pread/pwrite calls. Tasks from the same pread/pwrite call are given the same color and call
@@ -50,6 +75,11 @@ inline const std::pair<const nvtx_color_type&, std::uint64_t> get_next_color_and
5075 return {nvtx_color, call_idx};
5176}
5277
78+ /* *
79+ * @brief Submit the task callable to the underlying thread pool.
80+ *
81+ * Both the callable and arguments shall satisfy copy-constructible.
82+ */
5383template <typename F, typename T>
5484std::future<std::size_t > submit_task (F op,
5585 T buf,
@@ -59,12 +89,40 @@ std::future<std::size_t> submit_task(F op,
5989 std::uint64_t nvtx_payload = 0ull ,
6090 nvtx_color_type nvtx_color = NvtxManager::default_color())
6191{
92+ static_assert (std::is_invocable_r_v<std::size_t ,
93+ decltype (op),
94+ decltype (buf),
95+ decltype (size),
96+ decltype (file_offset),
97+ decltype (devPtr_offset)>);
98+
6299 return defaults::thread_pool ().submit_task ([=] {
63100 KVIKIO_NVTX_SCOPED_RANGE (" task" , nvtx_payload, nvtx_color);
64101 return op (buf, size, file_offset, devPtr_offset);
65102 });
66103}
67104
105+ /* *
106+ * @brief Submit the move-only task callable to the underlying thread pool.
107+ *
108+ * @tparam F Callable type. F shall be move-only and have no argument.
109+ * @param op Callable.
110+ * @return A future to be used later to check if the operation has finished its execution.
111+ */
112+ template <typename F>
113+ std::future<std::size_t > submit_move_only_task (
114+ F op_move_only,
115+ std::uint64_t nvtx_payload = 0ull ,
116+ nvtx_color_type nvtx_color = NvtxManager::default_color())
117+ {
118+ static_assert (std::is_invocable_r_v<std::size_t , F>);
119+ auto op_copyable = make_copyable_lambda (std::move (op_move_only));
120+ return defaults::thread_pool ().submit_task ([=] {
121+ KVIKIO_NVTX_SCOPED_RANGE (" task" , nvtx_payload, nvtx_color);
122+ return op_copyable ();
123+ });
124+ }
125+
68126} // namespace detail
69127
70128/* *
@@ -90,40 +148,40 @@ std::future<std::size_t> parallel_io(F op,
90148 nvtx_color_type nvtx_color = NvtxManager::default_color())
91149{
92150 KVIKIO_EXPECT (task_size > 0 , " `task_size` must be positive" , std::invalid_argument);
151+ static_assert (std::is_invocable_r_v<std::size_t ,
152+ decltype (op),
153+ decltype (buf),
154+ decltype (size),
155+ decltype (file_offset),
156+ decltype (devPtr_offset)>);
93157
94158 // Single-task guard
95159 if (task_size >= size || page_size >= size) {
96160 return detail::submit_task (op, buf, size, file_offset, devPtr_offset, call_idx, nvtx_color);
97161 }
98162
99- // We know an upper bound of the total number of tasks
100163 std::vector<std::future<std::size_t >> tasks;
101- tasks.reserve (size / task_size + 2 );
164+ tasks.reserve (size / task_size);
102165
103- // 1) Submit `task_size` sized tasks
104- while (size >= task_size) {
166+ // 1) Submit all tasks but the last one. These are all `task_size` sized tasks.
167+ while (size > task_size) {
105168 tasks.push_back (
106169 detail::submit_task (op, buf, task_size, file_offset, devPtr_offset, call_idx, nvtx_color));
107170 file_offset += task_size;
108171 devPtr_offset += task_size;
109172 size -= task_size;
110173 }
111174
112- // 2) Submit a task for the remainder
113- if (size > 0 ) {
114- tasks.push_back (
115- detail::submit_task (op, buf, size, file_offset, devPtr_offset, call_idx, nvtx_color));
116- }
117-
118- // Finally, we sum the result of all tasks.
119- auto gather_tasks = [](std::vector<std::future<std::size_t >>&& tasks) -> std::size_t {
120- std::size_t ret = 0 ;
175+ // 2) Submit the last task, which consists of performing the last I/O and waiting the previous
176+ // tasks.
177+ auto last_task = [=, tasks = std::move (tasks)]() mutable -> std::size_t {
178+ auto ret = op (buf, size, file_offset, devPtr_offset);
121179 for (auto & task : tasks) {
122180 ret += task.get ();
123181 }
124182 return ret;
125183 };
126- return std::async (std::launch::deferred, gather_tasks, std::move (tasks) );
184+ return detail::submit_move_only_task (std::move (last_task), call_idx, nvtx_color );
127185}
128186
129187} // namespace kvikio
0 commit comments