|
| 1 | +/** |
| 2 | + * SPDX-FileCopyrightText: Copyright (c) 2025, NVIDIA CORPORATION & AFFILIATES. |
| 3 | + * SPDX-License-Identifier: Apache-2.0 |
| 4 | + */ |
| 5 | + |
| 6 | +#include "concatenate.hpp" |
| 7 | + |
| 8 | +#include <memory> |
| 9 | +#include <ranges> |
| 10 | + |
| 11 | +#include <cudf/concatenate.hpp> |
| 12 | +#include <cudf/table/table.hpp> |
| 13 | +#include <cudf/table/table_view.hpp> |
| 14 | + |
| 15 | +#include <rapidsmpf/cuda_event.hpp> |
| 16 | +#include <rapidsmpf/cuda_stream.hpp> |
| 17 | +#include <rapidsmpf/streaming/core/channel.hpp> |
| 18 | +#include <rapidsmpf/streaming/core/context.hpp> |
| 19 | +#include <rapidsmpf/streaming/core/message.hpp> |
| 20 | +#include <rapidsmpf/streaming/cudf/table_chunk.hpp> |
| 21 | + |
| 22 | +#include "utilities.hpp" |
| 23 | + |
| 24 | +namespace rapidsmpf::ndsh { |
| 25 | + |
| 26 | + |
| 27 | +streaming::Node concatenate( |
| 28 | + std::shared_ptr<streaming::Context> ctx, |
| 29 | + std::shared_ptr<streaming::Channel> ch_in, |
| 30 | + std::shared_ptr<streaming::Channel> ch_out, |
| 31 | + ConcatOrder order |
| 32 | +) { |
| 33 | + streaming::ShutdownAtExit c{ch_in, ch_out}; |
| 34 | + CudaEvent event; |
| 35 | + std::vector<streaming::Message> messages; |
| 36 | + ctx->comm()->logger().print("Concatenate"); |
| 37 | + auto concat_stream = ctx->br()->stream_pool().get_stream(); |
| 38 | + while (true) { |
| 39 | + co_await ctx->executor()->schedule(); |
| 40 | + auto msg = co_await ch_in->receive(); |
| 41 | + if (msg.empty()) { |
| 42 | + break; |
| 43 | + } |
| 44 | + messages.push_back(std::move(msg)); |
| 45 | + } |
| 46 | + if (messages.size() == 0) { |
| 47 | + co_await ch_out->send( |
| 48 | + streaming::to_message( |
| 49 | + 0, |
| 50 | + std::make_unique<streaming::TableChunk>( |
| 51 | + std::make_unique<cudf::table>(), concat_stream |
| 52 | + ) |
| 53 | + ) |
| 54 | + ); |
| 55 | + } else if (messages.size() == 1) { |
| 56 | + co_await ch_out->send(std::move(messages[0])); |
| 57 | + } else { |
| 58 | + std::vector<streaming::TableChunk> chunks; |
| 59 | + std::vector<cudf::table_view> views; |
| 60 | + if (order == ConcatOrder::LINEARIZE) { |
| 61 | + std::ranges::sort(messages, std::less{}, [](auto&& msg) { |
| 62 | + return msg.sequence_number(); |
| 63 | + }); |
| 64 | + } |
| 65 | + chunks.reserve(messages.size()); |
| 66 | + views.reserve(messages.size()); |
| 67 | + for (auto&& msg : messages) { |
| 68 | + auto chunk = msg.release<streaming::TableChunk>(); |
| 69 | + chunk = to_device(ctx, std::move(chunk)); |
| 70 | + cuda_stream_join(concat_stream, chunk.stream(), &event); |
| 71 | + views.push_back(chunk.table_view()); |
| 72 | + chunks.push_back(std::move(chunk)); |
| 73 | + } |
| 74 | + auto result = std::make_unique<streaming::TableChunk>( |
| 75 | + cudf::concatenate(views, concat_stream, ctx->br()->device_mr()), concat_stream |
| 76 | + ); |
| 77 | + cuda_stream_join( |
| 78 | + chunks | std::views::transform([](auto&& chunk) { return chunk.stream(); }), |
| 79 | + std::ranges::single_view(concat_stream), |
| 80 | + &event |
| 81 | + ); |
| 82 | + chunks.clear(); |
| 83 | + co_await ch_out->send(streaming::to_message(0, std::move(result))); |
| 84 | + } |
| 85 | + co_await ch_out->drain(ctx->executor()); |
| 86 | +} |
| 87 | + |
| 88 | +} // namespace rapidsmpf::ndsh |
0 commit comments