rapidsai
diff --git a/‎cpp/benchmarks/streaming/CMakeLists.txt‎
Lines changed: 2 additions & 0 deletions b/‎cpp/benchmarks/streaming/CMakeLists.txt‎
Lines changed: 2 additions & 0 deletions
diff --git a/‎cpp/benchmarks/streaming/ndsh/CMakeLists.txt‎
Lines changed: 62 additions & 0 deletions b/‎cpp/benchmarks/streaming/ndsh/CMakeLists.txt‎
Lines changed: 62 additions & 0 deletions
diff --git a/‎cpp/benchmarks/streaming/ndsh/concatenate.cpp‎
Lines changed: 88 additions & 0 deletions b/‎cpp/benchmarks/streaming/ndsh/concatenate.cpp‎
Lines changed: 88 additions & 0 deletions
diff --git a/‎cpp/benchmarks/streaming/ndsh/concatenate.hpp‎
Lines changed: 27 additions & 0 deletions b/‎cpp/benchmarks/streaming/ndsh/concatenate.hpp‎
Lines changed: 27 additions & 0 deletions
@@ -30,3 +30,5 @@ install(
   DESTINATION bin/benchmarks/librapidsmpf
   EXCLUDE_FROM_ALL
 )
+
+add_subdirectory(ndsh)
@@ -0,0 +1,62 @@
+# =================================================================================
+# cmake-format: off
+# SPDX-FileCopyrightText: Copyright (c) 2025, NVIDIA CORPORATION & AFFILIATES.
+# SPDX-License-Identifier: Apache-2.0
+# cmake-format: on
+# =================================================================================
+
+if(NOT RAPIDSMPF_HAVE_MPI)
+  message(FATAL_ERROR "Streaming NDSH benchmarks require MPI support")
+endif()
+
+if(NOT RAPIDSMPF_HAVE_STREAMING)
+  message(FATAL_ERROR "Streaming NDSH benchmarks require streaming support")
+endif()
+
+add_library(rapidsmpfndsh concatenate.cpp join.cpp utilities.cpp)
+
+set_target_properties(
+  rapidsmpfndsh
+  PROPERTIES BUILD_RPATH "\$ORIGIN"
+             INSTALL_RPATH "\$ORIGIN"
+             CXX_STANDARD 20
+             CXX_STANDARD_REQUIRED ON
+             CUDA_STANDARD 20
+             CUDA_STANDARD_REQUIRED ON
+             POSITION_INDEPENDENT_CODE ON
+             INTERFACE_POSITION_INDEPENDENT_CODE ON
+)
+
+target_compile_options(
+  rapidsmpfndsh PRIVATE "$<$<COMPILE_LANGUAGE:CXX>:${RAPIDSMPF_CXX_FLAGS}>"
+                        "$<$<COMPILE_LANGUAGE:CUDA>:${RAPIDSMPF_CUDA_FLAGS}>"
+)
+target_link_libraries(
+  rapidsmpfndsh
+  PRIVATE rapidsmpf::rapidsmpf rmm::rmm cudf::cudf libcoro $<TARGET_NAME_IF_EXISTS:ucxx::ucxx>
+          $<TARGET_NAME_IF_EXISTS:MPI::MPI_C> $<TARGET_NAME_IF_EXISTS:conda_env> maybe_asan
+)
+
+add_executable(q09 "q09.cpp")
+set_target_properties(
+  q09
+  PROPERTIES RUNTIME_OUTPUT_DIRECTORY "$<BUILD_INTERFACE:${RAPIDSMPF_BINARY_DIR}/benchmarks/ndsh>"
+             CXX_STANDARD 20
+             CXX_STANDARD_REQUIRED ON
+             CUDA_STANDARD 20
+             CUDA_STANDARD_REQUIRED ON
+)
+target_compile_options(
+  q09 PRIVATE "$<$<COMPILE_LANGUAGE:CXX>:${RAPIDSMPF_CXX_FLAGS}>"
+              "$<$<COMPILE_LANGUAGE:CUDA>:${RAPIDSMPF_CUDA_FLAGS}>"
+)
+target_link_libraries(
+  q09 PRIVATE rapidsmpfndsh rapidsmpf::rapidsmpf ucxx::ucxx ucx::ucp
+              $<TARGET_NAME_IF_EXISTS:MPI::MPI_C> $<TARGET_NAME_IF_EXISTS:conda_env> maybe_asan
+)
+install(
+  TARGETS q09
+  COMPONENT benchmarking
+  DESTINATION bin/benchmarks/librapidsmpf
+  EXCLUDE_FROM_ALL
+)
@@ -0,0 +1,88 @@
+/**
+ * SPDX-FileCopyrightText: Copyright (c) 2025, NVIDIA CORPORATION & AFFILIATES.
+ * SPDX-License-Identifier: Apache-2.0
+ */
+
+#include "concatenate.hpp"
+
+#include <memory>
+#include <ranges>
+
+#include <cudf/concatenate.hpp>
+#include <cudf/table/table.hpp>
+#include <cudf/table/table_view.hpp>
+
+#include <rapidsmpf/cuda_event.hpp>
+#include <rapidsmpf/cuda_stream.hpp>
+#include <rapidsmpf/streaming/core/channel.hpp>
+#include <rapidsmpf/streaming/core/context.hpp>
+#include <rapidsmpf/streaming/core/message.hpp>
+#include <rapidsmpf/streaming/cudf/table_chunk.hpp>
+
+#include "utilities.hpp"
+
+namespace rapidsmpf::ndsh {
+
+
+streaming::Node concatenate(
+    std::shared_ptr<streaming::Context> ctx,
+    std::shared_ptr<streaming::Channel> ch_in,
+    std::shared_ptr<streaming::Channel> ch_out,
+    ConcatOrder order
+) {
+    streaming::ShutdownAtExit c{ch_in, ch_out};
+    CudaEvent event;
+    std::vector<streaming::Message> messages;
+    ctx->comm()->logger().print("Concatenate");
+    auto concat_stream = ctx->br()->stream_pool().get_stream();
+    while (true) {
+        co_await ctx->executor()->schedule();
+        auto msg = co_await ch_in->receive();
+        if (msg.empty()) {
+            break;
+        }
+        messages.push_back(std::move(msg));
+    }
+    if (messages.size() == 0) {
+        co_await ch_out->send(
+            streaming::to_message(
+                0,
+                std::make_unique<streaming::TableChunk>(
+                    std::make_unique<cudf::table>(), concat_stream
+                )
+            )
+        );
+    } else if (messages.size() == 1) {
+        co_await ch_out->send(std::move(messages[0]));
+    } else {
+        std::vector<streaming::TableChunk> chunks;
+        std::vector<cudf::table_view> views;
+        if (order == ConcatOrder::LINEARIZE) {
+            std::ranges::sort(messages, std::less{}, [](auto&& msg) {
+                return msg.sequence_number();
+            });
+        }
+        chunks.reserve(messages.size());
+        views.reserve(messages.size());
+        for (auto&& msg : messages) {
+            auto chunk = msg.release<streaming::TableChunk>();
+            chunk = to_device(ctx, std::move(chunk));
+            cuda_stream_join(concat_stream, chunk.stream(), &event);
+            views.push_back(chunk.table_view());
+            chunks.push_back(std::move(chunk));
+        }
+        auto result = std::make_unique<streaming::TableChunk>(
+            cudf::concatenate(views, concat_stream, ctx->br()->device_mr()), concat_stream
+        );
+        cuda_stream_join(
+            chunks | std::views::transform([](auto&& chunk) { return chunk.stream(); }),
+            std::ranges::single_view(concat_stream),
+            &event
+        );
+        chunks.clear();
+        co_await ch_out->send(streaming::to_message(0, std::move(result)));
+    }
+    co_await ch_out->drain(ctx->executor());
+}
+
+}  // namespace rapidsmpf::ndsh
@@ -0,0 +1,27 @@
+/**
+ * SPDX-FileCopyrightText: Copyright (c) 2025, NVIDIA CORPORATION & AFFILIATES.
+ * SPDX-License-Identifier: Apache-2.0
+ */
+
+#pragma once
+#include <memory>
+
+#include <rapidsmpf/streaming/core/channel.hpp>
+#include <rapidsmpf/streaming/core/context.hpp>
+#include <rapidsmpf/streaming/core/node.hpp>
+
+namespace rapidsmpf::ndsh {
+
+enum class ConcatOrder : bool {
+    DONT_CARE,
+    LINEARIZE,
+};
+
+streaming::Node concatenate(
+    std::shared_ptr<streaming::Context> ctx,
+    std::shared_ptr<streaming::Channel> ch_in,
+    std::shared_ptr<streaming::Channel> ch_out,
+    ConcatOrder order = ConcatOrder::DONT_CARE
+);
+
+}  // namespace rapidsmpf::ndsh
Original file line number	Diff line number	Diff line change
`@@ -30,3 +30,5 @@ install(`
`30`	`30`	`DESTINATION bin/benchmarks/librapidsmpf`
`31`	`31`	`EXCLUDE_FROM_ALL`
`32`	`32`	`)`
	`33`	`+`
	`34`	`+add_subdirectory(ndsh)`