Channel benchmarks (meta-pytorch#627)

eliothedeman · facebook-github-bot · commit 2ab02d8dafb0 · 2025-07-23T11:43:37.000-07:00
Summary: Pull Request resolved: meta-pytorch#627 Differential Revision: D78755894
diff --git a/hyperactor/Cargo.toml b/hyperactor/Cargo.toml
@@ -1,4 +1,4 @@
-# @generated by autocargo from //monarch/hyperactor:[hyperactor,hyperactor-example-derive]
+# @generated by autocargo from //monarch/hyperactor:[channel_benchmarks,hyperactor,hyperactor-example-derive]
 
 [package]
 name = "hyperactor"
@@ -9,6 +9,10 @@ description = "a high-performance, scalable actor framework for cluster computin
 repository = "https://github.com/pytorch-labs/monarch/"
 license = "BSD-3-Clause"
 
+[[bin]]
+name = "channel_benchmarks"
+path = "benches/channel_benchmarks.rs"
+
 [[bin]]
 name = "hyperactor_example_derive"
 path = "example/derive.rs"
@@ -21,6 +25,7 @@ bincode = "1.3.3"
 bytes = { version = "1.9.0", features = ["serde"] }
 cityhasher = "0.1.0"
 crc32fast = "1.4"
+criterion = { version = "0.5.1", features = ["async_tokio", "csv_output"] }
 dashmap = { version = "5.5.3", features = ["rayon", "serde"] }
 derivative = "2.2"
 dns-lookup = "1.0"
diff --git a/hyperactor/benches/README.md b/hyperactor/benches/README.md
@@ -0,0 +1,139 @@
+# Hyperactor Channel Benchmarks
+
+This directory contains comprehensive benchmarks for the Hyperactor channel system using the Criterion benchmarking framework.
+
+## Overview
+
+The benchmark harness tests various aspects of channel performance:
+
+1. **Channel Creation** - Benchmarks the cost of creating channels (dial/serve operations)
+2. **Message Sending** - Tests different sending methods (`try_post`, `post`, `send`)
+3. **Message Sizes** - Compares performance with different message sizes (small, medium, large)
+4. **Throughput** - Measures messages per second for different transports
+5. **Latency** - Round-trip latency measurements
+6. **Transport Comparison** - Performance comparison between different transport types
+
+## Supported Transports
+
+The benchmarks test the following channel transports:
+
+- **Local** - In-process channels using mpsc
+- **TCP** - Network channels over TCP
+- **Unix** - Unix domain socket channels
+
+## Message Types
+
+Three message types are used to test different payload sizes:
+
+- **SmallMessage** - ~16 bytes (id + value)
+- **MediumMessage** - ~1KB (id + 1KB data)
+- **LargeMessage** - ~64KB (id + 64KB payload)
+
+## Running Benchmarks
+
+### Run All Benchmarks
+```bash
+cargo bench
+```
+
+### Run Specific Benchmark Groups
+```bash
+# Channel creation benchmarks
+cargo bench channel_creation
+
+# Message sending benchmarks
+cargo bench message_sending
+
+# Message size comparison
+cargo bench message_sizes
+
+# Throughput benchmarks
+cargo bench throughput
+
+# Latency benchmarks
+cargo bench latency
+
+# Transport comparison
+cargo bench transport_comparison
+```
+
+### Run with Specific Transport
+```bash
+# Run only local transport benchmarks
+cargo bench -- local
+
+# Run only TCP transport benchmarks
+cargo bench -- tcp
+```
+
+## Output
+
+Benchmarks generate:
+
+1. **Console Output** - Real-time results with timing statistics
+2. **HTML Reports** - Detailed reports in `target/criterion/` directory
+3. **Baseline Comparisons** - Performance regression detection
+
+## Benchmark Details
+
+### Channel Creation
+- Measures time to create server (`serve`) and client (`dial`) endpoints
+- Tests all supported transport types
+- Useful for understanding connection establishment overhead
+
+### Message Sending
+- Compares `try_post` (non-blocking), `post` (fire-and-forget), and `send` (synchronous)
+- Tests local and TCP transports
+- Measures raw sending performance
+
+### Message Sizes
+- Tests impact of payload size on performance
+- Uses local transport to isolate serialization/deserialization costs
+- Reports throughput in bytes per second
+
+### Throughput
+- Measures sustained message rate
+- Batches messages to reduce measurement overhead
+- 10-second measurement window for stability
+
+### Latency
+- Round-trip latency measurement
+- Uses echo server pattern
+- Tests both local and network transports
+
+### Transport Comparison
+- Direct comparison of transport performance
+- Sends 1000 messages per iteration
+- Helps choose optimal transport for use case
+
+## Interpreting Results
+
+- **Lower is better** for latency measurements
+- **Higher is better** for throughput measurements
+- **Confidence intervals** indicate measurement reliability
+- **Slope** indicates performance scaling characteristics
+
+## Configuration
+
+Benchmarks can be customized by modifying:
+
+- Message sizes in the message type definitions
+- Number of messages in throughput tests
+- Measurement duration in benchmark groups
+- Transport types tested
+
+## Dependencies
+
+The benchmark harness requires:
+
+- `criterion` - Benchmarking framework
+- `tokio` - Async runtime
+- `serde` - Message serialization
+- `hyperactor` - The library being benchmarked
+
+## Notes
+
+- Benchmarks automatically handle async operations using Tokio runtime
+- Receiver tasks are spawned to consume messages and prevent backpressure
+- Results may vary based on system load and hardware
+- Network benchmarks may be affected by local network configuration
diff --git a/hyperactor/benches/channel_benchmarks.rs b/hyperactor/benches/channel_benchmarks.rs
@@ -0,0 +1,103 @@
+/*
+ * Copyright (c) Meta Platforms, Inc. and affiliates.
+ * All rights reserved.
+ *
+ * This source code is licensed under the BSD-style license found in the
+ * LICENSE file in the root directory of this source tree.
+ */
+
+use std::future::IntoFuture;
+use std::iter::zip;
+use std::time::Duration;
+use std::time::Instant;
+
+use criterion::BenchmarkId;
+use criterion::Criterion;
+use criterion::Throughput;
+use criterion::black_box;
+use criterion::criterion_group;
+use criterion::criterion_main;
+use hyperactor::Named;
+use hyperactor::RemoteMessage;
+use hyperactor::channel::ChannelAddr;
+use hyperactor::channel::ChannelRx;
+use hyperactor::channel::ChannelTransport;
+use hyperactor::channel::ChannelTx;
+use hyperactor::channel::Rx;
+use hyperactor::channel::Tx;
+use hyperactor::channel::dial;
+use hyperactor::channel::serve;
+use serde::Deserialize;
+use serde::Serialize;
+use tokio::runtime::Runtime;
+use tokio::sync::oneshot;
+
+#[derive(Debug, Clone, Serialize, Deserialize, Named, PartialEq)]
+struct Message {
+    id: u64,
+    payload: Vec<u8>,
+}
+
+impl Message {
+    fn new(id: u64, size: usize) -> Self {
+        Self {
+            id,
+            payload: vec![0; size],
+        }
+    }
+}
+
+// Benchmark message sizes
+fn bench_message_sizes(c: &mut Criterion) {
+    let mut group = c.benchmark_group("message_sizes");
+
+    let transports = vec![
+        ("local", ChannelTransport::Local),
+        ("tcp", ChannelTransport::Tcp),
+        ("metatls", ChannelTransport::MetaTls),
+        ("unix", ChannelTransport::Unix),
+    ];
+
+    for size_exp in 1..10 {
+        let size = 10_usize.pow(size_exp);
+        let fsize = size as f64;
+        let (nice_size, postfix) = match size {
+            1_000..=999_999 => (fsize / 1000f64, "kb"),
+            1_000_000..=999_999_999 => (fsize / 1_000_000f64, "mb"),
+            1_000_000_000..=999_999_999_999 => (fsize / 1_000_000_000f64, "gb"),
+            _ => (fsize, "b"),
+        };
+
+        for (transport_name, transport) in &transports {
+            let transport = transport.clone();
+            group.throughput(Throughput::Bytes(size as u64));
+            group.bench_function(
+                format!("message_{}_{}{}", transport_name, nice_size, postfix),
+                move |b| {
+                    let mut b = b.to_async(Runtime::new().unwrap());
+                    let tt = &transport;
+                    b.iter_custom(|iters| async move {
+                        let addr = ChannelAddr::any(tt.clone());
+                        let (listen_addr, mut rx) = serve::<Message>(addr).await.unwrap();
+                        let tx = dial::<Message>(listen_addr).unwrap();
+                        let msg = Message::new(0, size);
+                        let start = Instant::now();
+                        for _ in 0..iters {
+                            tx.post(msg.clone());
+                            rx.recv().await.unwrap();
+                        }
+                        start.elapsed()
+                    });
+                },
+            );
+        }
+    }
+
+    group.finish();
+}
+
+// Benchmark throughpu
+
+criterion_group!(benches, bench_message_sizes);
+
+criterion_main!(benches);