diff --git a/.DS_Store b/.DS_Store
new file mode 100644
index 000000000..2d56d71c0
Binary files /dev/null and b/.DS_Store differ
diff --git a/.github/.DS_Store b/.github/.DS_Store
new file mode 100644
index 000000000..71a545d9d
Binary files /dev/null and b/.github/.DS_Store differ
diff --git a/.github/workflows/doc_build.yml b/.github/workflows/doc_build.yml
new file mode 100644
index 000000000..d96f5995b
--- /dev/null
+++ b/.github/workflows/doc_build.yml
@@ -0,0 +1,87 @@
+name: Docs
+
+on:
+ push:
+ branches:
+ - main
+ pull_request:
+
+concurrency:
+ group: docs-${{ github.workflow }}-${{ github.ref == 'refs/heads/main' && github.run_number || github.ref }}
+ cancel-in-progress: true
+
+jobs:
+ build:
+ name: Build Documentation
+ runs-on: ubuntu-latest
+ steps:
+ - name: Checkout
+ uses: actions/checkout@v4
+ with:
+ fetch-depth: 0
+ submodules: recursive
+
+ - name: Setup Python
+ uses: actions/setup-python@v5
+ with:
+ python-version: "3.10"
+ architecture: x64
+
+ - name: Setup Rust
+ uses: dtolnay/rust-toolchain@nightly
+ with:
+ components: rustfmt, clippy
+
+ - name: Install system dependencies
+ run: |
+ sudo apt-get update
+ sudo apt-get install -y libunwind-dev
+
+ - name: Install Python dependencies
+ run: |
+ python -m pip install --upgrade pip
+ pip install -r build-requirements.txt
+ pip install -r docs/requirements.txt
+
+ - name: Generate Rust API Documentation
+ run: |
+ # Set environment variables for CPU-only build
+ export USE_CUDA=0
+ export USE_TENSOR_ENGINE=0
+ export RUSTFLAGS="-Zthreads=16"
+ export _GLIBCXX_USE_CXX11_ABI=1
+
+ # Generate documentation for specific crates that don't depend on CUDA
+ cargo doc --no-deps -p hyperactor -p hyperactor_macros -p hyperactor_multiprocess -p hyperactor_mesh -p hyperactor_mesh_macros
+
+ # Create an index.html file in the target/doc directory to serve as the main entry point
+ echo '' > target/doc/index.html
+
+ # Ensure the target/doc directory exists in the expected location for Sphinx
+ mkdir -p docs/source/target
+ cp -r target/doc docs/source/target/
+
+ - name: Build Sphinx Docs
+ working-directory: docs
+ run: |
+ make html
+
+ - name: Upload Pages artifact
+ uses: actions/upload-pages-artifact@v3
+ with:
+ path: docs/build/html
+
+ deploy:
+ needs: build
+ if: github.ref == 'refs/heads/main'
+ permissions:
+ pages: write
+ id-token: write
+ environment:
+ name: github-pages
+ url: ${{ steps.deployment.outputs.page_url }}
+ runs-on: ubuntu-latest
+ steps:
+ - name: Deploy to GitHub Pages
+ id: deployment
+ uses: actions/deploy-pages@v4
diff --git a/.gitignore b/.gitignore
index fab347461..33252d739 100644
--- a/.gitignore
+++ b/.gitignore
@@ -21,4 +21,14 @@ Cargo.lock
# mdbook output
books/hyperactor-book/book/**
+# macOS files
+.DS_Store
+docs/source/.DS_Store
+
+# Sphinx build files
+build/**
+docs/_build/**
+docs/build/**
+docs/generated/**
+
CLAUDE.md
diff --git a/books/hyperactor-book/README.md b/books/hyperactor-book/README.md
index 45c2046a7..517e94f14 100644
--- a/books/hyperactor-book/README.md
+++ b/books/hyperactor-book/README.md
@@ -1,7 +1,14 @@
-# hyperactor Documentation Book
+# Hyperactor Documentation Book
This is the development documentation for the hyperactor system, built using [`mdBook`](https://rust-lang.github.io/mdBook/).
+```{toctree}
+:maxdepth: 2
+:caption: Contents
+
+./src/introduction
+```
+
## Running the Book
### On the **Server**
diff --git a/books/hyperactor-book/src/introduction.md b/books/hyperactor-book/src/introduction.md
index 7f712e7de..3fd4df2ee 100644
--- a/books/hyperactor-book/src/introduction.md
+++ b/books/hyperactor-book/src/introduction.md
@@ -5,3 +5,14 @@ This book describes the design and implementation of the hyperactor runtime.
The goal is to provide a clear, structured explanation of how actors communicate safely and efficiently across distributed systems using hyperactor’s abstractions.
We hope this becomes the book we wish we had when we started working with Monarch. Work in progress.
+
+```{toctree}
+:maxdepth: 2
+:caption: Contents
+
+refrences
+mailbox
+macros
+actors
+summary
+```
diff --git a/docs/.DS_Store b/docs/.DS_Store
new file mode 100644
index 000000000..5008ddfcf
Binary files /dev/null and b/docs/.DS_Store differ
diff --git a/docs/Makefile b/docs/Makefile
new file mode 100644
index 000000000..f7f9d66b6
--- /dev/null
+++ b/docs/Makefile
@@ -0,0 +1,35 @@
+# Minimal makefile for Sphinx documentation
+#
+
+# You can set these variables from the command line, and also
+# from the environment for the first two.
+SPHINXOPTS ?=
+SPHINXBUILD ?= sphinx-build
+SOURCEDIR = source
+BUILDDIR = build
+
+# Put it first so that "make" without argument is like "make help".
+help:
+ @$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
+
+# Generate example documentation from Python files
+generate-examples:
+ @echo "Generating example documentation..."
+ @cd "$(SOURCEDIR)" && python GenerateExamples.py
+
+# Override html target to run generate-examples first and include books
+html:
+ @echo "Building HTML documentation with books directory..."
+ @$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
+
+.PHONY: help Makefile html
+
+# Catch-all target: route all unknown targets to Sphinx using the new
+# "make mode" option. $(O) is meant as a shortcut for $(SPHINXOPTS).
+%: Makefile
+ @$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
+
+clean:
+ @echo "Cleaning up build directory..."
+ @rm -rf "$(BUILDDIR)"
+ @rm -rf "$(SOURCEDIR)/generated"
diff --git a/docs/make.bat b/docs/make.bat
new file mode 100644
index 000000000..747ffb7b3
--- /dev/null
+++ b/docs/make.bat
@@ -0,0 +1,35 @@
+@ECHO OFF
+
+pushd %~dp0
+
+REM Command file for Sphinx documentation
+
+if "%SPHINXBUILD%" == "" (
+ set SPHINXBUILD=sphinx-build
+)
+set SOURCEDIR=source
+set BUILDDIR=build
+
+%SPHINXBUILD% >NUL 2>NUL
+if errorlevel 9009 (
+ echo.
+ echo.The 'sphinx-build' command was not found. Make sure you have Sphinx
+ echo.installed, then set the SPHINXBUILD environment variable to point
+ echo.to the full path of the 'sphinx-build' executable. Alternatively you
+ echo.may add the Sphinx directory to PATH.
+ echo.
+ echo.If you don't have Sphinx installed, grab it from
+ echo.https://www.sphinx-doc.org/
+ exit /b 1
+)
+
+if "%1" == "" goto help
+
+%SPHINXBUILD% -M %1 %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O%
+goto end
+
+:help
+%SPHINXBUILD% -M help %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O%
+
+:end
+popd
diff --git a/docs/requirements.txt b/docs/requirements.txt
new file mode 100644
index 000000000..5f0e15d33
--- /dev/null
+++ b/docs/requirements.txt
@@ -0,0 +1,14 @@
+sphinx==7.2.6
+-e git+https://github.com/pytorch/pytorch_sphinx_theme.git@pytorch_sphinx_theme2#egg=pytorch_sphinx_theme2
+sphinxcontrib.katex==0.9.10
+#breathe==4.34.0 # only if generating C++
+exhale==0.2.3 # only if generating C++ docs
+docutils>=0.18.1,<0.21
+sphinx-design==0.6.1
+sphinxcontrib-mermaid==1.0.0
+myst-parser #==0.18.1 # if want to contribute in markdown
+sphinx-gallery==0.14.0 # only if hosting interactive tutorials
+sphinx-sitemap==2.7.1
+sphinxext-opengraph
+nbsphinx
+pillow # Required for image processing in documentation
diff --git a/docs/source/.DS_Store b/docs/source/.DS_Store
new file mode 100644
index 000000000..5008ddfcf
Binary files /dev/null and b/docs/source/.DS_Store differ
diff --git a/docs/source/books/books.md b/docs/source/books/books.md
new file mode 100644
index 000000000..db97a6575
--- /dev/null
+++ b/docs/source/books/books.md
@@ -0,0 +1,16 @@
+# Books
+
+This section contains books and comprehensive guides for Monarch.
+
+```{toctree}
+:maxdepth: 2
+:caption: Available Books
+:hidden:
+
+./hyperactor-book/src/introduction
+```
+
+
+## [HyperActor Book](./hyperactor-book/src/introduction)
+
+The HyperActor Book provides a comprehensive guide to using the actor system in Monarch.
diff --git a/docs/source/books/hyperactor-book/.gitignore b/docs/source/books/hyperactor-book/.gitignore
new file mode 100644
index 000000000..3006b271d
--- /dev/null
+++ b/docs/source/books/hyperactor-book/.gitignore
@@ -0,0 +1 @@
+book/
diff --git a/docs/source/books/hyperactor-book/README.md b/docs/source/books/hyperactor-book/README.md
new file mode 100644
index 000000000..01be21768
--- /dev/null
+++ b/docs/source/books/hyperactor-book/README.md
@@ -0,0 +1,75 @@
+# Hyperactor Documentation Book
+
+This is the development documentation for the hyperactor system, built using [`mdBook`](https://rust-lang.github.io/mdBook/).
+
+```{toctree}
+:maxdepth: 2
+:caption: Contents
+
+./src/introduction
+./src/refrences
+mailbox
+macros
+actors
+summary
+```
+
+## Running the Book
+
+### On the **Server**
+
+To run the book on a remote server (e.g., `devgpu004`):
+
+```bash
+x2ssh devgpu004.rva5.facebook.com
+tmux new -s mdbook
+cd ~/fbsource/fbcode/monarch/books/hyperactor-book
+mdbook serve
+```
+Then detach with Ctrl+b, then d.
+
+### On the **Client**
+
+To access the remote book from your local browser:
+```bash
+autossh -M 0 -N -L 3000:localhost:3000 devgpu004.rva5.facebook.com
+```
+Then open http://localhost:3000 in your browser.
+
+**Note**: If you don’t have autossh installed, you can install it with:
+```bash
+brew install autossh
+```
+
+### Notes
+
+- The source is located in src/, with structure defined in SUMMARY.md.
+- The book will auto-reload in the browser on edits.
+
+## Cleaning Up
+
+To shut down the book server:
+
+### Option 1: Reattach and stop
+
+```bash
+x2ssh devgpu004.rva5.facebook.com
+tmux attach -t mdbook
+```
+Inside the session:
+- Press Ctrl+C to stop mdbook serve
+- Then type exit to close the shell and terminate the tmux session
+
+### Option 2: Kill the session directly
+
+If you don’t want to reattach, you can kill the session from a new shell:
+```bash
+x2ssh devgpu004.rva5.facebook.com
+tmux kill-session -t mdbook
+```
+
+### Optional: View active tmux sessions
+```bash
+tmux ls
+```
+Use this to check whether the mdbook session is still running.
diff --git a/docs/source/books/hyperactor-book/book.toml b/docs/source/books/hyperactor-book/book.toml
new file mode 100644
index 000000000..cea7f9d89
--- /dev/null
+++ b/docs/source/books/hyperactor-book/book.toml
@@ -0,0 +1,9 @@
+[book]
+authors = ["Shayne Fletcher"]
+language = "en"
+src = "src"
+title = "Hyperactor Book"
+
+[output.html]
+git-repository-url = "https://github.com/pytorch-labs/monarch"
+edit-url-template = "https://github.com/pytorch-labs/monarch/edit/main/books/hyperactor-book/src/{path}"
diff --git a/docs/source/books/hyperactor-book/src/SUMMARY.md b/docs/source/books/hyperactor-book/src/SUMMARY.md
new file mode 100644
index 000000000..555653504
--- /dev/null
+++ b/docs/source/books/hyperactor-book/src/SUMMARY.md
@@ -0,0 +1,39 @@
+# Summary
+
+- [Introduction](./introduction.md)
+- [References](references/index.md)
+ - [Syntax](references/syntax.md)
+ - [WorldId](references/world_id.md)
+ - [ProcId](references/proc_id.md)
+ - [ActorId](references/actor_id.md)
+ - [PortId](references/port_id.md)
+ - [GangId](references/gang_id.md)
+ - [Reference](references/reference.md)
+ - [Typed References](references/typed_refs.md)
+- [Mailboxes and Routers](mailboxes/index.md)
+ - [Ports](mailboxes/ports.md)
+ - [MailboxSender](mailboxes/mailbox_sender.md)
+ - [Reconfigurable Senders](mailboxes/reconfigurable_sender.md)
+ - [MailboxServer](mailboxes/mailbox_server.md)
+ - [MailboxClient](mailboxes/mailbox_client.md)
+ - [Mailbox](mailboxes/mailbox.md)
+ - [Delivery Semantics](mailboxes/delivery.md)
+ - [Multiplexers](mailboxes/multiplexer.md)
+ - [Routers](mailboxes/routers.md)
+- [Actors](actors/index.md)
+ - [Actor](actors/actor.md)
+ - [Handler](actors/handler.md)
+ - [RemoteableActor](actors/remotable_actor.md)
+ - [Checkpointable](actors/checkpointable.md)
+ - [RemoteActor](actors/remote_actor.md)
+ - [Binds](actors/binds.md)
+ - [RemoteHandles](actors/remote_handles.md)
+ - [ActorHandle](actors/actor_handle.md)
+ - [Actor Lifecycle](actors/actor_lifecycle.md)
+- [Macros](macros/index.md)
+ - [`#[derive(Handler)]`](macros/handler.md)
+ - [`#[derive(HandleClient)]`](macros/handle_client.md)
+ - [`#[derive(RefClient)]`](macros/ref_client.md)
+ - [`#[derive(Named)]`](macros/named.md)
+ - [`#[export]`](macros/export.md)
+ - [`#[forward]`](macros/forward.md)
diff --git a/docs/source/books/hyperactor-book/src/actors/actor.md b/docs/source/books/hyperactor-book/src/actors/actor.md
new file mode 100644
index 000000000..9d7ef93c2
--- /dev/null
+++ b/docs/source/books/hyperactor-book/src/actors/actor.md
@@ -0,0 +1,169 @@
+# The `Actor` Trait
+
+The `Actor` trait defines the core behavior of all actors in the hyperactor runtime.
+
+Every actor type must implement this trait to participate in the system. It defines how an actor is constructed, initialized, and supervised.
+
+```rust
+#[async_trait]
+pub trait Actor: Sized + Send + Debug + 'static {
+ type Params: Send + 'static;
+
+ async fn new(params: Self::Params) -> Result;
+
+ async fn init(&mut self, _this: &Instance) -> Result<(), anyhow::Error> {
+ Ok(())
+ }
+
+ async fn spawn(
+ cap: &impl cap::CanSpawn,
+ params: Self::Params,
+ ) -> anyhow::Result> {
+ cap.spawn(params).await
+ }
+
+ async fn spawn_detached(params: Self::Params) -> Result, anyhow::Error> {
+ Proc::local().spawn("anon", params).await
+ }
+
+ fn spawn_server_task(future: F) -> JoinHandle
+ where
+ F: Future + Send + 'static,
+ F::Output: Send + 'static,
+ {
+ tokio::spawn(future)
+ }
+
+ async fn handle_supervision_event(
+ &mut self,
+ _this: &Instance,
+ _event: &ActorSupervisionEvent,
+ ) -> Result {
+ Ok(false)
+ }
+
+ async fn handle_undeliverable_message(
+ &mut self,
+ this: &Instance,
+ Undeliverable(envelope): Undeliverable,
+ ) -> Result<(), anyhow::Error> {
+ assert_eq!(envelope.sender(), this.self_id());
+
+ anyhow::bail!(UndeliverableMessageError::delivery_failure(&envelope));
+ }
+}
+```
+
+## Construction: `Params` and `new`
+
+Each actor must define a `Params` type:
+
+```rust
+type Params: Send + 'static;
+```
+
+This associated type defines the data required to instantiate the actor.
+
+The actor is constructed by the runtime using:
+```rust
+async fn new(params: Self::Params) -> Result;
+```
+
+This method returns the actor's internal state. At this point, the actor has not yet been connected to the runtime; it has no mailbox and cannot yet send or receive messages. `new` is typically used to construct the actor's fields from its input parameters.
+
+## Initialization: `init`
+
+```rust
+async fn init(&mut self, this: &Instance) -> Result<(), anyhow::Error>
+```
+
+The `init` method is called after the actor has been constructed with `new` and registered with the runtime. It is passed a reference to the actor's `Instance`, allowing access to runtime services such as:
+- The actor’s ID and status
+- The mailbox and port system
+- Capabilities for spawning or sending messages
+
+The default implementation does nothing and returns `Ok(())`.
+
+If `init` returns an error, the actor is considered failed and will not proceed to handle any messages.
+
+Use `init` to perform startup logic that depends on the actor being fully integrated into the system.
+
+## Spawning: `spawn`
+
+The `spawn` method provides a default implementation for creating a new actor from an existing one:
+
+```rust
+async fn spawn(
+ cap: &impl cap::CanSpawn,
+ params: Self::Params,
+) -> anyhow::Result> {
+ cap.spawn(params).await
+}
+```
+
+In practice, `CanSpawn` is only implemented for `Instance`, which represents a running actor. As a result, `Actor::spawn(...)` always constructs a child actor: the new actor receives a child ID and is linked to its parent through the runtime.
+
+## Detached Spawning: `spawn_detached`
+
+```rust
+async fn spawn_detached(params: Self::Params) -> Result, anyhow::Error> {
+ Proc::local().spawn("anon", params).await
+}
+```
+This method creates a root actor on a fresh, isolated proc.
+- The proc is local-only and cannot forward messages externally.
+- The actor receives a unique root `ActorId` with no parent.
+- No supervision or linkage is established.
+- The actor is named `"anon"`.
+
+## Background Tasks: `spawn_server_task`
+
+```rust
+fn spawn_server_task(future: F) -> JoinHandle
+where
+ F: Future + Send + 'static,
+ F::Output: Send + 'static,
+{
+ tokio::spawn(future)
+}
+```
+
+This method provides a hook point for customizing how the runtime spawns background tasks.
+
+By default, it simply calls `tokio::spawn(...)` to run the given future on the Tokio executor.
+
+# Supervision Events: `handle_supervision_event`
+
+```rust
+async fn handle_supervision_event(
+ &mut self,
+ _this: &Instance,
+ _event: &ActorSupervisionEvent,
+) -> Result {
+ Ok(false)
+}
+```
+This method is invoked when the runtime delivers an `ActorSupervisionEvent` to the actor — for example, when a child crashes or exits.
+
+By default, it returns `Ok(false)`, which indicates that the event was not handled by the actor. This allows the runtime to fall back on default behavior (e.g., escalation).
+
+Actors may override this to implement custom supervision logic.
+
+## Undeliverables: `handle_undeliverable_message`
+
+```rust
+async fn handle_undeliverable_message(
+ &mut self,
+ this: &Instance,
+ Undeliverable(envelope): Undeliverable,
+) -> Result<(), anyhow::Error> {
+ assert_eq!(envelope.sender(), this.self_id());
+
+ anyhow::bail!(UndeliverableMessageError::delivery_failure(&envelope));
+}
+```
+This method is called when a message sent by this actor fails to be delivered.
+- It asserts that the message was indeed sent by this actor.
+- Then it returns an error: `Err(UndeliverableMessageError::DeliveryFailure(...))`
+
+This signals that the actor considers this delivery failure to be a fatal error. You may override this method to suppress the failure or to implement custom fallback behavior.
diff --git a/docs/source/books/hyperactor-book/src/actors/actor_handle.md b/docs/source/books/hyperactor-book/src/actors/actor_handle.md
new file mode 100644
index 000000000..3befb189c
--- /dev/null
+++ b/docs/source/books/hyperactor-book/src/actors/actor_handle.md
@@ -0,0 +1,174 @@
+# `ActorHandle`
+
+An `ActorHandle` is a reference to a **local, running actor** of type `A`. It provides access to the actor's messaging ports, lifecycle status, and control methods (such as stop signals).
+
+Unlike remote references (e.g. `ActorRef`), which may refer to actors on other `Proc`s, an `ActorHandle` only exists within the same `Proc` and can be sent messages without requiring serialization.
+
+## Definition
+
+```rust
+pub struct ActorHandle {
+ cell: InstanceCell,
+ ports: Arc>,
+}
+```
+An `ActorHandle` contains:
+- `cell` is the actor’s internal runtime state, including identity and lifecycle metadata.
+- `ports` is a shared dictionary of all typed message ports available to the actor.
+
+This handle is cloneable, sendable across tasks, and allows interaction with the actor via messaging, status observation, and controlled shutdown.
+
+## Methods
+
+### `new` (internal)
+
+Constructs a new `ActorHandle` from its backing `InstanceCell` and `Ports`. This is called by the runtime when spawning a new actor.
+```rust
+pub(crate) fn new(cell: InstanceCell, ports: Arc>) -> Self {
+ Self { cell, ports }
+}
+```
+
+### `cell` (internal)
+
+Returns the underlying `InstanceCell` backing the actor.
+```rust
+pub(crate) fn cell(&self) -> &InstanceCell {
+ &self.cell
+}
+```
+
+### `actor_id`
+
+Returns the `ActorId` of the actor represented by this handle.
+```rust
+pub fn actor_id(&self) -> &ActorId {
+ self.cell.actor_id()
+}
+```
+
+### `drain_and_stop`
+
+Signals the actor to drain any pending messages and then stop. This enables a graceful shutdown procedure.
+```rust
+pub fn drain_and_stop(&self) -> Result<(), ActorError> {
+ self.cell.signal(Signal::DrainAndStop)
+}
+```
+
+### `status`
+
+Returns a watch channel that can be used to observe the actor's lifecycle status (e.g., running, stopped, crashed).
+```rust
+pub fn status(&self) -> watch::Receiver {
+ self.cell.status().clone()
+}
+```
+
+### `send`
+
+Sends a message of type `M` to the actor. The actor must implement `Handler` for this to compile.
+
+Messages sent via an `ActorHandle` are always delivered in-process and do not require serialization.
+```rust
+pub fn send(&self, message: M) -> Result<(), MailboxSenderError>
+where
+ A: Handler,
+{
+ self.ports.get().send(message)
+}
+```
+
+### `port`
+
+Returns a reusable port handle for the given message type.
+```rust
+pub fn port(&self) -> PortHandle
+where
+ A: Handler,
+{
+ self.ports.get()
+}
+```
+
+### `bind`
+
+Creates a remote reference (`ActorRef`) by applying a `Binds` implementation.
+```rust
+pub fn bind>(&self) -> ActorRef {
+ self.cell.bind(self.ports.as_ref())
+}
+```
+
+### Binding and ActorRefs
+
+The `bind()` method on `ActorHandle` creates an `ActorRef` for a given remote-facing reference type `R`. This is the bridge between a local actor instance and its externally visible interface.
+```rust
+pub fn bind>(&self) -> ActorRef
+```
+This method requires that `R` implements the `Binds` trait. The `Binds` trait specifies how to associate a remote-facing reference type with the concrete ports handled by the actor:
+```rust
+pub trait Binds: RemoteActor {
+ fn bind(ports: &Ports);
+}
+```
+In practice, `A` and `R` are usually the same type; this is the pattern produced by the `#[export]` macro. But `R` can also be a trait object or wrapper that abstracts over multiple implementations.
+
+### Binding internals
+
+Calling `bind()` on the `ActorHandle`:
+1. Invokes the `Binds::bind()` implementation for `R`, registering the actor's message handlers into the `Ports` dictionary.
+2. Always binds the `Signal` type (used for draining, stopping, and supervision).
+3. Records the bound message types into `InstanceState::exported_named_ports`, enabling routing and diagnostics.
+4. Constructs the final `ActorRef` using `ActorRef::attest(...)`, which assumes the type-level correspondence between `R` and the bound ports.
+
+The result is a typed, routable reference that can be shared across `Proc`s.
+
+## `IntoFuture for ActorHandle`
+
+### Overview
+
+An `ActorHandle` can be awaited directly thanks to its `IntoFuture` implementation. Awaiting the `handle` waits for the actor to shut down.
+
+### Purpose
+
+This allows you to write:
+```rust
+let status = actor_handle.await;
+```
+Instead of:
+```rust
+let mut status = actor_handle.status();
+status.wait_for(ActorStatus::is_terminal).await;
+```
+
+### Behavior
+
+When awaited, the handle:
+- Subscribes to the actor’s status channel,
+- Waits for a terminal status (`Stopped`, `Crashed`, etc.),
+- Returns the final status,
+- Returns `ActorStatus::Unknown` if the channel closes unexpectedly.
+
+### Implementation
+```rust
+impl IntoFuture for ActorHandle {
+ type Output = ActorStatus;
+ type IntoFuture = BoxFuture<'static, Self::Output>;
+
+ fn into_future(self) -> Self::IntoFuture {
+ let future = async move {
+ let mut status_receiver = self.cell.status().clone();
+ let result = status_receiver.wait_for(ActorStatus::is_terminal).await;
+ match result {
+ Err(_) => ActorStatus::Unknown,
+ Ok(status) => status.passthrough(),
+ }
+ };
+ future.boxed()
+ }
+}
+```
+### Summary
+
+This feature is primarily ergonomic. It provides a natural way to synchronize with the termination of an actor by simply awaiting its handle.
diff --git a/docs/source/books/hyperactor-book/src/actors/actor_lifecycle.md b/docs/source/books/hyperactor-book/src/actors/actor_lifecycle.md
new file mode 100644
index 000000000..0746b5d36
--- /dev/null
+++ b/docs/source/books/hyperactor-book/src/actors/actor_lifecycle.md
@@ -0,0 +1,112 @@
+# Actor Lifecycle Types
+
+This page documents auxiliary types used in actor startup, shutdown, and supervision logic.
+
+## `ActorStatus`
+
+`ActorStatus` describes the current runtime state of an actor. It is used to monitor progress, coordinate shutdown, and detect failure conditions.
+```rust
+pub enum ActorStatus {
+ Unknown,
+ Created,
+ Initializing,
+ Client,
+ Idle,
+ Processing(SystemTime, Option<(String, Option)>),
+ Saving(SystemTime),
+ Loading(SystemTime),
+ Stopping,
+ Stopped,
+ Failed(String),
+}
+```
+
+### States
+- `Unknown`: The status is unknown (e.g. not yet initialized).
+- `Created`: The actor has been constructed but not yet started.
+- `Initializing`: The actor is running its init lifecycle hook and is not yet receiving messages.
+- `Client`: The actor is operating in “client” mode; its ports are being managed manually.
+- `Idle`: The actor is ready to process messages but is currently idle.
+- `Processing`: The actor is handling a message. Contains a timestamp and optionally the handler/arm label.
+- `Saving`: The actor is saving its state as part of a checkpoint. Includes the time the operation began.
+- `Loading`: The actor is loading a previously saved state.
+- `Stopping`: The actor is in shutdown mode and draining its mailbox.
+- `Stopped`: The actor has exited and will no longer process messages.
+- `Failed`: The actor terminated abnormally. Contains an error description.
+
+### Methods
+- `is_terminal(&self) -> bool`: Returns true if the actor has either stopped or failed.
+- `is_failed(&self) -> bool`: Returns true if the actor is in the Failed state.
+- `passthrough(&self) -> ActorStatus`: Returns a clone of the status. Used internally during joins.
+- `span_string(&self) -> &'static str`: Returns the active handler/arm name if available. Used for tracing.
+
+## `Signal`
+
+`Signal` is used to control actor lifecycle transitions externally. These messages are sent internally by the runtime (or explicitly by users) to initiate operations like shutdown.
+```rust
+pub enum Signal {
+ Stop,
+ DrainAndStop,
+ Save,
+ Load,
+}
+```
+Variants
+- `Stop`: Immediately halts the actor, even if messages remain in its mailbox.
+- `DrainAndStop`: Gracefully stops the actor by first draining all queued messages.
+- `Save`: Triggers a state snapshot using the actor’s Checkpointable::save method.
+- `Load`: Requests state restoration via Checkpointable::load.
+
+These signals are routed like any other message, typically sent using `ActorHandle::send` or by the runtime during supervision and recovery procedures.
+
+## `ActorError`
+
+`ActorError` represents a failure encountered while serving an actor. It includes the actor's identity and the underlying cause.
+```rust
+pub struct ActorError {
+ actor_id: ActorId,
+ kind: ActorErrorKind,
+}
+```
+This error type is returned in various actor lifecycle operations such as initialization, message handling, checkpointing, and shutdown. It is structured and extensible, allowing the runtime to distinguish between different classes of failure.
+
+### Associated Methods
+```rust
+impl ActorError {
+ /// Constructs a new `ActorError` with the given ID and kind.
+ pub(crate) fn new(actor_id: ActorId, kind: ActorErrorKind) -> Self
+
+ /// Returns a cloneable version of this error, discarding error structure
+ /// and retaining only the formatted string.
+ fn passthrough(&self) -> Self
+}
+```
+
+## `ActorErrorKind`
+
+```rust
+pub enum ActorErrorKind {
+ Processing(anyhow::Error),
+ Panic(anyhow::Error),
+ Init(anyhow::Error),
+ Mailbox(MailboxError),
+ MailboxSender(MailboxSenderError),
+ Checkpoint(CheckpointError),
+ MessageLog(MessageLogError),
+ IndeterminateState,
+ Passthrough(anyhow::Error),
+}
+```
+### Variants
+
+- `Processing`: The actor's `handle()` method returned an error.
+- `Panic`: A panic occurred during message handling or actor logic.
+- `Init`: Actor initialization failed.
+- `Mailbox`: A lower-level mailbox error occurred.
+- `MailboxSender`: A lower-level sender error occurred.
+- `Checkpoint`: Error during save/load of actor state.
+- `MessageLog`: Failure in the underlying message log.
+- `IndeterminateState`: The actor reached an invalid or unknown internal state.
+- `Passthrough`: A generic error, preserving only the error message.
+
+`Passthrough` is used when a structured error needs to be simplified for cloning or propagation across boundaries.
diff --git a/docs/source/books/hyperactor-book/src/actors/binds.md b/docs/source/books/hyperactor-book/src/actors/binds.md
new file mode 100644
index 000000000..2409308af
--- /dev/null
+++ b/docs/source/books/hyperactor-book/src/actors/binds.md
@@ -0,0 +1,31 @@
+# Binds
+
+The `Binds` trait defines how an actor's ports are associated with the message types it can receive remotely.
+```rust
+pub trait Binds: RemoteActor {
+ fn bind(ports: &Ports);
+}
+```
+Implementing `Binds` allows the system to determine which messages can be routed to an actor instance of type `A`.
+
+## Code Generation
+
+In most cases, you do not implement this trait manually. Instead, the `#[export]` macro generates the appropriate `Binds` implementation by registering the actor's supported message types.
+
+For example:
+```rust
+#[hyperactor::export(
+ spawn = true,
+ handlers = [ShoppingList],
+)]
+struct ShoppingListActor;
+```
+Expands to:
+```rust
+impl Binds for ShoppingListActor {
+ fn bind(ports: &Ports) {
+ ports.bind::();
+ }
+}
+```
+This ensures that the actor is correctly wired to handle messages of type `ShoppingList` when used in a remote messaging context.
diff --git a/docs/source/books/hyperactor-book/src/actors/checkpointable.md b/docs/source/books/hyperactor-book/src/actors/checkpointable.md
new file mode 100644
index 000000000..dcc5ca55b
--- /dev/null
+++ b/docs/source/books/hyperactor-book/src/actors/checkpointable.md
@@ -0,0 +1,58 @@
+# Checkpointable
+
+The `Checkpointable` trait enables an actor to define how its internal state can be saved and restored. This allows actors to participate in checkpointing and recovery mechanisms when supported by the surrounding system.
+
+## Trait definition
+```rust
+#[async_trait]
+pub trait Checkpointable: Send + Sync + Sized {
+ type State: RemoteMessage;
+
+ async fn save(&self) -> Result;
+ async fn load(state: Self::State) -> Result;
+}
+```
+
+## Associated Type
+
+- `type State`: A serializable type representing the object's saved state. This must implement `RemoteMessage` so it can serialized and transmitted.
+
+## `save`
+
+Persists the current state of the component. Returns the Returns a `Self::State` value. If the operation fails, returns `CheckpointError::Save`.
+
+## `load`
+
+Reconstructs a new instance from a previously saved `Self::State`. If deserialization or reconstruction fails, returns `CheckpointError::Load`.
+
+## `CheckpointError`
+
+Errors returned by save and load operations:
+```rust
+pub enum CheckpointError {
+ Save(anyhow::Error),
+ Load(SeqId, anyhow::Error),
+}
+```
+
+## Blanket Implementation
+
+Any type `T` that implements `RemoteMessage` and `Clone` automatically satisfies `Checkpointable`:
+```rust
+#[async_trait]
+impl Checkpointable for T
+where
+ T: RemoteMessage + Clone,
+{
+ type State = T;
+
+ async fn save(&self) -> Result {
+ Ok(self.clone())
+ }
+
+ async fn load(state: Self::State) -> Result {
+ Ok(state)
+ }
+}
+```
+This implementation uses `clone()` to produce a checkpoint and simply returns the cloned state in load.
diff --git a/docs/source/books/hyperactor-book/src/actors/handler.md b/docs/source/books/hyperactor-book/src/actors/handler.md
new file mode 100644
index 000000000..39cd49c52
--- /dev/null
+++ b/docs/source/books/hyperactor-book/src/actors/handler.md
@@ -0,0 +1,70 @@
+# The `Handler` Trait
+
+The `Handler` trait defines how an actor receives and responds to messages of a specific type.
+
+Each message type that an actor can handle must be declared by implementing this trait. The runtime invokes the `handle` method when such a message is delivered.
+
+```rust
+#[async_trait]
+pub trait Handler: Actor {
+ async fn handle(&mut self, cx: &Context, message: M) -> Result<(), anyhow::Error>;
+}
+```
+
+## Message Dispatch: `handle`
+
+The `handle` method is invoked by the runtime whenever a message of type `M` arrives at a matching port on the actor.
+- message is the received payload.
+- this gives access to the actor's runtime context, including its identity, mailbox, and and any capabilities exposed by the `Instance` type (such as spawning or reference resolution).
+- The return value indicates whether the message was handled successfully.
+
+An actor may implement `Handler` multiple times — once for each message type `M` it supports.
+
+## Built-in Handlers
+
+The runtime provides implementations of `Handler` for a few internal message types:
+
+### `Handler`
+
+This is a marker implementation indicating that all actors can receive `Signal`. The handler is not expected to be invoked directly — its real behavior is implemented inside the runtime.
+```rust
+#[async_trait]
+impl Handler for A {
+ async fn handle(
+ &mut self,
+ _cx: &Context,
+ _message: Signal,
+ ) -> Result<(), anyhow::Error> {
+ unimplemented!("signal handler should not be called directly")
+ }
+}
+```
+
+### `Handler>`
+
+```rust
+#[async_trait]
+impl Handler> for A
+where
+ A: Handler,
+ M: Castable,
+{
+ async fn handle(
+ &mut self,
+ cx: &Context,
+ msg: IndexedErasedUnbound,
+ ) -> anyhow::Result<()> {
+ let message = msg.downcast()?.bind()?;
+ Handler::handle(self, this, message).await
+ }
+}
+```
+This implementation allows an actor to transparently handle erased, rebound messages of type `M`, provided it already implements `Handler`.
+
+This construct is used in the implementation of **accumulation**, a communication pattern where a message is multicast to multiple recipients and their replies are gathered—possibly through intermediate actors—before being sent back to the original sender.
+
+To enable this, messages are unbound at the sender: reply ports (`PortRef`s) are extracted into a `Bindings` object, allowing intermediate nodes to rewrite those ports to point back to themselves. This ensures that replies from downstream actors are routed through the intermediate, enabling reply collection and reduction.
+
+Once a message reaches its destination, it is rebound by merging the updated bindings back into the message. The `Handler>` implementation automates this by recovering the typed message `M` and dispatching it to the actor's existing `Handler` implementation.
+
+This allows actors to remain unaware of accumulation mechanics—they can just implement `Handler` as usual.
diff --git a/docs/source/books/hyperactor-book/src/actors/index.md b/docs/source/books/hyperactor-book/src/actors/index.md
new file mode 100644
index 000000000..676645622
--- /dev/null
+++ b/docs/source/books/hyperactor-book/src/actors/index.md
@@ -0,0 +1,19 @@
+# Actors
+
+Hyperactor programs are structured around actors: isolated state machines that process messages asynchronously.
+
+Each actor runs in isolation, and maintains private internal state. Actors interact with the outside world through typed message ports and follow strict lifecycle semantics managed by the runtime.
+
+This chapter introduces the actor system in hyperactor. We'll cover:
+
+- The [`Actor`](./actor.md) trait and its lifecycle hooks
+- The [`Handler`](./handler.md) trait for defining message-handling behavior
+- The [`RemotableActor`](./remotable_actor.md) trait for enabling remote spawning
+- The [`Checkpointable`](./checkpointable.md) trait for supporting actor persistence and recovery
+- The [`RemoteActor`](./remote_actor.md) marker trait for remotely referencable types
+- The [`Binds`](./binds.md) trait for wiring exported ports to reference types
+- The [`RemoteHandles`](./remote_handles.md) trait for associating message types with a reference
+- The [`ActorHandle`](./actor_handle.md) type for referencing and communicating with running actors
+- [Actor Lifecycle](./lifecycle.md), including `Signal` and `ActorStatus`
+
+Actors are instantiated with parameters and bound to mailboxes, enabling reliable message-passing. The runtime builds upon this foundation to support supervision, checkpointing, and remote interaction via typed references.
diff --git a/docs/source/books/hyperactor-book/src/actors/remotable_actor.md b/docs/source/books/hyperactor-book/src/actors/remotable_actor.md
new file mode 100644
index 000000000..02fde16ea
--- /dev/null
+++ b/docs/source/books/hyperactor-book/src/actors/remotable_actor.md
@@ -0,0 +1,79 @@
+# The `RemoteableActor` Trait
+
+```rust
+pub trait RemotableActor: Actor
+where
+ Self::Params: RemoteMessage,
+{
+ fn gspawn(
+ proc: &Proc,
+ name: &str,
+ serialized_params: Data,
+ ) -> Pin> + Send>>;
+
+ fn get_type_id() -> TypeId {
+ TypeId::of::()
+ }
+}
+```
+The `RemotableActor` trait marks an actor type as spawnable across process boundaries. It enables hyperactor's remote spawning and registration system, allowing actors to be created from serialized parameters in a different `Proc`.
+
+## Requirements
+- The actor type must also implement `Actor`.
+- Its `Params` type (used in `Actor::new`) must implement `RemoteMessage`, so it can be serialized and transmitted over the network.
+
+## `gspawn`
+```rust
+fn gspawn(
+ proc: &Proc,
+ name: &str,
+ serialized_params: Data,
+) -> Pin> + Send>>;
+```
+This is the core entry point for remote actor spawning. It takes:
+- a target `Proc` where the actor should be created,
+- a string name to assign to the actor,
+- and a `Data` payload representing serialized parameters.
+
+The method deserializes the parameters, creates the actor, and returns its `ActorId`.
+
+This is used internally by hyperactor's remote actor registry and `spawn` services. Ordinary users generally don't call this directly.
+
+> **Note:** This is not an `async fn` because `RemotableActor` must be object-safe.
+
+## `get_type_id`
+
+Returns a stable `TypeId` for the actor type. Used to identify actor types at runtime—e.g., in registration tables or type-based routing logic.
+
+## Blanket Implementation
+
+The RemotableActor trait is automatically implemented for any actor type `A` that:
+- implements `Actor` and `RemoteActor`,
+- and whose `Params` type implements `RemoteMessage`.
+
+This allows `A` to be remotely registered and instantiated from serialized data, typically via the runtime's registration mechanism.
+
+```rust
+impl RemotableActor for A
+where
+ A: Actor + RemoteActor,
+ A: Binds,
+ A::Params: RemoteMessage,
+{
+ fn gspawn(
+ proc: &Proc,
+ name: &str,
+ serialized_params: Data,
+ ) -> Pin> + Send>> {
+ let proc = proc.clone();
+ let name = name.to_string();
+ Box::pin(async move {
+ let handle = proc
+ .spawn::(&name, bincode::deserialize(&serialized_params)?)
+ .await?;
+ Ok(handle.bind::().actor_id)
+ })
+ }
+}
+```
+Note the `Binds` bound: this trait specifies how an actor's ports are wired determining which message types the actor can receive remotely. The resulting `ActorId` corresponds to a port-bound, remotely callable version of the actor.
diff --git a/docs/source/books/hyperactor-book/src/actors/remote_actor.md b/docs/source/books/hyperactor-book/src/actors/remote_actor.md
new file mode 100644
index 000000000..9b7555b36
--- /dev/null
+++ b/docs/source/books/hyperactor-book/src/actors/remote_actor.md
@@ -0,0 +1,10 @@
+# RemoteActor
+
+```rust
+pub trait RemoteActor: Named + Send + Sync {}
+```
+This is a marker trait indicating that a type is eligible to serve as a reference to a remote actor (i.e., an actor that may reside on a different proc).
+
+It requires:
+- `Named`: the type must provide a static name.
+- `Send + Sync`: the type must be safely transferable and shareable across threads.
diff --git a/docs/source/books/hyperactor-book/src/actors/remote_handles.md b/docs/source/books/hyperactor-book/src/actors/remote_handles.md
new file mode 100644
index 000000000..a141fea9b
--- /dev/null
+++ b/docs/source/books/hyperactor-book/src/actors/remote_handles.md
@@ -0,0 +1,14 @@
+# RemoteHandles
+
+The `RemoteHandles` trait is a marker used to declare that a given `RemoteActor` type can handle messages of type `M`.
+```rust
+pub trait RemoteHandles: RemoteActor {}
+```
+
+An implementation like:
+```rust
+impl RemoteHandles for ShoppingListActor {}
+```
+means that `ShoppingListActor` is known to handle the `ShoppingList` message type.
+
+These implementations are typically generated by the `#[export(handlers = [...])]` macro, and are not written by hand.
diff --git a/docs/source/books/hyperactor-book/src/introduction.md b/docs/source/books/hyperactor-book/src/introduction.md
new file mode 100644
index 000000000..0730903f3
--- /dev/null
+++ b/docs/source/books/hyperactor-book/src/introduction.md
@@ -0,0 +1,19 @@
+# Hyperactor
+
+This book describes the design and implementation of the hyperactor runtime.
+
+The goal is to provide a clear, structured explanation of how actors communicate safely and efficiently across distributed systems using hyperactor’s abstractions.
+
+We hope this becomes the book we wish we had when we started working with Monarch. Work in progress.
+
+```{toctree}
+:maxdepth: 2
+:caption: Chapters
+
+./actors/index
+./macros/index
+./mailboxes/index
+./references/index
+./SUMMARY.md
+
+```
diff --git a/docs/source/books/hyperactor-book/src/macros/export.md b/docs/source/books/hyperactor-book/src/macros/export.md
new file mode 100644
index 000000000..04c1abb65
--- /dev/null
+++ b/docs/source/books/hyperactor-book/src/macros/export.md
@@ -0,0 +1,70 @@
+# `#[export]`
+
+The `#[hyperactor::export]` macro turns a regular `Actor` implementation into a remotely spawnable actor, registering its type information, `spawn` function, and supported message handlers for discovery and use across processes or runtimes.
+
+## What It Adds
+
+When applied to an actor type like this:
+
+```rust
+#[hyperactor::export(
+ spawn = true,
+ handlers = [ShoppingList],
+)]
+struct ShoppingListActor(HashSet);
+```
+The macro expands to include:
+ - A `Named` implementation for the actor
+ - A `Binds` implementation that registers supported message types
+ - Implementations of `RemoteHandles` for each type in the `handlers = [...]` list
+ - A `RemoteActor` marker implementation
+ - If `spawn = true`, a `RemotableActor` implementation and an inventory registration of the `spawn` function.
+
+This enables the actor to be:
+ - Spawned dynamically by name
+ - Routed to via typed messages
+ - Reflected on at runtime (for diagnostics, tools, and orchestration)
+
+## Generated Implementations (simplified)
+```rust
+impl RemoteActor for ShoppingListActor {}
+
+impl RemoteHandles for ShoppingListActor {}
+impl RemoteHandles for ShoppingListActor {}
+
+impl Binds for ShoppingListActor {
+ fn bind(ports: &Ports) {
+ ports.bind::();
+ }
+}
+
+impl Named for ShoppingListActor {
+ fn typename() -> &'static str {
+ "my_crate::ShoppingListActor"
+ }
+}
+```
+If `spawn = true`, the macro also emits:
+```rust
+impl RemotableActor for ShoppingListActor {}
+```
+This enables remote spawning via the default `gspawn` provided by a blanket implementation.
+
+It also registers the actor into inventory:
+```
+inventory::submit!(SpawnableActor {
+ name: ...,
+ gspawn: ...,
+ get_type_id: ...,
+});
+```
+This allows the actor to be discovered and spawned by name at runtime.
+
+## Summary
+
+The `#[export]` macro makes an actor remotely visible, spawnable, and routable by declaring:
+ - What messages it handles
+ - What messages it handles
+ - How to bind those messages
+ - What its globally unique name is
+ - (Optionally) how to spawn it dynamically
diff --git a/docs/source/books/hyperactor-book/src/macros/forward.md b/docs/source/books/hyperactor-book/src/macros/forward.md
new file mode 100644
index 000000000..a342fc3a9
--- /dev/null
+++ b/docs/source/books/hyperactor-book/src/macros/forward.md
@@ -0,0 +1,28 @@
+# `#[forward]`
+
+The `#[hyperactor::forward]` macro connects a user-defined handler trait implementation (like `ShoppingListHandler`) to the core `Handler` trait required by the runtime.
+
+In short, it generates the boilerplate needed to route incoming messages of type `T` to your high-level trait implementation.
+
+## What it generates
+
+The macro expands to:
+```rust
+#[async_trait]
+impl Handler for ShoppingListActor {
+ async fn handle(&mut self, ctx: &Context, message: ShoppingList) -> Result<(), Error> {
+ ::handle(self, ctx, message).await
+ }
+}
+```
+This avoids having to manually match on enum variants or duplicate message logic.
+
+## When to use it
+
+Use `#[forward(MessageType)]` when:
+
+- You’ve defined a custom trait (e.g., `ShoppingListHandler`)
+- You’re handling a message enum (like `ShoppingList`)
+- You want the runtime to route messages to your trait automatically.
+
+This is most often used alongside `#[derive(Handler)]`, which generates the corresponding handler and client traits for a user-defined message enum.
diff --git a/docs/source/books/hyperactor-book/src/macros/handle_client.md b/docs/source/books/hyperactor-book/src/macros/handle_client.md
new file mode 100644
index 000000000..3070b317f
--- /dev/null
+++ b/docs/source/books/hyperactor-book/src/macros/handle_client.md
@@ -0,0 +1,65 @@
+# `#[derive(HandleClient)]`
+
+`#[derive(Handler)]` generates both the server-side handler trait (`ShoppingListHandler`) and the client-side trait definition (`ShoppingListClient`). However, it does not implement the client trait for any specific type.
+
+This is where `#[derive(HandleClient)]` comes in.
+
+## What It Adds
+
+`#[derive(HandleClient)]` generates the following implementation:
+
+```rust
+impl ShoppingListClient for ActorHandle
+where
+ T: ShoppingListHandler + Send + Sync + 'static`
+```
+
+This means you can call methods like `.add(...)` or `.list(...)` directly on an `ActorHandle` without needing to manually implement the `ShoppingListClient` trait:
+
+In other words, `HandleClient` connects the generated `ShoppingListClient` interface (from `Handler`) to the concrete type `ActorHandle`.
+
+## Generated Implementation (simplified)
+```rust
+use async_trait::async_trait;
+use hyperactor::{
+ ActorHandle,
+ anyhow::Error,
+ cap::{CanSend, CanOpenPort},
+ mailbox::open_once_port,
+ metrics,
+ Message,
+};
+
+#[async_trait]
+impl ShoppingListClient for ActorHandle
+where
+ T: ShoppingListHandler + Send + Sync + 'static,
+{
+ async fn add(&self, caps: &impl CanSend, item: String) -> Result<(), Error> {
+ self.send(caps, ShoppingList::Add(item)).await
+ }
+
+ async fn remove(&self, caps: &impl CanSend, item: String) -> Result<(), Error> {
+ self.send(caps, ShoppingList::Remove(item)).await
+ }
+
+ async fn exists(
+ &self,
+ caps: &impl CanSend + CanOpenPort,
+ item: String,
+ ) -> Result {
+ let (reply_to, recv) = open_once_port(caps)?;
+ self.send(caps, ShoppingList::Exists(item, reply_to)).await?;
+ Ok(recv.await?)
+ }
+
+ async fn list(
+ &self,
+ caps: &impl CanSend + CanOpenPort,
+ ) -> Result, Error> {
+ let (reply_to, recv) = open_once_port(caps)?;
+ self.send(caps, ShoppingList::List(reply_to)).await?;
+ Ok(recv.await?)
+ }
+
+```
diff --git a/docs/source/books/hyperactor-book/src/macros/handler.md b/docs/source/books/hyperactor-book/src/macros/handler.md
new file mode 100644
index 000000000..300130cef
--- /dev/null
+++ b/docs/source/books/hyperactor-book/src/macros/handler.md
@@ -0,0 +1,96 @@
+# `#[derive(Handler)]`
+
+The `#[derive(Handler)]` macro generates the infrastructure for sending and receiving typed messages in hyperactor. When applied to an enum like this:
+```rust
+#[derive(Handler)]
+enum ShoppingList {
+ // Fire-and-forget messages
+ Add(String),
+ Remove(String),
+
+ // Request-response messages
+ Exists(String, #[reply] OncePortRef),
+ List(#[reply] OncePortRef>),
+}
+```
+... it generates **two key things**:
+
+### 1. `ShoppingListHandler` trait
+This trait defines a method for each variant, and a `handle` method to route incoming messages:
+```rust
+use async_trait::async_trait;
+use hyperactor::anyhow::Error;
+
+#[async_trait]
+pub trait ShoppingListHandler: hyperactor::Actor + Send + Sync {
+ async fn add(&mut self, ctx: &Context, item: String) -> Result<(), Error>;
+ async fn remove(&mut self, ctx: &Context, item: String) -> Result<(), Error>;
+ async fn exists(&mut self, ctx: &Context, item: String) -> Result;
+ async fn list(&mut self, ctx: &Context) -> Result, Error>;
+
+ async fn handle(&mut self, ctx: &Context, msg: ShoppingList) -> Result<(), Error> {
+ match msg {
+ ShoppingList::Add(item) => {
+ self.add(ctx, item).await
+ }
+ ShoppingList::Remove(item) => {
+ self.remove(ctx, item).await
+ }
+ ShoppingList::Exists(item, reply_to) => {
+ let result = self.exists(ctx, item).await?;
+ reply_to.send(ctx, result)?;
+ Ok(())
+ }
+ ShoppingList::List(reply_to) => {
+ let result = self.list(ctx).await?;
+ reply_to.send(ctx, result)?;
+ Ok(())
+ }
+ }
+ }
+}
+```
+Note:
+ - `Add` and `Remove` are **oneway**: no reply port
+ - `Exists` and `List` are **call-style**: they take a `#[reply] OncePortRef` and expect a response to be sent back.
+
+### 2. `ShoppingListClient` trait
+
+Alongside the handler, the `#[derive(Handler)]` macro also generates a client-side trait named `ShoppingListClient`. This trait provides a convenient and type-safe interface for sending messages to an actor.
+
+Each method in the trait corresponds to a variant of the message enum. For example:
+```rust
+use async_trait::async_trait;
+use hyperactor::anyhow::Error;
+use hyperactor::cap::{CanSend, CanOpenPort};
+
+#[async_trait]
+pub trait ShoppingListClient: Send + Sync {
+ async fn add(&self, caps: &impl CanSend, item: String) -> Result<(), Error>;
+ async fn remove(&self, caps: &impl CanSend, item: String) -> Result<(), Error>;
+ async fn exists(&self, caps: &impl CanSend + CanOpenPort, item: String) -> Result;
+ async fn list(&self, caps: &impl CanSend + CanOpenPort) -> Result, Error>;
+}
+```
+
+#### Capability Parameter
+Each method takes a caps argument that provides the runtime capabilities required to send the message:
+- All methods require `CanSend`.
+- Methods with `#[reply]` arguments additionally require `CanOpenPort`.
+
+In typical usage, `caps` is a `Mailbox`.
+
+#### Example Usage
+```rust
+let mut proc = Proc::local();
+let actor = proc.spawn::("shopping", ()).await?;
+let client = proc.attach("client").unwrap();
+
+// Fire-and-forget
+actor.add(&client, "milk".into()).await?;
+
+// With reply
+let found = actor.exists(&client, "milk".into()).await?;
+println!("got milk? {found}");
+```
+Here, actor is an `ActorHandle` that implements `ShoppingListClient`, and `client` is a `Mailbox` that provides the necessary capabilities.
diff --git a/docs/source/books/hyperactor-book/src/macros/index.md b/docs/source/books/hyperactor-book/src/macros/index.md
new file mode 100644
index 000000000..44a1479e9
--- /dev/null
+++ b/docs/source/books/hyperactor-book/src/macros/index.md
@@ -0,0 +1,32 @@
+# Macros
+
+This section documents the macros provided by hyperactor for actor and message integration.
+
+These macros support a complete message-passing workflow: from defining message enums and generating client APIs, to routing messages and exporting actors for dynamic or remote use.
+
+- [`#[derive(Handler)]`](handler.md) — generate message handling and client traits for actor enums
+- [`#[derive(HandleClient)]`](handle_client.md) — implement the generated client trait for `ActorHandle`
+- [`#[derive(RefClient)]`](ref_client.md) — implement the generated client trait for `ActorRef`
+- [`#[derive(Named)]`](named.md) — give a type a globally unique name and port for routing and reflection
+- [`#[export]`](export.md) — make an actor remotely spawnable and routable by registering its type, handlers, and and optionally spawnable from outside the current runtime
+- [`#[forward]`](forward.md) — route messages to a user-defined handler trait implementation
+
+## Macro Summary
+
+- **`#[derive(Handler)]`**
+ Generates handler and client traits for a message enum.
+
+- **`#[derive(HandleClient)]`**
+ Implements the client trait for `ActorHandle`.
+
+- **`#[derive(RefClient)]`**
+ Implements the client trait for `ActorRef`.
+
+- **`#[derive(Named)]`**
+ Registers the type with a globally unique name and port.
+
+- **`#[export]`**
+ Makes an actor spawnable and routable via inventory.
+
+- **`#[forward]`**
+ Forwards messages to a user-defined handler trait implementation.
diff --git a/docs/source/books/hyperactor-book/src/macros/named.md b/docs/source/books/hyperactor-book/src/macros/named.md
new file mode 100644
index 000000000..7ca9f30fe
--- /dev/null
+++ b/docs/source/books/hyperactor-book/src/macros/named.md
@@ -0,0 +1,77 @@
+# `#[derive(Named)]`
+
+The `#[derive(Named)]` macro implements the `hyperactor::Named` trait for a type, making it identifiable at runtime through a globally unique string and stable hash.
+
+## The `Named` trait
+
+The `hyperactor::data::Named` trait is the foundation of type identification in hyperactor. It gives each type a globally unique identity based on its name used in routing.
+```rust
+pub trait Named: Sized + 'static {
+ fn typename() -> &'static str;
+ fn typehash() -> u64 { ... }
+ fn typeid() -> TypeId { ... }
+ fn port() -> u64 { ... }
+ fn arm(&self) -> Option<&'static str> { ... }
+ unsafe fn arm_unchecked(self_: *const ()) -> Option<&'static str> { ... }
+}
+```
+
+### Trait Methods
+
+#### `typename() -> &'static str`
+
+Returns the globally unique, fully-qualified type name for the type. This should typically look like:
+```rust
+"foo::bar::Corge"
+```
+
+#### `typehash() -> u64`
+
+Returns a stable hash derived from `typename()`. This value is used for message port derivation.
+```rust
+cityhasher::hash(Self::typename())
+```
+
+#### `typeid() -> TypeId`
+
+Returns the Rust `TypeId` for the type (, which is only unique within a single binary).
+
+#### `port() -> u64`
+
+Returns a globally unique port number for the type:
+```rust
+Self::typehash() | (1 << 63)
+```
+Typed ports are reserved in the range 2^63 .. 2^64 - 1.
+
+### `arm(&self) -> Option<&'static str>`
+
+For enum types, this returns the name of the current variant, e.g., "Add" or "Remove".
+
+### `unsafe fn arm_unchecked(ptr: *const ()) -> Option<&'static str>`
+
+The type-erased version of `arm()`. Casts ptr back to `&Self` and calls `arm()`.
+
+Useful for dynamic reflection when the concrete type isn’t statically known
+
+### Runtime Registration
+
+In addition to implementing the `Named` trait, the macro registers the type’s metadata at startup using the `inventory` crate:
+```rust
+const _: () = {
+ static __INVENTORY: ::inventory::Node = ::inventory::Node {
+ value: &TypeInfo { ... },
+ ...
+ };
+ // Registers the type info before main() runs
+ #[link_section = ".init_array"]
+ static __CTOR: unsafe extern "C" fn() = __ctor;
+};
+```
+This allows the type to be discovered at runtime, enabling:
+- Message dispatch from erased or serialized inputs
+- Introspection and diagnostics
+- Dynamic spawning or reflection
+- Tooling support
+
+Types registered this way appear in the global `inventory::iter` set, which is how the hyperactor runtime locates known message types.
diff --git a/docs/source/books/hyperactor-book/src/macros/ref_client.md b/docs/source/books/hyperactor-book/src/macros/ref_client.md
new file mode 100644
index 000000000..7ccf746b4
--- /dev/null
+++ b/docs/source/books/hyperactor-book/src/macros/ref_client.md
@@ -0,0 +1,62 @@
+# `#[derive(RefClient)]`
+
+While `#[derive(HandleClient)]` enables calling the generated client trait on `ActorHandle`, there are cases where you don’t have a handle, only a reference to an actor (`ActorRef`). This is where `#[derive(RefClient)]` comes in.
+
+## What It Adds
+
+`#[derive(RefClient)]` generates the following implementation:
+```rust
+impl ShoppingListClient for ActorRef
+where
+ T: ShoppingListHandler + Send + Sync + 'static
+```
+This allows you to invoke methods like `.add(...)` or `.list(...)` directly on an `ActorRef`.
+
+In other words, `RefClient` connects the generated `ShoppingListClient` interface (from `Handler`) to the `ActorRef` type, which refers to a remote actor.
+
+## Generated Implementation (simplified)
+
+```rust
+use async_trait::async_trait;
+use hyperactor::{
+ ActorRef,
+ anyhow::Error,
+ cap::{CanSend, CanOpenPort},
+ mailbox::open_once_port,
+ metrics,
+ Message,
+};
+
+#[async_trait]
+impl ShoppingListClient for ActorRef
+where
+ T: ShoppingListHandler + Send + Sync + 'static,
+{
+ async fn add(&self, caps: &impl CanSend, item: String) -> Result<(), Error> {
+ self.send(caps, ShoppingList::Add(item)).await
+ }
+
+ async fn remove(&self, caps: &impl CanSend, item: String) -> Result<(), Error> {
+ self.send(caps, ShoppingList::Remove(item)).await
+ }
+
+ async fn exists(
+ &self,
+ caps: &impl CanSend + CanOpenPort,
+ item: String,
+ ) -> Result {
+ let (reply_to, recv) = open_once_port(caps)?;
+ self.send(caps, ShoppingList::Exists(item, reply_to)).await?;
+ Ok(recv.await?)
+ }
+
+ async fn list(
+ &self,
+ caps: &impl CanSend + CanOpenPort,
+ ) -> Result, Error> {
+ let (reply_to, recv) = open_once_port(caps)?;
+ self.send(caps, ShoppingList::List(reply_to)).await?;
+ Ok(recv.await?)
+ }
+}
+```
diff --git a/docs/source/books/hyperactor-book/src/mailboxes/delivery.md b/docs/source/books/hyperactor-book/src/mailboxes/delivery.md
new file mode 100644
index 000000000..eef1117ee
--- /dev/null
+++ b/docs/source/books/hyperactor-book/src/mailboxes/delivery.md
@@ -0,0 +1,226 @@
+# Delivery Semantics
+
+This section defines the mechanics of message delivery and failure in the mailbox system.
+
+Key components:
+
+- `MessageEnvelope`: encapsulates a message, sender, and destination
+- `DeliveryError`: enumerates failure modes (unroutable, broken link, etc.)
+- Undeliverable handling: how messages are returned on failure
+- Serialization and deserialization support
+
+These types form the foundation for how messages are transmitted, routed, and failed in a structured way.
+
+An envelope carries a message destined to a remote actor. The envelope contains a serialized message along with its destination and sender:
+```rust
+#[derive(Debug, Serialize, Deserialize, Clone, PartialEq, Named)]
+pub struct MessageEnvelope {
+ /// The sender of this message.
+ sender: ActorId,
+
+ /// The destination of the message.
+ dest: PortId,
+
+ /// The serialized message.
+ data: Serialized,
+
+ /// Error contains a delivery error when message delivery failed.
+ error: Option,
+
+ /// Additional context for this message.
+ headers: Attrs,
+}
+```
+
+`MessageEnvelope::new` creates a message envelope:
+```rust
+impl MessageEnvelope {
+ fn new(sender: ActorId, dest: PortId, data: Serialized, headers: Attrs) -> Self { ... }
+}
+```
+`MessageEnvelope::new_unknown` creates a new envelope when we don't know who the sender is:
+```rust
+impl MessageEnvelope {
+ fn new_unknown(dest: PortId, data: Serialized) -> Self {
+ Self::new(id!(unknown[0].unknown), dest, data)
+ }
+}
+```
+If a type `T` implements `Serialize` and `Named`, an envelope can be constructed while serializing the message data:
+```rust
+impl MessageEnvelope {
+ fn serialize(
+ source: ActorId, dest: PortId, value: &T, headers: Attrs) -> Result {
+ Ok(Self {
+ data: Serialized::serialize(value)?,
+ sender: source,
+ dest,
+ error: None,
+ })
+ }
+}
+```
+We can use the fact that `T` implements `DeserializeOwned` to provide a function to deserialize the message data in an envelope:
+```rust
+impl MessageEnvelope {
+ fn deserialized(&self) -> Result {
+ self.data.deserialized()
+ }
+}
+```
+This function stamps an envelope with a delivery error:
+```rust
+impl MessageEnvelope {
+ fn error(&mut self, error: DeliveryError) {
+ self.error = Some(error);
+ }
+}
+```
+The `undeliverable` function on a `MessageEnvelope` can be called when a message has been determined to be undeliverable due to the provided error. It marks the envelope with the error and attempts to return it to the sender.
+```rust
+impl MessageEnvelope {
+ pub fn undeliverable(
+ mut self,
+ error: DeliveryError,
+ return_handle: PortHandle>,
+ ) {
+ self.try_set_error(error);
+ undeliverable::return_undeliverable(return_handle, self);
+ }
+}
+```
+
+### Delivery errors
+
+Delivery errors can occur during message posting:
+```rust
+#[derive(thiserror::Error, ...)]
+pub enum DeliveryError {
+ /// The destination address is not reachable.
+ #[error("address not routable: {0}")]
+ Unroutable(String),
+
+ /// A broken link indicates that a link in the message
+ /// delivery path has failed.
+ #[error("broken link: {0}")]
+ BrokenLink(String),
+
+ /// A (local) mailbox delivery error.
+ #[error("mailbox error: {0}")]
+ Mailbox(String),
+}
+```
+
+### Mailbox Errors
+
+Errors can occur during mailbox operations. Each error is associated with the mailbox's actor ID:
+```rust
+pub struct MailboxError {
+ actor_id: ActorId,
+ kind: MailboxErrorKind,
+}
+
+#[non_exhaustive]
+pub enum MailboxErrorKind {
+ /// An operation was attempted on a closed mailbox.
+ #[error("mailbox closed")]
+ Closed,
+
+ /// The port associated with an operation was invalid.
+ #[error("invalid port: {0}")]
+ InvalidPort(PortId),
+
+ /// There was no sender associated with the port.
+ #[error("no sender for port: {0}")]
+ NoSenderForPort(PortId),
+
+ /// There was no local sender associated with the port.
+ /// Returned by operations that require a local port.
+ #[error("no local sender for port: {0}")]
+ NoLocalSenderForPort(PortId),
+
+ /// The port was closed.
+ #[error("{0}: port closed")]
+ PortClosed(PortId),
+
+ /// An error occured during a send operation.
+ #[error("send {0}: {1}")]
+ Send(PortId, #[source] anyhow::Error),
+
+ /// An error occured during a receive operation.
+ #[error("recv {0}: {1}")]
+ Recv(PortId, #[source] anyhow::Error),
+
+ /// There was a serialization failure.
+ #[error("serialize: {0}")]
+ Serialize(#[source] anyhow::Error),
+
+ /// There was a deserialization failure.
+ #[error("deserialize {0}: {1}")]
+ Deserialize(&'static str, anyhow::Error),
+
+ #[error(transparent)]
+ Channel(#[from] ChannelError),
+}
+```
+
+`PortLocation` describes the location of a port. It provides a uniform data type for ports that may or may not be bound.
+```rust
+#[derive(Debug, Clone)]
+pub enum PortLocation {
+ /// The port was bound: the location is its underlying bound ID.
+ Bound(PortId),
+ /// The port was not bound: we provide the actor ID and the message type.
+ Unbound(ActorId, &'static str),
+}
+```
+
+One place `PortLocation` is used is in the type `MailboxSenderError` which is specifically for errors that occur during mailbox send operations. Each error is associated with the port ID of the operation:
+```rust
+#[derive(Debug)]
+pub struct MailboxSenderError {
+ location: PortLocation,
+ kind: MailboxSenderErrorKind,
+}
+
+/// The kind of mailbox sending errors.
+#[derive(thiserror::Error, Debug)]
+pub enum MailboxSenderErrorKind {
+ /// Error during serialization.
+ #[error("serialization error: {0}")]
+ Serialize(anyhow::Error),
+
+ /// Error during deserialization.
+ #[error("deserialization error for type {0}: {1}")]
+ Deserialize(&'static str, anyhow::Error),
+
+ /// A send to an invalid port.
+ #[error("invalid port")]
+ Invalid,
+
+ /// A send to a closed port.
+ #[error("port closed")]
+ Closed,
+
+ // The following pass through underlying errors:
+ /// An underlying mailbox error.
+ #[error(transparent)]
+ Mailbox(#[from] MailboxError),
+
+ /// An underlying channel error.
+ #[error(transparent)]
+ Channel(#[from] ChannelError),
+
+ /// An underlying message log error.
+ #[error(transparent)]
+ MessageLog(#[from] MessageLogError),
+
+ /// An other, uncategorized error.
+ #[error("send error: {0}")]
+ Other(#[from] anyhow::Error),
+
+ /// The destination was unreachable.
+ #[error("unreachable: {0}")]
+ Unreachable(anyhow::Error),
+}
+```
diff --git a/docs/source/books/hyperactor-book/src/mailboxes/index.md b/docs/source/books/hyperactor-book/src/mailboxes/index.md
new file mode 100644
index 000000000..8897f6c60
--- /dev/null
+++ b/docs/source/books/hyperactor-book/src/mailboxes/index.md
@@ -0,0 +1,15 @@
+# Mailboxes and Routers
+
+Mailboxes are the foundation of message delivery in hyperactor. They coordinate typed ports, routing logic, forwarding, and delivery infrastructure for distributed actors.
+
+This chapter introduces the components of the mailbox subsystem:
+
+- [Ports](ports.md): typed channels for local message delivery
+- [MailboxSender](mailbox_sender.md): trait-based abstraction for message posting
+- [Reconfigurable Senders](reconfigurable_sender.md): deferred wiring and dynamic configuration
+- [MailboxServer](mailbox_server.md): bridging incoming message streams into mailboxes
+- [MailboxClient](mailbox_client.md): buffering, forwarding, and failure reporting
+- [Mailbox](mailbox.md): port registration, binding, and routing
+- [Delivery Semantics](delivery.md): envelopes, delivery errors, and failure handling
+- [Multiplexers](multiplexer.md): port-level dispatch to local mailboxes
+- [Routers](routers.md): prefix-based routing to local or remote destinations
diff --git a/docs/source/books/hyperactor-book/src/mailboxes/mailbox.md b/docs/source/books/hyperactor-book/src/mailboxes/mailbox.md
new file mode 100644
index 000000000..9f0f07410
--- /dev/null
+++ b/docs/source/books/hyperactor-book/src/mailboxes/mailbox.md
@@ -0,0 +1,354 @@
+# Mailbox
+
+A `Mailbox` represents an actor's in-process inbox. It owns and manages all of the actor's ports, provides APIs to open and bind them, and routes messages based on their destination `PortId`.
+
+A mailbox routes local messages directly to its ports. If a message is addressed to a different actor, the mailbox uses its configured forwarder to relay the message. If the message cannot be delivered-for instance, if the destination port is unbound-the mailbox wraps it as undeliverable and returns it via the supplied handle.
+
+This section covers:
+
+- Opening ports of various kinds
+- Port binding and registration
+- Internal mailbox state and delivery logic
+
+The `State` holds all delivery infrastructure: active ports, the actor's ID, a port allocator, and a forwarding mechanism. Multiple clones of a `Mailbox` share access to the same state:
+```rust
+pub struct Mailbox {
+ state: Arc,
+}
+```
+The `new` function creates a mailbox with the provided actor ID and forwarder for external destinations:
+```rust
+impl Mailbox {
+ pub fn new(actor_id: ActorId, forwarder: BoxedMailboxSender) -> Self {
+ Self {
+ state: Arc::new(State::new(actor_id, forwarder)),
+ }
+ }
+}
+```
+`new_detached` mailboxes are not connected to an external message forwarder and can only deliver to its own ports:
+```rust
+impl Mailbox {
+ pub fn new_detached(actor_id: ActorId) -> Self {
+ Self {
+ state: Arc::new(State::new(actor_id, BOXED_PANICKING_MAILBOX_SENDER.clone())),
+ }
+ }
+```
+
+A mailbox can open ports, each identified by a unique `PortId` within the owning actor. The most common form is `open_port`, which creates a fresh, unbounded channel for message delivery:
+```rust
+impl Mailbox {
+ pub fn open_port(&self) -> (PortHandle, PortReceiver) {
+ let port_index = self.state.allocate_port();
+ let (sender, receiver) = mpsc::unbounded_channel::();
+ let port_id = PortId(self.state.actor_id.clone(), port_index);
+ (
+ PortHandle::new(self.clone(), port_index, UnboundedPortSender::Mpsc(sender)),
+ PortReceiver::new(
+ receiver,
+ port_id,
+ /*coalesce=*/ false,
+ self.state.clone(),
+ ),
+ )
+ }
+}
+```
+This allocates a new port index and sets up a pair of endpoints: a `PortHandle` for sending messages into the port, and a `PortReceiver` for asynchronously consuming them. Internally, these are two ends of an `mpsc::unbounded_channel`, meaning messages are buffered in memory and processed in order without backpressure.
+
+In contrast to `open_port`, which uses a channel-backed buffer, `open_enqueue_port` constructs a port backed directly by a user-supplied enqueue function:
+```rust
+impl Mailbox {
+ pub(crate) fn open_enqueue_port(
+ &self,
+ enqueue: impl Fn(M) -> Result<(), anyhow::Error> + Send + Sync + 'static,
+ ) -> PortHandle {
+ PortHandle {
+ mailbox: self.clone(),
+ port_index: self.state.allocate_port(),
+ sender: UnboundedPortSender::Func(Arc::new(enqueue)),
+ bound: Arc::new(OnceLock::new()),
+ reducer_typehash: None,
+ }
+ }
+}
+```
+Unlike `open_port`, which yields both sender and receiver ends of an internal channel, `open_enqueue_port` returns only a `PortHandle`. Instead of buffering messages, this port invokes a user-supplied function directly on each message it receives.
+
+Another variant `open_accum_port`, builds on the same principle as `open_enqueue_port`, but pairs the port with an accumulator that maintains state across messages. We'll return to this specialized port type later. We'll also encounter `open_once_port` analogous to `open_port` but sets up a one-shot message channel - useful for rendezvous-style communication - using the associated `OncePortHandle` and `OncePort` types.
+
+## Port Binding
+
+Binding is only required when a port must be referred to externally-for example, when sending it across the network or including it in a message. Binding a port produces a `PortRef`, which globally names the port and requires that the associated message type implements `RemoteMessage` (i.e., is serializable). All messages sent via a `PortRef` are serialized.
+
+By contrast, `PortHandle` can be used locally to send any type implementing `Message`, including non-serializable types, and behaves like a typed in-memory queue.
+
+Once a port is opened with `open_port`, it must be bound before it can receive messages routed through the mailbox. Binding installs the port into the mailbox's internal routing table and produces a `PortRef`-a lightweight, serializable reference that remote actors can use to send messages to the port.
+
+Port binding is performed by calling `.bind()` on a `PortHandle`:
+```rust
+impl PortHandle {
+ pub fn bind(&self) -> PortRef {
+ PortRef::attest_reducible(
+ self.bound
+ .get_or_init(|| self.mailbox.bind(self).port_id().clone())
+ .clone(),
+ self.reducer_typehash.clone(),
+ )
+ }
+}
+```
+This delegates to `Mailbox::bind(&self, handle)`, which performs the actual installation into the mailbox's internal `State`. If the port is already bound, this is a no-op.
+
+The mailbox checks that the port handle belongs to it, computes the `PortId`, and then inserts the sender into the internal ports map if it hasn't been bound already:
+```rust
+impl Mailbox {
+ fn bind(&self, handle: &PortHandle) -> PortRef {
+ assert_eq!(
+ handle.mailbox.actor_id(),
+ self.actor_id(),
+ "port does not belong to mailbox"
+ );
+ let port_id = self.actor_id().port_id(handle.port_index);
+ match self.state.ports.entry(handle.port_index) {
+ Entry::Vacant(entry) => {
+ entry.insert(Box::new(UnboundedSender::new(
+ handle.sender.clone(),
+ port_id.clone(),
+ )));
+ }
+ Entry::Occupied(_entry) => {}
+ }
+
+ PortRef::attest(port_id)
+ }
+}
+```
+The result is a `PortRef` that can be sent across the network to deliver messages to this bound port.
+
+## Binding to a Specific Index
+
+There is also a lower-level variant, bind_to, used internally by actor binding mechanisms (e.g., when installing well-known ports at known indices):
+```rust
+impl Mailbox {
+ fn bind_to(&self, handle: &PortHandle, port_index: u64) {
+ assert_eq!(
+ handle.mailbox.actor_id(),
+ self.actor_id(),
+ "port does not belong to mailbox"
+ );
+
+ let port_id = self.actor_id().port_id(port_index);
+ match self.state.ports.entry(port_index) {
+ Entry::Vacant(entry) => {
+ entry.insert(Box::new(UnboundedSender::new(
+ handle.sender.clone(),
+ port_id,
+ )));
+ }
+ Entry::Occupied(_) => panic!("port {} already bound", port_id),
+ }
+ }
+}
+```
+
+## Message Delivery via MailboxSender
+
+The mailbox also handles message delivery. It does this by implementing the `MailboxSender` trait, which defines how messages-wrapped in `MessageEnvelope`-are routed, deserialized, and delivered to bound ports or forwarded to remote destinations.
+```rust
+impl MailboxSender for Mailbox {
+ fn post(
+ &self,
+ envelope: MessageEnvelope,
+ return_handle: PortHandle>,
+ ) {
+ if envelope.dest().actor_id() != &self.state.actor_id {
+ return self.state.forwarder.post(envelope, return_handle);
+ }
+
+ match self.state.ports.entry(envelope.dest().index()) {
+ Entry::Vacant(_) => envelope.undeliverable(
+ DeliveryError::Unroutable("port not bound in mailbox".to_string()),
+ return_handle,
+ ),
+ Entry::Occupied(entry) => {
+ let (metadata, data) = envelope.open();
+ let MessageMetadata {headers, sender, dest, error: metadata_error } = metadata;
+ match entry.get().send_serialized(headers, data) {
+ Ok(false) => {
+ entry.remove();
+ }
+ Ok(true) => (),
+ Err(SerializedSenderError {
+ data,
+ error,
+ headers,
+ }) => MessageEnvelope::seal(
+ MessageMetadata { headers, sender, dest, error: metadata_error },
+ data,
+ )
+ .undeliverable(DeliveryError::Mailbox(format!("{}", error)), return_handle),
+ }
+ }
+ }
+ }
+}
+```
+
+### Breakdown of Delivery Logic
+
+This implementation of `MailboxSender::post` defines how a mailbox handles message delivery:
+1. Actor ID routing
+```rust
+if envelope.dest().actor_id() != &self.state.actor_id
+```
+If the message is not addressed to this actor, it's forwarded using the forwarder defined in the mailbox's state. This allows for transparent routing across process or network boundaries.
+
+2. Port Lookup and Binding Check
+```rust
+match self.state.ports.entry(envelope.dest().index())
+```
+The mailbox uses the destination `PortId` to locate the bound port in its internal routing table. If the port hasn't been bound, the message is returned to the sender as undeliverable.
+
+3. Deserialization and Delivery Attempt
+```rust
+match entry.get().send_serialized(headers, data)
+```
+If the port is found, the message is unsealed and passed to the corresponding `SerializedSender` (e.g., the `UnboundedSender` inserted during binding). This may succeed or fail:
+ - `Ok(true)`: Message was delivered.
+ - `Ok(false)`: Port is closed; remove it from the routing table.
+ - `Err(...)`: Deserialization failed or other error; wrap the message and return it to the sender as undeliverable.
+
+### Relationship to Bound Ports
+
+Only ports that have been bound via `PortHandle::bind()` appear in the ports map and are eligible to receive messages via this `post` path. The entry in this map is a type-erased boxed `SerializedSender`, which, when invoked, attempts to deserialize the raw message payload into the expected concrete type and forward it to the associated `PortReceiver` or handler.
+
+The mailbox's routing and delivery logic ultimately relies on the internal `State`, which stores port mappings, forwarding configuration, and allocation state.
+
+## State
+Each `Mailbox` instance wraps an internal `State` struct that contains all shared delivery infrastructure:
+```rust
+struct State {
+ actor_id: ActorId,
+ ports: DashMap>,
+ next_port: AtomicU64,
+ forwarder: BoxedMailboxSender,
+}
+```
+This structure is reference-counted via `Arc` and is cloned across all components that need access to the mailbox's internal state. Each field plays a central role:
+- **`actor_id`**: Identifies the actor that owns this mailbox. All ports in the mailbox are scoped under this actor ID and used to construct `PortId`s during binding and routing.
+- **`ports`**: A concurrent map from port indices (`u64`) to type-erased `SerializedSenders`. Each entry corresponds to a bound port and provides the ability to deserialize and deliver raw messages to the correct `PortReceiver`. Only serializable ports are registered here.
+- **`next_port`**: Tracks the next available user port index. Actor-assigned ports occupy indices 0..1024, and user-allocated ports begin from a constant offset (`USER_PORT_OFFSET`).
+- **`forwarder`**: A boxed `MailboxSender` used for forwarding messages to other actors. If a message's destination is not owned by this mailbox, it will be passed to this sender.
+
+### State: Internal Structure of a Mailbox
+
+The `State` struct holds all the internal data needed for a functioning `Mailbox`. It's not exposed directly—rather, it's wrapped in `Arc` and shared between `Mailbox`, `PortHandle`, and `PortReceiver`:
+```rust
+impl State {
+ fn new(actor_id: ActorId, forwarder: BoxedMailboxSender) -> Self {
+ Self {
+ actor_id,
+ ports: DashMap::new(),
+ next_port: AtomicU64::new(USER_PORT_OFFSET),
+ forwarder,
+ }
+ }
+
+ fn allocate_port(&self) -> u64 {
+ self.next_port.fetch_add(1, Ordering::SeqCst)
+ }
+}
+```
+**Notes**:
+- The `actor_id` allows every `Mailbox` to know which actor it belongs to, which is essential for routing decisions (`post` checks this).
+- The ports field holds the routing table: it maps each port index to a type-erased sink (`SerializedSender`) capable of deserializing and dispatching messages to the right receiver.
+- `next_port` enables safe concurrent dynamic port allocation by atomically assigning unique port indices.
+- The forwarder is used to send messages not destined for this actor-e.g., remote delivery.
+
+## Sending and Receiving Messages
+
+There are two distinct pathways by which a message can arrive at a `PortReceiver`. Both ultimately push a message into an `mpsc` channel (or functionally equivalent handler), but they differ in intent and routing mechanism.
+
+### Local Sends via PortHandle
+
+When you call `.send(msg)` on a `PortHandle`, the message bypasses the `Mailbox` entirely and goes directly into the associated channel:
+```text
+PortHandle::send(msg)
+→ UnboundedPortSender::send(Attrs::new(), msg)
+→ underlying channel (mpsc::UnboundedSender)
+→ PortReceiver::recv().await
+```
+
+### Routed Sends via Mailbox
+
+When a message is wrapped in a `MessageEnvelope` and posted via `Mailbox::post`, routing logic takes over:
+```text
+Mailbox::post(envelope, return_handle)
+→ lookup State::ports[port_index]
+→ SerializedSender::send_serialized(headers, bytes)
+→ UnboundedSender::send(headers, M) // after deserialization
+→ mpsc channel
+→ PortReceiver::recv().await
+```
+This is the delivery path for remote messages or any message routed by a `PortRef`. A `PortHandle` must first be **bound** to participate in this.
+
+## Capabilities
+
+Capabilities are lightweight traits that control access to mailbox-related operations. They act as permissions: a type that implements a capability trait is allowed to perform the corresponding action, such as sending messages or opening ports.
+
+These traits are sealed, meaning they can only be implemented inside the crate. This ensures that capability boundaries are enforced and cannot be circumvented by downstream code.
+
+### Overview
+
+| Capability | Description |
+|----------------|-----------------------------------------------------|
+| `CanSend` | Allows sending messages to ports |
+| `CanOpenPort` | Allows creating new ports for receiving messages |
+| `CanSplitPort` | Allows splitting existing ports with reducers |
+| `CanSpawn` | Allows spawning new child actors |
+
+Each public trait (e.g., `CanSend`) is implemented for any type that implements the corresponding private `sealed::CanSend` trait. This gives the crate full control over capability delegation and encapsulation.
+
+### Example: CanSend
+```rust
+pub trait CanSend: sealed::CanSend {}
+impl CanSend for T {}
+```
+
+The sealed version defines the core method:
+```rust
+pub trait sealed::CanSend: Send + Sync {
+ fn post(&self, dest: PortId, headers: Attrs, data: Serialized);
+}
+```
+Only internal types (e.g., `Mailbox`) implement this sealed trait, meaning only trusted components can obtain `CanSend`:
+```rust
+impl cap::sealed::CanSend for Mailbox {
+ fn post(&self, dest: PortId, headers: Attrs, data: Serialized) {
+ let return_handle = self
+ .lookup_sender::>()
+ .map_or_else(
+ || {
+ let actor_id = self.actor_id();
+ if CAN_SEND_WARNED_MAILBOXES
+ .get_or_init(DashSet::new)
+ .insert(actor_id.clone()) {
+ let bt = std::backtrace::Backtrace::capture();
+ tracing::warn!(
+ actor_id = ?actor_id,
+ backtrace = ?bt,
+ "mailbox attempted to post a message without binding Undeliverable"
+ );
+ }
+ monitored_return_handle()
+ },
+ |sender| PortHandle::new(self.clone(), self.state.allocate_port(), sender),
+ );
+ let envelope = MessageEnvelope::new(self.actor_id().clone(), dest, data, headers);
+ MailboxSender::post(self, envelope, return_handle);
+ }
+}
+```
+This implementation prefers that the mailbox has already bound a port capable of receiving undeliverable messages (of type `Undeliverable`). This port acts as a return address for failed message deliveries. If the port is not bound, message sending will warn with a backtrace indicating a logic error in system setup and fallback on a `monitored_return_handle` (ideally we'd `panic!` but backwards compatibility prevents this). This ensures that all messages have a well-defined failure path and avoids silent message loss.
diff --git a/docs/source/books/hyperactor-book/src/mailboxes/mailbox_client.md b/docs/source/books/hyperactor-book/src/mailboxes/mailbox_client.md
new file mode 100644
index 000000000..b2021fb3d
--- /dev/null
+++ b/docs/source/books/hyperactor-book/src/mailboxes/mailbox_client.md
@@ -0,0 +1,201 @@
+# MailboxClient
+
+A `MailboxClient` is the sending counterpart to a `MailboxServer`. It owns a buffer of outgoing messages and transmits them over a `channel::Tx` interface to a remote server.
+
+The client handles undeliverable returns, maintains a background task for monitoring channel health, and implements `MailboxSender` for compatibility.
+
+Topics in this section:
+
+- The `MailboxClient` struct and its `new` constructor
+- The use of `Buffer` for decoupled delivery
+- Delivery error handling and monitoring
+
+## Internal Buffering
+
+`MailboxClient` uses a `Buffer` internally to decouple message submission from actual transmission. This buffer ensures ordered, asynchronous delivery while preserving undeliverable routing guarantees.
+
+This is a foundational buffer abstraction used in several types in the remainder of the program. It's a concurrency-safe buffered message processor, parameterized on the message type `T`.
+
+The buffer:
+- accepts messages of type `T`
+- spawns an internal background task to process messages asynchronously
+- tracks how many messages have been processed via a `watch` channel + `AtomicUsize`:
+```rust
+struct Buffer {
+ queue: mpsc::UnboundedSender<(T, PortHandle>)>,
+ processed: watch::Receiver,
+ seq: AtomicUsize,
+}
+```
+For functions of type `Fn(T) -> impl Future