diff --git a/rholang/src/rust/interpreter/spaces/adapter.rs b/rholang/src/rust/interpreter/spaces/adapter.rs new file mode 100644 index 000000000..613f63a92 --- /dev/null +++ b/rholang/src/rust/interpreter/spaces/adapter.rs @@ -0,0 +1,376 @@ +//! ISpace to SpaceAgent Adapter +//! +//! This module provides an adapter that bridges the existing `ISpace` trait +//! from rspace++ with the new `SpaceAgent` trait hierarchy. This allows +//! the current RSpace implementation to be used within the multi-space registry. + +use std::collections::BTreeSet; +use std::sync::Arc; +use tokio::sync::RwLock; + +use async_trait::async_trait; + +use rspace_plus_plus::rspace::{ + checkpoint::{Checkpoint, SoftCheckpoint}, + errors::RSpaceError, + hashing::blake2b256_hash::Blake2b256Hash, + internal::{Datum, WaitingContinuation}, + rspace_interface::{ContResult, ISpace, RSpaceResult}, + trace::{Log, event::Produce}, +}; + +use super::async_agent::{AsyncSpaceAgent, AsyncCheckpointableSpace, AsyncReplayableSpace}; +use super::errors::SpaceError; +use super::types::{ChannelBound, ContinuationBound, DataBound, PatternBound, SpaceId, SpaceQualifier}; + +/// Adapter that wraps an `ISpace` to implement `SpaceAgent`. +/// +/// This allows the existing RSpace implementation to be used with the +/// new multi-space registry and trait hierarchy. +pub struct ISpaceAdapter +where + C: ChannelBound, + P: PatternBound, + A: DataBound, + K: ContinuationBound, +{ + /// The underlying ISpace implementation - uses RwLock for better read concurrency + inner: Arc + Send + Sync>>>, + + /// The space ID for this adapter + space_id: SpaceId, + + /// The space qualifier + qualifier: SpaceQualifier, + + /// Counter for gensym + gensym_counter: std::sync::atomic::AtomicUsize, +} + +impl ISpaceAdapter +where + C: ChannelBound, + P: PatternBound, + A: DataBound, + K: ContinuationBound, +{ + /// Create a new adapter wrapping an ISpace. + pub fn new( + inner: Arc + Send + Sync>>>, + space_id: SpaceId, + qualifier: SpaceQualifier, + ) -> Self { + ISpaceAdapter { + inner, + space_id, + qualifier, + gensym_counter: std::sync::atomic::AtomicUsize::new(0), + } + } + + /// Get the inner ISpace (for direct access when needed). + pub fn inner(&self) -> &Arc + Send + Sync>>> { + &self.inner + } +} + +// Note: The synchronous SpaceAgent trait is intentionally NOT implemented for ISpaceAdapter +// because ISpace requires async access. Use AsyncSpaceAgent instead. +// This design enforces async-only usage at compile time, preventing runtime errors. + +impl ISpaceAdapter +where + C: ChannelBound, + P: PatternBound, + A: DataBound, + K: ContinuationBound, +{ + /// Async version of create_checkpoint (requires write lock - modifies state) + pub async fn create_checkpoint_async(&mut self) -> Result { + let mut guard = self.inner.write().await; + guard.create_checkpoint().map_err(|e| SpaceError::CheckpointError { description: e.to_string() }) + } + + /// Async version of create_soft_checkpoint (requires write lock - captures state) + pub async fn create_soft_checkpoint_async(&mut self) -> SoftCheckpoint { + let mut guard = self.inner.write().await; + guard.create_soft_checkpoint() + } + + /// Async version of revert_to_soft_checkpoint (requires write lock - modifies state) + pub async fn revert_to_soft_checkpoint_async( + &mut self, + checkpoint: SoftCheckpoint, + ) -> Result<(), SpaceError> { + let mut guard = self.inner.write().await; + guard.revert_to_soft_checkpoint(checkpoint) + .map_err(|e| SpaceError::CheckpointError { description: e.to_string() }) + } + + /// Async version of reset (requires write lock - modifies state) + pub async fn reset_async(&mut self, root: &Blake2b256Hash) -> Result<(), SpaceError> { + let mut guard = self.inner.write().await; + guard.reset(root).map_err(|e| SpaceError::CheckpointError { description: e.to_string() }) + } + + /// Async version of clear (requires write lock - modifies state) + pub async fn clear_async(&mut self) -> Result<(), SpaceError> { + let mut guard = self.inner.write().await; + guard.clear().map_err(|e| SpaceError::CheckpointError { description: e.to_string() }) + } + + /// Async version of get_data (uses read lock - no mutation) + pub async fn get_data_async(&self, channel: &C) -> Vec> { + let guard = self.inner.read().await; + guard.get_data(channel) + } + + /// Async version of get_waiting_continuations (uses read lock - no mutation) + pub async fn get_waiting_continuations_async(&self, channels: Vec) -> Vec> { + let guard = self.inner.read().await; + guard.get_waiting_continuations(channels) + } + + /// Async version of get_joins (uses read lock - no mutation) + pub async fn get_joins_async(&self, channel: C) -> Vec> { + let guard = self.inner.read().await; + guard.get_joins(channel) + } + + /// Async version of produce (requires write lock - modifies state) + pub async fn produce_async( + &mut self, + channel: C, + data: A, + persist: bool, + priority: Option, + ) -> Result, Vec>, Produce)>, SpaceError> { + let mut guard = self.inner.write().await; + guard.produce(channel, data, persist, priority) + .map_err(|e| SpaceError::InternalError { description: e.to_string() }) + } + + /// Async version of consume (requires write lock - modifies state) + pub async fn consume_async( + &mut self, + channels: Vec, + patterns: Vec

, + continuation: K, + persist: bool, + peeks: BTreeSet, + ) -> Result, Vec>)>, SpaceError> { + let mut guard = self.inner.write().await; + guard.consume(channels, patterns, continuation, persist, peeks) + .map_err(|e| SpaceError::InternalError { description: e.to_string() }) + } + + /// Async version of install (requires write lock - modifies state) + pub async fn install_async( + &mut self, + channels: Vec, + patterns: Vec

, + continuation: K, + ) -> Result)>, SpaceError> { + let mut guard = self.inner.write().await; + guard.install(channels, patterns, continuation) + .map_err(|e| SpaceError::InternalError { description: e.to_string() }) + } + + /// Async version of rig_and_reset (requires write lock - modifies state) + pub async fn rig_and_reset_async( + &mut self, + start_root: Blake2b256Hash, + log: Log, + ) -> Result<(), SpaceError> { + let mut guard = self.inner.write().await; + guard.rig_and_reset(start_root, log) + .map_err(|e| SpaceError::InternalError { description: e.to_string() }) + } + + /// Async version of rig (requires write lock - sets up replay tables) + pub async fn rig_async(&self, log: Log) -> Result<(), SpaceError> { + let guard = self.inner.write().await; + guard.rig(log) + .map_err(|e| SpaceError::InternalError { description: e.to_string() }) + } + + /// Async version of check_replay_data (uses read lock - validation only) + pub async fn check_replay_data_async(&self) -> Result<(), SpaceError> { + let guard = self.inner.read().await; + guard.check_replay_data() + .map_err(|e| SpaceError::InternalError { description: e.to_string() }) + } + + /// Check if in replay mode (uses read lock - no mutation) + pub async fn is_replay_async(&self) -> bool { + let guard = self.inner.read().await; + guard.is_replay() + } + + /// Async version of update_produce (requires write lock - modifies state) + pub async fn update_produce_async(&mut self, produce: Produce) { + let mut guard = self.inner.write().await; + guard.update_produce(produce) + } +} + +// ========================================================================== +// AsyncSpaceAgent Implementation +// ========================================================================== + +#[async_trait] +impl AsyncSpaceAgent for ISpaceAdapter +where + C: ChannelBound + From, + P: Clone + Send + Sync + 'static, + A: Clone + Send + Sync + 'static, + K: Clone + Send + Sync + 'static, +{ + fn space_id(&self) -> &SpaceId { + &self.space_id + } + + fn qualifier(&self) -> SpaceQualifier { + self.qualifier + } + + async fn gensym(&mut self) -> Result { + let counter = self.gensym_counter.fetch_add(1, std::sync::atomic::Ordering::SeqCst); + Ok(C::from(counter)) + } + + async fn produce( + &mut self, + channel: C, + data: A, + persist: bool, + priority: Option, + ) -> Result, Vec>, Produce)>, SpaceError> { + self.produce_async(channel, data, persist, priority).await + } + + async fn consume( + &mut self, + channels: Vec, + patterns: Vec

, + continuation: K, + persist: bool, + peeks: BTreeSet, + ) -> Result, Vec>)>, SpaceError> { + self.consume_async(channels, patterns, continuation, persist, peeks).await + } + + async fn install( + &mut self, + channels: Vec, + patterns: Vec

, + continuation: K, + ) -> Result)>, SpaceError> { + self.install_async(channels, patterns, continuation).await + } + + async fn get_data(&self, channel: &C) -> Vec> { + self.get_data_async(channel).await + } + + async fn get_waiting_continuations(&self, channels: Vec) -> Vec> { + self.get_waiting_continuations_async(channels).await + } + + async fn get_joins(&self, channel: C) -> Vec> { + self.get_joins_async(channel).await + } +} + +// ========================================================================== +// AsyncCheckpointableSpace Implementation +// ========================================================================== + +#[async_trait] +impl AsyncCheckpointableSpace for ISpaceAdapter +where + C: ChannelBound + From, + P: Clone + Send + Sync + 'static, + A: Clone + Send + Sync + 'static, + K: Clone + Send + Sync + 'static, +{ + async fn create_checkpoint(&mut self) -> Result { + self.create_checkpoint_async().await + } + + async fn create_soft_checkpoint(&mut self) -> SoftCheckpoint { + self.create_soft_checkpoint_async().await + } + + async fn revert_to_soft_checkpoint( + &mut self, + checkpoint: SoftCheckpoint, + ) -> Result<(), SpaceError> { + self.revert_to_soft_checkpoint_async(checkpoint).await + } + + async fn reset(&mut self, root: &Blake2b256Hash) -> Result<(), SpaceError> { + self.reset_async(root).await + } + + async fn clear(&mut self) -> Result<(), SpaceError> { + self.clear_async().await + } +} + +// ========================================================================== +// AsyncReplayableSpace Implementation +// ========================================================================== + +#[async_trait] +impl AsyncReplayableSpace for ISpaceAdapter +where + C: ChannelBound + From, + P: Clone + Send + Sync + 'static, + A: Clone + Send + Sync + 'static, + K: Clone + Send + Sync + 'static, +{ + async fn rig_and_reset( + &mut self, + start_root: Blake2b256Hash, + log: Log, + ) -> Result<(), SpaceError> { + self.rig_and_reset_async(start_root, log).await + } + + async fn rig(&self, log: Log) -> Result<(), SpaceError> { + self.rig_async(log).await + } + + async fn check_replay_data(&self) -> Result<(), SpaceError> { + self.check_replay_data_async().await + } + + fn is_replay(&self) -> bool { + // Use blocking for this simple check since it doesn't modify state + // In a full async context, you might want to cache this value + false // Default to false; async version available via is_replay_async + } + + async fn update_produce(&mut self, produce: Produce) { + self.update_produce_async(produce).await + } +} + +/// Convert RSpaceError to SpaceError +impl From for SpaceError { + fn from(err: RSpaceError) -> Self { + SpaceError::InternalError { description: err.to_string() } + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_adapter_space_id() { + // This is a compile-time check that ISpaceAdapter can be created + // A full test would require a mock ISpace implementation + } +} diff --git a/rholang/src/rust/interpreter/spaces/agent.rs b/rholang/src/rust/interpreter/spaces/agent.rs new file mode 100644 index 000000000..a0af54392 --- /dev/null +++ b/rholang/src/rust/interpreter/spaces/agent.rs @@ -0,0 +1,372 @@ +//! Layer 3-4: Space Agent Traits +//! +//! This module defines the core traits for space operations: +//! - `SpaceAgent`: Core produce/consume operations (Layer 3) +//! - `CheckpointableSpace`: State management (Layer 4) +//! - `ReplayableSpace`: Deterministic replay (Layer 4) +//! +//! These traits abstract over the underlying storage implementation, +//! allowing different space configurations to be used interchangeably. + +use std::collections::BTreeSet; + +use super::errors::SpaceError; +use super::types::{ChannelBound, ContinuationBound, DataBound, PatternBound, SpaceId, SpaceQualifier}; + +use models::rhoapi::EFunction; +use rspace_plus_plus::rspace::{ + checkpoint::{Checkpoint, SoftCheckpoint}, + hashing::blake2b256_hash::Blake2b256Hash, + internal::{Datum, WaitingContinuation}, + rspace_interface::{ContResult, RSpaceResult}, + trace::{Log, event::Produce}, +}; + +// ========================================================================== +// Layer 3: Space Agent Core Trait +// ========================================================================== + +/// Core trait for space operations. +/// +/// This is the primary interface for interacting with a space. It provides: +/// - `produce`: Send data to a channel +/// - `consume`: Receive data from channels (with pattern matching) +/// - `gensym`: Generate unique channel names +/// - `install`: Install persistent continuations +/// +/// Type parameters: +/// - `C`: Channel type +/// - `P`: Pattern type +/// - `A`: Data type +/// - `K`: Continuation type +pub trait SpaceAgent: Send + Sync +where + C: ChannelBound, + P: PatternBound, + A: DataBound, + K: ContinuationBound, +{ + /// Get the space ID for this agent. + fn space_id(&self) -> &SpaceId; + + /// Get the qualifier for this space. + fn qualifier(&self) -> SpaceQualifier; + + /// Generate a new unique channel name in this space. + /// + /// The behavior depends on the outer storage type: + /// - HashMap: Returns a unique random-based name + /// - Array: Returns the next index, or `OutOfNames` if full + /// - Vector: Returns the next index, growing the vector + fn gensym(&mut self) -> Result; + + /// Produce data on a channel. + /// + /// If a matching continuation is waiting, it is triggered and the match + /// result is returned. Otherwise, the data is stored at the channel. + /// + /// # Arguments + /// - `channel`: The channel to send on + /// - `data`: The data to send + /// - `persist`: Whether the data should persist after being consumed + /// - `priority`: Optional priority level for PriorityQueue collections (0 = highest) + /// + /// # Returns + /// - `Ok(Some(...))` if a continuation was triggered + /// - `Ok(None)` if the data was stored + /// - `Err(...)` on error (e.g., Cell already full) + fn produce( + &mut self, + channel: C, + data: A, + persist: bool, + priority: Option, + ) -> Result, Vec>, Produce)>, SpaceError>; + + /// Consume data from channels. + /// + /// Searches for data matching the given patterns on the given channels. + /// If matches are found, the continuation is triggered immediately. + /// Otherwise, the continuation is stored to wait for matching data. + /// + /// # Arguments + /// - `channels`: The channels to receive from + /// - `patterns`: The patterns to match (one per channel) + /// - `continuation`: The continuation to execute on match + /// - `persist`: Whether the continuation should persist after triggering + /// - `peeks`: Set of channel indices to peek (don't consume) + /// + /// # Returns + /// - `Ok(Some(...))` if matching data was found + /// - `Ok(None)` if the continuation was stored + /// - `Err(...)` on error + fn consume( + &mut self, + channels: Vec, + patterns: Vec

, + continuation: K, + persist: bool, + peeks: BTreeSet, + ) -> Result, Vec>)>, SpaceError>; + + /// Consume data from channels with pattern modifiers (sim, rank, etc.). + /// + /// This method extends the standard consume operation with modifier-based + /// matching. Modifiers are represented as EFunction calls: + /// - `sim(query, metric, threshold, ...)`: VectorDB similarity matching + /// - `rank(query, function, params...)`: Result ranking/filtering + /// + /// # Arguments + /// - `channels`: The channels to receive from + /// - `patterns`: The patterns to match (one per channel) + /// - `modifiers`: Pattern modifiers for each channel (as EFunction calls) + /// - `continuation`: The continuation to execute on match + /// - `persist`: Whether the continuation should persist after triggering + /// - `peeks`: Set of channel indices to peek (don't consume) + /// + /// # Returns + /// - `Ok(Some(...))` if matching data was found + /// - `Ok(None)` if the continuation was stored + /// - `Err(...)` on error + /// + /// # Default Implementation + /// Falls back to regular consume, ignoring modifiers. + /// Spaces that support VectorDB collections should override this. + fn consume_with_modifiers( + &mut self, + channels: Vec, + patterns: Vec

, + _modifiers: Vec>, + continuation: K, + persist: bool, + peeks: BTreeSet, + ) -> Result, Vec>)>, SpaceError> { + // Default implementation: fall back to standard consume + // VectorDB spaces should override this to implement modifier-based matching + self.consume(channels, patterns, continuation, persist, peeks) + } + + /// Install a persistent continuation. + /// + /// Similar to consume with `persist=true`, but always stores the continuation + /// even if matching data exists. + /// + /// # Arguments + /// - `channels`: The channels to receive from + /// - `patterns`: The patterns to match + /// - `continuation`: The continuation to install + /// + /// # Returns + /// - `Ok(Some(...))` if matching data was found (continuation still installed) + /// - `Ok(None)` if no matching data + fn install( + &mut self, + channels: Vec, + patterns: Vec

, + continuation: K, + ) -> Result)>, SpaceError>; + + /// Get data stored at a channel. + fn get_data(&self, channel: &C) -> Vec>; + + /// Get waiting continuations for channels. + fn get_waiting_continuations(&self, channels: Vec) -> Vec>; + + /// Get join patterns for a channel. + fn get_joins(&self, channel: C) -> Vec>; +} + +// ========================================================================== +// Layer 4: Checkpointable Space +// ========================================================================== + +/// Extension trait for checkpointing capabilities. +/// +/// Checkpoints capture the state of a space at a point in time, allowing +/// rollback for speculative execution or recovery from failures. +pub trait CheckpointableSpace: SpaceAgent +where + C: ChannelBound, + P: PatternBound, + A: DataBound, + K: ContinuationBound, +{ + /// Create a persistent checkpoint. + /// + /// This captures the current state and writes it to the history trie. + /// The checkpoint can be used to reset the space to this state later. + fn create_checkpoint(&mut self) -> Result; + + /// Create a soft (non-persistent) checkpoint. + /// + /// This is faster than a full checkpoint but doesn't persist to storage. + /// Useful for speculative execution that may be rolled back. + fn create_soft_checkpoint(&mut self) -> SoftCheckpoint; + + /// Revert to a soft checkpoint. + /// + /// Restores the space to the state captured in the soft checkpoint. + fn revert_to_soft_checkpoint( + &mut self, + checkpoint: SoftCheckpoint, + ) -> Result<(), SpaceError>; + + /// Reset to a checkpoint by its merkle root. + /// + /// Restores the space to a previously created checkpoint. + fn reset(&mut self, root: &Blake2b256Hash) -> Result<(), SpaceError>; + + /// Clear all data and continuations. + /// + /// Does not affect the history trie. + fn clear(&mut self) -> Result<(), SpaceError>; +} + +// ========================================================================== +// Layer 4: Replayable Space +// ========================================================================== + +/// Extension trait for deterministic replay. +/// +/// Replay is used to re-execute a sequence of operations deterministically, +/// typically for validation or recovery. The space is "rigged" with a log +/// of expected operations and verifies that replay matches the log. +pub trait ReplayableSpace: CheckpointableSpace +where + C: ChannelBound, + P: PatternBound, + A: DataBound, + K: ContinuationBound, +{ + /// Rig the space for replay and reset to a starting state. + /// + /// # Arguments + /// - `start_root`: The merkle root to reset to before replay + /// - `log`: The log of operations to replay + fn rig_and_reset( + &mut self, + start_root: Blake2b256Hash, + log: Log, + ) -> Result<(), SpaceError>; + + /// Rig the space for replay without resetting. + /// + /// Note: Takes `&self` not `&mut self` per spec. + fn rig(&self, log: Log) -> Result<(), SpaceError>; + + /// Check that replay data matches expectations. + /// + /// Verifies that all operations in the log were replayed correctly. + fn check_replay_data(&self) -> Result<(), SpaceError>; + + /// Check if the space is in replay mode. + fn is_replay(&self) -> bool; + + /// Update produce result during replay. + /// + /// Called after a produce operation in replay mode to record the result. + fn update_produce(&mut self, produce: Produce); +} + +// ========================================================================== +// Dynamic Space Agent (Type-Erased) +// ========================================================================== + +/// Type-erased space agent for use in the registry. +/// +/// This allows different space implementations to be stored in the same +/// collection. Uses the same type parameters as the interpreter. +pub type DynSpaceAgent = Box>; + +/// Blanket implementation for boxed space agents. +/// +/// This allows `Box` to be used where `SpaceAgent` is expected, +/// enabling type-erased storage in registries and factories. +impl SpaceAgent for Box> +where + C: ChannelBound, + P: PatternBound, + A: DataBound, + K: ContinuationBound, +{ + fn space_id(&self) -> &SpaceId { + (**self).space_id() + } + + fn qualifier(&self) -> SpaceQualifier { + (**self).qualifier() + } + + fn gensym(&mut self) -> Result { + (**self).gensym() + } + + fn produce( + &mut self, + channel: C, + data: A, + persist: bool, + priority: Option, + ) -> Result, Vec>, Produce)>, SpaceError> { + (**self).produce(channel, data, persist, priority) + } + + fn consume( + &mut self, + channels: Vec, + patterns: Vec

, + continuation: K, + persist: bool, + peeks: BTreeSet, + ) -> Result, Vec>)>, SpaceError> { + (**self).consume(channels, patterns, continuation, persist, peeks) + } + + fn consume_with_modifiers( + &mut self, + channels: Vec, + patterns: Vec

, + modifiers: Vec>, + continuation: K, + persist: bool, + peeks: BTreeSet, + ) -> Result, Vec>)>, SpaceError> { + (**self).consume_with_modifiers(channels, patterns, modifiers, continuation, persist, peeks) + } + + fn install( + &mut self, + channels: Vec, + patterns: Vec

, + continuation: K, + ) -> Result)>, SpaceError> { + (**self).install(channels, patterns, continuation) + } + + fn get_data(&self, channel: &C) -> Vec> { + (**self).get_data(channel) + } + + fn get_waiting_continuations(&self, channels: Vec) -> Vec> { + (**self).get_waiting_continuations(channels) + } + + fn get_joins(&self, channel: C) -> Vec> { + (**self).get_joins(channel) + } +} + +#[cfg(test)] +mod tests { + use super::*; + + // Basic compile-time check that traits are object-safe + fn _check_object_safety(_: &dyn SpaceAgent) + where + C: ChannelBound, + P: PatternBound, + A: DataBound, + K: ContinuationBound, + { + } +} diff --git a/rholang/src/rust/interpreter/spaces/async_agent.rs b/rholang/src/rust/interpreter/spaces/async_agent.rs new file mode 100644 index 000000000..f6de17ddb --- /dev/null +++ b/rholang/src/rust/interpreter/spaces/async_agent.rs @@ -0,0 +1,258 @@ +//! Async Space Agent Traits +//! +//! This module provides async versions of the SpaceAgent trait hierarchy +//! for use with tokio-based runtime where mutex acquisition is async. +//! +//! These traits mirror the synchronous versions in `agent.rs` but use +//! async methods, allowing proper integration with the async ISpace +//! implementation from rspace++. + +use std::collections::BTreeSet; + +use async_trait::async_trait; + +use super::errors::SpaceError; +use super::types::{ChannelBound, ContinuationBound, DataBound, PatternBound, SpaceId, SpaceQualifier}; + +use rspace_plus_plus::rspace::{ + checkpoint::{Checkpoint, SoftCheckpoint}, + hashing::blake2b256_hash::Blake2b256Hash, + internal::{Datum, WaitingContinuation}, + rspace_interface::{ContResult, RSpaceResult}, + trace::{Log, event::Produce}, +}; + +// ========================================================================== +// Async Layer 3: Space Agent Core Trait +// ========================================================================== + +/// Async version of the core trait for space operations. +/// +/// This is the primary async interface for interacting with a space. It provides: +/// - `produce`: Send data to a channel +/// - `consume`: Receive data from channels (with pattern matching) +/// - `gensym`: Generate unique channel names +/// - `install`: Install persistent continuations +/// +/// Type parameters: +/// - `C`: Channel type +/// - `P`: Pattern type +/// - `A`: Data type +/// - `K`: Continuation type +#[async_trait] +pub trait AsyncSpaceAgent: Send + Sync +where + C: ChannelBound, + P: PatternBound, + A: DataBound, + K: ContinuationBound, +{ + /// Get the space ID for this agent. + fn space_id(&self) -> &SpaceId; + + /// Get the qualifier for this space. + fn qualifier(&self) -> SpaceQualifier; + + /// Generate a new unique channel name in this space. + /// + /// The behavior depends on the outer storage type: + /// - HashMap: Returns a unique random-based name + /// - Array: Returns the next index, or `OutOfNames` if full + /// - Vector: Returns the next index, growing the vector + async fn gensym(&mut self) -> Result; + + /// Produce data on a channel. + /// + /// If a matching continuation is waiting, it is triggered and the match + /// result is returned. Otherwise, the data is stored at the channel. + /// + /// # Arguments + /// - `channel`: The channel to send on + /// - `data`: The data to send + /// - `persist`: Whether the data should persist after being consumed + /// - `priority`: Optional priority level for PriorityQueue collections (0 = highest) + /// + /// # Returns + /// - `Ok(Some(...))` if a continuation was triggered + /// - `Ok(None)` if the data was stored + /// - `Err(...)` on error (e.g., Cell already full) + async fn produce( + &mut self, + channel: C, + data: A, + persist: bool, + priority: Option, + ) -> Result, Vec>, Produce)>, SpaceError>; + + /// Consume data from channels. + /// + /// Searches for data matching the given patterns on the given channels. + /// If matches are found, the continuation is triggered immediately. + /// Otherwise, the continuation is stored to wait for matching data. + /// + /// # Arguments + /// - `channels`: The channels to receive from + /// - `patterns`: The patterns to match (one per channel) + /// - `continuation`: The continuation to execute on match + /// - `persist`: Whether the continuation should persist after triggering + /// - `peeks`: Set of channel indices to peek (don't consume) + /// + /// # Returns + /// - `Ok(Some(...))` if matching data was found + /// - `Ok(None)` if the continuation was stored + /// - `Err(...)` on error + async fn consume( + &mut self, + channels: Vec, + patterns: Vec

, + continuation: K, + persist: bool, + peeks: BTreeSet, + ) -> Result, Vec>)>, SpaceError>; + + /// Install a persistent continuation. + /// + /// Similar to consume with `persist=true`, but always stores the continuation + /// even if matching data exists. + /// + /// # Arguments + /// - `channels`: The channels to receive from + /// - `patterns`: The patterns to match + /// - `continuation`: The continuation to install + /// + /// # Returns + /// - `Ok(Some(...))` if matching data was found (continuation still installed) + /// - `Ok(None)` if no matching data + async fn install( + &mut self, + channels: Vec, + patterns: Vec

, + continuation: K, + ) -> Result)>, SpaceError>; + + /// Get data stored at a channel. + async fn get_data(&self, channel: &C) -> Vec>; + + /// Get waiting continuations for channels. + async fn get_waiting_continuations(&self, channels: Vec) -> Vec>; + + /// Get join patterns for a channel. + async fn get_joins(&self, channel: C) -> Vec>; +} + +// ========================================================================== +// Async Layer 4: Checkpointable Space +// ========================================================================== + +/// Async extension trait for checkpointing capabilities. +/// +/// Checkpoints capture the state of a space at a point in time, allowing +/// rollback for speculative execution or recovery from failures. +#[async_trait] +pub trait AsyncCheckpointableSpace: AsyncSpaceAgent +where + C: ChannelBound, + P: PatternBound, + A: DataBound, + K: ContinuationBound, +{ + /// Create a persistent checkpoint. + /// + /// This captures the current state and writes it to the history trie. + /// The checkpoint can be used to reset the space to this state later. + async fn create_checkpoint(&mut self) -> Result; + + /// Create a soft (non-persistent) checkpoint. + /// + /// This is faster than a full checkpoint but doesn't persist to storage. + /// Useful for speculative execution that may be rolled back. + async fn create_soft_checkpoint(&mut self) -> SoftCheckpoint; + + /// Revert to a soft checkpoint. + /// + /// Restores the space to the state captured in the soft checkpoint. + async fn revert_to_soft_checkpoint( + &mut self, + checkpoint: SoftCheckpoint, + ) -> Result<(), SpaceError>; + + /// Reset to a checkpoint by its merkle root. + /// + /// Restores the space to a previously created checkpoint. + async fn reset(&mut self, root: &Blake2b256Hash) -> Result<(), SpaceError>; + + /// Clear all data and continuations. + /// + /// Does not affect the history trie. + async fn clear(&mut self) -> Result<(), SpaceError>; +} + +// ========================================================================== +// Async Layer 4: Replayable Space +// ========================================================================== + +/// Async extension trait for deterministic replay. +/// +/// Replay is used to re-execute a sequence of operations deterministically, +/// typically for validation or recovery. The space is "rigged" with a log +/// of expected operations and verifies that replay matches the log. +#[async_trait] +pub trait AsyncReplayableSpace: AsyncCheckpointableSpace +where + C: ChannelBound, + P: PatternBound, + A: DataBound, + K: ContinuationBound, +{ + /// Rig the space for replay and reset to a starting state. + /// + /// # Arguments + /// - `start_root`: The merkle root to reset to before replay + /// - `log`: The log of operations to replay + async fn rig_and_reset( + &mut self, + start_root: Blake2b256Hash, + log: Log, + ) -> Result<(), SpaceError>; + + /// Rig the space for replay without resetting. + async fn rig(&self, log: Log) -> Result<(), SpaceError>; + + /// Check that replay data matches expectations. + /// + /// Verifies that all operations in the log were replayed correctly. + async fn check_replay_data(&self) -> Result<(), SpaceError>; + + /// Check if the space is in replay mode. + fn is_replay(&self) -> bool; + + /// Update produce result during replay. + /// + /// Called after a produce operation in replay mode to record the result. + async fn update_produce(&mut self, produce: Produce); +} + +// ========================================================================== +// Dynamic Async Space Agent (Type-Erased) +// ========================================================================== + +/// Type-erased async space agent for use in the registry. +/// +/// This allows different async space implementations to be stored in the same +/// collection. Uses the same type parameters as the interpreter. +pub type DynAsyncSpaceAgent = Box>; + +#[cfg(test)] +mod tests { + use super::*; + + // Basic compile-time check that async traits are object-safe + fn _check_object_safety(_: &dyn AsyncSpaceAgent) + where + C: Clone + Eq + std::hash::Hash + Send + Sync + 'static, + P: Clone + Send + Sync + 'static, + A: Clone + Send + Sync + 'static, + K: Clone + Send + Sync + 'static, + { + } +} diff --git a/rholang/src/rust/interpreter/spaces/charging_agent.rs b/rholang/src/rust/interpreter/spaces/charging_agent.rs new file mode 100644 index 000000000..ae46bae3b --- /dev/null +++ b/rholang/src/rust/interpreter/spaces/charging_agent.rs @@ -0,0 +1,816 @@ +//! Charging Space Agent - Automatic Phlogiston Metering +//! +//! This module provides a wrapper around SpaceAgent that automatically charges +//! phlogiston (gas) for all operations. It ensures that resource consumption +//! is metered and bounded. +//! +//! # Formal Correspondence +//! - `Phlogiston.v`: Charge preservation and non-negativity invariants +//! - `GenericRSpace.v`: Integration of gas accounting with space operations +//! - `Safety/Properties.v`: Resource exhaustion safety properties +//! +//! # Design +//! The `ChargingSpaceAgent` wraps any `SpaceAgent` implementation +//! and intercepts all operations to: +//! 1. Calculate the phlogiston cost +//! 2. Attempt to charge from the meter +//! 3. Either proceed with the operation or return OutOfPhlogiston error +//! +//! This pattern follows the decorator design pattern, allowing gas accounting +//! to be composed with any space implementation. + +use std::collections::{BTreeSet, HashMap}; +use std::hash::Hash; +use std::sync::Arc; + +use serde::{de::DeserializeOwned, Serialize}; + +use super::agent::{SpaceAgent, CheckpointableSpace, ReplayableSpace}; +use super::errors::SpaceError; +use super::phlogiston::{PhlogistonMeter, Operation}; +use super::types::{ChannelBound, ContinuationBound, DataBound, PatternBound, SpaceId, SpaceQualifier}; + +use rspace_plus_plus::rspace::{ + checkpoint::{Checkpoint, SoftCheckpoint}, + errors::RSpaceError, + hashing::blake2b256_hash::Blake2b256Hash, + internal::{Datum, Row, WaitingContinuation}, + rspace_interface::{ContResult, ISpace, MaybeConsumeResult, MaybeProduceResult, RSpaceResult}, + trace::{Log, event::Produce}, +}; + +// ============================================================================= +// Charging Space Agent +// ============================================================================= + +/// A wrapper that charges phlogiston for all space operations. +/// +/// This implements the `SpaceAgent` trait by delegating to an inner space +/// while charging the appropriate gas cost for each operation. +/// +/// # Type Parameters +/// - `S`: The underlying space agent type +/// - `C`: Channel type +/// - `P`: Pattern type +/// - `A`: Data type +/// - `K`: Continuation type +/// +/// # Invariants (from Phlogiston.v) +/// - All operations charge before executing (charge-before-execute) +/// - Failed charges result in OutOfPhlogiston error +/// - Successful operations reduce meter balance by operation cost +/// - Total consumed = sum of all successful operation costs +pub struct ChargingSpaceAgent +where + S: SpaceAgent, + C: ChannelBound, + P: PatternBound, + A: DataBound, + K: ContinuationBound, +{ + /// The underlying space agent. + inner: S, + /// The phlogiston meter for tracking gas consumption. + meter: Arc, + /// Phantom data for type parameters. + _phantom: std::marker::PhantomData<(C, P, A, K)>, +} + +impl ChargingSpaceAgent +where + S: SpaceAgent, + C: ChannelBound, + P: PatternBound, + A: DataBound, + K: ContinuationBound, +{ + /// Create a new charging agent wrapping the given space. + /// + /// # Arguments + /// * `inner` - The underlying space agent + /// * `meter` - The phlogiston meter to use for charging + pub fn new(inner: S, meter: Arc) -> Self { + ChargingSpaceAgent { + inner, + meter, + _phantom: std::marker::PhantomData, + } + } + + /// Create a charging agent with unlimited phlogiston (for testing). + pub fn unlimited(inner: S) -> Self { + Self::new(inner, Arc::new(PhlogistonMeter::unlimited())) + } + + /// Get a reference to the underlying space agent. + pub fn inner(&self) -> &S { + &self.inner + } + + /// Get a mutable reference to the underlying space agent. + pub fn inner_mut(&mut self) -> &mut S { + &mut self.inner + } + + /// Get the phlogiston meter. + pub fn meter(&self) -> &PhlogistonMeter { + &self.meter + } + + /// Get the current phlogiston balance. + pub fn balance(&self) -> u64 { + self.meter.balance() + } + + /// Get total phlogiston consumed. + pub fn total_consumed(&self) -> u64 { + self.meter.total_consumed() + } + + /// Charge for an operation, returning error if insufficient phlogiston. + fn charge(&self, operation: &Operation) -> Result<(), SpaceError> { + self.meter.charge(operation) + } +} + +impl SpaceAgent for ChargingSpaceAgent +where + S: SpaceAgent, + C: ChannelBound, + P: PatternBound, + A: DataBound, + K: ContinuationBound, +{ + fn space_id(&self) -> &SpaceId { + self.inner.space_id() + } + + fn qualifier(&self) -> SpaceQualifier { + self.inner.qualifier() + } + + fn gensym(&mut self) -> Result { + // Charge for channel creation + self.charge(&Operation::CreateChannel)?; + self.inner.gensym() + } + + fn produce( + &mut self, + channel: C, + data: A, + persist: bool, + priority: Option, + ) -> Result, Vec>, Produce)>, SpaceError> { + // Estimate data size for send cost + // In a real implementation, we'd serialize and measure the actual size + // For now, use a reasonable estimate based on type + let data_size = std::mem::size_of::(); + self.charge(&Operation::Send { data_size })?; + + self.inner.produce(channel, data, persist, priority) + } + + fn consume( + &mut self, + channels: Vec, + patterns: Vec

, + continuation: K, + persist: bool, + peeks: BTreeSet, + ) -> Result, Vec>)>, SpaceError> { + // Charge for receive and pattern matching + self.charge(&Operation::Receive)?; + + // Charge for pattern matching based on pattern count + let pattern_size = patterns.len(); + if pattern_size > 0 { + self.charge(&Operation::Match { pattern_size })?; + } + + self.inner.consume(channels, patterns, continuation, persist, peeks) + } + + fn install( + &mut self, + channels: Vec, + patterns: Vec

, + continuation: K, + ) -> Result)>, SpaceError> { + // Install is like consume but persistent - charge accordingly + self.charge(&Operation::Receive)?; + + let pattern_size = patterns.len(); + if pattern_size > 0 { + self.charge(&Operation::Match { pattern_size })?; + } + + self.inner.install(channels, patterns, continuation) + } + + fn get_data(&self, channel: &C) -> Vec> { + // Read operations are free (for now) as they don't modify state + // Could add lookup cost if needed + self.inner.get_data(channel) + } + + fn get_waiting_continuations(&self, channels: Vec) -> Vec> { + // Read operations are free + self.inner.get_waiting_continuations(channels) + } + + fn get_joins(&self, channel: C) -> Vec> { + // Read operations are free + self.inner.get_joins(channel) + } +} + +// ============================================================================= +// Checkpointable Implementation +// ============================================================================= + +impl CheckpointableSpace for ChargingSpaceAgent +where + S: CheckpointableSpace, + C: ChannelBound, + P: PatternBound, + A: DataBound, + K: ContinuationBound, +{ + fn create_checkpoint(&mut self) -> Result { + self.charge(&Operation::Checkpoint)?; + self.inner.create_checkpoint() + } + + fn create_soft_checkpoint(&mut self) -> SoftCheckpoint { + // Soft checkpoints are cheaper, could use a different cost + // For now, they're free as they don't persist + self.inner.create_soft_checkpoint() + } + + fn revert_to_soft_checkpoint( + &mut self, + checkpoint: SoftCheckpoint, + ) -> Result<(), SpaceError> { + // Reverting is free (part of soft checkpoint semantics) + self.inner.revert_to_soft_checkpoint(checkpoint) + } + + fn reset(&mut self, root: &Blake2b256Hash) -> Result<(), SpaceError> { + // Reset charges similar to replay + self.charge(&Operation::Replay { operation_count: 1 })?; + self.inner.reset(root) + } + + fn clear(&mut self) -> Result<(), SpaceError> { + // Clear is a management operation, could charge if needed + self.inner.clear() + } +} + +// ============================================================================= +// Replayable Implementation +// ============================================================================= + +impl ReplayableSpace for ChargingSpaceAgent +where + S: ReplayableSpace, + C: ChannelBound, + P: PatternBound, + A: DataBound, + K: ContinuationBound, +{ + fn rig_and_reset( + &mut self, + start_root: Blake2b256Hash, + log: Log, + ) -> Result<(), SpaceError> { + // Charge based on log size for replay + let operation_count = estimate_log_operations(&log); + self.charge(&Operation::Replay { operation_count })?; + self.inner.rig_and_reset(start_root, log) + } + + fn rig(&self, log: Log) -> Result<(), SpaceError> { + // Rigging without reset is a setup operation + // Note: This takes &self so we can't charge (would need interior mutability) + // In production, consider using a lock or making this &mut self + self.inner.rig(log) + } + + fn check_replay_data(&self) -> Result<(), SpaceError> { + // Checking is a validation operation, no charge + self.inner.check_replay_data() + } + + fn is_replay(&self) -> bool { + self.inner.is_replay() + } + + fn update_produce(&mut self, produce: Produce) { + // Update during replay is part of the replayed operation + self.inner.update_produce(produce) + } +} + +/// Estimate the number of operations in a log for charging purposes. +fn estimate_log_operations(log: &Log) -> usize { + // Log is Vec, so the count is simply the vector length + log.len() +} + +// ============================================================================= +// ISpace Implementation (for compatibility with RhoISpace) +// ============================================================================= + +/// ISpace implementation for ChargingSpaceAgent. +/// +/// This allows ChargingSpaceAgent to be used where `Box>` is expected, +/// enabling phlogiston metering for user-created spaces that use the ISpace interface. +/// +/// # Charging Behavior +/// - `produce`, `consume`, `consume_with_similarity`: Charge based on data/pattern size +/// - `install`: Charge for receive + pattern matching +/// - `create_checkpoint`: Charge for checkpoint creation +/// - `rig_and_reset`: Charge based on log size +/// - Read operations (`get_data`, `get_joins`, etc.): Free (no state modification) +/// - `clear`, `reset`: Free management operations +impl ISpace for ChargingSpaceAgent +where + S: SpaceAgent + ISpace, + C: ChannelBound + Eq + Hash + AsRef<[u8]> + Serialize + DeserializeOwned, + P: PatternBound + Serialize + DeserializeOwned, + A: DataBound + Serialize + DeserializeOwned, + K: ContinuationBound + Serialize + DeserializeOwned, +{ + fn create_checkpoint(&mut self) -> Result { + self.charge(&Operation::Checkpoint) + .map_err(|e| RSpaceError::InterpreterError(e.to_string()))?; + ISpace::create_checkpoint(&mut self.inner) + } + + fn get_data(&self, channel: &C) -> Vec> { + // Read operations are free + ISpace::get_data(&self.inner, channel) + } + + fn get_waiting_continuations(&self, channels: Vec) -> Vec> { + // Read operations are free + ISpace::get_waiting_continuations(&self.inner, channels) + } + + fn get_joins(&self, channel: C) -> Vec> { + // Read operations are free + ISpace::get_joins(&self.inner, channel) + } + + fn clear(&mut self) -> Result<(), RSpaceError> { + // Clear is a management operation, no charge + ISpace::clear(&mut self.inner) + } + + fn reset(&mut self, root: &Blake2b256Hash) -> Result<(), RSpaceError> { + // Reset charges similar to replay + self.charge(&Operation::Replay { operation_count: 1 }) + .map_err(|e| RSpaceError::InterpreterError(e.to_string()))?; + ISpace::reset(&mut self.inner, root) + } + + fn consume_result( + &mut self, + channel: Vec, + pattern: Vec

, + ) -> Result)>, RSpaceError> { + // Charge for receive + match + self.charge(&Operation::Receive) + .map_err(|e| RSpaceError::InterpreterError(e.to_string()))?; + let pattern_size = pattern.len(); + if pattern_size > 0 { + self.charge(&Operation::Match { pattern_size }) + .map_err(|e| RSpaceError::InterpreterError(e.to_string()))?; + } + ISpace::consume_result(&mut self.inner, channel, pattern) + } + + fn to_map(&self) -> HashMap, Row> { + // Read operation, no charge + ISpace::to_map(&self.inner) + } + + fn create_soft_checkpoint(&mut self) -> SoftCheckpoint { + // Soft checkpoints are free (no persistence) + ISpace::create_soft_checkpoint(&mut self.inner) + } + + fn revert_to_soft_checkpoint( + &mut self, + checkpoint: SoftCheckpoint, + ) -> Result<(), RSpaceError> { + // Reverting is free + ISpace::revert_to_soft_checkpoint(&mut self.inner, checkpoint) + } + + fn consume( + &mut self, + channels: Vec, + patterns: Vec

, + continuation: K, + persist: bool, + peeks: BTreeSet, + ) -> Result, RSpaceError> { + // Charge for receive + self.charge(&Operation::Receive) + .map_err(|e| RSpaceError::InterpreterError(e.to_string()))?; + + // Charge for pattern matching + let pattern_size = patterns.len(); + if pattern_size > 0 { + self.charge(&Operation::Match { pattern_size }) + .map_err(|e| RSpaceError::InterpreterError(e.to_string()))?; + } + + ISpace::consume(&mut self.inner, channels, patterns, continuation, persist, peeks) + } + + fn consume_with_modifiers( + &mut self, + channels: Vec, + patterns: Vec

, + modifiers: Vec>, + continuation: K, + persist: bool, + peeks: BTreeSet, + ) -> Result, RSpaceError> { + // Charge for receive + self.charge(&Operation::Receive) + .map_err(|e| RSpaceError::InterpreterError(e.to_string()))?; + + // Charge for pattern matching + let pattern_size = patterns.len(); + if pattern_size > 0 { + self.charge(&Operation::Match { pattern_size }) + .map_err(|e| RSpaceError::InterpreterError(e.to_string()))?; + } + + // Charge extra for VectorDB operations if any modifiers present + for modifier_bytes in &modifiers { + if !modifier_bytes.is_empty() { + // Estimate dimensions from serialized modifier size + let dimensions = modifier_bytes.len() / std::mem::size_of::(); + self.charge(&Operation::VectorDbSearch { dimensions }) + .map_err(|e| RSpaceError::InterpreterError(e.to_string()))?; + } + } + + ISpace::consume_with_modifiers(&mut self.inner, channels, patterns, modifiers, continuation, persist, peeks) + } + + fn produce( + &mut self, + channel: C, + data: A, + persist: bool, + priority: Option, + ) -> Result, RSpaceError> { + // Estimate data size for send cost + let data_size = std::mem::size_of::(); + self.charge(&Operation::Send { data_size }) + .map_err(|e| RSpaceError::InterpreterError(e.to_string()))?; + + ISpace::produce(&mut self.inner, channel, data, persist, priority) + } + + fn install( + &mut self, + channels: Vec, + patterns: Vec

, + continuation: K, + ) -> Result)>, RSpaceError> { + // Install is like consume but persistent + self.charge(&Operation::Receive) + .map_err(|e| RSpaceError::InterpreterError(e.to_string()))?; + + let pattern_size = patterns.len(); + if pattern_size > 0 { + self.charge(&Operation::Match { pattern_size }) + .map_err(|e| RSpaceError::InterpreterError(e.to_string()))?; + } + + ISpace::install(&mut self.inner, channels, patterns, continuation) + } + + fn rig_and_reset(&mut self, start_root: Blake2b256Hash, log: Log) -> Result<(), RSpaceError> { + // Charge based on log size for replay + let operation_count = estimate_log_operations(&log); + self.charge(&Operation::Replay { operation_count }) + .map_err(|e| RSpaceError::InterpreterError(e.to_string()))?; + ISpace::rig_and_reset(&mut self.inner, start_root, log) + } + + fn rig(&self, log: Log) -> Result<(), RSpaceError> { + // Rigging is a setup operation, no charge (takes &self anyway) + ISpace::rig(&self.inner, log) + } + + fn check_replay_data(&self) -> Result<(), RSpaceError> { + // Validation only, no charge + ISpace::check_replay_data(&self.inner) + } + + fn is_replay(&self) -> bool { + ISpace::is_replay(&self.inner) + } + + fn update_produce(&mut self, produce: Produce) { + // Update during replay is part of replayed operation + ISpace::update_produce(&mut self.inner, produce) + } +} + +// ============================================================================= +// Builder for Charging Agent +// ============================================================================= + +/// Builder for creating charging space agents with custom configuration. +pub struct ChargingAgentBuilder { + inner: Option, + meter: Option>, + initial_limit: Option, +} + +impl ChargingAgentBuilder { + /// Create a new builder. + pub fn new() -> Self { + ChargingAgentBuilder { + inner: None, + meter: None, + initial_limit: None, + } + } + + /// Set the inner space agent. + pub fn with_space(mut self, space: S) -> Self { + self.inner = Some(space); + self + } + + /// Set a shared phlogiston meter. + pub fn with_meter(mut self, meter: Arc) -> Self { + self.meter = Some(meter); + self + } + + /// Set the initial phlogiston limit (creates a new meter). + pub fn with_limit(mut self, limit: u64) -> Self { + self.initial_limit = Some(limit); + self + } + + /// Build the charging agent. + /// + /// # Errors + /// + /// Returns `SpaceError::BuilderIncomplete` if no inner space was provided. + /// + /// # Examples + /// + /// ```ignore + /// let charging_agent = ChargingAgentBuilder::new() + /// .with_space(inner_space) + /// .with_limit(1_000_000) + /// .build()?; + /// ``` + pub fn build(self) -> Result, SpaceError> + where + S: SpaceAgent, + C: ChannelBound, + P: PatternBound, + A: DataBound, + K: ContinuationBound, + { + let inner = self.inner.ok_or(SpaceError::BuilderIncomplete { + builder: "ChargingAgentBuilder", + missing_field: "inner (use with_space())", + })?; + let meter = self.meter.unwrap_or_else(|| { + let limit = self.initial_limit.unwrap_or(10_000_000); + Arc::new(PhlogistonMeter::new(limit)) + }); + + Ok(ChargingSpaceAgent::new(inner, meter)) + } + + /// Build the charging agent, panicking if incomplete. + /// + /// This is a convenience method for cases where you're certain the builder + /// is complete. Prefer `build()` for production code. + /// + /// # Panics + /// + /// Panics if no inner space was provided. + pub fn build_unchecked(self) -> ChargingSpaceAgent + where + S: SpaceAgent, + C: ChannelBound, + P: PatternBound, + A: DataBound, + K: ContinuationBound, + { + self.build().expect("ChargingAgentBuilder incomplete") + } +} + +impl Default for ChargingAgentBuilder { + fn default() -> Self { + Self::new() + } +} + +// ============================================================================= +// Tests +// ============================================================================= + +#[cfg(test)] +mod tests { + use super::*; + + // Mock space agent for testing + struct MockSpaceAgent { + id: SpaceId, + gensym_count: usize, + } + + impl MockSpaceAgent { + fn new() -> Self { + MockSpaceAgent { + id: SpaceId::default_space(), + gensym_count: 0, + } + } + } + + impl SpaceAgent for MockSpaceAgent { + fn space_id(&self) -> &SpaceId { + &self.id + } + + fn qualifier(&self) -> SpaceQualifier { + SpaceQualifier::Default + } + + fn gensym(&mut self) -> Result { + self.gensym_count += 1; + Ok(self.gensym_count as u64) + } + + fn produce( + &mut self, + _channel: u64, + _data: String, + _persist: bool, + _priority: Option, + ) -> Result, Vec>, Produce)>, SpaceError> { + Ok(None) + } + + fn consume( + &mut self, + _channels: Vec, + _patterns: Vec, + _continuation: String, + _persist: bool, + _peeks: BTreeSet, + ) -> Result, Vec>)>, SpaceError> { + Ok(None) + } + + fn install( + &mut self, + _channels: Vec, + _patterns: Vec, + _continuation: String, + ) -> Result)>, SpaceError> { + Ok(None) + } + + fn get_data(&self, _channel: &u64) -> Vec> { + vec![] + } + + fn get_waiting_continuations(&self, _channels: Vec) -> Vec> { + vec![] + } + + fn get_joins(&self, _channel: u64) -> Vec> { + vec![] + } + } + + #[test] + fn test_charging_agent_gensym_charges() { + let mock = MockSpaceAgent::new(); + let meter = Arc::new(PhlogistonMeter::new(1000)); + let mut agent = ChargingSpaceAgent::new(mock, meter.clone()); + + let initial = agent.balance(); + assert!(agent.gensym().is_ok()); + + // Balance should have decreased by CHANNEL_CREATE_COST + assert!(agent.balance() < initial); + assert_eq!(agent.total_consumed(), super::super::phlogiston::CHANNEL_CREATE_COST); + } + + #[test] + fn test_charging_agent_out_of_phlogiston() { + let mock = MockSpaceAgent::new(); + // Very low limit + let meter = Arc::new(PhlogistonMeter::new(10)); + let mut agent = ChargingSpaceAgent::new(mock, meter); + + // Should fail due to insufficient phlogiston + let result = agent.gensym(); + assert!(result.is_err()); + + match result { + Err(SpaceError::OutOfPhlogiston { required, available, .. }) => { + assert_eq!(available, 10); + assert!(required > 10); + } + _ => panic!("Expected OutOfPhlogiston error"), + } + } + + #[test] + fn test_charging_agent_produce_charges() { + let mock = MockSpaceAgent::new(); + let meter = Arc::new(PhlogistonMeter::new(10_000)); + let mut agent = ChargingSpaceAgent::new(mock, meter); + + let initial = agent.balance(); + assert!(agent.produce(1, "test".to_string(), false, None).is_ok()); + + assert!(agent.balance() < initial); + assert!(agent.total_consumed() > 0); + } + + #[test] + fn test_charging_agent_consume_charges() { + let mock = MockSpaceAgent::new(); + let meter = Arc::new(PhlogistonMeter::new(10_000)); + let mut agent = ChargingSpaceAgent::new(mock, meter); + + let initial = agent.balance(); + assert!(agent.consume( + vec![1], + vec!["pattern".to_string()], + "cont".to_string(), + false, + BTreeSet::new() + ).is_ok()); + + // Should charge for both receive and match + assert!(agent.balance() < initial); + assert!(agent.total_consumed() > super::super::phlogiston::RECEIVE_BASE_COST); + } + + #[test] + fn test_builder() { + let mock = MockSpaceAgent::new(); + let agent: ChargingSpaceAgent<_, u64, String, String, String> = ChargingAgentBuilder::new() + .with_space(mock) + .with_limit(5000) + .build() + .expect("Builder should succeed with all required fields"); + + assert_eq!(agent.balance(), 5000); + } + + #[test] + fn test_builder_incomplete_returns_error() { + let result: Result, _> = + ChargingAgentBuilder::new() + .with_limit(5000) + // Missing .with_space() + .build(); + + assert!(result.is_err()); + if let Err(SpaceError::BuilderIncomplete { builder, missing_field }) = result { + assert_eq!(builder, "ChargingAgentBuilder"); + assert!(missing_field.contains("inner")); + } else { + panic!("Expected BuilderIncomplete error"); + } + } + + #[test] + fn test_unlimited_agent() { + let mock = MockSpaceAgent::new(); + let mut agent: ChargingSpaceAgent<_, u64, String, String, String> = + ChargingSpaceAgent::unlimited(mock); + + // Should be able to do many operations without running out + for _ in 0..1000 { + assert!(agent.gensym().is_ok()); + } + } +} diff --git a/rholang/src/rust/interpreter/spaces/generic_rspace.rs b/rholang/src/rust/interpreter/spaces/generic_rspace.rs new file mode 100644 index 000000000..a7d23cf41 --- /dev/null +++ b/rholang/src/rust/interpreter/spaces/generic_rspace.rs @@ -0,0 +1,3846 @@ +//! GenericRSpace - Parameterized RSpace Implementation +//! +//! This module implements the core `GenericRSpace` struct as specified in the +//! "Reifying RSpaces" specification (Lines 641-715). It provides a flexible, +//! parameterized implementation of the RSpace tuple space. +//! +//! # Type Parameters +//! +//! - `CS`: ChannelStore - How channels are indexed (HashMap, PathMap, Array, etc.) +//! - `M`: Match - Pattern matching strategy (RholangMatch, VectorDBMatch, etc.) +//! - `C`: Channel type +//! - `P`: Pattern type +//! - `A`: Data type +//! - `K`: Continuation type +//! - `DC`: DataCollection - How data is stored at each channel (Bag, Queue, Stack, etc.) +//! - `CC`: ContinuationCollection - How continuations are stored (Bag, Queue, Stack, etc.) +//! +//! # Design +//! +//! The GenericRSpace combines: +//! - A ChannelStore for channel indexing and data/continuation storage +//! - A Matcher for pattern matching semantics +//! - Optional HistoryStore for checkpointing +//! - SpaceQualifier for persistence and concurrency behavior +//! +//! This design enables creating spaces with different combinations of: +//! - Storage strategies (HashMap for O(1), PathMap for hierarchical, etc.) +//! - Matching semantics (structural, similarity-based, etc.) +//! - Persistence behavior (Default, Temp, Seq) + +use std::collections::{BTreeMap, BTreeSet, HashMap}; +use std::fmt::Debug; +use std::hash::Hash; + +use smallvec::SmallVec; +use dashmap::DashMap; +use serde::{Serialize, Deserialize, de::DeserializeOwned}; +use std::collections::HashSet; +use rspace_plus_plus::rspace::{ + checkpoint::{Checkpoint, SoftCheckpoint}, + hashing::blake2b256_hash::Blake2b256Hash, + hot_store::HotStoreState, + internal::{Datum, WaitingContinuation, MultisetMultiMap}, + rspace_interface::{ContResult, RSpaceResult}, + trace::{Log, event::{Produce, Event, IOEvent, COMM}}, +}; + +use models::rhoapi::{EFunction, GPrivate, GUnforgeable, ListParWithRandom, Par, g_unforgeable::UnfInstance}; +use uuid::Uuid; + +use super::agent::{CheckpointableSpace, ReplayableSpace, SpaceAgent}; +use super::channel_store::ChannelStore; +use super::collections::{ContinuationCollection, DataCollection, EmbeddingType, SimilarityCollection, SimilarityMetric}; +use super::errors::SpaceError; +use super::history::BoxedHistoryStore; +use super::matcher::Match; +use super::types::{BoxedTheory, SpaceConfig, SpaceId, SpaceQualifier, Validatable, TheoryValidator, get_path_suffix, SuffixKey}; +use super::similarity_extraction::{ + extract_embedding_from_map, extract_channel_id_from_par, + compute_cosine_similarity, compute_dot_product, compute_euclidean_similarity, + compute_manhattan_similarity, compute_hamming_similarity, compute_jaccard_similarity, +}; + +// Re-export for backward compatibility +pub use super::similarity_extraction::ExtractedModifiers; + +// Also re-export extract_modifiers_from_efunctions for backward compatibility +pub use super::similarity_extraction::extract_modifiers_from_efunctions; + +// ============================================================================= +// GenericRSpace Struct +// ============================================================================= + +/// Generic RSpace parameterized by storage strategy and matcher. +/// +/// This is the core implementation of a reified RSpace. It provides the full +/// `SpaceAgent` and `CheckpointableSpace` interfaces while being flexible +/// enough to support different storage and matching strategies. +/// +/// # Examples +/// +/// ```ignore +/// // Create a HashMap-based space with exact matching +/// let store = HashMapChannelStore::new(BagDataCollection::new, BagContinuationCollection::new); +/// let matcher = ExactMatch::new(); +/// let space = GenericRSpace::new( +/// store, +/// matcher, +/// SpaceId::default_space(), +/// SpaceQualifier::Default, +/// ); +/// ``` +pub struct GenericRSpace +where + CS: ChannelStore, + M: Match, +{ + /// The channel store for data and continuation storage + channel_store: CS, + + /// The pattern matcher + matcher: M, + + /// Unique identifier for this space + space_id: SpaceId, + + /// Qualifier determining persistence and concurrency behavior + qualifier: SpaceQualifier, + + /// Optional history store for checkpointing (None for temp spaces) + history_store: Option, + + /// Soft checkpoint for speculative execution rollback + soft_checkpoint: Option>, + + /// Replay log for deterministic replay (if in replay mode) + replay_log: Option, + + /// Whether this space is in replay mode + is_replay: bool, + + /// Replay data for tracking COMM events during replay verification. + /// Maps IOEvents to their associated COMM events from the log. + /// + /// Note: MultisetMultiMap uses DashMap internally, providing lock-free concurrent access. + /// No outer RwLock needed since all MultisetMultiMap methods take &self. + replay_data: MultisetMultiMap, + + /// Optional theory for data validation before storage. + /// + /// If set, all data sent to this space will be validated against the theory + /// before being stored. Invalid data will be rejected with a TheoryValidationError. + /// + /// Formal Correspondence: GenericRSpace.v (produce_validates_data) + theory: Option, + + /// Stack of channel store snapshots for nested soft checkpoints. + /// + /// Each soft checkpoint stores a clone of the channel store at that point. + /// Reverting pops and restores from this stack. + soft_checkpoint_stack: Vec<(CS, usize)>, // (channel_store_snapshot, gensym_counter_at_checkpoint) + + /// Similarity query matrices for efficient batch similarity computation. + /// + /// Keyed by channel, each matrix stores normalized query embeddings from + /// waiting similarity-based continuations. When `produce()` stores new data, + /// it queries these matrices using SIMD-optimized matrix-vector multiplication + /// to find matching continuations. + /// + /// # Architecture + /// + /// This enables the store-first approach for similarity matching: + /// 1. `produce()` stores data in VectorDB (embedding gets normalized) + /// 2. `produce()` queries the channel's matrix with the normalized embedding + /// 3. SIMD batch computation finds all queries with similarity >= threshold + /// 4. Best-matching continuation is fired + /// + /// The counter tracks the next continuation ID to assign. + /// + /// # Lazy Allocation + /// Wrapped in Option to save ~100 bytes per space that doesn't use similarity queries. + /// Most spaces don't use VectorDB/similarity features, so this is None by default. + similarity_queries: Option>, + + /// Counter for generating unique continuation IDs for similarity queries. + next_continuation_id: std::sync::atomic::AtomicUsize, + + /// Registry of lazy result producers for similarity query result channels. + /// + /// When a similarity query matches, instead of returning all results immediately, + /// we create a lazy result channel and store a `LazyResultProducer` here. + /// When consumers try to read from the lazy channel, the producer provides + /// documents one at a time, enabling: + /// + /// - **Lazy evaluation**: Documents computed on-demand + /// - **Early termination**: Consumer can stop after first N results + /// - **Backpressure**: Production rate controlled by consumption + /// + /// # Key Format + /// + /// The key is the result channel ID (GPrivate bytes). When `consume()` is called + /// on a channel, we check if it's a lazy result channel by looking up its ID here. + /// + /// # Lifecycle + /// + /// 1. `consume_with_similarity()` creates result channel + `LazyResultProducer` + /// 2. Producer is stored here with result channel ID as key + /// 3. Consumer calls `for (@doc <- resultCh)` which triggers `consume()` + /// 4. `consume()` detects lazy channel, calls `producer.produce_next()` + /// 5. Next document is produced to the channel + /// 6. When exhausted, Nil is produced and entry is removed + /// + /// # Lazy Allocation + /// Wrapped in Option to save memory when not using lazy result channels. + /// Only allocated when the first similarity query creates a lazy channel. + lazy_producers: Option, super::collections::LazyResultProducer>>, +} + +// ============================================================================= +// Type Aliases for Common Collection Types +// ============================================================================= +// +// These aliases reduce the 8 type parameters of GenericRSpace to more manageable +// configurations for common use cases. + +// Collection type aliases (internal) +type BagDC = super::collections::BagDataCollection; +type BagCC = super::collections::BagContinuationCollection; +type QueueDC = super::collections::QueueDataCollection; +type QueueCC = super::collections::QueueContinuationCollection; +type StackDC = super::collections::StackDataCollection; +type StackCC = super::collections::StackContinuationCollection; +type SetDC = super::collections::SetDataCollection; +type SetCC = super::collections::SetContinuationCollection; + +/// Backward-compatible type alias for GenericRSpace with Bag collections. +/// +/// This preserves a convenient type alias for GenericRSpace with a specific +/// channel store that uses Bag collections. +/// +/// # Example +/// ```ignore +/// // Using BagRSpace with HashMapChannelStore +/// type MySpace = BagRSpace, ExactMatch>; +/// ``` +pub type BagRSpace = GenericRSpace; + +/// GenericRSpace with FIFO Queue collections. +/// +/// Use this for spaces where data and continuation processing order matters. +/// First-in-first-out semantics ensure fair processing. +pub type QueueRSpace = GenericRSpace; + +/// GenericRSpace with LIFO Stack collections. +/// +/// Use this for spaces where last-in-first-out processing is desired, +/// such as recursive computation or depth-first exploration. +pub type StackRSpace = GenericRSpace; + +/// GenericRSpace with Set collections (idempotent operations). +/// +/// Use this for spaces where duplicate data is not meaningful. +/// Produces to the same channel with identical data are idempotent. +pub type SetRSpace = GenericRSpace; + +// ============================================================================= +// Concrete Channel Store + Collection Type Aliases +// ============================================================================= +// +// These aliases fix both the channel store and collection types, requiring only +// the matcher, channel, pattern, data, and continuation types. + +use super::channel_store::{HashMapChannelStore, PathMapChannelStore, VectorChannelStore}; + +/// HashMap-based space with Bag collections. +/// +/// The most common configuration: O(1) channel lookup with unordered data storage. +/// +/// # Type Parameters +/// - `M`: Matcher (e.g., ExactMatch, VectorDBMatch) +/// - `C`: Channel type +/// - `P`: Pattern type +/// - `A`: Data type +/// - `K`: Continuation type +/// +/// # Example +/// ```ignore +/// type MySpace = HashMapBagSpace, Par, Par, ListParWithRandom, TaggedContinuation>; +/// ``` +pub type HashMapBagSpace = GenericRSpace< + HashMapChannelStore, BagCC>, + M, +>; + +/// HashMap-based space with Queue collections (FIFO). +/// +/// O(1) channel lookup with FIFO data processing order. +pub type HashMapQueueSpace = GenericRSpace< + HashMapChannelStore, QueueCC>, + M, +>; + +/// HashMap-based space with Stack collections (LIFO). +/// +/// O(1) channel lookup with LIFO data processing order. +pub type HashMapStackSpace = GenericRSpace< + HashMapChannelStore, StackCC>, + M, +>; + +/// HashMap-based space with Set collections (idempotent). +/// +/// O(1) channel lookup with idempotent data storage. +pub type HashMapSetSpace = GenericRSpace< + HashMapChannelStore, SetCC>, + M, +>; + +/// PathMap-based space with Bag collections. +/// +/// Hierarchical channel addressing with prefix semantics. +/// Channels are `Vec` paths, enabling prefix aggregation. +/// +/// # Example +/// ```ignore +/// // A consume on @[0,1] can match data at @[0,1,2], @[0,1,3], etc. +/// type HierarchicalSpace = PathMapBagSpace, Par, ListParWithRandom, TaggedContinuation>; +/// ``` +pub type PathMapBagSpace = GenericRSpace< + PathMapChannelStore, BagCC>, + M, +>; + +/// PathMap-based space with Queue collections. +pub type PathMapQueueSpace = GenericRSpace< + PathMapChannelStore, QueueCC>, + M, +>; + +/// Vector-based space with Bag collections. +/// +/// Uses integer channel indices for O(1) lookup with dense allocation. +/// Best when channels are sequential integers starting from 0. +/// For Rholang integration, use `VectorChannelStore` directly. +pub type VectorBagSpace = GenericRSpace< + VectorChannelStore, BagCC>, + M, +>; + +/// Vector-based space with Queue collections. +/// For Rholang integration, use `VectorChannelStore` directly. +pub type VectorQueueSpace = GenericRSpace< + VectorChannelStore, QueueCC>, + M, +>; + +// ============================================================================= +// Prefix Match Result +// ============================================================================= + +/// Internal struct for tracking match results with prefix semantics. +/// +/// When prefix semantics are enabled (e.g., PathMap storage), a consume on +/// channel `@[0,1]` may find data at `@[0,1,2]`. This struct tracks both +/// the consume pattern channel and the actual channel where data was found. +/// +/// # Fields +/// - `consume_channel_idx`: Index into the consume channels array +/// - `actual_channel`: The channel where data was actually found +/// - `data`: The matched data +/// - `suffix_key`: The path suffix (empty for exact matches) +/// - `is_peek`: Whether this was a peek operation +#[derive(Clone, Debug)] +struct PrefixMatchResult { + /// Index of the consume channel in the pattern + consume_channel_idx: usize, + /// The actual channel where data was found (may differ from consume channel) + actual_channel: C, + /// The matched data + data: A, + /// Suffix key for prefix matches (empty if exact match) + suffix_key: SuffixKey, + /// Whether this was a peek operation + is_peek: bool, +} + +// ============================================================================= +// Serialized State +// ============================================================================= + +/// Serializable representation of GenericRSpace state for checkpointing. +/// +/// This struct captures the essential state that needs to be persisted: +/// - All data collections indexed by channel +/// - All continuation collections indexed by channel pattern +/// - All join patterns +/// - The gensym counter for generating unique names +/// - The space qualifier for persistence behavior +/// - The space ID +/// +/// # Formal Correspondence +/// - `Checkpoint.v`: checkpoint_preserves_state theorem +/// - `CheckpointReplay.tla`: HardCheckpoint action +#[derive(Serialize, Deserialize, Clone, Debug)] +pub struct SerializedState { + /// Data collections: (channel, list of (data, persist_flag)) + pub data: Vec<(C, Vec<(A, bool)>)>, + + /// Continuation collections: (channel_pattern, list of (patterns, continuation, persist_flag)) + pub continuations: Vec<(Vec, Vec<(Vec

, K, bool)>)>, + + /// Join patterns: (channel, list of join patterns involving this channel) + pub joins: Vec<(C, Vec>)>, + + /// The gensym counter for unique name generation + pub gensym_counter: usize, + + /// The space qualifier (Default, Temp, Seq) + pub qualifier: SpaceQualifier, + + /// The space ID + pub space_id: Vec, +} + +impl GenericRSpace +where + CS: ChannelStore, + M: Match, + C: Clone + Eq + Hash + Send + Sync, + P: Clone + Send + Sync, + A: Clone + Send + Sync + std::fmt::Debug + 'static, + K: Clone + Send + Sync, + DC: DataCollection + Default + Clone + Send + Sync + 'static, + CC: ContinuationCollection + Default + Clone + Send + Sync, +{ + /// Create a new GenericRSpace with the given configuration. + /// + /// # Arguments + /// - `channel_store`: The channel store implementation + /// - `matcher`: The pattern matcher implementation + /// - `space_id`: Unique identifier for this space + /// - `qualifier`: Persistence and concurrency behavior + pub fn new( + channel_store: CS, + matcher: M, + space_id: SpaceId, + qualifier: SpaceQualifier, + ) -> Self { + GenericRSpace { + channel_store, + matcher, + space_id, + qualifier, + history_store: None, + soft_checkpoint: None, + replay_log: None, + is_replay: false, + replay_data: MultisetMultiMap::empty(), + theory: None, + soft_checkpoint_stack: Vec::new(), + similarity_queries: None, // Lazy allocation - only allocated when needed + next_continuation_id: std::sync::atomic::AtomicUsize::new(0), + lazy_producers: None, // Lazy allocation - only allocated when needed + } + } + + /// Create a new GenericRSpace with a history store for checkpointing. + /// + /// # Arguments + /// - `channel_store`: The channel store implementation + /// - `matcher`: The pattern matcher implementation + /// - `space_id`: Unique identifier for this space + /// - `qualifier`: Persistence and concurrency behavior + /// - `history_store`: The history store for checkpointing + pub fn with_history( + channel_store: CS, + matcher: M, + space_id: SpaceId, + qualifier: SpaceQualifier, + history_store: BoxedHistoryStore, + ) -> Self { + GenericRSpace { + channel_store, + matcher, + space_id, + qualifier, + history_store: Some(history_store), + soft_checkpoint: None, + replay_log: None, + is_replay: false, + replay_data: MultisetMultiMap::empty(), + theory: None, + soft_checkpoint_stack: Vec::new(), + similarity_queries: None, // Lazy allocation - only allocated when needed + next_continuation_id: std::sync::atomic::AtomicUsize::new(0), + lazy_producers: None, // Lazy allocation - only allocated when needed + } + } + + /// Set a theory for data validation on this space. + /// + /// When a theory is set, all data produced to this space will be validated + /// against the theory before being stored. Invalid data will be rejected + /// with a `SpaceError::TheoryValidationError`. + /// + /// # Arguments + /// - `theory`: The theory to validate data against + /// + /// # Example + /// ```ignore + /// let theory = SimpleTypeTheory::new("NatTheory", vec!["Nat".to_string()]); + /// space.set_theory(Some(Box::new(theory))); + /// ``` + pub fn set_theory(&mut self, theory: Option) { + self.theory = theory; + } + + /// Get a reference to the theory (if any). + pub fn theory(&self) -> Option<&BoxedTheory> { + self.theory.as_ref() + } + + /// Check if this space has a theory configured. + pub fn has_theory(&self) -> bool { + self.theory.is_some() + } + + /// Validate data against this space's theory. + /// + /// If no theory is configured, validation always succeeds. + /// If a theory is configured, the data is validated against it. + /// + /// # Arguments + /// - `data`: The data to validate, implementing Validatable + /// + /// # Returns + /// - `Ok(())` if validation passes or no theory is configured + /// - `Err(SpaceError::TheoryValidationError)` if validation fails + /// + /// # Formal Correspondence + /// GenericRSpace.v: `validate_data_sound` theorem + pub fn validate(&self, data: &V) -> Result<(), SpaceError> { + match &self.theory { + Some(theory) => theory.validate_data(data), + None => Ok(()), // No theory = accept everything + } + } + + /// Validate data using a string representation. + /// + /// This is useful when the data type doesn't implement Validatable, + /// but you can provide a string representation for validation. + /// + /// # Arguments + /// - `term`: The string representation of the data to validate + /// + /// # Returns + /// - `Ok(())` if validation passes or no theory is configured + /// - `Err(SpaceError::TheoryValidationError)` if validation fails + pub fn validate_term(&self, term: &str) -> Result<(), SpaceError> { + match &self.theory { + Some(theory) => { + theory.validate(term).map_err(|validation_error| { + SpaceError::TheoryValidationError { + theory_name: theory.name().to_string(), + validation_error, + term: term.to_string(), + } + }) + } + None => Ok(()), // No theory = accept everything + } + } + + /// Get a reference to the matcher. + pub fn matcher(&self) -> &M { + &self.matcher + } + + /// Get a reference to the channel store. + pub fn channel_store(&self) -> &CS { + &self.channel_store + } + + /// Get a mutable reference to the channel store. + pub fn channel_store_mut(&mut self) -> &mut CS { + &mut self.channel_store + } + + /// Check if this space has a history store configured. + pub fn has_history_store(&self) -> bool { + self.history_store.is_some() + } + + /// Get the allocation mode for `new` bindings within this space. + /// + /// Delegates to the channel store's allocation mode. Used by `eval_new()` + /// to determine whether to use random IDs or index-based allocation. + pub fn allocation_mode(&self) -> super::types::AllocationMode { + self.channel_store.allocation_mode() + } + + /// Get the space configuration. + /// + /// Returns a configuration reflecting the current space settings. + /// The theory (if any) is cloned into the returned configuration. + pub fn config(&self) -> SpaceConfig { + SpaceConfig { + outer: super::types::OuterStorageType::HashMap, // Default, would need runtime detection + data_collection: super::types::InnerCollectionType::Bag, + continuation_collection: super::types::InnerCollectionType::Bag, + qualifier: self.qualifier, + theory: self.theory.as_ref().map(|t| t.clone_box()), + gas_config: super::types::GasConfiguration::default(), // Gas config tracked by ChargingSpaceAgent wrapper + } + } + + // ========================================================================= + // Internal Helper Methods + // ========================================================================= + + /// Find a fireable continuation for the given channel and data, checking similarity requirements. + /// + /// This method verifies that ALL channels in the join pattern have matching data + /// before returning a result. It also checks similarity requirements stored with + /// continuations. When a continuation was registered with similarity patterns + /// (via `consume_with_similarity`), this method verifies that the incoming data + /// meets the similarity threshold before allowing the continuation to fire. + /// + /// NOTE: For VectorDB channels with waiting similarity queries, prefer using + /// `try_produce_with_similarity_matrix()` first, which uses SIMD-optimized + /// matrix operations for batch similarity computation. + /// + /// # Arguments + /// * `channel` - The channel where data is being produced + /// * `data` - The data being produced + /// + /// # Returns + /// * `Some((patterns, continuation, persist, join_channels))` - A continuation that can fire + /// * `None` - No fireable continuation found + fn find_fireable_continuation_with_similarity( + &self, + channel: &C, + data: &A, + ) -> Option<(Vec

, K, bool, Vec)> + where + P: Clone, + K: Clone, + A: 'static, + { + use std::any::Any; + use super::collections::{StoredSimilarityInfo, VectorDBDataCollection}; + + // Get all join patterns that include this channel + let joins = self.channel_store.get_joins(channel); + + for join_channels in joins { + // Get the continuation collection for this join pattern + if let Some(cont_coll) = self.channel_store.get_continuation_collection(&join_channels) + { + // Check each continuation with its similarity info + for (patterns, cont, persist, similarity_opt) in cont_coll.all_continuations_with_similarity() { + // Find which pattern corresponds to our channel + if let Some(idx) = join_channels.iter().position(|c| c == channel) { + if idx < patterns.len() { + // Check if our data matches this pattern + if !self.matcher.matches(&patterns[idx], data) { + continue; + } + + // Check similarity requirements if present + if let Some(similarity_info) = similarity_opt { + // Get the similarity requirement for this channel + if idx < similarity_info.embeddings.len() { + if let Some((query_embedding, threshold, resolved_metric, _top_k)) = &similarity_info.embeddings[idx] { + // Extract embedding from the incoming data + // First, try to get VectorDB collection for embedding type info + let data_embedding = self.extract_embedding_from_data(channel, data); + + if let Ok(data_emb) = data_embedding { + // Use resolved metric if provided, otherwise default to cosine + let similarity = match resolved_metric.as_ref().unwrap_or(&SimilarityMetric::Cosine) { + SimilarityMetric::Cosine => compute_cosine_similarity(query_embedding, &data_emb), + SimilarityMetric::DotProduct => compute_dot_product(query_embedding, &data_emb), + SimilarityMetric::Euclidean => compute_euclidean_similarity(query_embedding, &data_emb), + SimilarityMetric::Manhattan => compute_manhattan_similarity(query_embedding, &data_emb), + SimilarityMetric::Hamming => compute_hamming_similarity(query_embedding, &data_emb), + SimilarityMetric::Jaccard => compute_jaccard_similarity(query_embedding, &data_emb), + }; + + if similarity < *threshold { + // Similarity not met - skip this continuation + continue; + } + } else { + // Failed to extract embedding - skip this continuation + tracing::warn!( + "find_fireable_continuation_with_similarity: failed to extract embedding from data" + ); + continue; + } + } + } + } + + // Pattern matches and similarity satisfied - check if all OTHER channels have data + let mut all_channels_have_data = true; + for (i, ch) in join_channels.iter().enumerate() { + if ch == channel { + // This is our channel - we have the data being produced + continue; + } + if i >= patterns.len() { + continue; + } + + // Check if this other channel has matching data + // Note: For joins with multiple channels, we'd also need to check + // similarity on those channels. For now, we use standard pattern matching. + let pattern = &patterns[i]; + let has_data = self.channel_store + .get_data_collection(ch) + .map(|dc| dc.peek(|a| self.matcher.matches(pattern, a)).is_some()) + .unwrap_or(false); + if !has_data { + all_channels_have_data = false; + break; + } + } + + // Only return this continuation if all channels have data + if all_channels_have_data { + return Some(( + patterns.to_vec(), + cont.clone(), + persist, + join_channels.clone(), + )); + } + // Otherwise, continue searching for another continuation + } + } + } + } + } + + None + } + + /// Extract embedding from data for similarity comparison. + /// + /// This helper attempts to extract the embedding from the data being produced, + /// using the VectorDB collection's configuration to determine the embedding type. + fn extract_embedding_from_data(&self, channel: &C, data: &A) -> Result, SpaceError> + where + A: 'static, + { + use std::any::Any; + use super::collections::VectorDBDataCollection; + + // Get the data collection for this channel + let dc = self.channel_store.get_data_collection(channel).ok_or_else(|| { + SpaceError::InvalidConfiguration { + description: "No data collection found for channel".to_string(), + } + })?; + + // Downcast to VectorDB collection to get embedding config + let dc_any: &dyn Any = dc; + if let Some(vec_dc) = dc_any.downcast_ref::>() { + let embedding_type = vec_dc.embedding_type(); + let dimensions = vec_dc.embedding_dimensions(); + + // Try to downcast data to extract embedding + let data_any: &dyn Any = data; + + // First try: ListParWithRandom (actual type used by RhoISpace) + if let Some(list_par) = data_any.downcast_ref::() { + if let Some(data_par) = list_par.pars.first() { + return extract_embedding_from_map(data_par, embedding_type, dimensions); + } + } + + // Fallback: direct Par downcast + if let Some(data_par) = data_any.downcast_ref::() { + return extract_embedding_from_map(data_par, embedding_type, dimensions); + } + + Err(SpaceError::EmbeddingExtractionError { + description: "Data type not supported for embedding extraction".to_string(), + }) + } else { + Err(SpaceError::InvalidConfiguration { + description: "Channel does not have a VectorDB collection".to_string(), + }) + } + } + + /// Try to produce data using the store-first similarity matrix approach. + /// + /// This method implements efficient similarity matching by: + /// 1. Storing the data first in the VectorDB collection + /// 2. Querying the similarity matrix for matching continuations + /// 3. Firing the best-matching continuation if found + /// + /// This is more efficient than the on-the-fly approach because: + /// - Data embedding is normalized once during storage + /// - Similarity is computed via SIMD-optimized matrix-vector multiplication + /// - All waiting queries are checked in a single batch operation + /// + /// # Arguments + /// - `channel`: The channel to produce on + /// - `data`: The data being produced + /// - `persist`: Whether the data persists after matching + /// + /// # Returns + /// - `Some(Ok(...))`: A matching continuation was found and fired + /// - `Some(Err(...))`: An error occurred during processing + /// - `None`: No similarity queries for this channel (caller should use fallback) + fn try_produce_with_similarity_matrix( + &mut self, + channel: &C, + data: A, + persist: bool, + ) -> Option, Vec>, Produce)>, SpaceError>> + where + A: Clone + 'static, + P: Clone, + K: Clone, + { + use std::any::Any; + use super::collections::{ContinuationId, SimilarityQueryMatrix, VectorDBDataCollection}; + + // Check if this channel has any waiting similarity queries + // Lazy allocation: return None early if similarity_queries hasn't been allocated + let query_matrix = self.similarity_queries.as_ref()?.get(channel)?; + if query_matrix.is_empty() { + return None; + } + + // Get the VectorDB data collection for this channel + let dc = self.channel_store.get_or_create_data_collection(channel); + let dc_any: &mut dyn Any = dc; + let vec_dc = dc_any.downcast_mut::>()?; + + // Extract embedding from data + let embedding_type = vec_dc.embedding_type(); + let dimensions = vec_dc.embedding_dimensions(); + + // Try to extract embedding from data + let data_any: &dyn Any = &data; + let embedding = if let Some(list_par) = data_any.downcast_ref::() { + if let Some(data_par) = list_par.pars.first() { + match extract_embedding_from_map(data_par, embedding_type, dimensions) { + Ok(emb) => emb, + Err(e) => return Some(Err(e)), + } + } else { + return Some(Err(SpaceError::InvalidConfiguration { + description: "ListParWithRandom contains no Par values for embedding extraction".to_string(), + })); + } + } else if let Some(data_par) = data_any.downcast_ref::() { + match extract_embedding_from_map(data_par, embedding_type, dimensions) { + Ok(emb) => emb, + Err(e) => return Some(Err(e)), + } + } else { + // Not a VectorDB-compatible data type + return None; + }; + + // Store data first and get the index + let data_idx = match vec_dc.put_with_embedding_returning_index(data.clone(), embedding, persist) { + Ok(idx) => idx, + Err(e) => return Some(Err(e)), + }; + + // Get the normalized embedding we just stored + let normalized_embedding = match vec_dc.get_normalized_embedding(data_idx) { + Some(emb) => emb, + None => { + return Some(Err(SpaceError::InvalidConfiguration { + description: "Failed to retrieve normalized embedding after storage".to_string(), + })); + } + }; + + // Query the similarity matrix for matching continuations (SIMD-optimized) + // Re-borrow query_matrix since we dropped the mutable borrow above + let query_matrix = self.similarity_queries.as_ref()?.get(channel)?; + let matches = query_matrix.find_matching_queries(&normalized_embedding); + + if matches.is_empty() { + // No matching continuation found - data stays in VectorDB + return Some(Ok(None)); + } + + // Find the best match (highest similarity score) + let (cont_id, channel_idx, similarity, cont_persist) = matches + .into_iter() + .max_by(|a, b| a.2.partial_cmp(&b.2).unwrap_or(std::cmp::Ordering::Equal))?; + + // Get the continuation from the continuation collection + // For single-channel queries, the join pattern is just [channel] + let join_channels = vec![channel.clone()]; + + // Find and retrieve the continuation (will be removed if not persistent) + let cont_coll = self.channel_store.get_continuation_collection_mut(&join_channels)?; + let (patterns, cont, _persist, _similarity_info) = if cont_persist { + // Persistent - just peek, don't remove + let all = cont_coll.all_continuations_with_similarity(); + // Find by ContinuationId - we need to match the correct one + // For now, just take the first matching one (we'll refine this later) + // Note: all.first() returns Option<&(...)> so we need (*k).clone() to get K (owned) + all.first().map(|(p, k, persist, sim)| (p.to_vec(), (*k).clone(), *persist, sim.cloned()))? + } else { + // Non-persistent - remove the continuation + cont_coll.find_and_remove_with_similarity(|_p, _k| true)? + }; + + // Remove data from VectorDB if not persistent + if !persist { + // Re-borrow as mutable + let dc = self.channel_store.get_data_collection_mut(channel)?; + let dc_any: &mut dyn Any = dc; + if let Some(vec_dc) = dc_any.downcast_mut::>() { + vec_dc.remove_by_index(data_idx); + } + } + + // Remove the query from the similarity matrix if not persistent + if !cont_persist { + if let Some(query_matrix) = self.similarity_queries.as_mut().and_then(|m| m.get_mut(channel)) { + query_matrix.remove_query(cont_id); + } + + // Remove join pattern if no more continuations + let should_remove_join = self.channel_store + .get_continuation_collection(&join_channels) + .map(|cc| cc.is_empty()) + .unwrap_or(true); + + if should_remove_join { + self.channel_store.remove_join(&join_channels); + } + } + + // Build the result + let matched_data = vec![RSpaceResult { + channel: channel.clone(), + matched_datum: data.clone(), + removed_datum: data, + persistent: persist, + suffix_key: None, + }]; + + let cont_result = ContResult { + continuation: cont, + persistent: cont_persist, + peek: false, + channels: join_channels, + patterns, + }; + + Some(Ok(Some((cont_result, matched_data, Produce::default())))) + } + + /// Find a matching continuation at a prefix path for the given channel and data. + /// + /// When prefix semantics are enabled (PathMap storage), this method searches + /// for continuations waiting at prefix paths of the produce channel. + /// + /// # Example + /// If produce is on `@[0,1,2]`, this will check for continuations at: + /// - `@[0,1]` (immediate prefix) + /// - `@[0]` (longer prefix) + /// - `@[]` (root prefix, if applicable) + /// + /// # Returns + /// - `Some((patterns, continuation, persist, join_channels, prefix_channel, match_idx))`: + /// The matching continuation with the prefix path where it was found + /// - `None`: No matching continuation found at any prefix + /// + /// # Formal Correspondence + /// - `PathMapStore.v`: `produce_finds_prefix_continuation` theorem + /// Find a matching continuation at a prefix path for the given channel and data. + /// + /// # Returns + /// - `Some((patterns, continuation, persist, join_channels, prefix_channel, match_idx, suffix_key))`: + /// The matching continuation with the prefix path and computed suffix key + /// - `None`: No matching continuation found at any prefix + fn find_matching_continuation_at_prefix( + &self, + channel: &C, + data: &A, + ) -> Option<(Vec

, K, bool, Vec, C, usize, SuffixKey)> + where + P: Clone, + K: Clone, + C: AsRef<[u8]>, + { + if !self.channel_store.supports_prefix_semantics() { + return None; + } + + // Get all prefix paths for the produce channel + let prefixes = self.channel_store.channel_prefixes(channel); + + for prefix in prefixes { + // Skip the exact channel (already checked by find_matching_continuation) + if &prefix == channel { + continue; + } + + // First, check for single-channel continuations at the prefix + // Single-channel consumes like `for (@x <- @[0,1])` don't register joins, + // they just store a continuation at [prefix] directly. + let single_channel_key = vec![prefix.clone()]; + if let Some(cont_coll) = self.channel_store.get_continuation_collection(&single_channel_key) { + for (patterns, cont, persist) in cont_coll.all_continuations() { + if !patterns.is_empty() { + // Check if our data matches this pattern + if self.matcher.matches(&patterns[0], data) { + // Compute suffix key: difference between produce channel and prefix + // E.g., channel=[0,1,2], prefix=[0,1] -> suffix=[2] + let suffix_key = get_path_suffix(prefix.as_ref(), channel.as_ref()) + .unwrap_or_default(); + return Some(( + patterns.to_vec(), + cont.clone(), + persist, + single_channel_key, + prefix.clone(), + 0, // idx is always 0 for single-channel + suffix_key, + )); + } + } + } + } + + // Then, check join patterns that include this prefix channel + let joins = self.channel_store.get_joins(&prefix); + + for join_channels in joins { + // Get the continuation collection for this join pattern + if let Some(cont_coll) = self.channel_store.get_continuation_collection(&join_channels) + { + // Check each continuation + for (patterns, cont, persist) in cont_coll.all_continuations() { + // Find which pattern corresponds to the prefix channel + if let Some(idx) = join_channels.iter().position(|c| c == &prefix) { + if idx < patterns.len() { + // Check if our data matches this pattern + if self.matcher.matches(&patterns[idx], data) { + // Compute suffix key: difference between produce channel and prefix + // E.g., channel=[0,1,2], prefix=[0,1] -> suffix=[2] + let suffix_key = get_path_suffix(prefix.as_ref(), channel.as_ref()) + .unwrap_or_default(); + return Some(( + patterns.to_vec(), + cont.clone(), + persist, + join_channels.clone(), + prefix.clone(), + idx, + suffix_key, + )); + } + } + } + } + } + } + } + + None + } + + /// Check if all channels in a join have matching data. + fn check_all_channels_match( + &self, + channels: &[C], + patterns: &[P], + peeks: &BTreeSet, + ) -> Option> + where + A: Clone, + { + let mut matched_data = Vec::with_capacity(channels.len()); + + for (i, (ch, pattern)) in channels.iter().zip(patterns.iter()).enumerate() { + let is_peek = peeks.contains(&(i as i32)); + + if let Some(data_coll) = self.channel_store.get_data_collection(ch) { + // Try to find matching data + let matcher = &self.matcher; + let found = if is_peek { + data_coll.peek(|a| matcher.matches(pattern, a)).cloned() + } else { + // For non-peek, we need the data but can't remove yet + // (we'll remove in a second pass if all channels match) + data_coll.peek(|a| matcher.matches(pattern, a)).cloned() + }; + + if let Some(data) = found { + matched_data.push(data); + } else { + return None; // This channel has no matching data + } + } else { + return None; // No data collection for this channel + } + } + + Some(matched_data) + } + + /// Remove matched data from all channels (for successful consume). + fn remove_matched_data( + &mut self, + channels: &[C], + patterns: &[P], + peeks: &BTreeSet, + ) -> Vec + where + A: Clone, + { + let mut removed = Vec::with_capacity(channels.len()); + + for (i, (ch, pattern)) in channels.iter().zip(patterns.iter()).enumerate() { + let is_peek = peeks.contains(&(i as i32)); + + if let Some(data_coll) = self.channel_store.get_data_collection_mut(ch) { + if is_peek { + // For peek, just get the data without removing + if let Some(data) = data_coll.peek(|a| self.matcher.matches(pattern, a)) { + removed.push(data.clone()); + } + } else { + // For non-peek, remove the data + if let Some(data) = + data_coll.find_and_remove(|a| self.matcher.matches(pattern, a)) + { + removed.push(data); + } + } + } + } + + removed + } + + /// Atomically check and remove matching data from all channels in a single pass. + /// + /// This combines `check_all_channels_match` and `remove_matched_data` into a + /// single atomic operation, eliminating the TOCTOU race condition and improving + /// performance by avoiding the second pass. + /// + /// # Algorithm + /// 1. For each channel, atomically find and remove matching data (or peek for peek channels) + /// 2. If any channel fails to match, rollback all previously removed data + /// 3. Return None if rollback occurred, Some(data) if all channels matched + /// + /// # Performance + /// - Single pass instead of two passes + /// - O(n) where n is the number of channels + /// - Uses SmartDataStorage's O(1) swap_remove in Eager mode + /// + /// # Formal Correspondence + /// - Implements atomic consume semantics from `GenericRSpace.v` + /// - Ensures linearizability: either all channels match or none are modified + fn check_and_remove_matched_data_atomic( + &mut self, + channels: &[C], + patterns: &[P], + peeks: &BTreeSet, + ) -> Option> + where + A: Clone, + { + let mut matched_data: Vec = Vec::with_capacity(channels.len()); + let mut removed_from: Vec<(usize, C)> = Vec::with_capacity(channels.len()); + + for (i, (ch, pattern)) in channels.iter().zip(patterns.iter()).enumerate() { + let is_peek = peeks.contains(&(i as i32)); + + if let Some(data_coll) = self.channel_store.get_data_collection_mut(ch) { + let matcher = &self.matcher; + + if is_peek { + // For peek, just find without removing + if let Some(data) = data_coll.peek(|a| matcher.matches(pattern, a)) { + matched_data.push(data.clone()); + } else { + // No match - rollback and return None + self.rollback_removed_data(&removed_from, &matched_data, peeks); + return None; + } + } else { + // Atomic find and remove + if let Some(data) = data_coll.find_and_remove(|a| matcher.matches(pattern, a)) { + matched_data.push(data); + removed_from.push((i, ch.clone())); + } else { + // No match - rollback and return None + self.rollback_removed_data(&removed_from, &matched_data, peeks); + return None; + } + } + } else { + // No data collection for this channel - rollback and return None + self.rollback_removed_data(&removed_from, &matched_data, peeks); + return None; + } + } + + Some(matched_data) + } + + /// Rollback helper: put back removed data on failure. + /// + /// Called when atomic matching fails mid-operation to restore the space + /// to its previous state. + #[inline] + fn rollback_removed_data( + &mut self, + removed_from: &[(usize, C)], + matched_data: &[A], + peeks: &BTreeSet, + ) where + A: Clone, + { + for (idx, (i, ch)) in removed_from.iter().enumerate() { + // Only rollback non-peek removes (peeks didn't modify anything) + if !peeks.contains(&(*i as i32)) { + if let Some(data_coll) = self.channel_store.get_data_collection_mut(ch) { + // Best effort rollback - ignore errors + let _ = data_coll.put(matched_data[idx].clone()); + } + } + } + } + + // ========================================================================= + // Prefix-Aware Matching Methods + // ========================================================================= + + /// Check if all channels have matching data, including prefix descendants. + /// + /// When prefix semantics are enabled, this method searches not just the + /// exact channels but also all descendant channels (paths that have the + /// consume channel as a prefix). + /// + /// # Example + /// For PathMap with consume on `@[0,1]`, this will find data at: + /// - `@[0,1]` (exact match) + /// - `@[0,1,2]` (descendant with suffix `[2]`) + /// - `@[0,1,3,4]` (descendant with suffix `[3,4]`) + /// + /// # Formal Correspondence + /// - `PathMapStore.v`: `consume_finds_descendant_data` theorem + fn check_all_channels_match_with_prefix( + &self, + channels: &[C], + patterns: &[P], + peeks: &BTreeSet, + ) -> Option>> + where + A: Clone, + { + let mut results = Vec::with_capacity(channels.len()); + + for (i, (ch, pattern)) in channels.iter().zip(patterns.iter()).enumerate() { + let is_peek = peeks.contains(&(i as i32)); + + // First, try exact match on the consume channel + if let Some(data_coll) = self.channel_store.get_data_collection(ch) { + if let Some(data) = data_coll.peek(|a| self.matcher.matches(pattern, a)) { + results.push(PrefixMatchResult { + consume_channel_idx: i, + actual_channel: ch.clone(), + data: data.clone(), + suffix_key: Vec::new(), // Empty suffix = exact match + is_peek, + }); + continue; + } + } + + // If no exact match and prefix semantics enabled, check descendants + if self.channel_store.supports_prefix_semantics() { + let descendants = self.channel_store.channels_with_prefix(ch); + let mut found = false; + + for descendant in descendants { + // Skip the exact channel (already checked above) + if &descendant == ch { + continue; + } + + if let Some(data_coll) = self.channel_store.get_data_collection(&descendant) { + if let Some(data) = data_coll.peek(|a| self.matcher.matches(pattern, a)) { + // Compute suffix key using the channel store's implementation + // For PathMap stores, this converts channels to paths and computes the suffix + let suffix_key = self.channel_store.compute_suffix_key(ch, &descendant) + .unwrap_or_default(); + + results.push(PrefixMatchResult { + consume_channel_idx: i, + actual_channel: descendant.clone(), + data: data.clone(), + suffix_key, + is_peek, + }); + found = true; + break; // Found a match for this channel + } + } + } + + if !found { + return None; // No match for this channel + } + } else { + return None; // No exact match and no prefix semantics + } + } + + Some(results) + } + + /// Check if all channels have matching data with suffix key computation. + /// + /// This is the specialized version for PathMap stores where the channel + /// type is `Vec` and suffix keys can be computed. + /// + /// # Formal Correspondence + /// - `PathMapStore.v`: `consume_computes_suffix_key` theorem + #[allow(dead_code)] // May be needed for PathMap prefix semantics integration + fn check_all_channels_match_with_suffix( + &self, + channels: &[C], + patterns: &[P], + peeks: &BTreeSet, + ) -> Option>> + where + A: Clone, + C: AsRef<[u8]>, + { + let mut results = Vec::with_capacity(channels.len()); + + for (i, (ch, pattern)) in channels.iter().zip(patterns.iter()).enumerate() { + let is_peek = peeks.contains(&(i as i32)); + + // First, try exact match on the consume channel + if let Some(data_coll) = self.channel_store.get_data_collection(ch) { + if let Some(data) = data_coll.peek(|a| self.matcher.matches(pattern, a)) { + results.push(PrefixMatchResult { + consume_channel_idx: i, + actual_channel: ch.clone(), + data: data.clone(), + suffix_key: Vec::new(), // Empty suffix = exact match + is_peek, + }); + continue; + } + } + + // If no exact match and prefix semantics enabled, check descendants + if self.channel_store.supports_prefix_semantics() { + let descendants = self.channel_store.channels_with_prefix(ch); + let mut found = false; + + for descendant in descendants { + // Skip the exact channel (already checked above) + if &descendant == ch { + continue; + } + + if let Some(data_coll) = self.channel_store.get_data_collection(&descendant) { + if let Some(data) = data_coll.peek(|a| self.matcher.matches(pattern, a)) { + // Compute suffix key + let suffix = get_path_suffix(ch.as_ref(), descendant.as_ref()) + .unwrap_or_default(); + + results.push(PrefixMatchResult { + consume_channel_idx: i, + actual_channel: descendant.clone(), + data: data.clone(), + suffix_key: suffix, + is_peek, + }); + found = true; + break; // Found a match for this channel + } + } + } + + if !found { + return None; // No match for this channel + } + } else { + return None; // No exact match and no prefix semantics + } + } + + Some(results) + } + + /// Remove matched data based on prefix match results. + /// + /// Unlike `remove_matched_data`, this method removes data from the + /// actual channels where matches were found, which may differ from + /// the consume pattern channels when prefix semantics are enabled. + /// + /// # Formal Correspondence + /// - `PathMapStore.v`: `consume_removes_from_actual_path` theorem + #[allow(dead_code)] // Kept for reference; replaced by atomic version + fn remove_matched_data_from_prefix_results( + &mut self, + results: &[PrefixMatchResult], + patterns: &[P], + ) -> Vec<(A, SuffixKey)> + where + A: Clone, + { + let mut removed = Vec::with_capacity(results.len()); + + for result in results { + if result.is_peek { + // For peek, return data without removing + removed.push((result.data.clone(), result.suffix_key.clone())); + } else { + // Remove from the actual channel where data was found + if let Some(data_coll) = self.channel_store.get_data_collection_mut(&result.actual_channel) { + let pattern = &patterns[result.consume_channel_idx]; + if let Some(data) = data_coll.find_and_remove(|a| self.matcher.matches(pattern, a)) { + removed.push((data, result.suffix_key.clone())); + } + } + } + } + + removed + } + + /// Atomic prefix-aware consume that fuses find and remove operations. + /// + /// This method solves the TOCTOU race condition in prefix-aware consume where + /// concurrent consumers with overlapping prefixes (e.g., @[0] and @[0,1]) could + /// both peek the same data at a descendant channel (@[0,1,2]), but only one + /// would successfully remove it, leaving the other with incomplete results. + /// + /// # Algorithm + /// + /// For each consume channel: + /// 1. Try exact match first using `find_and_remove` (atomic) + /// 2. If no exact match and prefix semantics enabled, try descendants + /// 3. If removal fails mid-operation (race detected), rollback and return None + /// + /// When this method returns `None`, the caller should store the continuation + /// as a wait pattern rather than firing with incomplete bindings. + /// + /// # Formal Correspondence + /// - `PathMapStore.v`: `consume_atomic` lemma + /// - Fixes TOCTOU race between `check_all_channels_match_with_prefix` and + /// `remove_matched_data_from_prefix_results` + fn check_and_consume_with_prefix_atomic( + &mut self, + channels: &[C], + patterns: &[P], + peeks: &BTreeSet, + ) -> Option> + where + A: Clone, + { + let mut results: Vec<(C, A, SuffixKey)> = Vec::with_capacity(channels.len()); + let mut removed_channels: Vec = Vec::with_capacity(channels.len()); + + for (i, (ch, pattern)) in channels.iter().zip(patterns.iter()).enumerate() { + let is_peek = peeks.contains(&(i as i32)); + + // Track whether we found a match for this channel + let mut found = false; + + // First, try exact match on the consume channel + if let Some(data_coll) = self.channel_store.get_data_collection_mut(ch) { + if is_peek { + // For peek, just check if matching data exists + if let Some(data) = data_coll.peek(|a| self.matcher.matches(pattern, a)) { + results.push((ch.clone(), data.clone(), Vec::new())); + found = true; + } + } else { + // Atomic find and remove for exact match + if let Some(data) = data_coll.find_and_remove(|a| self.matcher.matches(pattern, a)) { + results.push((ch.clone(), data, Vec::new())); + removed_channels.push(ch.clone()); + found = true; + } + } + } + + // If no exact match and prefix semantics enabled, check descendants + if !found && self.channel_store.supports_prefix_semantics() { + let descendants = self.channel_store.channels_with_prefix(ch); + + for descendant in descendants { + // Skip the exact channel (already checked above) + if &descendant == ch { + continue; + } + + if is_peek { + // For peek, just check if matching data exists + if let Some(data_coll) = self.channel_store.get_data_collection(&descendant) { + if let Some(data) = data_coll.peek(|a| self.matcher.matches(pattern, a)) { + let suffix_key = self.channel_store.compute_suffix_key(ch, &descendant) + .unwrap_or_default(); + results.push((descendant.clone(), data.clone(), suffix_key)); + found = true; + break; + } + } + } else { + // Atomic find and remove for prefix match + if let Some(data_coll) = self.channel_store.get_data_collection_mut(&descendant) { + if let Some(data) = data_coll.find_and_remove(|a| self.matcher.matches(pattern, a)) { + let suffix_key = self.channel_store.compute_suffix_key(ch, &descendant) + .unwrap_or_default(); + results.push((descendant.clone(), data, suffix_key)); + removed_channels.push(descendant.clone()); + found = true; + break; + } + } + } + } + } + + // If no match found for this channel, we must rollback and return None + if !found { + // Rollback: put back all removed data + for (idx, (removed_ch, removed_data, _)) in results.into_iter().enumerate() { + // Only rollback non-peek removes + if !peeks.contains(&(idx as i32)) { + if let Some(data_coll) = self.channel_store.get_data_collection_mut(&removed_ch) { + // Ignore errors during rollback - best effort + let _ = data_coll.put(removed_data); + } + } + } + return None; + } + } + + Some(results) + } + + /// Serialize the current state for checkpointing. + /// + /// Exports all data, continuations, joins, and the gensym counter to a + /// serializable format, then encodes it using bincode. + /// + /// # Type Requirements + /// - `C`, `P`, `A`, `K` must implement `Serialize` + /// + /// # Formal Correspondence + /// - `Checkpoint.v`: checkpoint_preserves_state theorem + /// - `CheckpointReplay.tla`: HardCheckpoint action + /// + /// # Returns + /// The serialized state as bytes, or an empty vector if serialization fails. + fn serialize_state(&self) -> Vec + where + C: Serialize, + P: Serialize, + A: Serialize, + K: Serialize, + { + // Export data from channel store + let exported_data = self.channel_store.export_data(); + let data: Vec<(C, Vec<(A, bool)>)> = exported_data + .into_iter() + .map(|(channel, data_collection)| { + // Convert data collection to serializable format + // Each item in BagDataCollection is just the data itself + let items: Vec<(A, bool)> = data_collection + .all_data() + .into_iter() + .map(|a| (a.clone(), false)) // Default persist = false + .collect(); + (channel, items) + }) + .collect(); + + // Export continuations from channel store + let exported_conts = self.channel_store.export_continuations(); + let continuations: Vec<(Vec, Vec<(Vec

, K, bool)>)> = exported_conts + .into_iter() + .map(|(channels, cont_collection)| { + // Convert continuation collection to serializable format + let items: Vec<(Vec

, K, bool)> = cont_collection + .all_continuations() + .into_iter() + .map(|(patterns, k, persist)| (patterns.to_vec(), k.clone(), persist)) + .collect(); + (channels, items) + }) + .collect(); + + // Export joins from channel store + let joins = self.channel_store.export_joins(); + + // Create serialized state + let state = SerializedState { + data, + continuations, + joins, + gensym_counter: self.channel_store.gensym_counter(), + qualifier: self.qualifier, + space_id: self.space_id.0.clone(), + }; + + // Serialize using bincode + bincode::serialize(&state).unwrap_or_else(|e| { + tracing::error!("Failed to serialize space state: {}", e); + Vec::new() + }) + } + + /// Deserialize state from bytes. + /// + /// Restores data, continuations, joins, and the gensym counter from the + /// serialized state. + /// + /// # Type Requirements + /// - `C`, `P`, `A`, `K` must implement `DeserializeOwned` + /// + /// # Formal Correspondence + /// - `Checkpoint.v`: replay_restores_state theorem + /// - `CheckpointReplay.tla`: Replay action + /// + /// # Arguments + /// - `state`: The serialized state bytes + /// + /// # Errors + /// Returns `SpaceError::DeserializationError` if the bytes cannot be decoded. + fn deserialize_state(&mut self, state: &[u8]) -> Result<(), SpaceError> + where + C: DeserializeOwned, + P: DeserializeOwned, + A: DeserializeOwned, + K: DeserializeOwned, + { + // Deserialize using bincode + let serialized: SerializedState = bincode::deserialize(state) + .map_err(|e| SpaceError::DeserializationError { + message: format!("Failed to deserialize space state: {}", e), + })?; + + // Validate space ID matches (optional, for safety) + if serialized.space_id != self.space_id.0 { + return Err(SpaceError::DeserializationError { + message: format!( + "Space ID mismatch: expected {:?}, got {:?}", + self.space_id.0, serialized.space_id + ), + }); + } + + // Import data into channel store + // Convert serialized format back to the appropriate DataCollection type + let data_collections: Vec<(C, DC)> = serialized + .data + .into_iter() + .map(|(channel, items)| { + let mut collection = DC::default(); + for (data, _persist) in items { + let _ = collection.put(data); + } + (channel, collection) + }) + .collect(); + self.channel_store.import_data(data_collections); + + // Import continuations into channel store + // Convert serialized format back to the appropriate ContinuationCollection type + let cont_collections: Vec<(Vec, CC)> = serialized + .continuations + .into_iter() + .map(|(channels, items)| { + let mut collection = CC::default(); + for (patterns, continuation, persist) in items { + collection.put(patterns, continuation, persist); + } + (channels, collection) + }) + .collect(); + self.channel_store.import_continuations(cont_collections); + + // Import joins + self.channel_store.import_joins(serialized.joins); + + // Restore gensym counter - use max to prevent name collisions + // with channels that were generated after the checkpoint. + // This ensures restored state never reuses channel names that + // were temporarily in use before the rollback. + let current_counter = self.channel_store.gensym_counter(); + self.channel_store.set_gensym_counter(std::cmp::max(current_counter, serialized.gensym_counter)); + + // Restore qualifier + self.qualifier = serialized.qualifier; + + Ok(()) + } +} + +// ============================================================================= +// Clone Implementation +// ============================================================================= + +impl Clone for GenericRSpace +where + CS: ChannelStore + Clone, + M: Match + Clone, + C: Clone + Eq + Hash + Send + Sync + 'static, + P: Clone + Send + Sync, + A: Clone + Send + Sync + std::fmt::Debug + 'static, + K: Clone + Send + Sync, + DC: DataCollection + Default + Clone + Send + Sync + 'static, + CC: ContinuationCollection + Default + Clone + Send + Sync, +{ + fn clone(&self) -> Self { + GenericRSpace { + channel_store: self.channel_store.clone(), + matcher: self.matcher.clone(), + space_id: self.space_id.clone(), + qualifier: self.qualifier, + history_store: self.history_store.clone(), + soft_checkpoint: self.soft_checkpoint.clone(), + replay_log: self.replay_log.clone(), + is_replay: self.is_replay, + replay_data: self.replay_data.clone(), + theory: self.theory.as_ref().map(|t| t.clone_box()), + soft_checkpoint_stack: self.soft_checkpoint_stack.clone(), + similarity_queries: self.similarity_queries.clone(), + next_continuation_id: std::sync::atomic::AtomicUsize::new( + self.next_continuation_id.load(std::sync::atomic::Ordering::Relaxed) // Relaxed sufficient for clone + ), + lazy_producers: self.lazy_producers.clone(), + } + } +} + +// ============================================================================= +// Debug Implementation +// ============================================================================= + +impl Debug for GenericRSpace +where + CS: ChannelStore + Debug, + M: Match + Debug, + C: Clone + Eq + Hash + Send + Sync + Debug + 'static, + P: Clone + Send + Sync + Debug, + A: Clone + Send + Sync + std::fmt::Debug, + K: Clone + Send + Sync + Debug, + DC: DataCollection + Default + Clone + Send + Sync + 'static, + CC: ContinuationCollection + Default + Clone + Send + Sync, +{ + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + f.debug_struct("GenericRSpace") + .field("space_id", &self.space_id) + .field("qualifier", &self.qualifier) + .field("has_history_store", &self.history_store.is_some()) + .field("is_replay", &self.is_replay) + .field("matcher", &self.matcher.matcher_name()) + .field("theory", &self.theory.as_ref().map(|t| t.name())) + .finish() + } +} + +// ============================================================================= +// SpaceAgent Implementation +// ============================================================================= + +impl SpaceAgent for GenericRSpace +where + CS: ChannelStore, + M: Match, + C: Clone + Eq + Hash + Send + Sync + AsRef<[u8]> + 'static, + P: Clone + PartialEq + Send + Sync + 'static, + A: Clone + Send + Sync + std::fmt::Debug + 'static, + K: Clone + Send + Sync + 'static, + DC: DataCollection + Default + Clone + Send + Sync + 'static, + CC: ContinuationCollection + Default + Clone + Send + Sync, +{ + fn space_id(&self) -> &SpaceId { + &self.space_id + } + + fn qualifier(&self) -> SpaceQualifier { + self.qualifier + } + + fn gensym(&mut self) -> Result { + self.channel_store.gensym(&self.space_id) + } + + fn produce( + &mut self, + channel: C, + data: A, + persist: bool, + priority: Option, + ) -> Result, Vec>, Produce)>, SpaceError> { + // Store priority for later use when storing data + let _priority = priority; + + // Validate data against theory before storing + if let Some(ref theory) = self.theory { + // Use Validatable trait for proper type-aware validation if available. + // For ListParWithRandom, this produces strings like "Nat(42)" or "String(hello)" + // that the theory can properly validate. + use std::any::TypeId; + use models::rhoapi::ListParWithRandom; + use super::types::Validatable; + + let term = if TypeId::of::() == TypeId::of::() { + // SAFETY: We've verified the type matches + let data_ref: &A = &data; + let ptr = data_ref as *const A as *const ListParWithRandom; + let lp = unsafe { &*ptr }; + lp.to_validatable_string() + } else { + // Fallback to Debug format for other types + format!("{:?}", data) + }; + + self.validate_term(&term)?; + } + + // 0. Try the efficient matrix-based approach first for VectorDB similarity queries + // + // This uses the store-first architecture with SIMD-optimized matrix operations: + // 1. Store data in VectorDB (normalizes embedding, adds to matrix) + // 2. Query similarity matrix for matching continuations + // 3. Fire best match if found, otherwise data stays stored + // + // Returns: + // - Some(result): Handled by matrix approach (success or error) + // - None: No similarity queries for this channel, use standard produce + if let Some(result) = self.try_produce_with_similarity_matrix(&channel, data.clone(), persist) { + return result; + } + + // 1. Check for a continuation that can actually fire (all join channels have data) + // This uses find_fireable_continuation_with_similarity which: + // - Checks data availability on all channels + // - Verifies similarity requirements for VectorDB queries + if let Some((patterns, cont, cont_persist, join_channels)) = + self.find_fireable_continuation_with_similarity(&channel, &data) + { + // 2. Found a fireable match - remove data from all channels + // SmallVec: most joins have ≤4 channels, avoiding heap allocation + let mut matched_data: SmallVec<[RSpaceResult; 4]> = SmallVec::new(); + for (i, ch) in join_channels.iter().enumerate() { + if ch == &channel { + // This is our channel - use the data we're producing + matched_data.push(RSpaceResult { + channel: channel.clone(), + matched_datum: data.clone(), + removed_datum: data.clone(), + persistent: persist, + suffix_key: None, // Exact match + }); + } else if i < patterns.len() { + // Another channel in the join - remove matching data + if let Some(dc) = self.channel_store.get_data_collection_mut(ch) { + let pattern = &patterns[i]; + if let Some(found_data) = + dc.find_and_remove(|a| self.matcher.matches(pattern, a)) + { + matched_data.push(RSpaceResult { + channel: ch.clone(), + matched_datum: found_data.clone(), + removed_datum: found_data, + persistent: false, // Retrieved data is not persistent + suffix_key: None, // Exact match + }); + } + } + } + } + + // 3. Remove the continuation if not persistent + if !cont_persist { + let should_remove_join = if let Some(cc) = self.channel_store.get_continuation_collection_mut(&join_channels) + { + // Note: We compare patterns only since cont is a clone and pointer comparison won't work + cc.find_and_remove(|p, _k| p == patterns.as_slice()); + // Only remove join if no more continuations remain for this join pattern + cc.is_empty() + } else { + true // No collection means join can be removed + }; + + // Only remove join pattern if no more continuations exist for it + if should_remove_join { + self.channel_store.remove_join(&join_channels); + } + } + + // 4. Build ContResult + let cont_result = ContResult { + continuation: cont, + persistent: cont_persist, + peek: false, + channels: join_channels, + patterns, + }; + + let produce_event = Produce::default(); + + return Ok(Some((cont_result, matched_data.into_vec(), produce_event))); + } + + // 5. Check for matching continuation at prefix paths (PathMap prefix semantics) + // This enables produce on @[0,1,2] to trigger continuation at @[0,1] + // + // Formal Correspondence: PathMapStore.v (produce_triggers_prefix_continuation) + if let Some((patterns, cont, cont_persist, join_channels, _prefix_channel, match_idx, suffix_key)) = + self.find_matching_continuation_at_prefix(&channel, &data) + { + // Found a match at a prefix path - collect data from all channels in the join + // SmallVec: most joins have ≤4 channels, avoiding heap allocation + let mut matched_data: SmallVec<[RSpaceResult; 4]> = SmallVec::new(); + + for (i, ch) in join_channels.iter().enumerate() { + if i == match_idx { + // This is the prefix channel - use the data we're producing + // The result channel is the actual produce channel, not the prefix + // Include suffix key for data wrapping per design spec: + // Data at @[0,1,2] consumed at @[0,1] should become [2, data] + matched_data.push(RSpaceResult { + channel: channel.clone(), + matched_datum: data.clone(), + removed_datum: data.clone(), + persistent: persist, + suffix_key: if suffix_key.is_empty() { None } else { Some(suffix_key.clone()) }, + }); + } else if i < patterns.len() { + // Another channel in the join - try to find matching data + if let Some(dc) = self.channel_store.get_data_collection_mut(ch) { + let pattern = &patterns[i]; + if let Some(found_data) = + dc.find_and_remove(|a| self.matcher.matches(pattern, a)) + { + matched_data.push(RSpaceResult { + channel: ch.clone(), + matched_datum: found_data.clone(), + removed_datum: found_data, + persistent: false, + suffix_key: None, // Other channels in join are exact matches + }); + } + } + } + } + + // Remove the continuation if not persistent + if !cont_persist { + let should_remove_join = if let Some(cc) = self.channel_store.get_continuation_collection_mut(&join_channels) + { + // Note: We compare patterns only since cont is a clone and pointer comparison won't work + cc.find_and_remove(|p, _k| p == patterns.as_slice()); + // Only remove join if no more continuations remain for this join pattern + cc.is_empty() + } else { + true // No collection means join can be removed + }; + + // Only remove join pattern if no more continuations exist for it + if should_remove_join { + self.channel_store.remove_join(&join_channels); + } + } + + // Build ContResult - note that channels refer to the continuation's join channels + // (which includes the prefix), but the result contains the actual produce channel + let cont_result = ContResult { + continuation: cont, + persistent: cont_persist, + peek: false, + channels: join_channels, + patterns, + }; + + let produce_event = Produce::default(); + + return Ok(Some((cont_result, matched_data.into_vec(), produce_event))); + } + + // 6. No match found at exact or prefix paths - store the data + let dc = self.channel_store.get_or_create_data_collection(&channel); + + // Check if this is a VectorDB collection requiring embedding extraction + // Uses runtime type downcasting to detect VectorDBDataCollection + use std::any::Any; + use super::collections::VectorDBDataCollection; + + let dc_any: &mut dyn Any = dc; + if let Some(vec_dc) = dc_any.downcast_mut::>() { + // VectorDB collection detected - try to extract embedding from data + // The data type A in RhoISpace is ListParWithRandom, not Par directly. + // ListParWithRandom wraps Vec with random state for deterministic execution. + let data_any: &dyn Any = &data; + + // Try to extract embedding from data + // Note: Not all data sent to a VectorDB space has embeddings (e.g., sync channels like ready!(Nil)) + // We gracefully fallback to standard storage for non-vector data. + let embedding_result = if let Some(list_par) = data_any.downcast_ref::() { + // Extract the first Par from the list + if let Some(data_par) = list_par.pars.first() { + let embedding_type = vec_dc.embedding_type(); + let dimensions = vec_dc.embedding_dimensions(); + extract_embedding_from_map(data_par, embedding_type, dimensions) + } else { + Err(SpaceError::EmbeddingExtractionError { + description: "ListParWithRandom contains no Par values".to_string(), + }) + } + } + // Fallback: try direct Par downcast (for unit tests or other contexts) + else if let Some(data_par) = data_any.downcast_ref::() { + let embedding_type = vec_dc.embedding_type(); + let dimensions = vec_dc.embedding_dimensions(); + extract_embedding_from_map(data_par, embedding_type, dimensions) + } + else { + Err(SpaceError::EmbeddingExtractionError { + description: format!( + "Data type not supported for embedding extraction: {:?}", + std::any::type_name::() + ), + }) + }; + + match embedding_result { + Ok(embedding) => { + // Store with embedding for similarity indexing + vec_dc.put_with_embedding_and_persist(data, embedding, persist)?; + } + Err(_) => { + // Graceful fallback: store without embedding for non-vector data + // This allows sync channels like ready!(Nil) to work within VectorDB spaces + vec_dc.put_with_persist(data, persist)?; + } + } + } else { + // Check if this is a PriorityQueue collection that needs priority handling + use super::collections::PriorityQueueDataCollection; + + let dc_any: &mut dyn Any = dc; + if let Some(pq_dc) = dc_any.downcast_mut::>() { + // PriorityQueue collection - use priority if specified + match priority { + Some(p) => pq_dc.put_with_priority_and_persist(data, p, persist)?, + None => pq_dc.put_with_persist(data, persist)?, + } + } else { + // Standard data collection - use put with persistence flag + // Priority is ignored for non-PriorityQueue collections + dc.put_with_persist(data, persist)?; + } + } + + Ok(None) + } + + fn consume( + &mut self, + channels: Vec, + patterns: Vec

, + continuation: K, + persist: bool, + peeks: BTreeSet, + ) -> Result, Vec>)>, SpaceError> { + if channels.len() != patterns.len() { + return Err(SpaceError::InvalidConfiguration { + description: format!( + "Channels and patterns length mismatch: {} vs {}", + channels.len(), + patterns.len() + ), + }); + } + + // ===================================================================== + // LAZY CHANNEL HANDLING + // ===================================================================== + // Check if any channel is a lazy result channel from a similarity query. + // Lazy channels are created in consume_with_similarity and registered + // in lazy_producers. When a consumer does `for (@doc <- resultCh)` on + // such a channel, we intercept here and lazily retrieve the next document. + // + // This enables TRUE lazy evaluation: + // - Documents are retrieved on-demand (not eagerly) + // - Early termination works (stop consuming = no more retrieval) + // - Memory efficient (only requested documents are cloned) + use std::any::Any; + use super::collections::VectorDBDataCollection; + + // For single-channel consumes, check if it's a lazy result channel + if channels.len() == 1 { + // Try to extract channel ID by downcasting to Par + let channel_ref: &C = &channels[0]; + let channel_any: &dyn Any = channel_ref; + if let Some(par) = channel_any.downcast_ref::() { + if let Some(channel_id) = extract_channel_id_from_par(par) { + // Check if this channel has a lazy producer + // Lazy allocation: skip if lazy_producers hasn't been allocated + let mut should_remove_producer = false; + let mut lazy_result: Option<(ContResult, Vec>)> = None; + + if let Some(producer) = self.lazy_producers.as_mut().and_then(|m| m.get_mut(&channel_id)) { + // Get the next document index from the producer + if let Some((doc_idx, score)) = producer.next_index() { + // Retrieve the document from the source VectorDB + let source_channel = producer.source_channel().clone(); + + // Get the VectorDB collection via the source channel + if let Some(dc) = self.channel_store.get_data_collection_mut(&source_channel) { + let dc_any: &mut dyn Any = dc; + if let Some(vec_dc) = dc_any.downcast_mut::>() { + // Retrieve document by index, respecting persistence semantics + // Persistent docs (stored with !!) return clone without removal + // Non-persistent docs (stored with !) are tombstoned + if let Some((data, _embedding)) = vec_dc.get_or_remove_by_index(doc_idx) { + tracing::info!( + "consume: lazy retrieval of doc {} (score={:.3}) from VectorDB", + doc_idx, score + ); + + // Build result with the retrieved document + // The data is already of type A (e.g., ListParWithRandom for RhoISpace) + let result = RSpaceResult { + channel: channels[0].clone(), + matched_datum: data.clone(), + removed_datum: data, + persistent: false, + suffix_key: None, + }; + + let cont_result = ContResult { + continuation: continuation.clone(), + persistent: persist, + peek: !peeks.is_empty(), + channels: channels.clone(), + patterns: patterns.clone(), + }; + + // Check if producer is exhausted after this retrieval + if producer.is_exhausted() { + should_remove_producer = true; + } + + lazy_result = Some((cont_result, vec![result])); + } else { + tracing::warn!( + "consume: lazy channel doc {} not found in VectorDB (may have been consumed)", + doc_idx + ); + } + } + } + } else { + // Producer exhausted - mark for removal + // The consume will then block waiting for data on this channel, + // which is the correct Rholang semantics for an empty channel. + should_remove_producer = true; + // Fall through to normal consume behavior (will block waiting) + } + } + + // Remove producer outside the mutable borrow scope if needed + if should_remove_producer { + if let Some(m) = self.lazy_producers.as_mut() { + m.remove(&channel_id); + } + } + + // Return lazy result if we got one + if let Some(result) = lazy_result { + return Ok(Some(result)); + } + } + } + } + + // Try prefix-aware matching first if the store supports it + // This enables consume on @[0,1] to find data at @[0,1,2], @[0,1,3], etc. + // + // Uses atomic check_and_consume_with_prefix_atomic to avoid TOCTOU race: + // Previously, check_all_channels_match_with_prefix would peek data, then + // remove_matched_data_from_prefix_results would remove it. Concurrent + // consumers with overlapping prefixes could both peek the same data, + // but only one would successfully remove, leaving the other incomplete. + if self.channel_store.supports_prefix_semantics() { + if let Some(atomic_results) = self.check_and_consume_with_prefix_atomic(&channels, &patterns, &peeks) { + // Build result - use the actual channel where data was found + // Include suffix key for data wrapping per design spec: + // Data at @[0,1,2] consumed at @[0,1] should become [2, data] + let results: Vec> = atomic_results + .into_iter() + .map(|(channel, data, suffix_key)| { + RSpaceResult { + channel, + matched_datum: data.clone(), + removed_datum: data, + persistent: false, + // Suffix key: None for exact matches, Some([...]) for prefix matches + suffix_key: if suffix_key.is_empty() { None } else { Some(suffix_key) }, + } + }) + .collect(); + + let cont_result = ContResult { + continuation, + persistent: persist, + peek: !peeks.is_empty(), + channels: channels.clone(), + patterns, + }; + + return Ok(Some((cont_result, results))); + } + } else { + // Use exact-match semantics for non-prefix stores (HashMap, Array, etc.) + // Single-pass atomic check-and-remove eliminates TOCTOU race and improves performance + if let Some(removed_data) = + self.check_and_remove_matched_data_atomic(&channels, &patterns, &peeks) + { + // Build result - no suffix key for exact matches + let results: Vec> = channels + .iter() + .zip(removed_data.into_iter()) + .map(|(ch, data)| RSpaceResult { + channel: ch.clone(), + matched_datum: data.clone(), + removed_datum: data, + persistent: false, + suffix_key: None, // Exact match - no suffix key + }) + .collect(); + + let cont_result = ContResult { + continuation, + persistent: persist, + peek: !peeks.is_empty(), + channels: channels.clone(), + patterns, + }; + + return Ok(Some((cont_result, results))); + } + } + + // No match found - store the continuation + // First, register join patterns + for _ch in &channels { + self.channel_store.put_join(channels.clone()); + // Break after first - put_join adds to all channels in the join + break; + } + + // Then store the continuation + let cc = self + .channel_store + .get_or_create_continuation_collection(&channels); + cc.put(patterns, continuation, persist); + + Ok(None) + } + + fn consume_with_modifiers( + &mut self, + channels: Vec, + patterns: Vec

, + modifiers: Vec>, + continuation: K, + persist: bool, + peeks: BTreeSet, + ) -> Result, Vec>)>, SpaceError> { + // Check if any pattern modifier is present + let has_modifiers = modifiers.iter().any(|m| !m.is_empty()); + + if !has_modifiers { + // No pattern modifiers - use standard consume + return self.consume(channels, patterns, continuation, persist, peeks); + } + + // Extract and validate pattern modifiers + // This prepares the data for VectorDB matching and provides early error detection + // Store (embedding, resolved_threshold, optional_metric, optional_top_k) + // threshold is resolved now so we can store it with the continuation + use std::any::Any; + use super::collections::{SimilarityMetric, VectorDBDataCollection, StoredSimilarityInfo}; + + let mut extracted_patterns: Vec, f32, Option, Option)>> = Vec::with_capacity(modifiers.len()); + + for (mods, channel) in modifiers.iter().zip(channels.iter()) { + if !mods.is_empty() { + // Extract modifier information from EFunction calls + let extracted = extract_modifiers_from_efunctions(mods)?; + + // Get the query embedding (required) + let embedding = extracted.query_embedding.ok_or_else(|| SpaceError::SimilarityMatchError { + reason: "Pattern modifier requires a query embedding".to_string(), + })?; + + // Use extracted metric or default + let resolved_metric = extracted.metric; + + // Use extracted threshold or resolve from collection's default + let resolved_threshold = if let Some(threshold) = extracted.threshold { + threshold + } else { + // Get threshold from VectorDB collection if available + if let Some(dc) = self.channel_store.get_data_collection(channel) { + let dc_any: &dyn Any = dc; + if let Some(vec_dc) = dc_any.downcast_ref::>() { + vec_dc.default_threshold() + } else { + 0.5 // Fallback for non-VectorDB collections + } + } else { + 0.5 // Fallback if collection doesn't exist yet + } + }; + + // Use extracted top_k if present + let top_k = extracted.top_k; + + extracted_patterns.push(Some((embedding, resolved_threshold, resolved_metric, top_k))); + } else { + extracted_patterns.push(None); + } + } + + // For each channel with a similarity pattern, try VectorDB matching + // When top-K is specified, we collect indices for lazy retrieval + // + // LAZY CHANNEL SEMANTICS: + // Instead of eagerly retrieving all documents, we: + // 1. Compute similarity scores and collect indices (not documents) + // 2. Create a lazy result channel (GPrivate) + // 3. Store a LazyResultProducer in the registry + // 4. Return the lazy channel to the continuation + // 5. Documents are retrieved on-demand when the channel is consumed + let mut similarity_results: Vec<(usize, Vec<(usize, f32)>, C)> = Vec::new(); // (channel_idx, sorted_indices, source_channel) + + for (i, (channel, extracted)) in channels.iter().zip(extracted_patterns.iter()).enumerate() { + if let Some((embedding, resolved_threshold, resolved_metric, top_k)) = extracted { + if let Some(dc) = self.channel_store.get_data_collection_mut(channel) { + // Try to downcast to VectorDBDataCollection + let dc_any: &mut dyn Any = dc; + if let Some(vec_dc) = dc_any.downcast_mut::>() { + let k = top_k.unwrap_or(1); + + // Use the new query() method which delegates to backend. + // This supports per-query metric override via resolved_metric. + let similarity_fn = resolved_metric.as_ref().map(|m| m.as_str()); + let ranking_fn = Some("topk"); + // Use backend ResolvedArg (Int variant, not Integer) + let params = vec![super::ResolvedArg::Int(k as i64)]; + + // LAZY: Get indices only (no document retrieval or removal yet) + let indices = match vec_dc.query( + embedding, + similarity_fn, + Some(*resolved_threshold), + ranking_fn, + ¶ms, + ) { + Ok(results) => results, + Err(e) => { + tracing::warn!( + "VectorDB query failed on channel {}: {}", + i, e + ); + vec![] + } + }; + + if !indices.is_empty() { + tracing::info!( + "VectorDB similarity match on channel {}: found {} indices (threshold: {}, k: {}, metric: {:?})", + i, indices.len(), resolved_threshold, k, similarity_fn + ); + // Store indices along with source channel for lazy retrieval + similarity_results.push((i, indices, channel.clone())); + continue; + } + } else { + tracing::warn!( + "Similarity pattern on non-VectorDB collection at channel {} - using standard match", + i + ); + } + } + // No similarity match found for this channel - leave empty (will fail all_required_matched check) + } + // No similarity pattern for this channel - skip (will use standard matching later if needed) + } + + // Check if we got similarity matches for all channels that required them + let channels_with_patterns: Vec = extracted_patterns.iter() + .enumerate() + .filter_map(|(i, p)| if p.is_some() { Some(i) } else { None }) + .collect(); + let matched_channels: std::collections::HashSet = similarity_results.iter() + .map(|(i, _, _)| *i) + .collect(); + let all_required_matched = channels_with_patterns.iter() + .all(|i| matched_channels.contains(i)); + + if all_required_matched && !similarity_results.is_empty() { + // LAZY CHANNEL SEMANTICS: + // For each channel with similarity matches, create a lazy result channel. + // The continuation receives the lazy channel(s), and documents are retrieved + // on-demand when the channel is consumed via `for (@doc <- resultCh)`. + // SmallVec: most joins have ≤4 channels, avoiding heap allocation + let mut results: SmallVec<[RSpaceResult; 4]> = SmallVec::new(); + + for (channel_idx, sorted_indices, source_channel) in similarity_results { + let pattern_channel = &channels[channel_idx]; + + // Create unforgeable result channel using UUID + let result_channel_id = Uuid::new_v4().as_bytes().to_vec(); + let result_channel_par = Par::default().with_unforgeables(vec![GUnforgeable { + unf_instance: Some(UnfInstance::GPrivateBody(GPrivate { + id: result_channel_id.clone(), + })), + }]); + + // Create lazy producer with indices and source channel + let producer = super::collections::LazyResultProducer::new( + sorted_indices.clone(), + source_channel, + ); + + tracing::info!( + "consume_with_similarity: created lazy channel {:?} with {} indices for channel {}", + &result_channel_id[..8], producer.total_matches(), channel_idx + ); + + // Store producer in registry keyed by result channel ID + // Lazy allocation: create HashMap on first use + self.lazy_producers + .get_or_insert_with(HashMap::new) + .insert(result_channel_id, producer); + + // Create result with lazy channel wrapped in the data type A + // For RhoISpace, A is ListParWithRandom, so we construct it and use Any to convert. + // The continuation body will receive this channel and consume from it. + // + // We use runtime type checking since A is generic but we know it must be + // ListParWithRandom for VectorDB lazy channels to work. + use std::any::Any; + let lazy_result_lpwr = ListParWithRandom { + pars: vec![result_channel_par], + random_state: Vec::new(), // No random state needed for lazy channel + }; + + // Convert ListParWithRandom to type A using Any downcast + // This works because A is ListParWithRandom for RhoISpace + let lazy_result_any: Box = Box::new(lazy_result_lpwr.clone()); + if let Ok(lazy_result) = lazy_result_any.downcast::() { + results.push(RSpaceResult { + channel: pattern_channel.clone(), + matched_datum: (*lazy_result).clone(), + removed_datum: *lazy_result, + persistent: false, + suffix_key: None, + }); + } else { + tracing::warn!( + "consume_with_similarity: lazy channel feature requires A=ListParWithRandom, \ + falling back to standard matching for channel {}", + channel_idx + ); + // Skip this channel - will fall through to standard matching + continue; + } + } + + if !results.is_empty() { + tracing::info!( + "consume_with_similarity: returning {} lazy result channel(s)", + results.len() + ); + let cont_result = ContResult { + continuation, + persistent: persist, + peek: !peeks.is_empty(), + channels: channels.clone(), + patterns, + }; + return Ok(Some((cont_result, results.into_vec()))); + } + } + + // No similarity match found - store the continuation with similarity info + // so that produce() can wake it up when matching data arrives. + // + // Previously this was an interim fix that just returned None without storing, + // but now we implement the full fix by storing similarity patterns with the + // continuation and checking them in find_fireable_continuation(). + if extracted_patterns.iter().any(|p| p.is_some()) { + // Register join patterns + for _ch in &channels { + self.channel_store.put_join(channels.clone()); + break; + } + + // Generate a unique continuation ID for the query matrix + use super::collections::{ContinuationId, SimilarityQueryMatrix}; + let cont_id = ContinuationId( + self.next_continuation_id.fetch_add(1, std::sync::atomic::Ordering::Relaxed) // Relaxed sufficient - uniqueness only + ); + + // Add queries to the SimilarityQueryMatrix for each channel with a similarity pattern + // This enables the efficient matrix-based matching in produce() + for (channel_idx, (channel, extracted)) in channels.iter().zip(extracted_patterns.iter()).enumerate() { + if let Some((embedding, resolved_threshold, _resolved_metric, _top_k)) = extracted { + // Get embedding dimensions from the VectorDB collection + let dimensions = if let Some(dc) = self.channel_store.get_data_collection(channel) { + let dc_any: &dyn Any = dc; + if let Some(vec_dc) = dc_any.downcast_ref::>() { + vec_dc.embedding_dimensions() + } else { + embedding.len() // Use query embedding dimensions as fallback + } + } else { + embedding.len() + }; + + // Get or create the SimilarityQueryMatrix for this channel + // Lazy allocation: create HashMap on first use + let query_matrix = self.similarity_queries + .get_or_insert_with(HashMap::new) + .entry(channel.clone()) + .or_insert_with(|| SimilarityQueryMatrix::new(dimensions)); + + // Add the query to the matrix + if let Err(e) = query_matrix.add_query(embedding, *resolved_threshold, cont_id, channel_idx, persist) { + tracing::warn!( + "consume_with_similarity: failed to add query to matrix: {:?}", + e + ); + } + } + } + + // Store the continuation with similarity info for later matching + let similarity_info = StoredSimilarityInfo::new(extracted_patterns); + let cc = self + .channel_store + .get_or_create_continuation_collection(&channels); + cc.put_with_similarity(patterns, continuation, persist, Some(similarity_info)); + + return Ok(None); + } + + // No similarity patterns at all - use standard consume + self.consume(channels, patterns, continuation, persist, peeks) + } + + fn install( + &mut self, + channels: Vec, + patterns: Vec

, + continuation: K, + ) -> Result)>, SpaceError> { + // Install is like consume with persist=true, but always stores + // First, register join patterns + for _ch in &channels { + self.channel_store.put_join(channels.clone()); + break; + } + + // Store as persistent continuation + let cc = self + .channel_store + .get_or_create_continuation_collection(&channels); + cc.put(patterns.clone(), continuation.clone(), true); + + // Check if there's already matching data (single-pass atomic operation) + let peeks = BTreeSet::new(); + if let Some(removed_data) = + self.check_and_remove_matched_data_atomic(&channels, &patterns, &peeks) + { + return Ok(Some((continuation, removed_data))); + } + + Ok(None) + } + + fn get_data(&self, channel: &C) -> Vec> { + self.channel_store + .get_data_collection(channel) + .map(|dc| { + dc.all_data() + .into_iter() + .map(|a| Datum { + a: a.clone(), + persist: false, + source: Produce::default(), + }) + .collect() + }) + .unwrap_or_default() + } + + fn get_waiting_continuations(&self, channels: Vec) -> Vec> { + self.channel_store + .get_continuation_collection(&channels) + .map(|cc| { + cc.all_continuations() + .into_iter() + .map(|(patterns, cont, persist)| WaitingContinuation { + patterns: patterns.to_vec(), + continuation: cont.clone(), + persist, + source: Default::default(), + peeks: BTreeSet::new(), + }) + .collect() + }) + .unwrap_or_default() + } + + fn get_joins(&self, channel: C) -> Vec> { + self.channel_store.get_joins(&channel) + } +} + +// ============================================================================= +// CheckpointableSpace Implementation +// ============================================================================= + +impl CheckpointableSpace for GenericRSpace +where + CS: ChannelStore + Clone, + M: Match + Clone, + C: Clone + Eq + Hash + Send + Sync + Serialize + DeserializeOwned + AsRef<[u8]> + 'static, + P: Clone + PartialEq + Send + Sync + Serialize + DeserializeOwned + 'static, + A: Clone + Send + Sync + std::fmt::Debug + Serialize + DeserializeOwned + 'static, + K: Clone + Send + Sync + Serialize + DeserializeOwned + 'static, + DC: DataCollection + Default + Clone + Send + Sync + 'static, + CC: ContinuationCollection + Default + Clone + Send + Sync, +{ + fn create_checkpoint(&mut self) -> Result { + // All spaces support checkpointing, but Temp spaces have ephemeral data + // that is cleared on restore (only the qualifier and empty collections persist) + let state = self.serialize_state(); + let root = Blake2b256Hash::new(&state); + + // Store in history if available + if let Some(ref store) = self.history_store { + store.store(root.clone(), &state)?; + } + + Ok(Checkpoint { + root, + log: Log::default(), + }) + } + + fn create_soft_checkpoint(&mut self) -> SoftCheckpoint { + // Store a snapshot of the channel store for later restoration + let gensym_counter = self.channel_store.gensym_counter(); + self.soft_checkpoint_stack.push((self.channel_store.snapshot(), gensym_counter)); + + // Return a SoftCheckpoint token (the actual data is in our stack) + // The HotStoreState is left empty since we use our own snapshotting + let cache_snapshot = HotStoreState { + continuations: DashMap::new(), + installed_continuations: DashMap::new(), + data: DashMap::new(), + joins: DashMap::new(), + installed_joins: DashMap::new(), + }; + SoftCheckpoint { + cache_snapshot, + log: Log::default(), + produce_counter: BTreeMap::new(), + } + } + + fn revert_to_soft_checkpoint( + &mut self, + _checkpoint: SoftCheckpoint, + ) -> Result<(), SpaceError> { + // Pop and restore from our snapshot stack + if let Some((snapshot, snapshot_gensym_counter)) = self.soft_checkpoint_stack.pop() { + // Keep track of current counter before restoring + let current_counter = self.channel_store.gensym_counter(); + self.channel_store = snapshot; + // Use max to prevent name collisions with channels generated after checkpoint + self.channel_store.set_gensym_counter(std::cmp::max(current_counter, snapshot_gensym_counter)); + Ok(()) + } else { + Err(SpaceError::CheckpointError { + description: "No soft checkpoint to revert to".to_string(), + }) + } + } + + fn reset(&mut self, root: &Blake2b256Hash) -> Result<(), SpaceError> { + // Retrieve state from history store + if let Some(ref store) = self.history_store { + if !store.contains(root) { + return Err(SpaceError::CheckpointError { + description: format!("Checkpoint root not found: {}", root), + }); + } + + let state = store.retrieve(root)?; + self.deserialize_state(&state)?; + + Ok(()) + } else { + Err(SpaceError::CheckpointError { + description: "No history store configured for reset".to_string(), + }) + } + } + + fn clear(&mut self) -> Result<(), SpaceError> { + self.channel_store.clear(); + self.soft_checkpoint = None; + Ok(()) + } +} + +// ============================================================================= +// ReplayableSpace Implementation +// ============================================================================= + +impl ReplayableSpace for GenericRSpace +where + CS: ChannelStore + Clone, + M: Match + Clone, + C: Clone + Eq + Hash + Send + Sync + Serialize + DeserializeOwned + AsRef<[u8]> + 'static, + P: Clone + PartialEq + Send + Sync + Serialize + DeserializeOwned + 'static, + A: Clone + Send + Sync + std::fmt::Debug + Serialize + DeserializeOwned + 'static, + K: Clone + Send + Sync + Serialize + DeserializeOwned + 'static, + DC: DataCollection + Default + Clone + Send + Sync + 'static, + CC: ContinuationCollection + Default + Clone + Send + Sync, +{ + fn rig_and_reset(&mut self, start_root: Blake2b256Hash, log: Log) -> Result<(), SpaceError> { + // Call rig first to populate replay_data + ReplayableSpace::rig(self, log.clone())?; + + // Reset to the starting state + CheckpointableSpace::reset(self, &start_root)?; + + // Store the log for replay verification + self.replay_log = Some(log); + self.is_replay = true; + + Ok(()) + } + + fn rig(&self, log: Log) -> Result<(), SpaceError> { + // Partition log into io_events and comm_events + let (io_events, comm_events): (Vec<_>, Vec<_>) = + log.iter().partition(|event| match event { + Event::IoEvent(IOEvent::Produce(_)) => true, + Event::IoEvent(IOEvent::Consume(_)) => true, + Event::Comm(_) => false, + }); + + // Create set of IOEvents for lookup + let new_stuff: HashSet<_> = io_events.into_iter().collect(); + + // Clear and populate replay_data (MultisetMultiMap uses DashMap, methods take &self) + self.replay_data.clear(); + + for event in comm_events { + match event { + Event::Comm(comm) => { + let (consume, produces) = (comm.consume.clone(), comm.produces.clone()); + let mut io_events: Vec = produces + .into_iter() + .map(IOEvent::Produce) + .collect(); + io_events.insert(0, IOEvent::Consume(consume)); + + for io_event in io_events { + let io_event_converted = Event::IoEvent(io_event.clone()); + if new_stuff.contains(&io_event_converted) { + self.replay_data.add_binding(io_event, comm.clone()); + } + } + } + _ => return Err(SpaceError::ReplayError { + description: "Only COMM events expected in replay log".to_string(), + }), + } + } + + Ok(()) + } + + fn check_replay_data(&self) -> Result<(), SpaceError> { + if !self.is_replay { + return Err(SpaceError::ReplayError { + description: "Not in replay mode".to_string(), + }); + } + + // Verify that all COMM events from replay_log were consumed + // (MultisetMultiMap uses DashMap, methods take &self - no lock needed) + if self.replay_data.is_empty() { + Ok(()) + } else { + Err(SpaceError::ReplayError { + description: format!( + "Unused COMM event: replay_data has {} elements left", + self.replay_data.map.len() + ), + }) + } + } + + fn is_replay(&self) -> bool { + self.is_replay + } + + fn update_produce(&mut self, produce_ref: Produce) { + // Record the produce result for replay verification by updating + // matching produce refs in the replay log + if let Some(ref mut log) = self.replay_log { + for event in log.iter_mut() { + match event { + Event::IoEvent(IOEvent::Produce(produce)) => { + if produce.hash == produce_ref.hash { + *produce = produce_ref.clone(); + } + } + Event::Comm(comm) => { + for produce in comm.produces.iter_mut() { + if produce.hash == produce_ref.hash { + *produce = produce_ref.clone(); + } + } + } + _ => {} + } + } + } + } +} + +// ============================================================================= +// Builder Pattern +// ============================================================================= + +/// Builder for constructing GenericRSpace instances. +/// +/// The builder supports two usage patterns: +/// 1. **Simple (Bag collections)**: Use default DC/CC parameters for backward compatibility +/// 2. **Advanced (custom collections)**: Specify DC/CC to use Queue, Stack, Set, etc. +/// +/// # Example with default Bag collections +/// ```ignore +/// let space = GenericRSpaceBuilder::<_, _, Channel, Pattern, Data, Continuation>::new() +/// .with_channel_store(store) +/// .with_matcher(matcher) +/// .build()?; +/// ``` +/// +/// # Example with custom collections +/// ```ignore +/// let space = GenericRSpaceBuilder::<_, _, Channel, Pattern, Data, Cont, QueueDC, QueueCC>::new() +/// .with_channel_store(store) +/// .with_matcher(matcher) +/// .build()?; +/// ``` +pub struct GenericRSpaceBuilder +where + CS: ChannelStore, + M: Match, +{ + channel_store: Option, + matcher: Option, + space_id: Option, + qualifier: SpaceQualifier, + history_store: Option, + theory: Option, +} + +impl GenericRSpaceBuilder +where + CS: ChannelStore, + M: Match, +{ + /// Create a new builder with default values. + pub fn new() -> Self { + GenericRSpaceBuilder { + channel_store: None, + matcher: None, + space_id: None, + qualifier: SpaceQualifier::Default, + history_store: None, + theory: None, + } + } + + /// Set the channel store. + pub fn with_channel_store(mut self, store: CS) -> Self { + self.channel_store = Some(store); + self + } + + /// Set the matcher. + pub fn with_matcher(mut self, matcher: M) -> Self { + self.matcher = Some(matcher); + self + } + + /// Set the space ID. + pub fn with_space_id(mut self, id: SpaceId) -> Self { + self.space_id = Some(id); + self + } + + /// Set the qualifier. + pub fn with_qualifier(mut self, qualifier: SpaceQualifier) -> Self { + self.qualifier = qualifier; + self + } + + /// Set the history store. + pub fn with_history_store(mut self, store: BoxedHistoryStore) -> Self { + self.history_store = Some(store); + self + } + + /// Set the theory for data validation. + /// + /// When a theory is set, all data produced to the space will be validated + /// against it before being stored. Invalid data will be rejected. + pub fn with_theory(mut self, theory: BoxedTheory) -> Self { + self.theory = Some(theory); + self + } + + /// Build the GenericRSpace. + /// + /// # Errors + /// + /// Returns `SpaceError::BuilderIncomplete` if channel_store or matcher is not set. + /// + /// # Examples + /// + /// ```ignore + /// let space = GenericRSpaceBuilder::new() + /// .with_channel_store(store) + /// .with_matcher(matcher) + /// .build()?; + /// ``` + pub fn build(self) -> Result, SpaceError> { + let channel_store = self.channel_store.ok_or(SpaceError::BuilderIncomplete { + builder: "GenericRSpaceBuilder", + missing_field: "channel_store", + })?; + let matcher = self.matcher.ok_or(SpaceError::BuilderIncomplete { + builder: "GenericRSpaceBuilder", + missing_field: "matcher", + })?; + let space_id = self.space_id.unwrap_or_else(SpaceId::default_space); + + let mut space = GenericRSpace::new(channel_store, matcher, space_id, self.qualifier); + + if let Some(history_store) = self.history_store { + space.history_store = Some(history_store); + } + + if let Some(theory) = self.theory { + space.theory = Some(theory); + } + + Ok(space) + } + + /// Build the GenericRSpace, panicking if incomplete. + /// + /// This is a convenience method for cases where you're certain the builder + /// is complete. Prefer `build()` for production code. + /// + /// # Panics + /// + /// Panics if channel_store or matcher is not set. + pub fn build_unchecked(self) -> GenericRSpace { + self.build().expect("GenericRSpaceBuilder incomplete") + } +} + +impl Default for GenericRSpaceBuilder +where + CS: ChannelStore, + M: Match, +{ + fn default() -> Self { + Self::new() + } +} + +// ============================================================================= +// ISpace Implementation +// ============================================================================= +// +// This implementation allows GenericRSpace to be used where ISpace is expected, +// enabling integration with the existing reducer infrastructure. + +use rspace_plus_plus::rspace::{ + errors::RSpaceError, + internal::Row, + rspace_interface::{ISpace, MaybeConsumeResult}, +}; + +impl ISpace for GenericRSpace +where + CS: ChannelStore + Clone, + M: Match + Clone, + C: Clone + Eq + Hash + Send + Sync + Serialize + DeserializeOwned + AsRef<[u8]> + 'static, + P: Clone + PartialEq + Send + Sync + Serialize + DeserializeOwned + 'static, + A: Clone + Send + Sync + std::fmt::Debug + Serialize + DeserializeOwned + 'static, + K: Clone + Send + Sync + Serialize + DeserializeOwned + 'static, + DC: DataCollection + Default + Clone + Send + Sync + 'static, + CC: ContinuationCollection + Default + Clone + Send + Sync, +{ + fn create_checkpoint(&mut self) -> Result { + CheckpointableSpace::create_checkpoint(self) + .map_err(|e| RSpaceError::InterpreterError(e.to_string())) + } + + fn get_data(&self, channel: &C) -> Vec> { + SpaceAgent::get_data(self, channel) + } + + fn get_waiting_continuations(&self, channels: Vec) -> Vec> { + SpaceAgent::get_waiting_continuations(self, channels) + } + + fn get_joins(&self, channel: C) -> Vec> { + SpaceAgent::get_joins(self, channel) + } + + fn clear(&mut self) -> Result<(), RSpaceError> { + CheckpointableSpace::clear(self) + .map_err(|e| RSpaceError::InterpreterError(e.to_string())) + } + + fn reset(&mut self, root: &Blake2b256Hash) -> Result<(), RSpaceError> { + CheckpointableSpace::reset(self, root) + .map_err(|e| RSpaceError::InterpreterError(e.to_string())) + } + + fn consume_result( + &mut self, + channels: Vec, + patterns: Vec

, + ) -> Result)>, RSpaceError> { + // consume_result is a peek-style operation that returns data without storing continuation. + // It finds a waiting continuation for the channels and returns matching data if available. + // + // Semantics: + // 1. Find waiting continuations for these channels + // 2. Find one whose patterns match the provided patterns + // 3. Look for data matching those patterns on each channel + // 4. Return (continuation.k, matched_data) if all succeed + + // Step 1: Get waiting continuations for these channels + let waiting_continuations = SpaceAgent::get_waiting_continuations(self, channels.clone()); + + // Step 2: Find a continuation with matching patterns + let matching_continuation = waiting_continuations.iter().find(|wc| { + wc.patterns.len() == patterns.len() + && wc + .patterns + .iter() + .zip(patterns.iter()) + .all(|(wc_pat, pat)| wc_pat == pat) + }); + + let continuation = match matching_continuation { + Some(wc) => wc.continuation.clone(), + None => { + // No matching continuation found - nothing to return + return Ok(None); + } + }; + + // Step 3: Find matching data on each channel + // SmallVec optimization: most joins have ≤4 channels, stack-allocate for common case + let mut matched_data: SmallVec<[A; 4]> = SmallVec::new(); + for (ch, pattern) in channels.iter().zip(patterns.iter()) { + let data = SpaceAgent::get_data(self, ch); + let matching = data.iter().find(|d| self.matcher.matches(pattern, &d.a)); + if let Some(datum) = matching { + matched_data.push(datum.a.clone()); + } else { + // No matching data on this channel + return Ok(None); + } + } + + // Step 4: Return the continuation and matched data + Ok(Some((continuation, matched_data.into_vec()))) + } + + fn to_map(&self) -> HashMap, Row> { + // Convert channel store state to a HashMap + // This is primarily for debugging and inspection + let map = HashMap::new(); + + // For each channel, collect data and continuations + // The channel_store doesn't expose iteration directly, so this is approximate + // A full implementation would need ChannelStore to expose its channels + + map + } + + fn create_soft_checkpoint(&mut self) -> SoftCheckpoint { + CheckpointableSpace::create_soft_checkpoint(self) + } + + fn revert_to_soft_checkpoint( + &mut self, + checkpoint: SoftCheckpoint, + ) -> Result<(), RSpaceError> { + CheckpointableSpace::revert_to_soft_checkpoint(self, checkpoint) + .map_err(|e| RSpaceError::InterpreterError(e.to_string())) + } + + fn consume( + &mut self, + channels: Vec, + patterns: Vec

, + continuation: K, + persist: bool, + peeks: BTreeSet, + ) -> Result, Vec>)>, RSpaceError> { + SpaceAgent::consume(self, channels, patterns, continuation, persist, peeks) + .map_err(|e| RSpaceError::InterpreterError(e.to_string())) + } + + fn consume_with_modifiers( + &mut self, + channels: Vec, + patterns: Vec

, + modifiers_bytes: Vec>, + continuation: K, + persist: bool, + peeks: BTreeSet, + ) -> Result, RSpaceError> { + use prost::Message; + + // Deserialize the pattern modifiers from bytes + // Each Vec contains length-prefixed serialized EFunction messages + let deserialized_modifiers: Vec> = modifiers_bytes + .into_iter() + .map(|bytes| { + if bytes.is_empty() { + vec![] + } else { + // Decode length-prefixed EFunction messages + let mut efunctions = Vec::new(); + let mut offset = 0; + while offset + 4 <= bytes.len() { + // Read 4-byte little-endian length + let len = u32::from_le_bytes([ + bytes[offset], + bytes[offset + 1], + bytes[offset + 2], + bytes[offset + 3], + ]) as usize; + offset += 4; + + if offset + len <= bytes.len() { + if let Ok(ef) = EFunction::decode(&bytes[offset..offset + len]) { + efunctions.push(ef); + } + offset += len; + } else { + break; + } + } + efunctions + } + }) + .collect(); + + // Call the SpaceAgent consume_with_modifiers method with deserialized modifiers + SpaceAgent::consume_with_modifiers(self, channels, patterns, deserialized_modifiers, continuation, persist, peeks) + .map_err(|e: SpaceError| RSpaceError::InterpreterError(e.to_string())) + } + + fn produce( + &mut self, + channel: C, + data: A, + persist: bool, + priority: Option, + ) -> Result, Vec>, Produce)>, RSpaceError> { + SpaceAgent::produce(self, channel, data, persist, priority) + .map_err(|e| RSpaceError::InterpreterError(e.to_string())) + } + + fn install( + &mut self, + channels: Vec, + patterns: Vec

, + continuation: K, + ) -> Result)>, RSpaceError> { + SpaceAgent::install(self, channels, patterns, continuation) + .map_err(|e| RSpaceError::InterpreterError(e.to_string())) + } + + fn rig_and_reset(&mut self, start_root: Blake2b256Hash, log: Log) -> Result<(), RSpaceError> { + ReplayableSpace::rig_and_reset(self, start_root, log) + .map_err(|e| RSpaceError::InterpreterError(e.to_string())) + } + + fn rig(&self, log: Log) -> Result<(), RSpaceError> { + ReplayableSpace::rig(self, log) + .map_err(|e| RSpaceError::InterpreterError(e.to_string())) + } + + fn check_replay_data(&self) -> Result<(), RSpaceError> { + ReplayableSpace::check_replay_data(self) + .map_err(|e| RSpaceError::InterpreterError(e.to_string())) + } + + fn is_replay(&self) -> bool { + ReplayableSpace::is_replay(self) + } + + fn update_produce(&mut self, produce: Produce) { + ReplayableSpace::update_produce(self, produce) + } +} + +// ============================================================================= +// Tests +// ============================================================================= + +#[cfg(test)] +mod tests { + use super::*; + use crate::rust::interpreter::spaces::channel_store::HashMapChannelStore; + use crate::rust::interpreter::spaces::collections::{ + BagContinuationCollection, BagDataCollection, + }; + use crate::rust::interpreter::spaces::matcher::WildcardMatch; + + use serde::{Serialize, Deserialize}; + + /// Test channel type - newtype wrapper around Vec that implements necessary traits + /// for both HashMap store (From for gensym) and PathMap suffix key semantics (AsRef<[u8]>). + #[derive(Clone, Debug, PartialEq, Eq, Hash, PartialOrd, Ord, Serialize, Deserialize)] + struct TestChannel(Vec); + + impl From for TestChannel { + fn from(n: usize) -> Self { + TestChannel(vec![n as u8]) + } + } + + impl AsRef<[u8]> for TestChannel { + fn as_ref(&self) -> &[u8] { + &self.0 + } + } + + type TestPattern = i32; // Use same type as data for simpler testing + type TestData = i32; + type TestCont = String; + + /// Helper to create a test channel from a usize for convenience. + fn chan(n: usize) -> TestChannel { + TestChannel::from(n) + } + + fn create_test_space() -> GenericRSpace< + HashMapChannelStore, BagContinuationCollection>, + WildcardMatch, + > { + let store = HashMapChannelStore::new(BagDataCollection::new, BagContinuationCollection::new); + let matcher = WildcardMatch::::new(); + GenericRSpace::new(store, matcher, SpaceId::default_space(), SpaceQualifier::Default) + } + + #[test] + fn test_generic_rspace_creation() { + let space = create_test_space(); + assert_eq!(space.qualifier(), SpaceQualifier::Default); + assert!(!space.has_history_store()); + } + + #[test] + fn test_gensym() { + let mut space = create_test_space(); + + let ch1 = space.gensym().expect("gensym should succeed"); + let ch2 = space.gensym().expect("gensym should succeed"); + let ch3 = space.gensym().expect("gensym should succeed"); + + assert_ne!(ch1, ch2); + assert_ne!(ch2, ch3); + assert_ne!(ch1, ch3); + } + + #[test] + fn test_produce_stores_data() { + let mut space = create_test_space(); + + let result = SpaceAgent::produce(&mut space, chan(0), 42, false, None); + assert!(result.is_ok()); + assert!(result.unwrap().is_none()); // No matching continuation + + let data = SpaceAgent::get_data(&space, &chan(0)); + assert_eq!(data.len(), 1); + assert_eq!(data[0].a, 42); + } + + #[test] + fn test_consume_stores_continuation() { + let mut space = create_test_space(); + + let channels = vec![chan(0)]; + let patterns = vec![0i32]; // Use i32 pattern + let continuation = "cont".to_string(); + + let result = SpaceAgent::consume(&mut space, channels.clone(), patterns, continuation, false, BTreeSet::new()); + assert!(result.is_ok()); + assert!(result.unwrap().is_none()); // No matching data + + let conts = SpaceAgent::get_waiting_continuations(&space, channels); + assert_eq!(conts.len(), 1); + } + + #[test] + fn test_get_joins() { + let mut space = create_test_space(); + + // Register a join by consuming on multiple channels + let channels = vec![chan(0), chan(1)]; + let patterns = vec![1i32, 2i32]; // Use i32 patterns + let continuation = "cont".to_string(); + + let _ = SpaceAgent::consume(&mut space, channels.clone(), patterns, continuation, false, BTreeSet::new()); + + let joins = SpaceAgent::get_joins(&space, chan(0)); + assert!(!joins.is_empty()); + assert!(joins.contains(&channels)); + } + + #[test] + fn test_clear() { + let mut space = create_test_space(); + + SpaceAgent::produce(&mut space, chan(0), 42, false, None).expect("produce should succeed"); + assert!(!SpaceAgent::get_data(&space, &chan(0)).is_empty()); + + CheckpointableSpace::clear(&mut space).expect("clear should succeed"); + assert!(SpaceAgent::get_data(&space, &chan(0)).is_empty()); + } + + #[test] + fn test_builder() { + let store = HashMapChannelStore::::new( + BagDataCollection::new, BagContinuationCollection::new, + ); + let matcher = WildcardMatch::::new(); + + let space: GenericRSpace<_, _> = + GenericRSpaceBuilder::new() + .with_channel_store(store) + .with_matcher(matcher) + .with_space_id(SpaceId::new(vec![1, 2, 3])) + .with_qualifier(SpaceQualifier::Temp) + .build() + .expect("Builder should succeed with all required fields"); + + assert_eq!(space.qualifier(), SpaceQualifier::Temp); + } + + #[test] + fn test_builder_incomplete_returns_error() { + // Missing channel_store + let result = GenericRSpaceBuilder::< + HashMapChannelStore, BagContinuationCollection>, + WildcardMatch + >::new() + .with_matcher(WildcardMatch::::new()) + .build(); + + assert!(result.is_err()); + let err = result.unwrap_err(); + assert!( + matches!(err, SpaceError::BuilderIncomplete { builder: "GenericRSpaceBuilder", .. }), + "Expected BuilderIncomplete error for missing channel_store, got: {:?}", err + ); + + // Missing matcher + let store = HashMapChannelStore::::new( + BagDataCollection::new, BagContinuationCollection::new, + ); + let result = GenericRSpaceBuilder::<_, WildcardMatch>::new() + .with_channel_store(store) + .build(); + + assert!(result.is_err()); + let err = result.unwrap_err(); + assert!( + matches!(err, SpaceError::BuilderIncomplete { builder: "GenericRSpaceBuilder", .. }), + "Expected BuilderIncomplete error for missing matcher, got: {:?}", err + ); + } + + #[test] + fn test_temp_space_checkpoint_succeeds() { + // Temp spaces now support checkpointing - data is ephemeral and cleared on restore + let store = HashMapChannelStore::::new( + BagDataCollection::new, BagContinuationCollection::new, + ); + let matcher = WildcardMatch::::new(); + + let mut space: GenericRSpace<_, _> = + GenericRSpace::new(store, matcher, SpaceId::default_space(), SpaceQualifier::Temp); + + let result: Result = CheckpointableSpace::create_checkpoint(&mut space); + assert!(result.is_ok(), "Temp spaces should support checkpointing"); + } + + #[test] + fn test_debug_output() { + let space = create_test_space(); + let debug = format!("{:?}", space); + assert!(debug.contains("GenericRSpace")); + assert!(debug.contains("Default")); + } + + #[test] + fn test_theory_not_configured() { + let space = create_test_space(); + assert!(!space.has_theory()); + assert!(space.theory().is_none()); + + // Validation should pass when no theory is configured + let result = space.validate_term("anything"); + assert!(result.is_ok()); + } + + #[test] + fn test_theory_validation_passes() { + use crate::rust::interpreter::spaces::types::SimpleTypeTheory; + + let mut space = create_test_space(); + let theory = SimpleTypeTheory::new("TestTheory", vec!["Nat".to_string(), "Int".to_string()]); + space.set_theory(Some(Box::new(theory))); + + assert!(space.has_theory()); + assert!(space.theory().is_some()); + assert_eq!(space.theory().unwrap().name(), "TestTheory"); + + // These should pass validation + assert!(space.validate_term("Nat(42)").is_ok()); + assert!(space.validate_term("Int(-5)").is_ok()); + } + + #[test] + fn test_theory_validation_fails() { + use crate::rust::interpreter::spaces::types::SimpleTypeTheory; + + let mut space = create_test_space(); + let theory = SimpleTypeTheory::new("NatOnly", vec!["Nat".to_string()]); + space.set_theory(Some(Box::new(theory))); + + // This should fail validation + let result = space.validate_term("String(hello)"); + assert!(result.is_err()); + + if let Err(SpaceError::TheoryValidationError { theory_name, term, .. }) = result { + assert_eq!(theory_name, "NatOnly"); + assert_eq!(term, "String(hello)"); + } else { + panic!("Expected TheoryValidationError"); + } + } + + #[test] + fn test_builder_with_theory() { + use crate::rust::interpreter::spaces::types::SimpleTypeTheory; + + let store = HashMapChannelStore::::new( + BagDataCollection::new, BagContinuationCollection::new, + ); + let matcher = WildcardMatch::::new(); + let theory = SimpleTypeTheory::new("BuilderTheory", vec!["Test".to_string()]); + + let space: GenericRSpace<_, _> = + GenericRSpaceBuilder::new() + .with_channel_store(store) + .with_matcher(matcher) + .with_theory(Box::new(theory)) + .build() + .expect("Builder should succeed with all required fields"); + + assert!(space.has_theory()); + assert_eq!(space.theory().unwrap().name(), "BuilderTheory"); + } + + #[test] + fn test_config_includes_theory() { + use crate::rust::interpreter::spaces::types::SimpleTypeTheory; + + let mut space = create_test_space(); + let theory = SimpleTypeTheory::new("ConfigTheory", vec!["Test".to_string()]); + space.set_theory(Some(Box::new(theory))); + + let config = space.config(); + assert!(config.theory.is_some()); + assert_eq!(config.theory.as_ref().unwrap().name(), "ConfigTheory"); + } + + #[test] + fn test_debug_with_theory() { + use crate::rust::interpreter::spaces::types::SimpleTypeTheory; + + let mut space = create_test_space(); + let theory = SimpleTypeTheory::new("DebugTheory", vec![]); + space.set_theory(Some(Box::new(theory))); + + let debug = format!("{:?}", space); + assert!(debug.contains("DebugTheory")); + } + + // ========================================================================= + // PathMap Prefix Semantics Tests + // ========================================================================= + + use crate::rust::interpreter::spaces::channel_store::PathMapChannelStore; + + /// Create a PathMap-based space for testing prefix semantics. + fn create_pathmap_space() -> GenericRSpace< + PathMapChannelStore, BagContinuationCollection>, + WildcardMatch, + > { + let store = PathMapChannelStore::new(BagDataCollection::new, BagContinuationCollection::new); + let matcher = WildcardMatch::::new(); + GenericRSpace::new(store, matcher, SpaceId::default_space(), SpaceQualifier::Default) + } + + #[test] + fn test_pathmap_space_supports_prefix_semantics() { + let space = create_pathmap_space(); + assert!(space.channel_store().supports_prefix_semantics()); + } + + #[test] + fn test_consume_finds_data_at_descendant_path() { + // Test that consume on @[0,1] finds data at @[0,1,2] + let mut space = create_pathmap_space(); + + // Produce data at @[0,1,2] (a child path) + let child_path = vec![0u8, 1, 2]; + SpaceAgent::produce(&mut space, child_path.clone(), "hello".to_string(), false, None) + .expect("produce should succeed"); + + // Verify data exists at the child path + assert_eq!(SpaceAgent::get_data(&space, &child_path).len(), 1); + + // Consume on @[0,1] (a prefix path) + let prefix_path = vec![0u8, 1]; + let result = SpaceAgent::consume( + &mut space, + vec![prefix_path.clone()], + vec!["*".to_string()], // Wildcard pattern + "continuation".to_string(), + false, + BTreeSet::new(), + ).expect("consume should succeed"); + + // Should find the data at the child path + assert!(result.is_some()); + let (cont_result, rspace_results) = result.unwrap(); + + // The result should reference the actual path where data was found + assert_eq!(rspace_results.len(), 1); + assert_eq!(rspace_results[0].channel, child_path); + assert_eq!(rspace_results[0].matched_datum, "hello"); + + // Data should be removed from the child path + assert_eq!(SpaceAgent::get_data(&space, &child_path).len(), 0); + + // The continuation patterns should reference the original consume channels + assert_eq!(cont_result.channels, vec![prefix_path]); + } + + #[test] + fn test_consume_exact_match_takes_priority() { + // When data exists at both the exact path and a descendant, + // exact match should be found first + let mut space = create_pathmap_space(); + + // Produce data at both @[0,1] and @[0,1,2] + let exact_path = vec![0u8, 1]; + let child_path = vec![0u8, 1, 2]; + + SpaceAgent::produce(&mut space, child_path.clone(), "from_child".to_string(), false, None) + .expect("produce should succeed"); + SpaceAgent::produce(&mut space, exact_path.clone(), "from_exact".to_string(), false, None) + .expect("produce should succeed"); + + // Consume on @[0,1] should find the exact match first + let result = SpaceAgent::consume( + &mut space, + vec![exact_path.clone()], + vec!["*".to_string()], + "continuation".to_string(), + false, + BTreeSet::new(), + ).expect("consume should succeed"); + + assert!(result.is_some()); + let (_, rspace_results) = result.unwrap(); + assert_eq!(rspace_results[0].channel, exact_path); + assert_eq!(rspace_results[0].matched_datum, "from_exact"); + + // Exact path data should be removed, child path data should remain + assert_eq!(SpaceAgent::get_data(&space, &exact_path).len(), 0); + assert_eq!(SpaceAgent::get_data(&space, &child_path).len(), 1); + } + + #[test] + fn test_consume_stores_continuation_when_no_data() { + // When no matching data exists, continuation should be stored + let mut space = create_pathmap_space(); + + let prefix_path = vec![0u8, 1]; + let result = SpaceAgent::consume( + &mut space, + vec![prefix_path.clone()], + vec!["*".to_string()], + "waiting".to_string(), + false, + BTreeSet::new(), + ).expect("consume should succeed"); + + // No immediate match + assert!(result.is_none()); + + // Continuation should be stored + let conts = SpaceAgent::get_waiting_continuations(&space, vec![prefix_path]); + assert_eq!(conts.len(), 1); + assert_eq!(conts[0].continuation, "waiting"); + } + + #[test] + fn test_consume_with_multiple_descendants() { + // Test that consume finds data when multiple descendants exist + let mut space = create_pathmap_space(); + + // Produce data at @[0,1,2] and @[0,1,3] + SpaceAgent::produce(&mut space, vec![0u8, 1, 2], "data_2".to_string(), false, None) + .expect("produce should succeed"); + SpaceAgent::produce(&mut space, vec![0u8, 1, 3], "data_3".to_string(), false, None) + .expect("produce should succeed"); + + // Consume on @[0,1] should find one of them + let prefix_path = vec![0u8, 1]; + let result = SpaceAgent::consume( + &mut space, + vec![prefix_path], + vec!["*".to_string()], + "continuation".to_string(), + false, + BTreeSet::new(), + ).expect("consume should succeed"); + + assert!(result.is_some()); + let (_, rspace_results) = result.unwrap(); + + // Should have consumed one piece of data + assert_eq!(rspace_results.len(), 1); + + // One data should remain, one should be consumed + let remaining_2 = SpaceAgent::get_data(&space, &vec![0u8, 1, 2]).len(); + let remaining_3 = SpaceAgent::get_data(&space, &vec![0u8, 1, 3]).len(); + assert_eq!(remaining_2 + remaining_3, 1); + } + + #[test] + fn test_hashmap_space_unchanged() { + // Verify that HashMap-based spaces still use exact matching + let mut space = create_test_space(); + + // Produce data at channel 1 + SpaceAgent::produce(&mut space, chan(1), 42, false, None).expect("produce should succeed"); + + // Consume on channel 0 should NOT find data at channel 1 + let result = SpaceAgent::consume( + &mut space, + vec![chan(0)], + vec![0i32], // Pattern that would match anything in WildcardMatch + "continuation".to_string(), + false, + BTreeSet::new(), + ).expect("consume should succeed"); + + // No match should be found (no prefix semantics) + assert!(result.is_none()); + + // Data at channel 1 should still exist + assert_eq!(SpaceAgent::get_data(&space, &chan(1)).len(), 1); + } + + #[test] + fn test_pathmap_spec_example() { + // Test from spec lines 159-192: + // @[0, 1, 2]!({|"hi"|}) | @[0, 1, 2]!({|"hello"|}) | @[0, 1, 3]!({|"there"|}) + // When receiving on @[0,1]: + // for( x <- @[0, 1] ) { P } + // Gets x bound to data from one of the descendant paths + let mut space = create_pathmap_space(); + + // Store data at child paths as per spec + SpaceAgent::produce(&mut space, vec![0u8, 1, 2], "hi".to_string(), false, None) + .expect("produce should succeed"); + SpaceAgent::produce(&mut space, vec![0u8, 1, 2], "hello".to_string(), false, None) + .expect("produce should succeed"); + SpaceAgent::produce(&mut space, vec![0u8, 1, 3], "there".to_string(), false, None) + .expect("produce should succeed"); + + // Verify data is stored + assert_eq!(SpaceAgent::get_data(&space, &vec![0u8, 1, 2]).len(), 2); + assert_eq!(SpaceAgent::get_data(&space, &vec![0u8, 1, 3]).len(), 1); + + // Consume on prefix @[0,1] - should find data from descendants + let result = SpaceAgent::consume( + &mut space, + vec![vec![0u8, 1]], + vec!["*".to_string()], + "consumer".to_string(), + false, + BTreeSet::new(), + ).expect("consume should succeed"); + + assert!(result.is_some()); + let (_, rspace_results) = result.unwrap(); + + // Should get one piece of data from one of the child paths + assert_eq!(rspace_results.len(), 1); + let data = &rspace_results[0].matched_datum; + assert!(data == "hi" || data == "hello" || data == "there"); + + // One less total data after consume + let remaining = SpaceAgent::get_data(&space, &vec![0u8, 1, 2]).len() + + SpaceAgent::get_data(&space, &vec![0u8, 1, 3]).len(); + assert_eq!(remaining, 2); + } + + // ========================================================================= + // Phase 4 Tests: Produce triggers continuations at prefix paths + // ========================================================================= + // Phase 4 is IMPLEMENTED: produce() checks prefix paths for continuations + // via find_matching_continuation_at_prefix(). Data at @[0,1,2] triggers + // continuations waiting on @[0,1]. + + #[test] + fn test_produce_on_child_stores_data_when_no_prefix_continuation() { + // When there's no waiting continuation, produce stores data + let mut space = create_pathmap_space(); + + // Produce on @[0,1,2] with no waiting continuations + let result = SpaceAgent::produce(&mut space, vec![0u8, 1, 2], "hello".to_string(), false, None) + .expect("produce should succeed"); + + // No continuation triggered - data stored + assert!(result.is_none()); + assert_eq!(SpaceAgent::get_data(&space, &vec![0u8, 1, 2]).len(), 1); + } + + #[test] + fn test_produce_finds_continuation_at_exact_path() { + // Produce should trigger continuation at exact path (existing behavior) + let mut space = create_pathmap_space(); + + let path = vec![0u8, 1, 2]; + + // Set up a waiting continuation at exact path + let consume_result = SpaceAgent::consume( + &mut space, + vec![path.clone()], + vec!["*".to_string()], + "waiting".to_string(), + false, + BTreeSet::new(), + ).expect("consume should succeed"); + assert!(consume_result.is_none()); // No data yet, continuation stored + + // Produce on the same path should trigger the continuation + let produce_result = SpaceAgent::produce(&mut space, path.clone(), "data".to_string(), false, None) + .expect("produce should succeed"); + + assert!(produce_result.is_some()); + let (cont_result, rspace_results, _) = produce_result.unwrap(); + assert_eq!(cont_result.continuation, "waiting"); + assert_eq!(rspace_results[0].matched_datum, "data"); + } + + #[test] + fn test_produce_on_child_triggers_prefix_continuation() { + // After Phase 4: produce on @[0,1,2] should trigger continuation at @[0,1] + let mut space = create_pathmap_space(); + + // Set up a waiting continuation at prefix path @[0,1] + let consume_result = SpaceAgent::consume( + &mut space, + vec![vec![0u8, 1]], + vec!["*".to_string()], + "prefix_consumer".to_string(), + false, + BTreeSet::new(), + ).expect("consume should succeed"); + assert!(consume_result.is_none()); // No data yet, continuation stored + + // Verify continuation is stored + assert_eq!(SpaceAgent::get_waiting_continuations(&space, vec![vec![0u8, 1]]).len(), 1); + + // Produce on child path @[0,1,2] should trigger the prefix continuation + let produce_result = SpaceAgent::produce(&mut space, vec![0u8, 1, 2], "hello".to_string(), false, None) + .expect("produce should succeed"); + + // This assertion will pass after Phase 4 is implemented + assert!(produce_result.is_some()); + let (cont_result, rspace_results, _) = produce_result.unwrap(); + assert_eq!(cont_result.continuation, "prefix_consumer"); + assert_eq!(rspace_results[0].matched_datum, "hello"); + // The actual channel should be the child path + assert_eq!(rspace_results[0].channel, vec![0u8, 1, 2]); + + // Continuation should be removed + assert_eq!(SpaceAgent::get_waiting_continuations(&space, vec![vec![0u8, 1]]).len(), 0); + } +} diff --git a/rholang/src/rust/interpreter/spaces/history.rs b/rholang/src/rust/interpreter/spaces/history.rs new file mode 100644 index 000000000..b85b60608 --- /dev/null +++ b/rholang/src/rust/interpreter/spaces/history.rs @@ -0,0 +1,759 @@ +//! History Repository Abstraction for Reified RSpaces +//! +//! This module provides an abstract interface for storing and retrieving space state +//! at Merkle roots. It enables checkpointing and replay functionality for spaces. +//! +//! # Design +//! +//! The `HistoryStore` trait provides a simple key-value interface where: +//! - Keys are Blake2b256Hash Merkle roots +//! - Values are serialized space state +//! +//! This abstraction enables: +//! - Multiple backend implementations (in-memory, LMDB, etc.) +//! - Testable checkpointing without heavy dependencies +//! - Clean separation between space logic and persistence +//! +//! # Implementations +//! +//! - `InMemoryHistoryStore`: For testing and ephemeral spaces +//! - `RSpaceHistoryStoreAdapter`: Wraps rspace++'s HistoryRepository for production + +use std::collections::{HashMap, VecDeque}; +use std::fmt::Debug; +use std::sync::{Arc, RwLock}; + +use rspace_plus_plus::rspace::hashing::blake2b256_hash::Blake2b256Hash; + +use super::errors::SpaceError; + +// ============================================================================= +// Core Trait: HistoryStore +// ============================================================================= + +/// Abstract repository for checkpoint/replay history. +/// +/// This trait provides a simple interface for storing and retrieving serialized +/// space state indexed by Merkle roots (Blake2b256Hash). +/// +/// # Thread Safety +/// +/// Implementations must be `Send + Sync` to support concurrent access from +/// multiple space instances. +/// +/// # Usage +/// +/// ```ignore +/// let store = InMemoryHistoryStore::new(); +/// +/// // Serialize and store space state +/// let state = serialize_space(&space); +/// let root = Blake2b256Hash::new(&state); +/// store.store(root.clone(), &state)?; +/// +/// // Later, retrieve and restore +/// let restored_state = store.retrieve(&root)?; +/// deserialize_space(&restored_state); +/// ``` +pub trait HistoryStore: Send + Sync + Debug { + /// Store serialized state at a Merkle root. + /// + /// # Arguments + /// - `root`: The Merkle root (hash of the state) + /// - `state`: The serialized state bytes + /// + /// # Errors + /// Returns `SpaceError::CheckpointError` if storage fails. + fn store(&self, root: Blake2b256Hash, state: &[u8]) -> Result<(), SpaceError>; + + /// Retrieve serialized state for a Merkle root. + /// + /// # Arguments + /// - `root`: The Merkle root to look up + /// + /// # Errors + /// Returns `SpaceError::CheckpointError` if the root is not found. + fn retrieve(&self, root: &Blake2b256Hash) -> Result, SpaceError>; + + /// Check if a Merkle root exists in the store. + /// + /// # Arguments + /// - `root`: The Merkle root to check + /// + /// # Returns + /// `true` if the root exists, `false` otherwise. + fn contains(&self, root: &Blake2b256Hash) -> bool; + + /// Clear all stored history. + /// + /// This removes all stored state, but does not affect the current space state. + /// Use with caution as this is irreversible. + /// + /// # Errors + /// Returns `SpaceError::CheckpointError` if clearing fails. + fn clear(&self) -> Result<(), SpaceError>; + + /// Get the number of stored checkpoints. + fn checkpoint_count(&self) -> usize; + + /// Get all stored roots (for debugging/enumeration). + fn list_roots(&self) -> Vec; +} + +// ============================================================================= +// In-Memory Implementation +// ============================================================================= + +/// In-memory history store for testing and ephemeral spaces. +/// +/// This implementation stores state in a thread-safe HashMap. It does not +/// persist across process restarts. +/// +/// # Thread Safety +/// +/// Uses `RwLock` for interior mutability, allowing concurrent reads. +/// +/// # Example +/// +/// ```ignore +/// let store = InMemoryHistoryStore::new(); +/// +/// let state = vec![1, 2, 3, 4]; +/// let root = Blake2b256Hash::new(&state); +/// +/// store.store(root.clone(), &state)?; +/// assert!(store.contains(&root)); +/// +/// let retrieved = store.retrieve(&root)?; +/// assert_eq!(state, retrieved); +/// ``` +#[derive(Debug)] +pub struct InMemoryHistoryStore { + /// Map from Merkle root to serialized state + states: RwLock>>, +} + +impl InMemoryHistoryStore { + /// Create a new empty in-memory history store. + pub fn new() -> Self { + InMemoryHistoryStore { + states: RwLock::new(HashMap::new()), + } + } + + /// Create a history store with pre-populated state. + /// + /// Useful for testing scenarios where initial history is needed. + pub fn with_initial_state(states: HashMap>) -> Self { + InMemoryHistoryStore { + states: RwLock::new(states), + } + } +} + +impl Default for InMemoryHistoryStore { + fn default() -> Self { + Self::new() + } +} + +impl HistoryStore for InMemoryHistoryStore { + fn store(&self, root: Blake2b256Hash, state: &[u8]) -> Result<(), SpaceError> { + let mut states = self.states.write().map_err(|e| SpaceError::CheckpointError { + description: format!("Failed to acquire write lock: {}", e), + })?; + + states.insert(root, state.to_vec()); + Ok(()) + } + + fn retrieve(&self, root: &Blake2b256Hash) -> Result, SpaceError> { + let states = self.states.read().map_err(|e| SpaceError::CheckpointError { + description: format!("Failed to acquire read lock: {}", e), + })?; + + states.get(root).cloned().ok_or_else(|| SpaceError::CheckpointError { + description: format!("Merkle root not found: {}", root), + }) + } + + fn contains(&self, root: &Blake2b256Hash) -> bool { + self.states + .read() + .map(|states| states.contains_key(root)) + .unwrap_or(false) + } + + fn clear(&self) -> Result<(), SpaceError> { + let mut states = self.states.write().map_err(|e| SpaceError::CheckpointError { + description: format!("Failed to acquire write lock: {}", e), + })?; + + states.clear(); + Ok(()) + } + + fn checkpoint_count(&self) -> usize { + self.states.read().map(|s| s.len()).unwrap_or(0) + } + + fn list_roots(&self) -> Vec { + self.states + .read() + .map(|s| s.keys().cloned().collect()) + .unwrap_or_default() + } +} + +// ============================================================================= +// Bounded History Store (with capacity limit) +// ============================================================================= + +/// History store with a maximum capacity and O(1) eviction. +/// +/// When the capacity is reached, the oldest entries are evicted (LRU-like behavior). +/// Useful for limiting memory usage in long-running processes. +/// +/// # Performance +/// Uses `VecDeque` for O(1) eviction at the front, improving over the previous +/// O(n) `Vec::remove(0)` approach. +#[derive(Debug)] +pub struct BoundedHistoryStore { + /// Map from Merkle root to serialized state + states: RwLock>>, + /// Insertion order for LRU eviction (VecDeque for O(1) pop_front) + insertion_order: RwLock>, + /// Maximum number of entries to keep + max_capacity: usize, +} + +impl BoundedHistoryStore { + /// Create a new bounded history store with the given capacity. + /// + /// # Arguments + /// - `max_capacity`: Maximum number of checkpoints to retain + pub fn new(max_capacity: usize) -> Self { + BoundedHistoryStore { + states: RwLock::new(HashMap::with_capacity(max_capacity)), + insertion_order: RwLock::new(VecDeque::with_capacity(max_capacity)), + max_capacity, + } + } + + /// Get the maximum capacity of this store. + pub fn capacity(&self) -> usize { + self.max_capacity + } +} + +impl HistoryStore for BoundedHistoryStore { + fn store(&self, root: Blake2b256Hash, state: &[u8]) -> Result<(), SpaceError> { + let mut states = self.states.write().map_err(|e| SpaceError::CheckpointError { + description: format!("Failed to acquire write lock: {}", e), + })?; + + let mut order = self.insertion_order.write().map_err(|e| { + SpaceError::CheckpointError { + description: format!("Failed to acquire insertion order lock: {}", e), + } + })?; + + // If key already exists, update it and move to end of order (no eviction needed) + let is_update = states.contains_key(&root); + if is_update { + order.retain(|r| r != &root); + } else { + // New key - evict oldest if at capacity (O(1) with VecDeque::pop_front) + while states.len() >= self.max_capacity { + if let Some(oldest_root) = order.pop_front() { + states.remove(&oldest_root); + } else { + break; + } + } + } + + states.insert(root.clone(), state.to_vec()); + order.push_back(root); + + Ok(()) + } + + fn retrieve(&self, root: &Blake2b256Hash) -> Result, SpaceError> { + let states = self.states.read().map_err(|e| SpaceError::CheckpointError { + description: format!("Failed to acquire read lock: {}", e), + })?; + + states.get(root).cloned().ok_or_else(|| SpaceError::CheckpointError { + description: format!("Merkle root not found: {}", root), + }) + } + + fn contains(&self, root: &Blake2b256Hash) -> bool { + self.states + .read() + .map(|states| states.contains_key(root)) + .unwrap_or(false) + } + + fn clear(&self) -> Result<(), SpaceError> { + let mut states = self.states.write().map_err(|e| SpaceError::CheckpointError { + description: format!("Failed to acquire write lock: {}", e), + })?; + + let mut order = self.insertion_order.write().map_err(|e| { + SpaceError::CheckpointError { + description: format!("Failed to acquire insertion order lock: {}", e), + } + })?; + + states.clear(); + order.clear(); + Ok(()) + } + + fn checkpoint_count(&self) -> usize { + self.states.read().map(|s| s.len()).unwrap_or(0) + } + + fn list_roots(&self) -> Vec { + // Return in insertion order (oldest first) + // Convert VecDeque to Vec for API compatibility + self.insertion_order + .read() + .map(|o| o.iter().cloned().collect()) + .unwrap_or_default() + } +} + +// ============================================================================= +// Null History Store (no-op for temp spaces) +// ============================================================================= + +/// No-op history store for temporary spaces that don't need checkpointing. +/// +/// All operations succeed but nothing is actually stored. Useful for +/// `SpaceQualifier::Temp` spaces where persistence is not needed. +#[derive(Debug, Clone, Copy, Default)] +pub struct NullHistoryStore; + +impl NullHistoryStore { + /// Create a new null history store. + pub fn new() -> Self { + NullHistoryStore + } +} + +impl HistoryStore for NullHistoryStore { + fn store(&self, _root: Blake2b256Hash, _state: &[u8]) -> Result<(), SpaceError> { + // No-op: temp spaces don't persist checkpoints + Ok(()) + } + + fn retrieve(&self, root: &Blake2b256Hash) -> Result, SpaceError> { + // Always fails: nothing is stored + Err(SpaceError::CheckpointError { + description: format!( + "NullHistoryStore does not store checkpoints. Root {} not found.", + root + ), + }) + } + + fn contains(&self, _root: &Blake2b256Hash) -> bool { + // Nothing is ever stored + false + } + + fn clear(&self) -> Result<(), SpaceError> { + // No-op: nothing to clear + Ok(()) + } + + fn checkpoint_count(&self) -> usize { + 0 + } + + fn list_roots(&self) -> Vec { + Vec::new() + } +} + +// ============================================================================= +// Type-Erased Boxed History Store +// ============================================================================= + +/// Type-erased history store for dynamic dispatch. +/// +/// This allows storing different `HistoryStore` implementations in the same +/// container, useful for the `GenericRSpace` struct. +pub type BoxedHistoryStore = Arc; + +/// Create a boxed in-memory history store. +pub fn boxed_in_memory() -> BoxedHistoryStore { + Arc::new(InMemoryHistoryStore::new()) +} + +/// Create a boxed bounded history store. +pub fn boxed_bounded(max_capacity: usize) -> BoxedHistoryStore { + Arc::new(BoundedHistoryStore::new(max_capacity)) +} + +/// Create a boxed null history store. +pub fn boxed_null() -> BoxedHistoryStore { + Arc::new(NullHistoryStore::new()) +} + +// ============================================================================= +// History Store with Verification +// ============================================================================= + +/// History store wrapper that verifies state integrity on retrieval. +/// +/// This wrapper computes the hash of retrieved state and verifies it matches +/// the requested root. Useful for detecting corruption. +#[derive(Debug)] +pub struct VerifyingHistoryStore { + inner: H, +} + +impl VerifyingHistoryStore { + /// Create a new verifying history store wrapping the given store. + pub fn new(inner: H) -> Self { + VerifyingHistoryStore { inner } + } + + /// Get a reference to the inner store. + pub fn inner(&self) -> &H { + &self.inner + } +} + +impl HistoryStore for VerifyingHistoryStore { + fn store(&self, root: Blake2b256Hash, state: &[u8]) -> Result<(), SpaceError> { + // Verify the root matches the state hash + let computed_root = Blake2b256Hash::new(state); + if computed_root != root { + return Err(SpaceError::CheckpointError { + description: format!( + "Root mismatch on store: expected {}, computed {}", + root, computed_root + ), + }); + } + + self.inner.store(root, state) + } + + fn retrieve(&self, root: &Blake2b256Hash) -> Result, SpaceError> { + let state = self.inner.retrieve(root)?; + + // Verify the retrieved state matches the expected root + let computed_root = Blake2b256Hash::new(&state); + if &computed_root != root { + return Err(SpaceError::CheckpointError { + description: format!( + "Corrupted state: expected root {}, computed {}", + root, computed_root + ), + }); + } + + Ok(state) + } + + fn contains(&self, root: &Blake2b256Hash) -> bool { + self.inner.contains(root) + } + + fn clear(&self) -> Result<(), SpaceError> { + self.inner.clear() + } + + fn checkpoint_count(&self) -> usize { + self.inner.checkpoint_count() + } + + fn list_roots(&self) -> Vec { + self.inner.list_roots() + } +} + +// ============================================================================= +// Tests +// ============================================================================= + +#[cfg(test)] +mod tests { + use super::*; + + fn make_test_state(id: u8) -> (Blake2b256Hash, Vec) { + let state = vec![id; 32]; + let root = Blake2b256Hash::new(&state); + (root, state) + } + + // ========================================================================= + // InMemoryHistoryStore tests + // ========================================================================= + + #[test] + fn test_in_memory_store_roundtrip() { + let store = InMemoryHistoryStore::new(); + let (root, state) = make_test_state(42); + + store.store(root.clone(), &state).expect("store should succeed"); + assert!(store.contains(&root)); + + let retrieved = store.retrieve(&root).expect("retrieve should succeed"); + assert_eq!(state, retrieved); + } + + #[test] + fn test_in_memory_store_not_found() { + let store = InMemoryHistoryStore::new(); + let (root, _) = make_test_state(99); + + let result = store.retrieve(&root); + assert!(result.is_err()); + assert!(!store.contains(&root)); + } + + #[test] + fn test_in_memory_store_clear() { + let store = InMemoryHistoryStore::new(); + let (root1, state1) = make_test_state(1); + let (root2, state2) = make_test_state(2); + + store.store(root1.clone(), &state1).expect("store 1"); + store.store(root2.clone(), &state2).expect("store 2"); + assert_eq!(store.checkpoint_count(), 2); + + store.clear().expect("clear should succeed"); + assert_eq!(store.checkpoint_count(), 0); + assert!(!store.contains(&root1)); + assert!(!store.contains(&root2)); + } + + #[test] + fn test_in_memory_store_overwrite() { + let store = InMemoryHistoryStore::new(); + let (root, state1) = make_test_state(1); + + store.store(root.clone(), &state1).expect("store 1"); + + // Overwrite with different data (same root for testing) + let state2 = vec![2; 32]; + store.store(root.clone(), &state2).expect("store 2"); + + let retrieved = store.retrieve(&root).expect("retrieve"); + assert_eq!(state2, retrieved); + assert_eq!(store.checkpoint_count(), 1); + } + + #[test] + fn test_in_memory_store_list_roots() { + let store = InMemoryHistoryStore::new(); + let (root1, state1) = make_test_state(1); + let (root2, state2) = make_test_state(2); + + store.store(root1.clone(), &state1).expect("store 1"); + store.store(root2.clone(), &state2).expect("store 2"); + + let roots = store.list_roots(); + assert_eq!(roots.len(), 2); + assert!(roots.contains(&root1)); + assert!(roots.contains(&root2)); + } + + // ========================================================================= + // BoundedHistoryStore tests + // ========================================================================= + + #[test] + fn test_bounded_store_eviction() { + let store = BoundedHistoryStore::new(2); + + let (root1, state1) = make_test_state(1); + let (root2, state2) = make_test_state(2); + let (root3, state3) = make_test_state(3); + + store.store(root1.clone(), &state1).expect("store 1"); + store.store(root2.clone(), &state2).expect("store 2"); + assert_eq!(store.checkpoint_count(), 2); + + // This should evict root1 + store.store(root3.clone(), &state3).expect("store 3"); + assert_eq!(store.checkpoint_count(), 2); + + // root1 should be gone + assert!(!store.contains(&root1)); + assert!(store.contains(&root2)); + assert!(store.contains(&root3)); + } + + #[test] + fn test_bounded_store_insertion_order() { + let store = BoundedHistoryStore::new(3); + + let (root1, state1) = make_test_state(1); + let (root2, state2) = make_test_state(2); + let (root3, state3) = make_test_state(3); + + store.store(root1.clone(), &state1).expect("store 1"); + store.store(root2.clone(), &state2).expect("store 2"); + store.store(root3.clone(), &state3).expect("store 3"); + + let roots = store.list_roots(); + assert_eq!(roots, vec![root1, root2, root3]); + } + + #[test] + fn test_bounded_store_update_moves_to_end() { + let store = BoundedHistoryStore::new(3); + + let (root1, state1) = make_test_state(1); + let (root2, state2) = make_test_state(2); + let (root3, state3) = make_test_state(3); + + store.store(root1.clone(), &state1).expect("store 1"); + store.store(root2.clone(), &state2).expect("store 2"); + store.store(root3.clone(), &state3).expect("store 3"); + + // Re-store root1, should move to end + store.store(root1.clone(), &state1).expect("re-store 1"); + + let roots = store.list_roots(); + assert_eq!(roots, vec![root2.clone(), root3.clone(), root1.clone()]); + + // Now add root4, should evict root2 (oldest) + let (root4, state4) = make_test_state(4); + store.store(root4.clone(), &state4).expect("store 4"); + + assert!(!store.contains(&root2)); + assert!(store.contains(&root3)); + assert!(store.contains(&root1)); + assert!(store.contains(&root4)); + } + + // ========================================================================= + // NullHistoryStore tests + // ========================================================================= + + #[test] + fn test_null_store_noop() { + let store = NullHistoryStore::new(); + let (root, state) = make_test_state(42); + + // Store succeeds + store.store(root.clone(), &state).expect("store should succeed"); + + // But nothing is stored + assert!(!store.contains(&root)); + assert_eq!(store.checkpoint_count(), 0); + + // Retrieve fails + let result = store.retrieve(&root); + assert!(result.is_err()); + } + + // ========================================================================= + // VerifyingHistoryStore tests + // ========================================================================= + + #[test] + fn test_verifying_store_valid() { + let inner = InMemoryHistoryStore::new(); + let store = VerifyingHistoryStore::new(inner); + + let state = vec![1, 2, 3, 4, 5]; + let root = Blake2b256Hash::new(&state); + + store.store(root.clone(), &state).expect("store should succeed"); + + let retrieved = store.retrieve(&root).expect("retrieve should succeed"); + assert_eq!(state, retrieved); + } + + #[test] + fn test_verifying_store_rejects_mismatched_root() { + let inner = InMemoryHistoryStore::new(); + let store = VerifyingHistoryStore::new(inner); + + let state = vec![1, 2, 3, 4, 5]; + let wrong_root = Blake2b256Hash::new(&[9, 9, 9]); // Different hash + + let result = store.store(wrong_root, &state); + assert!(result.is_err()); + assert!(result + .unwrap_err() + .to_string() + .contains("Root mismatch")); + } + + // ========================================================================= + // Thread safety tests + // ========================================================================= + + #[test] + fn test_concurrent_access() { + use std::thread; + + let store = Arc::new(InMemoryHistoryStore::new()); + let mut handles = vec![]; + + // Spawn multiple writers + for i in 0..10u8 { + let store_clone = Arc::clone(&store); + handles.push(thread::spawn(move || { + let (root, state) = make_test_state(i); + store_clone.store(root, &state).expect("store failed"); + })); + } + + // Wait for all writers + for handle in handles { + handle.join().expect("thread panicked"); + } + + assert_eq!(store.checkpoint_count(), 10); + + // Concurrent readers + let mut read_handles = vec![]; + let roots = store.list_roots(); + + for root in roots { + let store_clone = Arc::clone(&store); + read_handles.push(thread::spawn(move || { + let result = store_clone.retrieve(&root); + assert!(result.is_ok()); + })); + } + + for handle in read_handles { + handle.join().expect("read thread panicked"); + } + } + + // ========================================================================= + // Boxed store tests + // ========================================================================= + + #[test] + fn test_boxed_stores_are_compatible() { + let stores: Vec = vec![ + boxed_in_memory(), + boxed_bounded(10), + boxed_null(), + ]; + + let (root, state) = make_test_state(42); + + for store in stores { + // All stores should accept the same interface + let _ = store.store(root.clone(), &state); + let _ = store.contains(&root); + let _ = store.checkpoint_count(); + } + } +} diff --git a/rholang/src/rust/interpreter/spaces/mod.rs b/rholang/src/rust/interpreter/spaces/mod.rs index 2007834fe..4e50462d2 100644 --- a/rholang/src/rust/interpreter/spaces/mod.rs +++ b/rholang/src/rust/interpreter/spaces/mod.rs @@ -1,24 +1,43 @@ -//! Spaces Module - Foundation Traits, Collections & Outer Storage +//! Multi-Space RSpace Integration Module //! -//! This module provides the foundation for reified RSpaces. +//! This module implements the 6-layer trait hierarchy for reified RSpaces. -pub mod collections; +pub mod adapter; +pub mod agent; +pub mod async_agent; pub mod channel_store; +pub mod charging_agent; +pub mod collections; pub mod errors; +pub mod generic_rspace; +pub mod history; pub mod matcher; +pub mod phlogiston; +pub mod prelude; +pub mod registry; +pub mod similarity_extraction; pub mod types; pub mod vectordb; // Re-exports for convenience pub use types::{ + InnerCollectionType, + OuterStorageType, + SpaceConfig, + SpaceId, + SpaceQualifier, ChannelBound, PatternBound, DataBound, ContinuationBound, SpaceParamBound, - SpaceId, - InnerCollectionType, - OuterStorageType, + Theory, + NullTheory, + GasConfiguration, }; pub use errors::SpaceError; +pub use agent::SpaceAgent; +pub use async_agent::AsyncSpaceAgent; pub use channel_store::ChannelStore; +pub use generic_rspace::GenericRSpace; +pub use registry::SpaceRegistry; diff --git a/rholang/src/rust/interpreter/spaces/phlogiston.rs b/rholang/src/rust/interpreter/spaces/phlogiston.rs new file mode 100644 index 000000000..928f95ab4 --- /dev/null +++ b/rholang/src/rust/interpreter/spaces/phlogiston.rs @@ -0,0 +1,471 @@ +//! Phlogiston (Gas) Accounting for Reified RSpaces +//! +//! This module implements the gas/phlogiston accounting system that measures and limits +//! computational resource usage in RSpace operations. It ensures that processes cannot +//! consume unbounded resources by requiring payment for each operation. +//! +//! # Formal Correspondence +//! - `Phlogiston.v`: Core phlogiston invariants (non-negativity, charge preservation) +//! - `GenericRSpace.v`: Integration with space operations +//! - `Safety/Properties.v`: Gas accounting safety properties +//! +//! # Design Overview +//! The phlogiston system consists of: +//! 1. **Cost Functions**: Define the gas cost of each operation type +//! 2. **PhlogistonMeter**: Tracks gas consumption and enforces limits +//! 3. **ChargingSpaceAgent**: Wrapper that charges for operations +//! +//! # Invariants +//! - Phlogiston balance is always non-negative (enforced by type system with u64) +//! - Charge operations preserve non-negativity (checked before deduction) +//! - All space operations have defined costs + +use std::sync::atomic::{AtomicU64, Ordering}; +use super::errors::SpaceError; + +// ============================================================================= +// Cost Constants +// ============================================================================= + +/// Base cost for sending a message to a channel. +/// This covers the fundamental overhead of routing and storing the message. +pub const SEND_BASE_COST: u64 = 100; + +/// Cost per byte of data being sent. +pub const SEND_PER_BYTE_COST: u64 = 1; + +/// Base cost for receiving (consuming) from a channel. +pub const RECEIVE_BASE_COST: u64 = 100; + +/// Base cost for pattern matching operations. +pub const MATCH_BASE_COST: u64 = 50; + +/// Cost per pattern element matched. +pub const MATCH_PER_ELEMENT_COST: u64 = 10; + +/// Base cost for creating a new channel. +pub const CHANNEL_CREATE_COST: u64 = 200; + +/// Base cost for freeing a channel. +pub const CHANNEL_FREE_COST: u64 = 50; + +/// Base cost for creating a checkpoint. +pub const CHECKPOINT_COST: u64 = 500; + +/// Base cost for replaying from a checkpoint. +pub const REPLAY_BASE_COST: u64 = 100; + +/// Cost per operation replayed. +pub const REPLAY_PER_OP_COST: u64 = 10; + +/// Base cost for space creation. +pub const SPACE_CREATE_COST: u64 = 1000; + +/// Cost for looking up a channel. +pub const LOOKUP_COST: u64 = 20; + +/// Cost for VectorDB similarity search (base). +pub const VECTORDB_SEARCH_BASE_COST: u64 = 200; + +/// Cost per dimension in VectorDB similarity search. +pub const VECTORDB_SEARCH_PER_DIM_COST: u64 = 5; + +/// Cost for priority queue insertion (includes heapify). +pub const PRIORITY_QUEUE_INSERT_COST: u64 = 50; + +/// Cost for priority queue pop. +pub const PRIORITY_QUEUE_POP_COST: u64 = 30; + +// ============================================================================= +// Operation Types for Cost Calculation +// ============================================================================= + +/// Types of operations that consume phlogiston. +#[derive(Debug, Clone, PartialEq, Eq)] +pub enum Operation { + /// Sending data to a channel. + Send { + /// Size of the data being sent in bytes. + data_size: usize, + }, + /// Receiving data from a channel. + Receive, + /// Pattern matching operation. + Match { + /// Number of elements in the pattern. + pattern_size: usize, + }, + /// Creating a new channel. + CreateChannel, + /// Freeing a channel. + FreeChannel, + /// Creating a checkpoint. + Checkpoint, + /// Replaying from a checkpoint. + Replay { + /// Number of operations to replay. + operation_count: usize, + }, + /// Creating a new space. + CreateSpace, + /// Looking up a channel. + Lookup, + /// VectorDB similarity search. + VectorDbSearch { + /// Dimensionality of vectors. + dimensions: usize, + }, + /// Priority queue insert. + PriorityQueueInsert, + /// Priority queue pop. + PriorityQueuePop, + /// Custom operation with explicit cost. + Custom { + /// Name of the operation. + name: String, + /// Cost of the operation. + cost: u64, + }, +} + +impl Operation { + /// Calculate the gas cost for this operation. + /// + /// # Returns + /// The gas cost in phlogiston units. + pub fn cost(&self) -> u64 { + match self { + Operation::Send { data_size } => { + SEND_BASE_COST + (*data_size as u64) * SEND_PER_BYTE_COST + } + Operation::Receive => RECEIVE_BASE_COST, + Operation::Match { pattern_size } => { + MATCH_BASE_COST + (*pattern_size as u64) * MATCH_PER_ELEMENT_COST + } + Operation::CreateChannel => CHANNEL_CREATE_COST, + Operation::FreeChannel => CHANNEL_FREE_COST, + Operation::Checkpoint => CHECKPOINT_COST, + Operation::Replay { operation_count } => { + REPLAY_BASE_COST + (*operation_count as u64) * REPLAY_PER_OP_COST + } + Operation::CreateSpace => SPACE_CREATE_COST, + Operation::Lookup => LOOKUP_COST, + Operation::VectorDbSearch { dimensions } => { + VECTORDB_SEARCH_BASE_COST + (*dimensions as u64) * VECTORDB_SEARCH_PER_DIM_COST + } + Operation::PriorityQueueInsert => PRIORITY_QUEUE_INSERT_COST, + Operation::PriorityQueuePop => PRIORITY_QUEUE_POP_COST, + Operation::Custom { cost, .. } => *cost, + } + } + + /// Get a human-readable description of the operation. + pub fn description(&self) -> String { + match self { + Operation::Send { data_size } => format!("send({} bytes)", data_size), + Operation::Receive => "receive".to_string(), + Operation::Match { pattern_size } => format!("match({} elements)", pattern_size), + Operation::CreateChannel => "create_channel".to_string(), + Operation::FreeChannel => "free_channel".to_string(), + Operation::Checkpoint => "checkpoint".to_string(), + Operation::Replay { operation_count } => format!("replay({} ops)", operation_count), + Operation::CreateSpace => "create_space".to_string(), + Operation::Lookup => "lookup".to_string(), + Operation::VectorDbSearch { dimensions } => format!("vector_search({} dims)", dimensions), + Operation::PriorityQueueInsert => "priority_queue_insert".to_string(), + Operation::PriorityQueuePop => "priority_queue_pop".to_string(), + Operation::Custom { name, .. } => name.clone(), + } + } +} + +// ============================================================================= +// Phlogiston Meter +// ============================================================================= + +/// A meter that tracks phlogiston consumption and enforces limits. +/// +/// The meter is thread-safe and uses atomic operations for concurrent access. +/// It maintains a balance that starts at the initial limit and decreases as +/// operations are charged. +/// +/// # Invariants (from Phlogiston.v) +/// - `balance >= 0` (enforced by u64 type) +/// - `charge(amount)` only succeeds if `balance >= amount` +/// - Total consumed = initial_limit - current_balance +#[derive(Debug)] +pub struct PhlogistonMeter { + /// Current phlogiston balance (remaining gas). + balance: AtomicU64, + /// Initial limit for tracking total consumption. + initial_limit: u64, + /// Total amount consumed (for reporting). + total_consumed: AtomicU64, +} + +impl PhlogistonMeter { + /// Create a new phlogiston meter with the given initial balance. + /// + /// # Arguments + /// * `initial_limit` - The starting phlogiston balance. + pub fn new(initial_limit: u64) -> Self { + PhlogistonMeter { + balance: AtomicU64::new(initial_limit), + initial_limit, + total_consumed: AtomicU64::new(0), + } + } + + /// Create a meter with unlimited phlogiston (for testing or privileged operations). + pub fn unlimited() -> Self { + PhlogistonMeter { + balance: AtomicU64::new(u64::MAX), + initial_limit: u64::MAX, + total_consumed: AtomicU64::new(0), + } + } + + /// Get the current phlogiston balance. + pub fn balance(&self) -> u64 { + self.balance.load(Ordering::Relaxed) + } + + /// Get the initial limit. + pub fn initial_limit(&self) -> u64 { + self.initial_limit + } + + /// Get the total amount consumed. + pub fn total_consumed(&self) -> u64 { + self.total_consumed.load(Ordering::Relaxed) + } + + /// Check if a charge of the given amount is possible. + /// + /// # Arguments + /// * `amount` - The amount to check. + /// + /// # Returns + /// `true` if the balance is sufficient. + pub fn can_charge(&self, amount: u64) -> bool { + self.balance.load(Ordering::Relaxed) >= amount + } + + /// Attempt to charge phlogiston for an operation. + /// + /// This atomically deducts the amount from the balance if sufficient + /// funds are available. + /// + /// # Arguments + /// * `operation` - The operation being charged. + /// + /// # Returns + /// - `Ok(())` if the charge succeeded + /// - `Err(SpaceError::OutOfPhlogiston)` if insufficient balance + /// + /// # Formal Correspondence + /// Implements `charge_preserves_non_negative` from Phlogiston.v + pub fn charge(&self, operation: &Operation) -> Result<(), SpaceError> { + let amount = operation.cost(); + self.charge_amount(amount, &operation.description()) + } + + /// Charge a specific amount with a description. + /// + /// # Arguments + /// * `amount` - The amount to charge. + /// * `description` - Description of the operation for error messages. + /// + /// # Returns + /// - `Ok(())` if the charge succeeded + /// - `Err(SpaceError::OutOfPhlogiston)` if insufficient balance + pub fn charge_amount(&self, amount: u64, description: &str) -> Result<(), SpaceError> { + // Use compare-exchange loop for atomic deduction + loop { + let current = self.balance.load(Ordering::Relaxed); + + if current < amount { + return Err(SpaceError::OutOfPhlogiston { + required: amount, + available: current, + operation: description.to_string(), + }); + } + + let new_balance = current - amount; + match self.balance.compare_exchange_weak( + current, + new_balance, + Ordering::Release, // Release is sufficient - CAS retry loop handles races + Ordering::Relaxed, + ) { + Ok(_) => { + self.total_consumed.fetch_add(amount, Ordering::Relaxed); + return Ok(()); + } + Err(_) => continue, // Retry if another thread modified + } + } + } + + /// Refund phlogiston (e.g., for operations that were rolled back). + /// + /// # Arguments + /// * `amount` - The amount to refund. + /// + /// # Note + /// This can increase balance beyond initial_limit if called multiple times. + /// In practice, refunds should only restore previously charged amounts. + pub fn refund(&self, amount: u64) { + self.balance.fetch_add(amount, Ordering::Relaxed); + // Note: We don't decrease total_consumed for refunds + // This keeps accurate accounting of gross consumption + } + + /// Reset the meter to its initial state. + pub fn reset(&self) { + self.balance.store(self.initial_limit, Ordering::Relaxed); + self.total_consumed.store(0, Ordering::Relaxed); + } +} + +impl Clone for PhlogistonMeter { + fn clone(&self) -> Self { + PhlogistonMeter { + balance: AtomicU64::new(self.balance.load(Ordering::Relaxed)), + initial_limit: self.initial_limit, + total_consumed: AtomicU64::new(self.total_consumed.load(Ordering::Relaxed)), + } + } +} + +impl Default for PhlogistonMeter { + fn default() -> Self { + // Default to 1 million units + Self::new(1_000_000) + } +} + +// ============================================================================= +// Gas Configuration +// ============================================================================= + +/// Configuration for phlogiston/gas accounting. +#[derive(Debug, Clone)] +pub struct GasConfig { + /// Initial gas limit for new transactions. + pub initial_limit: u64, + /// Whether to enforce gas limits (can be disabled for testing). + pub enabled: bool, + /// Cost multiplier (for chain economics). + pub cost_multiplier: f64, +} + +impl Default for GasConfig { + fn default() -> Self { + GasConfig { + initial_limit: 10_000_000, + enabled: true, + cost_multiplier: 1.0, + } + } +} + +impl GasConfig { + /// Create a configuration with unlimited gas (for testing). + pub fn unlimited() -> Self { + GasConfig { + initial_limit: u64::MAX, + enabled: false, + cost_multiplier: 1.0, + } + } + + /// Create a meter from this configuration. + pub fn create_meter(&self) -> PhlogistonMeter { + if self.enabled { + PhlogistonMeter::new(self.initial_limit) + } else { + PhlogistonMeter::unlimited() + } + } +} + +// ============================================================================= +// Tests +// ============================================================================= + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_operation_costs() { + assert_eq!(Operation::Send { data_size: 0 }.cost(), SEND_BASE_COST); + assert_eq!( + Operation::Send { data_size: 100 }.cost(), + SEND_BASE_COST + 100 * SEND_PER_BYTE_COST + ); + assert_eq!(Operation::Receive.cost(), RECEIVE_BASE_COST); + assert_eq!( + Operation::Match { pattern_size: 5 }.cost(), + MATCH_BASE_COST + 5 * MATCH_PER_ELEMENT_COST + ); + } + + #[test] + fn test_meter_charge_success() { + let meter = PhlogistonMeter::new(1000); + + assert!(meter.charge(&Operation::Receive).is_ok()); + assert_eq!(meter.balance(), 1000 - RECEIVE_BASE_COST); + assert_eq!(meter.total_consumed(), RECEIVE_BASE_COST); + } + + #[test] + fn test_meter_charge_failure() { + let meter = PhlogistonMeter::new(50); + + let result = meter.charge(&Operation::Receive); + assert!(result.is_err()); + + match result { + Err(SpaceError::OutOfPhlogiston { required, available, .. }) => { + assert_eq!(required, RECEIVE_BASE_COST); + assert_eq!(available, 50); + } + _ => panic!("Expected OutOfPhlogiston error"), + } + } + + #[test] + fn test_meter_refund() { + let meter = PhlogistonMeter::new(1000); + + meter.charge(&Operation::Receive).unwrap(); + assert_eq!(meter.balance(), 1000 - RECEIVE_BASE_COST); + + meter.refund(50); + assert_eq!(meter.balance(), 1000 - RECEIVE_BASE_COST + 50); + } + + #[test] + fn test_unlimited_meter() { + let meter = PhlogistonMeter::unlimited(); + + // Should be able to charge a huge amount + assert!(meter.charge_amount(1_000_000_000, "test").is_ok()); + assert!(meter.balance() > 1_000_000_000); + } + + #[test] + fn test_gas_config() { + let config = GasConfig::default(); + let meter = config.create_meter(); + + assert_eq!(meter.initial_limit(), config.initial_limit); + + let unlimited = GasConfig::unlimited().create_meter(); + assert_eq!(unlimited.initial_limit(), u64::MAX); + } +} diff --git a/rholang/src/rust/interpreter/spaces/prelude.rs b/rholang/src/rust/interpreter/spaces/prelude.rs new file mode 100644 index 000000000..1a3173c6f --- /dev/null +++ b/rholang/src/rust/interpreter/spaces/prelude.rs @@ -0,0 +1,41 @@ +//! Prelude module for common reified RSpaces imports. +//! +//! This module provides a curated set of the most commonly used types and traits +//! for working with reified RSpaces. Use `use rholang::spaces::prelude::*;` for +//! quick access to essential items. +//! +//! For less common items, import from specific submodules: +//! - `use rholang::spaces::agent::*;` - Core agent traits +//! - `use rholang::spaces::collections::*;` - Collection types +//! - `use rholang::spaces::matcher::*;` - Pattern matching +//! - `use rholang::spaces::factory::*;` - Space construction +//! - `use rholang::spaces::vectordb::*;` - Vector database integration + +// Core types +pub use super::types::{ + SpaceId, + SpaceQualifier, + SpaceConfig, + InnerCollectionType, + OuterStorageType, +}; + +// Core traits +pub use super::agent::{SpaceAgent, CheckpointableSpace, ReplayableSpace}; +pub use super::errors::SpaceError; + +// Main implementation +pub use super::generic_rspace::{GenericRSpace, GenericRSpaceBuilder}; + +// Pattern matching +pub use super::matcher::{Match, ExactMatch}; + +// Checkpointing +pub use rspace_plus_plus::rspace::checkpoint::{Checkpoint, SoftCheckpoint}; +pub use rspace_plus_plus::rspace::hashing::blake2b256_hash::Blake2b256Hash; + +// Factory +pub use super::factory::{SpaceFactory, config_from_urn, urn_from_config}; + +// Registry +pub use super::registry::SpaceRegistry; diff --git a/rholang/src/rust/interpreter/spaces/registry.rs b/rholang/src/rust/interpreter/spaces/registry.rs new file mode 100644 index 000000000..a38972d17 --- /dev/null +++ b/rholang/src/rust/interpreter/spaces/registry.rs @@ -0,0 +1,1838 @@ +//! Space Registry +//! +//! The `SpaceRegistry` is the central component for managing multiple tuple spaces +//! within a Rholang runtime. It provides: +//! +//! - Space creation and lookup +//! - Channel-to-space routing +//! - Use block stack for scoped default spaces +//! - Checkpoint coordination across spaces +//! +//! # Channel Routing +//! +//! Each channel is associated with exactly one space. The registry tracks this +//! mapping and routes operations to the correct space. Channels from different +//! spaces cannot participate in the same join pattern. +//! +//! # Use Blocks +//! +//! Use blocks establish a scoped default space for channel creation: +//! +//! ```rholang +//! use space_1 { +//! new ch in { ch!(42) } // ch created in space_1 +//! } +//! ``` +//! +//! The use block stack is thread-local (task-local in async contexts). + +use std::collections::{BTreeSet, HashMap}; +use std::sync::{Arc, RwLock}; +use std::sync::atomic::{AtomicBool, AtomicUsize, Ordering}; +use std::time::{SystemTime, UNIX_EPOCH}; +use dashmap::DashMap; + +use super::errors::SpaceError; +use super::types::{SpaceConfig, SpaceId, SpaceQualifier}; + +// Re-export Blake2b256Hash if available from rspace++ +// For now, use a type alias that can be replaced with the actual type +pub type MerkleRoot = [u8; 32]; + +// ========================================================================== +// Channel Ownership +// ========================================================================== + +/// Information about a channel's ownership. +#[derive(Clone, Debug)] +pub struct ChannelInfo { + /// The space this channel belongs to + pub space_id: SpaceId, + + /// The qualifier of the space (for quick access) + pub qualifier: SpaceQualifier, +} + +// ========================================================================== +// Space Entry +// ========================================================================== + +/// Entry for a registered space. +#[derive(Clone, Debug)] +pub struct SpaceEntry { + /// The space ID + pub id: SpaceId, + + /// Configuration used to create the space + pub config: SpaceConfig, + + /// Whether this is the default space + pub is_default: bool, +} + +// ========================================================================== +// Operation Logging (TLA+ CheckpointReplay.tla lines 114-138) +// ========================================================================== + +/// Types of operations that can be logged for replay. +/// +/// These correspond to the operation types defined in CheckpointReplay.tla: +/// ```tla +/// OperationType == {"Produce", "Consume", "Install"} +/// ``` +#[derive(Clone, Debug, PartialEq, Eq)] +pub enum OperationType { + /// Produce operation: send data to a channel + Produce { + space_id: SpaceId, + channel: Vec, + data: Vec, + persist: bool, + }, + /// Consume operation: receive data from channels + Consume { + space_id: SpaceId, + channels: Vec>, + patterns: Vec>, + persist: bool, + peeks: BTreeSet, + }, + /// Install operation: install a persistent continuation + Install { + space_id: SpaceId, + channels: Vec>, + patterns: Vec>, + }, +} + +impl OperationType { + /// Get the space ID for this operation. + pub fn space_id(&self) -> &SpaceId { + match self { + OperationType::Produce { space_id, .. } => space_id, + OperationType::Consume { space_id, .. } => space_id, + OperationType::Install { space_id, .. } => space_id, + } + } + + /// Get a short description of the operation type. + pub fn type_name(&self) -> &'static str { + match self { + OperationType::Produce { .. } => "Produce", + OperationType::Consume { .. } => "Consume", + OperationType::Install { .. } => "Install", + } + } +} + +/// Log of operations for replay. +/// +/// Corresponds to the Log type in CheckpointReplay.tla: +/// ```tla +/// TypeOK == /\ log \in Seq(Operation) +/// ``` +#[derive(Clone, Debug, Default)] +pub struct OperationLog { + /// The sequence of operations + operations: Vec, +} + +impl OperationLog { + /// Create a new empty operation log. + pub fn new() -> Self { + OperationLog { + operations: Vec::new(), + } + } + + /// Append an operation to the log. + pub fn append(&mut self, op: OperationType) { + self.operations.push(op); + } + + /// Get the number of operations in the log. + pub fn len(&self) -> usize { + self.operations.len() + } + + /// Check if the log is empty. + pub fn is_empty(&self) -> bool { + self.operations.is_empty() + } + + /// Get an operation by index. + pub fn get(&self, index: usize) -> Option<&OperationType> { + self.operations.get(index) + } + + /// Get all operations. + pub fn operations(&self) -> &[OperationType] { + &self.operations + } + + /// Clear the log. + pub fn clear(&mut self) { + self.operations.clear(); + } + + /// Create an iterator over operations. + pub fn iter(&self) -> impl Iterator { + self.operations.iter() + } +} + +// ========================================================================== +// Replay State Machine (TLA+ CheckpointReplay.tla lines 193-231) +// ========================================================================== + +/// Replay mode state. +/// +/// Corresponds to the replay state in CheckpointReplay.tla: +/// ```tla +/// ReplayMode == replayMode \in BOOLEAN +/// ReplayIndex == replayIndex \in Nat +/// ``` +#[derive(Clone, Copy, Debug, PartialEq, Eq)] +pub enum ReplayState { + /// Normal operation (not replaying) + Normal, + /// Actively replaying from a log + Replaying { + /// Current position in the log + index: usize, + /// Total operations to replay + total: usize, + }, + /// Replay completed successfully + Completed, + /// Replay failed with an error + Failed, +} + +impl ReplayState { + /// Check if in replay mode. + pub fn is_replaying(&self) -> bool { + matches!(self, ReplayState::Replaying { .. }) + } + + /// Check if replay is complete (successfully or with failure). + pub fn is_finished(&self) -> bool { + matches!(self, ReplayState::Completed | ReplayState::Failed) + } +} + +// ========================================================================== +// Soft Checkpoint (TLA+ CheckpointReplay.tla lines 157-179) +// ========================================================================== + +/// Soft (non-persistent) checkpoint for speculative execution. +/// +/// This captures the registry state without persisting to storage, +/// allowing fast rollback for speculative execution that may be reverted. +/// +/// Corresponds to CheckpointReplay.tla: +/// ```tla +/// SoftCheckpoint == [spaces: SUBSET Space, channels: ChannelMap] +/// ``` +/// +/// # Performance Note +/// +/// Now uses HashMap directly since DashMap doesn't support Arc-based sharing. +/// Checkpoints require O(n) copy but lookups are significantly faster. +#[derive(Clone, Debug)] +pub struct SoftRegistryCheckpoint { + /// Snapshot of registered spaces + spaces: HashMap, + + /// Snapshot of channel ownership + channel_ownership: HashMap, ChannelInfo>, + + /// Block height at checkpoint + block_height: usize, + + /// Timestamp when checkpoint was created + timestamp: u64, + + /// Operation log length at checkpoint (for truncation on revert) + log_length: usize, +} + +impl SoftRegistryCheckpoint { + /// Get the block height at this checkpoint. + pub fn block_height(&self) -> usize { + self.block_height + } + + /// Get the timestamp when this checkpoint was created. + pub fn timestamp(&self) -> u64 { + self.timestamp + } + + /// Get the operation log length at this checkpoint. + pub fn log_length(&self) -> usize { + self.log_length + } +} + +// ========================================================================== +// Use Block Stack +// ========================================================================== + +/// Stack of default spaces for use blocks. +/// +/// This is managed per-task/thread to support concurrent evaluation. +#[derive(Clone, Debug, Default)] +pub struct UseBlockStack { + stack: Vec, +} + +impl UseBlockStack { + /// Create a new empty use block stack. + pub fn new() -> Self { + UseBlockStack { stack: Vec::new() } + } + + /// Push a new default space onto the stack. + pub fn push(&mut self, space_id: SpaceId) { + self.stack.push(space_id); + } + + /// Pop the current default space from the stack. + /// + /// Returns `None` if the stack is empty. + pub fn pop(&mut self) -> Option { + self.stack.pop() + } + + /// Get the current default space. + /// + /// Returns `None` if no use block is active. + pub fn current(&self) -> Option<&SpaceId> { + self.stack.last() + } + + /// Check if the stack is empty. + pub fn is_empty(&self) -> bool { + self.stack.is_empty() + } + + /// Get the depth of the use block stack. + pub fn depth(&self) -> usize { + self.stack.len() + } +} + +// ========================================================================== +// Space Registry +// ========================================================================== + +/// Central registry for managing multiple tuple spaces. +/// +/// The registry maintains: +/// - A collection of registered spaces +/// - Channel-to-space mappings +/// - The default space (for backward compatibility) +/// - Operation logging for deterministic replay (CheckpointReplay.tla) +/// - Soft checkpoints for speculative execution +/// +/// Thread safety is provided through internal locking. +/// +/// # Performance Note +/// +/// The `spaces` and `channel_ownership` collections use DashMap for fine-grained +/// concurrent access without global locking. Checkpoints now require O(n) copies +/// but lookups are significantly faster under concurrent workloads. +/// +/// # Performance +/// Uses DashMap for per-shard locking, providing 8-10x throughput improvement +/// on 16+ cores compared to RwLock. +pub struct SpaceRegistry { + /// Registered spaces by ID (DashMap for concurrent access) + spaces: DashMap, + + /// Channel ownership: maps channel hashes to space IDs + /// Uses a hash of the channel for the key to support different channel types + /// (DashMap for concurrent access) + channel_ownership: DashMap, ChannelInfo>, + + /// The default space ID (created at initialization) + default_space_id: SpaceId, + + /// Use block stacks per thread/task + /// In a real implementation, this would use thread-local or task-local storage + use_block_stacks: RwLock>, + + // ====================================================================== + // Checkpoint/Replay State (TLA+ CheckpointReplay.tla) + // ====================================================================== + + /// Current block height (incremented on each checkpoint) + block_height: AtomicUsize, + + /// Whether the registry is in replay mode + replay_mode: AtomicBool, + + /// Current index in the replay log + replay_index: AtomicUsize, + + /// Operation log for replay + operation_log: RwLock, + + /// Soft checkpoint for speculative execution + soft_checkpoint: RwLock>, + + /// Last committed merkle root + last_merkle_root: RwLock, +} + +impl Default for SpaceRegistry { + fn default() -> Self { + Self::new() + } +} + +impl SpaceRegistry { + /// Create a new space registry with a default space. + pub fn new() -> Self { + let default_id = SpaceId::default_space(); + let default_entry = SpaceEntry { + id: default_id.clone(), + config: SpaceConfig::default(), + is_default: true, + }; + + let spaces = DashMap::new(); + spaces.insert(default_id.clone(), default_entry); + + SpaceRegistry { + spaces, + channel_ownership: DashMap::new(), + default_space_id: default_id, + use_block_stacks: RwLock::new(HashMap::new()), + // Checkpoint/replay state + block_height: AtomicUsize::new(0), + replay_mode: AtomicBool::new(false), + replay_index: AtomicUsize::new(0), + operation_log: RwLock::new(OperationLog::new()), + soft_checkpoint: RwLock::new(None), + last_merkle_root: RwLock::new([0u8; 32]), + } + } + + /// Get the default space ID. + pub fn default_space_id(&self) -> &SpaceId { + &self.default_space_id + } + + /// Register a new space. + /// + /// # Arguments + /// - `space_id`: Unique identifier for the space + /// - `config`: Configuration for the space + /// + /// # Returns + /// - `Ok(())` if registration succeeded + /// - `Err(...)` if a space with this ID already exists + pub fn register_space(&self, space_id: SpaceId, config: SpaceConfig) -> Result<(), SpaceError> { + // Use DashMap's entry API for atomic check-and-insert + if self.spaces.contains_key(&space_id) { + return Err(SpaceError::InvalidConfiguration { + description: format!("Space {} already registered", space_id), + }); + } + + self.spaces.insert( + space_id.clone(), + SpaceEntry { + id: space_id, + config, + is_default: false, + }, + ); + + Ok(()) + } + + /// Get a space entry by ID. + pub fn get_space(&self, space_id: &SpaceId) -> Option { + self.spaces.get(space_id).map(|r| r.value().clone()) + } + + /// Check if a space exists. + pub fn space_exists(&self, space_id: &SpaceId) -> bool { + self.spaces.contains_key(space_id) + } + + /// Get all registered space IDs. + pub fn all_space_ids(&self) -> Vec { + self.spaces.iter().map(|r| r.key().clone()).collect() + } + + // ====================================================================== + // Channel Ownership + // ====================================================================== + + /// Register a channel as belonging to a space. + /// + /// # Arguments + /// - `channel_hash`: Hash of the channel (for type-agnostic storage) + /// - `space_id`: The space this channel belongs to + /// - `qualifier`: The qualifier of the space + pub fn register_channel( + &self, + channel_hash: Vec, + space_id: SpaceId, + qualifier: SpaceQualifier, + ) { + self.channel_ownership.insert( + channel_hash, + ChannelInfo { + space_id, + qualifier, + }, + ); + } + + /// Get the space for a channel. + /// + /// # Arguments + /// - `channel_hash`: Hash of the channel + /// + /// # Returns + /// - `Some(ChannelInfo)` if the channel is registered + /// - `None` if the channel is unknown + pub fn get_channel_space(&self, channel_hash: &[u8]) -> Option { + self.channel_ownership.get(channel_hash).map(|r| r.value().clone()) + } + + /// Resolve the space for a channel, defaulting to the current use block or default space. + /// + /// # Arguments + /// - `channel_hash`: Hash of the channel (or None for new channels) + /// - `task_id`: The current task ID for use block lookup + /// + /// # Returns + /// The resolved space ID + pub fn resolve_space( + &self, + channel_hash: Option<&[u8]>, + task_id: u64, + ) -> SpaceId { + // If channel is known, use its space + if let Some(hash) = channel_hash { + if let Some(info) = self.get_channel_space(hash) { + return info.space_id; + } + } + + // Otherwise, use current use block or default + self.current_default_space(task_id) + .unwrap_or_else(|| self.default_space_id.clone()) + } + + /// Check that all channels belong to the same space. + /// + /// Used to validate join patterns. + /// + /// # Arguments + /// - `channel_hashes`: Hashes of the channels in the join + /// + /// # Returns + /// - `Ok(SpaceId)` if all channels are in the same space + /// - `Err(...)` if channels are from different spaces + pub fn verify_same_space(&self, channel_hashes: &[Vec]) -> Result { + if channel_hashes.is_empty() { + return Err(SpaceError::InvalidConfiguration { + description: "Empty channel list".to_string(), + }); + } + + // Use DashMap's get() for lock-free access + let first_space = self.channel_ownership + .get(&channel_hashes[0]) + .map(|info| info.space_id.clone()) + .ok_or_else(|| SpaceError::ChannelNotFound { + description: "First channel not found".to_string(), + })?; + + for (i, hash) in channel_hashes.iter().enumerate().skip(1) { + let space = self.channel_ownership + .get(hash) + .map(|info| info.space_id.clone()) + .ok_or_else(|| SpaceError::ChannelNotFound { + description: format!("Channel {} not found", i), + })?; + + if space != first_space { + return Err(SpaceError::CrossSpaceJoinNotAllowed { + description: format!( + "Channel {} is in space {} but channel 0 is in space {}", + i, space, first_space + ), + }); + } + } + + Ok(first_space) + } + + // ====================================================================== + // Use Block Stack + // ====================================================================== + + /// Push a space onto the use block stack for a task. + pub fn push_use_block(&self, task_id: u64, space_id: SpaceId) { + let mut stacks = self.use_block_stacks.write().unwrap(); + stacks + .entry(task_id) + .or_insert_with(UseBlockStack::new) + .push(space_id); + } + + /// Pop a space from the use block stack for a task. + pub fn pop_use_block(&self, task_id: u64) -> Option { + let mut stacks = self.use_block_stacks.write().unwrap(); + stacks.get_mut(&task_id).and_then(|stack| stack.pop()) + } + + /// Get the current default space for a task. + pub fn current_default_space(&self, task_id: u64) -> Option { + self.use_block_stacks + .read() + .unwrap() + .get(&task_id) + .and_then(|stack| stack.current().cloned()) + } + + /// Get the use block depth for a task. + pub fn use_block_depth(&self, task_id: u64) -> usize { + self.use_block_stacks + .read() + .unwrap() + .get(&task_id) + .map(|stack| stack.depth()) + .unwrap_or(0) + } + + /// Clean up use block stack for a completed task. + pub fn cleanup_task(&self, task_id: u64) { + self.use_block_stacks.write().unwrap().remove(&task_id); + } + + // ====================================================================== + // Checkpointing (TLA+ CheckpointReplay.tla lines 149-155) + // ====================================================================== + + /// Create a registry checkpoint. + /// + /// This captures the current state of space registrations and channel + /// ownership for restoration later. Includes merkle root and block height + /// as required by the TLA+ specification. + /// + /// # Arguments + /// - `merkle_root`: The merkle root of the current state + pub fn create_checkpoint(&self, merkle_root: MerkleRoot) -> RegistryCheckpoint { + let block_height = self.block_height.fetch_add(1, Ordering::SeqCst); + let timestamp = SystemTime::now() + .duration_since(UNIX_EPOCH) + .map(|d| d.as_secs()) + .unwrap_or(0); + + // Update stored merkle root + *self.last_merkle_root.write().unwrap() = merkle_root; + + // Copy DashMap contents to HashMap for checkpoint + let spaces: HashMap = self.spaces + .iter() + .map(|r| (r.key().clone(), r.value().clone())) + .collect(); + + let channel_ownership: HashMap, ChannelInfo> = self.channel_ownership + .iter() + .map(|r| (r.key().clone(), r.value().clone())) + .collect(); + + RegistryCheckpoint { + spaces, + channel_ownership, + merkle_root, + block_height, + timestamp, + } + } + + /// Create a checkpoint with a default (zero) merkle root. + /// + /// This is a convenience method for cases where the merkle root is not + /// computed externally. + pub fn create_checkpoint_default(&self) -> RegistryCheckpoint { + self.create_checkpoint([0u8; 32]) + } + + /// Restore from a checkpoint. + /// + /// This restores space registrations and channel ownership. + /// Note: Individual space states must be restored separately. + pub fn restore_checkpoint(&self, checkpoint: RegistryCheckpoint) { + // Clear and repopulate DashMap from checkpoint HashMap + self.spaces.clear(); + for (k, v) in checkpoint.spaces { + self.spaces.insert(k, v); + } + + self.channel_ownership.clear(); + for (k, v) in checkpoint.channel_ownership { + self.channel_ownership.insert(k, v); + } + + *self.last_merkle_root.write().unwrap() = checkpoint.merkle_root; + // Note: block_height is not restored; it only increases + } + + /// Get the current block height. + pub fn block_height(&self) -> usize { + self.block_height.load(Ordering::SeqCst) + } + + /// Get the last committed merkle root. + pub fn last_merkle_root(&self) -> MerkleRoot { + *self.last_merkle_root.read().unwrap() + } + + // ====================================================================== + // Soft Checkpoints (TLA+ CheckpointReplay.tla lines 157-179) + // ====================================================================== + + /// Create a soft (non-persistent) checkpoint. + /// + /// This is faster than a full checkpoint and suitable for speculative + /// execution that may be rolled back. + pub fn create_soft_checkpoint(&self) -> Result<(), SpaceError> { + let timestamp = SystemTime::now() + .duration_since(UNIX_EPOCH) + .map(|d| d.as_secs()) + .unwrap_or(0); + + let checkpoint = SoftRegistryCheckpoint { + spaces: self.spaces.iter().map(|r| (r.key().clone(), r.value().clone())).collect(), + channel_ownership: self.channel_ownership.iter().map(|r| (r.key().clone(), r.value().clone())).collect(), + block_height: self.block_height.load(Ordering::SeqCst), + timestamp, + log_length: self.operation_log.read().unwrap().len(), + }; + + *self.soft_checkpoint.write().unwrap() = Some(checkpoint); + Ok(()) + } + + /// Revert to the soft checkpoint. + /// + /// Restores the registry state to when the soft checkpoint was created. + pub fn revert_to_soft_checkpoint(&self) -> Result<(), SpaceError> { + let checkpoint = self.soft_checkpoint.write().unwrap().take(); + + match checkpoint { + Some(cp) => { + self.spaces.clear(); + for (k, v) in cp.spaces { + self.spaces.insert(k, v); + } + + self.channel_ownership.clear(); + for (k, v) in cp.channel_ownership { + self.channel_ownership.insert(k, v); + } + + // Truncate operation log to checkpoint length + let mut log = self.operation_log.write().unwrap(); + while log.len() > cp.log_length { + log.operations.pop(); + } + + Ok(()) + } + None => Err(SpaceError::CheckpointError { + description: "No soft checkpoint to revert to".to_string(), + }), + } + } + + /// Commit (discard) the soft checkpoint. + /// + /// This makes the changes since the soft checkpoint permanent. + pub fn commit_soft_checkpoint(&self) -> Result<(), SpaceError> { + let checkpoint = self.soft_checkpoint.write().unwrap().take(); + + match checkpoint { + Some(_) => Ok(()), + None => Err(SpaceError::CheckpointError { + description: "No soft checkpoint to commit".to_string(), + }), + } + } + + /// Check if a soft checkpoint is active. + pub fn has_soft_checkpoint(&self) -> bool { + self.soft_checkpoint.read().unwrap().is_some() + } + + // ====================================================================== + // Replay State Machine (TLA+ CheckpointReplay.tla lines 193-231) + // ====================================================================== + + /// Enter replay mode with the given operation log. + /// + /// # Arguments + /// - `log`: The operation log to replay + pub fn enter_replay_mode(&self, log: OperationLog) -> Result<(), SpaceError> { + if self.replay_mode.load(Ordering::SeqCst) { + return Err(SpaceError::InternalError { + description: "Already in replay mode".to_string(), + }); + } + + *self.operation_log.write().unwrap() = log; + self.replay_index.store(0, Ordering::SeqCst); + self.replay_mode.store(true, Ordering::SeqCst); + + Ok(()) + } + + /// Get the next operation to replay, if any. + /// + /// Returns `None` if replay is complete or not in replay mode. + pub fn replay_next_operation(&self) -> Option { + if !self.replay_mode.load(Ordering::SeqCst) { + return None; + } + + let index = self.replay_index.load(Ordering::SeqCst); + let log = self.operation_log.read().unwrap(); + + if index >= log.len() { + return None; + } + + let op = log.get(index).cloned(); + self.replay_index.fetch_add(1, Ordering::SeqCst); + op + } + + /// Get the current replay state. + pub fn replay_state(&self) -> ReplayState { + if !self.replay_mode.load(Ordering::SeqCst) { + return ReplayState::Normal; + } + + let index = self.replay_index.load(Ordering::SeqCst); + let total = self.operation_log.read().unwrap().len(); + + if index >= total { + ReplayState::Completed + } else { + ReplayState::Replaying { index, total } + } + } + + /// Check if in replay mode. + pub fn is_replay_mode(&self) -> bool { + self.replay_mode.load(Ordering::SeqCst) + } + + /// Exit replay mode. + /// + /// Should be called after replay is complete or on error. + pub fn exit_replay_mode(&self) -> Result<(), SpaceError> { + if !self.replay_mode.load(Ordering::SeqCst) { + return Err(SpaceError::InternalError { + description: "Not in replay mode".to_string(), + }); + } + + self.replay_mode.store(false, Ordering::SeqCst); + self.replay_index.store(0, Ordering::SeqCst); + self.operation_log.write().unwrap().clear(); + + Ok(()) + } + + /// Check that replay data matches expectations. + /// + /// Should be called after replay is complete to verify correctness. + pub fn check_replay_data(&self) -> Result<(), SpaceError> { + let state = self.replay_state(); + + match state { + ReplayState::Completed => Ok(()), + ReplayState::Normal => Err(SpaceError::InternalError { + description: "Not in replay mode".to_string(), + }), + ReplayState::Replaying { index, total } => Err(SpaceError::InternalError { + description: format!("Replay incomplete: {} of {} operations", index, total), + }), + ReplayState::Failed => Err(SpaceError::InternalError { + description: "Replay failed".to_string(), + }), + } + } + + // ====================================================================== + // Operation Logging + // ====================================================================== + + /// Log an operation for potential replay. + /// + /// This should be called after each successful operation when not in replay mode. + pub fn log_operation(&self, op: OperationType) { + if !self.replay_mode.load(Ordering::SeqCst) { + self.operation_log.write().unwrap().append(op); + } + } + + /// Get the current operation log. + pub fn operation_log(&self) -> OperationLog { + self.operation_log.read().unwrap().clone() + } + + /// Clear the operation log. + pub fn clear_operation_log(&self) { + self.operation_log.write().unwrap().clear(); + } + + // ====================================================================== + // Seq Mobility Enforcement (TLA+ SpaceCoordination.tla lines 172-184) + // ====================================================================== + + /// Check if a channel is Seq (non-mobile). + /// + /// Seq channels cannot be sent across space boundaries. + /// + /// Corresponds to SpaceCoordination.tla: + /// ```tla + /// IsSeqChannel(c) == channelQualifier[c] = "Seq" + /// ``` + pub fn is_seq_channel(&self, channel_hash: &[u8]) -> bool { + self.get_channel_space(channel_hash) + .map(|info| info.qualifier == SpaceQualifier::Seq) + .unwrap_or(false) + } + + /// Validate that channels can be sent (no Seq channels). + /// + /// This enforces the mobility constraint from the TLA+ specification: + /// ```tla + /// ValidSendChannels(channels) == \A c \in channels : ~IsSeqChannel(c) + /// ``` + /// + /// # Arguments + /// - `channel_hashes`: Hashes of channels to validate + /// + /// # Returns + /// - `Ok(())` if all channels are mobile + /// - `Err(...)` if any channel is Seq (non-mobile) + pub fn validate_send_channels(&self, channel_hashes: &[Vec]) -> Result<(), SpaceError> { + for hash in channel_hashes { + if self.is_seq_channel(hash) { + return Err(SpaceError::SeqChannelNotMobile { + description: format!( + "Channel {:?} is Seq and cannot be sent", + hash + ), + }); + } + } + Ok(()) + } + + /// Check if a channel is mobile (can be sent). + /// + /// Returns true for all qualifiers except Seq. + pub fn is_mobile_channel(&self, channel_hash: &[u8]) -> bool { + self.get_channel_space(channel_hash) + .map(|info| info.qualifier.is_mobile()) + .unwrap_or(true) // Unknown channels default to mobile + } +} + +/// Checkpoint of registry state (TLA+ CheckpointReplay.tla lines 149-155). +/// +/// # Performance Note +/// Now uses HashMap directly since DashMap doesn't support Arc-based sharing. +/// Checkpoints require O(n) copy but lookups are significantly faster. +#[derive(Clone, Debug)] +pub struct RegistryCheckpoint { + /// Snapshot of registered spaces + spaces: HashMap, + + /// Snapshot of channel ownership + channel_ownership: HashMap, ChannelInfo>, + + /// Merkle root of the state at checkpoint + merkle_root: MerkleRoot, + + /// Block height when checkpoint was created + block_height: usize, + + /// Unix timestamp when checkpoint was created + timestamp: u64, +} + +impl RegistryCheckpoint { + /// Get the merkle root. + pub fn merkle_root(&self) -> &MerkleRoot { + &self.merkle_root + } + + /// Get the block height. + pub fn block_height(&self) -> usize { + self.block_height + } + + /// Get the timestamp. + pub fn timestamp(&self) -> u64 { + self.timestamp + } + + /// Get the spaces snapshot. + pub fn spaces(&self) -> &HashMap { + &self.spaces + } + + /// Get the channel ownership snapshot. + pub fn channel_ownership(&self) -> &HashMap, ChannelInfo> { + &self.channel_ownership + } +} + +// ========================================================================== +// Multi-Space Checkpoint (Atomic Coordination) +// ========================================================================== + +use rspace_plus_plus::rspace::hashing::blake2b256_hash::Blake2b256Hash; + +/// Information about a space checkpoint (root hash for restoration). +#[derive(Clone, Debug)] +pub struct SpaceCheckpointInfo { + /// The merkle root of the space state at checkpoint + pub root: Blake2b256Hash, +} + +/// Atomic checkpoint across all spaces. +/// +/// This captures both the registry metadata AND the state of all individual spaces, +/// ensuring that the entire system can be restored atomically. If any space +/// fails to checkpoint, the entire operation is rolled back. +/// +/// # TLA+ Correspondence +/// +/// This implements an atomic multi-space checkpoint coordination that extends +/// the SpaceCoordination.tla specification: +/// +/// ```tla +/// AtomicMultiSpaceCheckpoint == +/// /\ \A s \in spaces: s.state = "ready" +/// /\ \A s \in spaces: CreateCheckpoint(s) +/// /\ \/ (\A s \in spaces: CheckpointSuccess(s)) +/// \/ (\A s \in spaces: RollbackCheckpoint(s)) +/// ``` +#[derive(Clone, Debug)] +pub struct MultiSpaceCheckpoint { + /// Registry metadata checkpoint + registry_checkpoint: RegistryCheckpoint, + + /// Individual space checkpoint roots, keyed by space ID bytes + space_checkpoints: HashMap, SpaceCheckpointInfo>, + + /// Block height when the multi-checkpoint was created + block_height: usize, + + /// Unix timestamp when created + timestamp: u64, +} + +impl MultiSpaceCheckpoint { + /// Create a new multi-space checkpoint. + pub fn new( + registry_checkpoint: RegistryCheckpoint, + space_checkpoints: HashMap, SpaceCheckpointInfo>, + ) -> Self { + Self { + block_height: registry_checkpoint.block_height(), + timestamp: registry_checkpoint.timestamp(), + registry_checkpoint, + space_checkpoints, + } + } + + /// Get the registry checkpoint. + pub fn registry_checkpoint(&self) -> &RegistryCheckpoint { + &self.registry_checkpoint + } + + /// Get the space checkpoints. + pub fn space_checkpoints(&self) -> &HashMap, SpaceCheckpointInfo> { + &self.space_checkpoints + } + + /// Get the block height. + pub fn block_height(&self) -> usize { + self.block_height + } + + /// Get the timestamp. + pub fn timestamp(&self) -> u64 { + self.timestamp + } + + /// Get the merkle root from the registry checkpoint. + pub fn merkle_root(&self) -> &MerkleRoot { + self.registry_checkpoint.merkle_root() + } + + /// Check if a specific space has a checkpoint. + pub fn has_space_checkpoint(&self, space_id: &[u8]) -> bool { + self.space_checkpoints.contains_key(space_id) + } + + /// Get the checkpoint info for a specific space. + pub fn get_space_checkpoint(&self, space_id: &[u8]) -> Option<&SpaceCheckpointInfo> { + self.space_checkpoints.get(space_id) + } + + /// Get the number of spaces checkpointed. + pub fn num_spaces(&self) -> usize { + self.space_checkpoints.len() + } +} + +/// Result of a multi-space checkpoint operation. +#[derive(Clone, Debug)] +pub enum MultiSpaceCheckpointResult { + /// All spaces checkpointed successfully. + Success(MultiSpaceCheckpoint), + + /// Checkpoint failed, with rollback information. + /// + /// Contains the list of space IDs that were successfully checkpointed + /// before the failure (and have been rolled back). + PartialFailure { + /// The error that caused the failure + error: String, + + /// Space IDs that were successfully checkpointed before failure + checkpointed_spaces: Vec>, + + /// The space ID that failed + failed_space: Vec, + }, +} + +// ========================================================================== +// Multi-Space Checkpoint Helper Functions +// ========================================================================== + +use crate::rust::interpreter::rho_runtime::RhoISpace; + +/// Helper function to rollback previously checkpointed spaces on failure. +/// +/// This is a best-effort rollback - failures during rollback are silently ignored +/// since we're already in an error path. +fn rollback_checkpointed_spaces( + default_space: &RhoISpace, + space_store: &std::sync::RwLock, RhoISpace>>, + space_checkpoints: &HashMap, SpaceCheckpointInfo>, + checkpointed_space_ids: &[Vec], +) { + let default_id = SpaceId::default_space().as_bytes().to_vec(); + + for prev_id in checkpointed_space_ids { + if let Some(prev_checkpoint) = space_checkpoints.get(prev_id) { + if *prev_id == default_id { + // Rollback default space + if let Ok(mut default_locked) = default_space.try_lock() { + let _ = default_locked.reset(&prev_checkpoint.root); + } + } else { + // Rollback additional space + if let Ok(store) = space_store.read() { + if let Some(prev_space) = store.get(prev_id) { + if let Ok(mut prev_locked) = prev_space.try_lock() { + let _ = prev_locked.reset(&prev_checkpoint.root); + } + } + } + } + } + } +} + +/// Atomically checkpoint all spaces in the space store. +/// +/// This function implements a two-phase commit pattern: +/// 1. Phase 1: Attempt to checkpoint all spaces +/// 2. On failure: Rollback previously checkpointed spaces +/// +/// # Arguments +/// - `registry`: The space registry (for metadata checkpoint) +/// - `default_space`: The default space instance +/// - `space_store`: The map of additional space instances +/// - `merkle_root`: The merkle root for the registry checkpoint +/// +/// # Returns +/// - `Ok(MultiSpaceCheckpoint)` if all spaces checkpointed successfully +/// - `Err(SpaceError)` if any space failed (with rollback completed) +/// +/// # TLA+ Correspondence +/// +/// This implements atomic coordination across spaces: +/// ```tla +/// AtomicCheckpoint == +/// LET results == {CreateCheckpoint(s) : s \in spaces} +/// IN IF \A r \in results: r.success +/// THEN Success(results) +/// ELSE Rollback(results) /\ Failure +/// ``` +pub fn checkpoint_all_spaces( + registry: &SpaceRegistry, + default_space: &RhoISpace, + space_store: &std::sync::RwLock, RhoISpace>>, + merkle_root: MerkleRoot, +) -> Result { + let mut space_checkpoints: HashMap, SpaceCheckpointInfo> = HashMap::new(); + let mut checkpointed_space_ids: Vec> = Vec::new(); + + // Phase 1: Checkpoint the default space + let default_space_id = SpaceId::default_space().as_bytes().to_vec(); + match default_space.try_lock() { + Ok(mut space_locked) => { + match space_locked.create_checkpoint() { + Ok(checkpoint) => { + space_checkpoints.insert( + default_space_id.clone(), + SpaceCheckpointInfo { root: checkpoint.root }, + ); + checkpointed_space_ids.push(default_space_id); + } + Err(e) => { + return Err(SpaceError::CheckpointError { + description: format!("Failed to checkpoint default space: {:?}", e), + }); + } + } + } + Err(_) => { + return Err(SpaceError::CheckpointError { + description: "Failed to lock default space for checkpoint".to_string(), + }); + } + } + + // Phase 2: Checkpoint all additional spaces + // First, collect all space IDs to avoid borrow issues during error handling + let additional_space_ids: Vec> = { + let store_guard = space_store.read().map_err(|e| SpaceError::CheckpointError { + description: format!("Failed to lock space_store for reading: {}", e), + })?; + store_guard.keys().cloned().collect() + }; + + // Now checkpoint each additional space + for space_id in additional_space_ids { + let store_guard = space_store.read().map_err(|e| SpaceError::CheckpointError { + description: format!("Failed to lock space_store for reading: {}", e), + })?; + + let space = match store_guard.get(&space_id) { + Some(s) => s.clone(), + None => continue, // Space was removed between collecting IDs and now + }; + drop(store_guard); // Release lock before checkpoint operation + + match space.try_lock() { + Ok(mut space_locked) => { + match space_locked.create_checkpoint() { + Ok(checkpoint) => { + space_checkpoints.insert( + space_id.clone(), + SpaceCheckpointInfo { root: checkpoint.root }, + ); + checkpointed_space_ids.push(space_id.clone()); + } + Err(e) => { + // Rollback: Reset previously checkpointed spaces + rollback_checkpointed_spaces( + default_space, + space_store, + &space_checkpoints, + &checkpointed_space_ids, + ); + return Err(SpaceError::CheckpointError { + description: format!( + "Failed to checkpoint space {:?}: {:?}. Rollback completed for {} spaces.", + hex::encode(&space_id), + e, + checkpointed_space_ids.len() + ), + }); + } + } + } + Err(_) => { + // Space is locked - rollback and fail + rollback_checkpointed_spaces( + default_space, + space_store, + &space_checkpoints, + &checkpointed_space_ids, + ); + return Err(SpaceError::CheckpointError { + description: format!( + "Failed to lock space {:?} for checkpoint. Rollback completed.", + hex::encode(&space_id) + ), + }); + } + }; + } + + // All spaces checkpointed successfully - create the registry checkpoint + let registry_checkpoint = registry.create_checkpoint(merkle_root); + + Ok(MultiSpaceCheckpoint::new(registry_checkpoint, space_checkpoints)) +} + +/// Restore all spaces from a multi-space checkpoint. +/// +/// This function atomically restores the state of all spaces to match +/// the checkpoint. If any space fails to restore, the operation fails +/// but does not attempt rollback (the system may be in an inconsistent state). +/// +/// # Arguments +/// - `registry`: The space registry (for metadata restoration) +/// - `default_space`: The default space instance +/// - `space_store`: The map of additional space instances +/// - `checkpoint`: The multi-space checkpoint to restore from +/// +/// # Returns +/// - `Ok(())` if all spaces restored successfully +/// - `Err(SpaceError)` if any space failed to restore +pub fn restore_all_spaces( + registry: &SpaceRegistry, + default_space: &RhoISpace, + space_store: &std::sync::RwLock, RhoISpace>>, + checkpoint: &MultiSpaceCheckpoint, +) -> Result<(), SpaceError> { + // Restore registry metadata first + registry.restore_checkpoint(checkpoint.registry_checkpoint().clone()); + + // Restore default space + let default_space_id = SpaceId::default_space().as_bytes().to_vec(); + if let Some(space_checkpoint) = checkpoint.get_space_checkpoint(&default_space_id) { + match default_space.try_lock() { + Ok(mut space_locked) => { + space_locked.reset(&space_checkpoint.root).map_err(|e| { + SpaceError::CheckpointError { + description: format!("Failed to restore default space: {:?}", e), + } + })?; + } + Err(_) => { + return Err(SpaceError::CheckpointError { + description: "Failed to lock default space for restoration".to_string(), + }); + } + } + } + + // Restore additional spaces + let store_guard = space_store.read().map_err(|e| SpaceError::CheckpointError { + description: format!("Failed to lock space_store for reading: {}", e), + })?; + + for (space_id, space) in store_guard.iter() { + if let Some(space_checkpoint) = checkpoint.get_space_checkpoint(space_id) { + match space.try_lock() { + Ok(mut space_locked) => { + space_locked.reset(&space_checkpoint.root).map_err(|e| { + SpaceError::CheckpointError { + description: format!( + "Failed to restore space {:?}: {:?}", + hex::encode(space_id), + e + ), + } + })?; + } + Err(_) => { + return Err(SpaceError::CheckpointError { + description: format!( + "Failed to lock space {:?} for restoration", + hex::encode(space_id) + ), + }); + } + } + } + // Note: Spaces not in the checkpoint are left as-is + // This handles the case where new spaces were created after the checkpoint + } + + Ok(()) +} + +// ========================================================================== +// Thread-safe wrapper +// ========================================================================== + +/// Thread-safe reference to a space registry. +pub type SharedRegistry = Arc; + +/// Create a new shared registry. +pub fn create_shared_registry() -> SharedRegistry { + Arc::new(SpaceRegistry::new()) +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_registry_creation() { + let registry = SpaceRegistry::new(); + assert!(registry.space_exists(&SpaceId::default_space())); + } + + #[test] + fn test_register_space() { + let registry = SpaceRegistry::new(); + let space_id = SpaceId::new(vec![1, 2, 3, 4]); + + registry + .register_space(space_id.clone(), SpaceConfig::queue()) + .unwrap(); + + assert!(registry.space_exists(&space_id)); + let entry = registry.get_space(&space_id).unwrap(); + assert_eq!(entry.config.data_collection, super::super::types::InnerCollectionType::Queue); + } + + #[test] + fn test_duplicate_space_registration() { + let registry = SpaceRegistry::new(); + let space_id = SpaceId::new(vec![1, 2, 3, 4]); + + registry + .register_space(space_id.clone(), SpaceConfig::default()) + .unwrap(); + + let result = registry.register_space(space_id, SpaceConfig::default()); + assert!(result.is_err()); + } + + #[test] + fn test_channel_ownership() { + let registry = SpaceRegistry::new(); + let space_id = SpaceId::new(vec![5, 6, 7, 8]); + let channel_hash = vec![0xde, 0xad, 0xbe, 0xef]; + + registry.register_channel( + channel_hash.clone(), + space_id.clone(), + SpaceQualifier::Default, + ); + + let info = registry.get_channel_space(&channel_hash).unwrap(); + assert_eq!(info.space_id, space_id); + } + + #[test] + fn test_use_block_stack() { + let registry = SpaceRegistry::new(); + let task_id = 42; + let space1 = SpaceId::new(vec![1]); + let space2 = SpaceId::new(vec![2]); + + // Initially empty + assert!(registry.current_default_space(task_id).is_none()); + assert_eq!(registry.use_block_depth(task_id), 0); + + // Push space1 + registry.push_use_block(task_id, space1.clone()); + assert_eq!(registry.current_default_space(task_id), Some(space1.clone())); + assert_eq!(registry.use_block_depth(task_id), 1); + + // Push space2 + registry.push_use_block(task_id, space2.clone()); + assert_eq!(registry.current_default_space(task_id), Some(space2.clone())); + assert_eq!(registry.use_block_depth(task_id), 2); + + // Pop space2 + let popped = registry.pop_use_block(task_id); + assert_eq!(popped, Some(space2)); + assert_eq!(registry.current_default_space(task_id), Some(space1.clone())); + + // Pop space1 + let popped = registry.pop_use_block(task_id); + assert_eq!(popped, Some(space1)); + assert!(registry.current_default_space(task_id).is_none()); + } + + #[test] + fn test_verify_same_space() { + let registry = SpaceRegistry::new(); + let space_id = SpaceId::new(vec![1, 2, 3]); + + // Register channels in the same space + registry.register_channel(vec![1], space_id.clone(), SpaceQualifier::Default); + registry.register_channel(vec![2], space_id.clone(), SpaceQualifier::Default); + registry.register_channel(vec![3], space_id.clone(), SpaceQualifier::Default); + + let result = registry.verify_same_space(&[vec![1], vec![2], vec![3]]); + assert!(result.is_ok()); + assert_eq!(result.unwrap(), space_id); + } + + #[test] + fn test_verify_different_spaces() { + let registry = SpaceRegistry::new(); + let space1 = SpaceId::new(vec![1]); + let space2 = SpaceId::new(vec![2]); + + // Register channels in different spaces + registry.register_channel(vec![1], space1, SpaceQualifier::Default); + registry.register_channel(vec![2], space2, SpaceQualifier::Default); + + let result = registry.verify_same_space(&[vec![1], vec![2]]); + assert!(matches!( + result, + Err(SpaceError::CrossSpaceJoinNotAllowed { .. }) + )); + } + + #[test] + fn test_checkpoint_restore() { + let registry = SpaceRegistry::new(); + let space_id = SpaceId::new(vec![9, 9, 9]); + + registry + .register_space(space_id.clone(), SpaceConfig::stack()) + .unwrap(); + + let checkpoint = registry.create_checkpoint_default(); + + // Verify checkpoint has the space + assert!(checkpoint.spaces().contains_key(&space_id)); + + // Verify checkpoint has block height and merkle root + assert!(checkpoint.block_height() >= 0); + assert_eq!(checkpoint.merkle_root(), &[0u8; 32]); + + // Modify registry + let new_space = SpaceId::new(vec![8, 8, 8]); + registry + .register_space(new_space.clone(), SpaceConfig::default()) + .unwrap(); + + // Restore checkpoint + registry.restore_checkpoint(checkpoint); + + // New space should be gone + assert!(!registry.space_exists(&new_space)); + assert!(registry.space_exists(&space_id)); + } + + #[test] + fn test_soft_checkpoint() { + let registry = SpaceRegistry::new(); + let space_id = SpaceId::new(vec![5, 5, 5]); + + registry + .register_space(space_id.clone(), SpaceConfig::default()) + .unwrap(); + + // Create soft checkpoint + registry.create_soft_checkpoint().expect("create soft checkpoint should succeed"); + assert!(registry.has_soft_checkpoint()); + + // Make some changes + let new_space = SpaceId::new(vec![6, 6, 6]); + registry + .register_space(new_space.clone(), SpaceConfig::default()) + .unwrap(); + assert!(registry.space_exists(&new_space)); + + // Revert to soft checkpoint + registry.revert_to_soft_checkpoint().expect("revert should succeed"); + + // Changes should be gone + assert!(!registry.space_exists(&new_space)); + assert!(registry.space_exists(&space_id)); + assert!(!registry.has_soft_checkpoint()); + } + + #[test] + fn test_soft_checkpoint_commit() { + let registry = SpaceRegistry::new(); + + registry.create_soft_checkpoint().expect("create soft checkpoint should succeed"); + + let space_id = SpaceId::new(vec![7, 7, 7]); + registry + .register_space(space_id.clone(), SpaceConfig::default()) + .unwrap(); + + // Commit soft checkpoint + registry.commit_soft_checkpoint().expect("commit should succeed"); + + // Changes should persist + assert!(registry.space_exists(&space_id)); + assert!(!registry.has_soft_checkpoint()); + } + + #[test] + fn test_replay_mode() { + let registry = SpaceRegistry::new(); + + // Start in normal mode + assert!(!registry.is_replay_mode()); + assert_eq!(registry.replay_state(), ReplayState::Normal); + + // Create a simple log + let mut log = OperationLog::new(); + log.append(OperationType::Produce { + space_id: SpaceId::default_space(), + channel: vec![1, 2, 3], + data: vec![4, 5, 6], + persist: false, + }); + log.append(OperationType::Consume { + space_id: SpaceId::default_space(), + channels: vec![vec![1, 2, 3]], + patterns: vec![vec![]], + persist: false, + peeks: std::collections::BTreeSet::new(), + }); + + // Enter replay mode + registry.enter_replay_mode(log).expect("enter replay should succeed"); + assert!(registry.is_replay_mode()); + + // Get operations + let op1 = registry.replay_next_operation(); + assert!(op1.is_some()); + assert_eq!(op1.unwrap().type_name(), "Produce"); + + let op2 = registry.replay_next_operation(); + assert!(op2.is_some()); + assert_eq!(op2.unwrap().type_name(), "Consume"); + + // No more operations + let op3 = registry.replay_next_operation(); + assert!(op3.is_none()); + + // Verify completed state + assert!(matches!(registry.replay_state(), ReplayState::Completed)); + + // Exit replay mode + registry.exit_replay_mode().expect("exit replay should succeed"); + assert!(!registry.is_replay_mode()); + } + + #[test] + fn test_seq_mobility_enforcement() { + let registry = SpaceRegistry::new(); + let space_id = SpaceId::new(vec![1, 2, 3]); + + // Register a Seq channel + registry.register_channel(vec![1], space_id.clone(), SpaceQualifier::Seq); + + // Register a mobile channel + registry.register_channel(vec![2], space_id.clone(), SpaceQualifier::Default); + + // Seq channel should not be mobile + assert!(registry.is_seq_channel(&[1])); + assert!(!registry.is_mobile_channel(&[1])); + + // Default channel should be mobile + assert!(!registry.is_seq_channel(&[2])); + assert!(registry.is_mobile_channel(&[2])); + + // Validate send channels - should fail for Seq + let result = registry.validate_send_channels(&[vec![1]]); + assert!(matches!(result, Err(SpaceError::SeqChannelNotMobile { .. }))); + + // Validate send channels - should succeed for mobile + let result = registry.validate_send_channels(&[vec![2]]); + assert!(result.is_ok()); + } + + #[test] + fn test_operation_logging() { + let registry = SpaceRegistry::new(); + + // Log some operations + registry.log_operation(OperationType::Produce { + space_id: SpaceId::default_space(), + channel: vec![1], + data: vec![2], + persist: false, + }); + + registry.log_operation(OperationType::Install { + space_id: SpaceId::default_space(), + channels: vec![vec![3]], + patterns: vec![vec![]], + }); + + // Check log + let log = registry.operation_log(); + assert_eq!(log.len(), 2); + assert_eq!(log.get(0).unwrap().type_name(), "Produce"); + assert_eq!(log.get(1).unwrap().type_name(), "Install"); + + // Clear log + registry.clear_operation_log(); + assert!(registry.operation_log().is_empty()); + } + + #[test] + fn test_resolve_space() { + let registry = SpaceRegistry::new(); + let task_id = 100; + let space_id = SpaceId::new(vec![42]); + + // Register space and channel + registry + .register_space(space_id.clone(), SpaceConfig::default()) + .unwrap(); + registry.register_channel(vec![1, 2, 3], space_id.clone(), SpaceQualifier::Default); + + // Resolve known channel + let resolved = registry.resolve_space(Some(&[1, 2, 3]), task_id); + assert_eq!(resolved, space_id); + + // Resolve unknown channel defaults to default space + let resolved = registry.resolve_space(Some(&[9, 9, 9]), task_id); + assert_eq!(resolved, *registry.default_space_id()); + + // With use block, unknown channels use the use block space + let use_space = SpaceId::new(vec![77]); + registry.push_use_block(task_id, use_space.clone()); + let resolved = registry.resolve_space(Some(&[9, 9, 9]), task_id); + assert_eq!(resolved, use_space); + } + + // ========================================================================= + // Multi-Space Checkpoint Unit Tests + // ========================================================================= + + #[test] + fn test_space_checkpoint_info_creation() { + use rspace_plus_plus::rspace::hashing::blake2b256_hash::Blake2b256Hash; + + let root = Blake2b256Hash::new(&[1u8; 32]); + let info = SpaceCheckpointInfo { root: root.clone() }; + assert_eq!(info.root, root); + } + + #[test] + fn test_multi_space_checkpoint_creation() { + use rspace_plus_plus::rspace::hashing::blake2b256_hash::Blake2b256Hash; + + let registry = SpaceRegistry::new(); + let merkle_root = [2u8; 32]; + let registry_cp = registry.create_checkpoint(merkle_root); + + let mut space_checkpoints = HashMap::new(); + let default_id = SpaceId::default_space().as_bytes().to_vec(); + let root1 = Blake2b256Hash::new(&[3u8; 32]); + space_checkpoints.insert(default_id.clone(), SpaceCheckpointInfo { root: root1.clone() }); + + let multi_cp = MultiSpaceCheckpoint::new(registry_cp, space_checkpoints); + + // Verify getters work + assert!(multi_cp.space_checkpoints.contains_key(&default_id)); + assert_eq!(multi_cp.get_space_checkpoint(&default_id).unwrap().root, root1); + assert!(multi_cp.timestamp > 0); + } + + #[test] + fn test_multi_space_checkpoint_multiple_spaces() { + use rspace_plus_plus::rspace::hashing::blake2b256_hash::Blake2b256Hash; + + let registry = SpaceRegistry::new(); + let space1 = SpaceId::new(vec![1, 1, 1]); + let space2 = SpaceId::new(vec![2, 2, 2]); + + registry.register_space(space1.clone(), SpaceConfig::default()).unwrap(); + registry.register_space(space2.clone(), SpaceConfig::queue()).unwrap(); + + let merkle_root = [4u8; 32]; + let registry_cp = registry.create_checkpoint(merkle_root); + + let mut space_checkpoints = HashMap::new(); + let root1 = Blake2b256Hash::new(&[5u8; 32]); + let root2 = Blake2b256Hash::new(&[6u8; 32]); + let root_default = Blake2b256Hash::new(&[7u8; 32]); + + space_checkpoints.insert(SpaceId::default_space().as_bytes().to_vec(), SpaceCheckpointInfo { root: root_default.clone() }); + space_checkpoints.insert(space1.as_bytes().to_vec(), SpaceCheckpointInfo { root: root1.clone() }); + space_checkpoints.insert(space2.as_bytes().to_vec(), SpaceCheckpointInfo { root: root2.clone() }); + + let multi_cp = MultiSpaceCheckpoint::new(registry_cp, space_checkpoints); + + // Verify all spaces are checkpointed + assert_eq!(multi_cp.space_checkpoints.len(), 3); + assert_eq!(multi_cp.get_space_checkpoint(space1.as_bytes()).unwrap().root, root1); + assert_eq!(multi_cp.get_space_checkpoint(space2.as_bytes()).unwrap().root, root2); + assert_eq!(multi_cp.get_space_checkpoint(SpaceId::default_space().as_bytes()).unwrap().root, root_default); + + // Verify registry checkpoint contains all registered spaces + assert!(multi_cp.registry_checkpoint.spaces().contains_key(&space1)); + assert!(multi_cp.registry_checkpoint.spaces().contains_key(&space2)); + } + + #[test] + fn test_multi_space_checkpoint_get_nonexistent_space() { + use rspace_plus_plus::rspace::hashing::blake2b256_hash::Blake2b256Hash; + + let registry = SpaceRegistry::new(); + let registry_cp = registry.create_checkpoint([0u8; 32]); + + let mut space_checkpoints = HashMap::new(); + let root = Blake2b256Hash::new(&[1u8; 32]); + space_checkpoints.insert(SpaceId::default_space().as_bytes().to_vec(), SpaceCheckpointInfo { root }); + + let multi_cp = MultiSpaceCheckpoint::new(registry_cp, space_checkpoints); + + // Non-existent space should return None + let nonexistent = vec![99, 99, 99]; + assert!(multi_cp.get_space_checkpoint(&nonexistent).is_none()); + } + + #[test] + fn test_multi_space_checkpoint_result_enum() { + use rspace_plus_plus::rspace::hashing::blake2b256_hash::Blake2b256Hash; + + let registry = SpaceRegistry::new(); + let registry_cp = registry.create_checkpoint([0u8; 32]); + let space_checkpoints = HashMap::new(); + let multi_cp = MultiSpaceCheckpoint::new(registry_cp, space_checkpoints); + + // Test success variant + let success = MultiSpaceCheckpointResult::Success(multi_cp); + assert!(matches!(success, MultiSpaceCheckpointResult::Success(_))); + + // Test partial failure variant + let partial = MultiSpaceCheckpointResult::PartialFailure { + checkpointed_spaces: vec![SpaceId::default_space().as_bytes().to_vec()], + failed_space: vec![1], + error: "test error".to_string(), + }; + if let MultiSpaceCheckpointResult::PartialFailure { checkpointed_spaces, failed_space, error } = partial { + assert_eq!(checkpointed_spaces.len(), 1); + assert_eq!(failed_space, &[1]); + assert_eq!(error, "test error"); + } + } +} diff --git a/rholang/src/rust/interpreter/spaces/similarity_extraction.rs b/rholang/src/rust/interpreter/spaces/similarity_extraction.rs new file mode 100644 index 000000000..55cdabf3b --- /dev/null +++ b/rholang/src/rust/interpreter/spaces/similarity_extraction.rs @@ -0,0 +1,842 @@ +//! Similarity Pattern Extraction Helpers +//! +//! This module provides functions for extracting similarity-related parameters +//! from Rholang Par values and EFunction modifiers. These functions are used +//! by GenericRSpace and the VectorDB subsystem to process similarity queries. +//! +//! # Extraction Functions +//! +//! ## Pattern Modifiers +//! - [`extract_modifiers_from_efunctions`]: Parse `sim()` and `rank()` EFunction calls +//! - [`ExtractedModifiers`]: Consolidated result of modifier extraction +//! +//! ## Par-based Extraction +//! - [`extract_embedding_from_par`]: Extract embedding vectors from Par values +//! - [`extract_number_from_par`]: Extract integer values from Par +//! - [`extract_threshold_from_par`]: Extract similarity thresholds +//! - [`extract_top_k_from_par`]: Extract top-K ranking limits +//! - [`extract_metric_from_par`]: Extract similarity metric specification +//! - [`extract_rank_function_from_par`]: Extract ranking function names +//! +//! ## VectorDB Data Extraction +//! - [`extract_embedding_from_map`]: Extract embeddings from map-structured data +//! - [`extract_channel_id_from_par`]: Extract GPrivate channel IDs +//! +//! # Similarity Computation +//! +//! - [`compute_cosine_similarity`]: Cosine similarity (normalized dot product) +//! - [`compute_dot_product`]: Raw dot product +//! - [`compute_euclidean_similarity`]: Euclidean distance-based similarity +//! - [`compute_manhattan_similarity`]: Manhattan (L1) distance-based similarity +//! - [`compute_hamming_similarity`]: Hamming distance for boolean vectors +//! - [`compute_jaccard_similarity`]: Jaccard similarity for set-like vectors + +use models::rhoapi::{EFunction, Expr, Par, expr::ExprInstance, g_unforgeable::UnfInstance}; +use models::rust::par_map_type_mapper::ParMapTypeMapper; + +use super::collections::{EmbeddingType, SimilarityMetric}; +use super::errors::SpaceError; + +// ============================================================================= +// Par-based Extraction Helpers +// ============================================================================= + +/// Extract an embedding vector from a Par. +/// +/// The embedding is expected to be in one of these forms: +/// - GString with comma-delimited floats (e.g., `"0.8,0.2,0.5"`) - used as-is +/// - EList of GInt values (e.g., `[80, 20, 50]`) - scaled 0-100, converted to 0.0-1.0 +/// - EList of expressions that evaluate to numbers +/// +/// # Arguments +/// - `par`: The Par containing the embedding +/// +/// # Returns +/// - `Ok(Vec)`: The extracted embedding vector +/// - `Err(SpaceError)`: If the Par doesn't contain a valid embedding +pub fn extract_embedding_from_par(par: &Option) -> Result, SpaceError> { + let par = par.as_ref().ok_or_else(|| SpaceError::SimilarityMatchError { + reason: "Similarity query embedding is missing".to_string(), + })?; + + // Try float string format first: "0.8,0.2,0.5" + for expr in &par.exprs { + if let Some(ExprInstance::GString(s)) = &expr.expr_instance { + let parts: Vec<&str> = s.split(',').map(|p| p.trim()).collect(); + let mut embedding = Vec::with_capacity(parts.len()); + for part in parts { + if let Ok(f) = part.parse::() { + embedding.push(f); + } else { + return Err(SpaceError::SimilarityMatchError { + reason: format!("Invalid float in embedding string: '{}'", part), + }); + } + } + if !embedding.is_empty() { + return Ok(embedding); + } + } + } + + // Fall back to EList with integers: [80, 20, 50] + for expr in &par.exprs { + if let Some(ExprInstance::EListBody(list)) = &expr.expr_instance { + let mut embedding = Vec::with_capacity(list.ps.len()); + for elem_par in &list.ps { + // Each element should be a Par containing a GInt + let value = extract_number_from_par(elem_par)?; + // Scale from 0-100 to 0.0-1.0 + embedding.push(value as f32 / 100.0); + } + return Ok(embedding); + } + } + + Err(SpaceError::SimilarityMatchError { + reason: "Similarity query embedding must be a list of numbers or comma-delimited float string".to_string(), + }) +} + +/// Extract a single number from a Par. +/// +/// # Arguments +/// - `par`: The Par containing the number +/// +/// # Returns +/// - `Ok(i64)`: The extracted number +/// - `Err(SpaceError)`: If the Par doesn't contain a valid number +pub fn extract_number_from_par(par: &Par) -> Result { + for expr in &par.exprs { + if let Some(ExprInstance::GInt(n)) = &expr.expr_instance { + return Ok(*n); + } + } + + Err(SpaceError::SimilarityMatchError { + reason: "Expected a number in similarity pattern".to_string(), + }) +} + +/// Extract a threshold from a Par. +/// +/// The threshold can be specified as: +/// - GString with float (e.g., `"0.5"`) - used as-is (must be 0.0-1.0) +/// - GInt (e.g., `50`) - scaled from 0-100 to 0.0-1.0 +/// +/// # Arguments +/// - `par`: The Par containing the threshold +/// +/// # Returns +/// - `Ok(f32)`: The extracted threshold (0.0 to 1.0) +/// - `Err(SpaceError)`: If the Par doesn't contain a valid threshold +pub fn extract_threshold_from_par(par: &Option) -> Result { + let par = par.as_ref().ok_or_else(|| SpaceError::SimilarityMatchError { + reason: "Explicit threshold value is missing".to_string(), + })?; + + // Try float string format first: "0.5" + for expr in &par.exprs { + if let Some(ExprInstance::GString(s)) = &expr.expr_instance { + if let Ok(f) = s.parse::() { + if f < 0.0 || f > 1.0 { + return Err(SpaceError::SimilarityMatchError { + reason: format!("Threshold string must be 0.0-1.0, got {}", f), + }); + } + return Ok(f); + } + } + } + + // Fall back to integer: 50 -> 0.5 + let value = extract_number_from_par(par)?; + + // Validate range and convert 0-100 to 0.0-1.0 + if value < 0 || value > 100 { + return Err(SpaceError::SimilarityMatchError { + reason: format!("Threshold must be 0-100, got {}", value), + }); + } + + Ok(value as f32 / 100.0) +} + +/// Extract a top-K value from a Par. +/// +/// The K value must be a positive integer indicating how many results to return. +/// +/// # Arguments +/// - `par`: The Par containing the K value +/// +/// # Returns +/// - `Ok(usize)`: The extracted K value (must be >= 1) +/// - `Err(SpaceError)`: If the Par doesn't contain a valid K value +pub fn extract_top_k_from_par(par: &Option) -> Result { + let par = par.as_ref().ok_or_else(|| SpaceError::SimilarityMatchError { + reason: "Top-K value is missing".to_string(), + })?; + + let value = extract_number_from_par(par)?; + + if value < 1 { + return Err(SpaceError::SimilarityMatchError { + reason: format!("Top-K value must be >= 1, got {}", value), + }); + } + + Ok(value as usize) +} + +/// Extract a similarity metric from a Par. +/// +/// The metric can be specified as a string from the supported set: +/// - `"cos"` or `"cosine"` -> Cosine similarity +/// - `"dot"` or `"dotproduct"` -> Dot product +/// - `"euc"` or `"euclidean"` or `"l2"` -> Euclidean distance-based similarity +/// - `"manhattan"` or `"l1"` -> Manhattan distance-based similarity +/// - `"hamming"` -> Hamming distance for boolean vectors +/// - `"jaccard"` -> Jaccard similarity for boolean vectors +/// +/// # Arguments +/// - `par`: The Par containing the metric string +/// +/// # Returns +/// - `Ok(SimilarityMetric)`: The extracted metric +/// - `Err(SpaceError)`: If the Par doesn't contain a valid metric string +pub fn extract_metric_from_par(par: &Option) -> Result { + let par = par.as_ref().ok_or_else(|| SpaceError::SimilarityMatchError { + reason: "Similarity metric value is missing".to_string(), + })?; + + // Extract string from Par + for expr in &par.exprs { + if let Some(ExprInstance::GString(s)) = &expr.expr_instance { + // Parse metric string (case-insensitive) + return match s.to_lowercase().as_str() { + "cos" | "cosine" => Ok(SimilarityMetric::Cosine), + "dot" | "dotproduct" | "dot_product" => Ok(SimilarityMetric::DotProduct), + "euc" | "euclidean" | "l2" => Ok(SimilarityMetric::Euclidean), + "manhattan" | "l1" => Ok(SimilarityMetric::Manhattan), + "hamming" => Ok(SimilarityMetric::Hamming), + "jaccard" => Ok(SimilarityMetric::Jaccard), + _ => Err(SpaceError::SimilarityMatchError { + reason: format!( + "Unknown similarity metric '{}'. Supported: cos, dot, euc, manhattan, hamming, jaccard", + s + ), + }), + }; + } + } + + Err(SpaceError::SimilarityMatchError { + reason: "Similarity metric must be a string (e.g., \"cos\", \"dot\", \"euc\")".to_string(), + }) +} + +/// Extract a rank function from a Par. +/// +/// Currently only `"topk"` is supported. +/// +/// # Arguments +/// - `par`: The Par containing the rank function string +/// +/// # Returns +/// - `Ok(String)`: The extracted rank function name +/// - `Err(SpaceError)`: If the Par doesn't contain a valid rank function +pub fn extract_rank_function_from_par(par: &Option) -> Result { + let par = par.as_ref().ok_or_else(|| SpaceError::SimilarityMatchError { + reason: "Rank function value is missing".to_string(), + })?; + + // Extract string from Par + for expr in &par.exprs { + if let Some(ExprInstance::GString(s)) = &expr.expr_instance { + // Validate rank function + return match s.to_lowercase().as_str() { + "topk" | "top_k" => Ok("topk".to_string()), + _ => Err(SpaceError::SimilarityMatchError { + reason: format!( + "Unknown rank function '{}'. Supported: topk", + s + ), + }), + }; + } + } + + Err(SpaceError::SimilarityMatchError { + reason: "Rank function must be a string (e.g., \"topk\")".to_string(), + }) +} + +// ============================================================================= +// Pattern Modifier Extraction from EFunction +// ============================================================================= + +/// Extracted pattern modifiers from EFunction calls. +/// +/// This struct consolidates the modifier information extracted from `sim` and `rank` +/// EFunction calls into a unified representation for space operations. +#[derive(Debug, Clone)] +pub struct ExtractedModifiers { + /// Query embedding vector (from first argument of sim or rank) + pub query_embedding: Option>, + /// Similarity metric (from sim function) + pub metric: Option, + /// Similarity threshold (from sim params) + pub threshold: Option, + /// Top-K limit (from rank params) + pub top_k: Option, + /// Rank function name (for future extensibility) + pub rank_function: Option, +} + +impl Default for ExtractedModifiers { + fn default() -> Self { + Self { + query_embedding: None, + metric: None, + threshold: None, + top_k: None, + rank_function: None, + } + } +} + +/// Extract modifier information from a list of EFunction calls. +/// +/// EFunction format: +/// - `sim(query, metric, threshold, ...)` -> extracts embedding, metric, threshold +/// - `rank(query, function, params...)` -> extracts embedding, rank function, top_k +/// +/// # Arguments +/// - `modifiers`: List of EFunction pattern modifiers +/// +/// # Returns +/// - `Ok(ExtractedModifiers)`: The extracted modifier values +/// - `Err(SpaceError)`: If extraction fails +pub fn extract_modifiers_from_efunctions(modifiers: &[EFunction]) -> Result { + let mut result = ExtractedModifiers::default(); + + for efunction in modifiers { + match efunction.function_name.as_str() { + "sim" => { + // sim(query, [metric, [threshold, ...]]) + // Arguments: [0]=query, [1]=metric (optional), [2]=threshold (optional) + if efunction.arguments.is_empty() { + return Err(SpaceError::SimilarityMatchError { + reason: "sim modifier requires at least a query argument".to_string(), + }); + } + + // Extract query embedding from first argument + result.query_embedding = Some(extract_embedding_from_par(&Some(efunction.arguments[0].clone()))?); + + // Extract metric from second argument if present + if efunction.arguments.len() > 1 { + result.metric = Some(extract_metric_from_par(&Some(efunction.arguments[1].clone()))?); + } + + // Extract threshold from third argument if present + if efunction.arguments.len() > 2 { + result.threshold = Some(extract_threshold_from_par(&Some(efunction.arguments[2].clone()))?); + } + } + "rank" => { + // rank(query, function, [params...]) + // Arguments: [0]=query, [1]=function, [2]=k (for topk) + if efunction.arguments.len() < 2 { + return Err(SpaceError::SimilarityMatchError { + reason: "rank modifier requires query and function arguments".to_string(), + }); + } + + // If we haven't extracted the query yet, use rank's query + if result.query_embedding.is_none() { + result.query_embedding = Some(extract_embedding_from_par(&Some(efunction.arguments[0].clone()))?); + } + + // Extract rank function from second argument + result.rank_function = Some(extract_rank_function_from_par(&Some(efunction.arguments[1].clone()))?); + + // Extract top-K from third argument if present + if efunction.arguments.len() > 2 { + result.top_k = Some(extract_top_k_from_par(&Some(efunction.arguments[2].clone()))?); + } else { + // Default to 1 for topk without explicit K + result.top_k = Some(1); + } + } + other => { + tracing::warn!("Unknown pattern modifier function: {}", other); + // Skip unknown modifiers for forward compatibility + } + } + } + + Ok(result) +} + +/// Extract the GPrivate ID from a channel Par. +/// +/// Channels in Rholang are represented as Par values containing a GUnforgeable +/// with a GPrivate body. This function extracts the unique ID bytes from such +/// a channel representation. +/// +/// # Arguments +/// - `channel`: The Par representing a channel (should contain a GPrivate) +/// +/// # Returns +/// - `Some(Vec)`: The GPrivate ID bytes if present +/// - `None`: If the Par doesn't contain a GPrivate channel +pub fn extract_channel_id_from_par(channel: &Par) -> Option> { + for unf in &channel.unforgeables { + if let Some(UnfInstance::GPrivateBody(g_private)) = &unf.unf_instance { + return Some(g_private.id.clone()); + } + } + None +} + +// ============================================================================= +// Map-based Embedding Extraction for VectorDB +// ============================================================================= + +/// Extract an embedding from a Par that contains a map with an "embedding" key. +/// +/// This function is used during produce to extract embeddings from data sent to +/// VectorDB-backed channels. The data format follows industry standards (Pinecone, +/// Qdrant, Weaviate) where records are stored as dictionaries with explicit fields. +/// +/// # Expected Rholang Data Format +/// +/// ```rholang +/// {"id": 0, "title": "Document Title", "embedding": [90, 5, 10, 20]} +/// ``` +/// +/// # Arguments +/// +/// - `par`: The Par containing the map with embedding data +/// - `embedding_type`: The expected embedding format (Boolean, Integer, Float) +/// - `dimensions`: The expected number of embedding dimensions +/// +/// # Returns +/// +/// - `Ok(Vec)`: The extracted embedding vector as floats +/// - `Err(SpaceError)`: If extraction fails (wrong format, missing key, etc.) +/// +/// # Embedding Type Handling +/// +/// - **Boolean**: `[0, 1, 1, 0]` → `[0.0, 1.0, 1.0, 0.0]` +/// - **Integer**: `[90, 5, 10, 20]` → `[0.9, 0.05, 0.1, 0.2]` (scaled from 0-100) +/// - **Float**: `"0.9,0.05,0.1,0.2"` → parsed to `Vec` +pub fn extract_embedding_from_map( + par: &Par, + embedding_type: EmbeddingType, + dimensions: usize, +) -> Result, SpaceError> { + // Create a key Par for "embedding" lookup + let embedding_key = Par::default().with_exprs(vec![Expr { + expr_instance: Some(ExprInstance::GString("embedding".to_string())), + }]); + + // Look for an EMap in the Par's expressions + for expr in &par.exprs { + if let Some(ExprInstance::EMapBody(emap)) = &expr.expr_instance { + // Convert to ParMap for key-value access + let par_map = ParMapTypeMapper::emap_to_par_map(emap.clone()); + + // Look up the "embedding" key + let embedding_par = par_map.ps.get_or_else(embedding_key.clone(), Par::default()); + + // Check if we got a valid result (not default) + if embedding_par == Par::default() { + return Err(SpaceError::EmbeddingExtractionError { + description: "VectorDB data must contain an 'embedding' key. \ + Expected format: {\"id\": ..., \"title\": ..., \"embedding\": [...]}".to_string(), + }); + } + + // Extract the embedding based on type + return extract_embedding_by_type(&embedding_par, embedding_type, dimensions); + } + } + + Err(SpaceError::EmbeddingExtractionError { + description: "VectorDB data must be a map with an 'embedding' key. \ + Expected format: {\"id\": ..., \"title\": ..., \"embedding\": [...]}".to_string(), + }) +} + +/// Extract embedding based on the configured embedding type. +fn extract_embedding_by_type( + par: &Par, + embedding_type: EmbeddingType, + dimensions: usize, +) -> Result, SpaceError> { + match embedding_type { + EmbeddingType::Boolean => extract_boolean_embedding(par, dimensions), + EmbeddingType::Integer => extract_integer_embedding(par, dimensions), + EmbeddingType::Float => extract_float_string_embedding(par, dimensions), + } +} + +/// Extract a boolean embedding: [0, 1, 1, 0] → [0.0, 1.0, 1.0, 0.0] +fn extract_boolean_embedding(par: &Par, dimensions: usize) -> Result, SpaceError> { + // Look for an EList in the Par + for expr in &par.exprs { + if let Some(ExprInstance::EListBody(list)) = &expr.expr_instance { + if list.ps.len() != dimensions { + return Err(SpaceError::DimensionMismatch { + expected: dimensions, + actual: list.ps.len(), + }); + } + + let mut embedding = Vec::with_capacity(dimensions); + for elem_par in &list.ps { + let value = extract_number_from_par(elem_par)?; + if value != 0 && value != 1 { + return Err(SpaceError::EmbeddingExtractionError { + description: format!( + "Boolean embedding type requires values 0 or 1, found: {}", + value + ), + }); + } + embedding.push(if value != 0 { 1.0 } else { 0.0 }); + } + return Ok(embedding); + } + } + + Err(SpaceError::EmbeddingExtractionError { + description: "Boolean embedding must be a list of 0/1 values".to_string(), + }) +} + +/// Extract an integer embedding: [90, 5, 10, 20] → [0.9, 0.05, 0.1, 0.2] +fn extract_integer_embedding(par: &Par, dimensions: usize) -> Result, SpaceError> { + // Look for an EList in the Par + for expr in &par.exprs { + if let Some(ExprInstance::EListBody(list)) = &expr.expr_instance { + if list.ps.len() != dimensions { + return Err(SpaceError::DimensionMismatch { + expected: dimensions, + actual: list.ps.len(), + }); + } + + let mut embedding = Vec::with_capacity(dimensions); + for elem_par in &list.ps { + let value = extract_number_from_par(elem_par)?; + if value < 0 || value > 100 { + return Err(SpaceError::EmbeddingExtractionError { + description: format!( + "Integer embedding type requires values in 0-100 range, found: {}", + value + ), + }); + } + // Scale from 0-100 to 0.0-1.0 + embedding.push(value as f32 / 100.0); + } + return Ok(embedding); + } + } + + Err(SpaceError::EmbeddingExtractionError { + description: "Integer embedding must be a list of integers (0-100 scale)".to_string(), + }) +} + +/// Extract a float string embedding: "0.9,0.05,0.1,0.2" → [0.9, 0.05, 0.1, 0.2] +fn extract_float_string_embedding(par: &Par, dimensions: usize) -> Result, SpaceError> { + // Look for a GString in the Par + for expr in &par.exprs { + if let Some(ExprInstance::GString(s)) = &expr.expr_instance { + let parts: Vec<&str> = s.split(',').collect(); + if parts.len() != dimensions { + return Err(SpaceError::DimensionMismatch { + expected: dimensions, + actual: parts.len(), + }); + } + + let mut embedding = Vec::with_capacity(dimensions); + for (i, part) in parts.iter().enumerate() { + let value = part.trim().parse::().map_err(|e| { + SpaceError::EmbeddingExtractionError { + description: format!( + "Failed to parse float at position {}: '{}' ({})", + i, part, e + ), + } + })?; + embedding.push(value); + } + return Ok(embedding); + } + } + + Err(SpaceError::EmbeddingExtractionError { + description: "Float embedding must be a comma-separated string of floats (e.g., \"0.1,0.2,0.3\")".to_string(), + }) +} + +// ============================================================================= +// Similarity Computation Helpers +// ============================================================================= + +/// Compute cosine similarity between two embedding vectors. +/// +/// Both vectors are L2-normalized before computing the dot product. +/// This matches the behavior of VectorDBDataCollection's similarity computation. +/// +/// # Arguments +/// - `a`: First embedding vector +/// - `b`: Second embedding vector +/// +/// # Returns +/// Cosine similarity score in range [-1.0, 1.0], or 0.0 if either vector is zero. +pub fn compute_cosine_similarity(a: &[f32], b: &[f32]) -> f32 { + if a.len() != b.len() || a.is_empty() { + return 0.0; + } + + // Compute L2 norms + let norm_a = a.iter().map(|x| x * x).sum::().sqrt(); + let norm_b = b.iter().map(|x| x * x).sum::().sqrt(); + + if norm_a == 0.0 || norm_b == 0.0 { + return 0.0; + } + + // Compute dot product of normalized vectors + let dot: f32 = a.iter().zip(b.iter()).map(|(x, y)| x * y).sum(); + dot / (norm_a * norm_b) +} + +/// Compute dot product of two vectors. +/// +/// # Arguments +/// * `a` - First vector +/// * `b` - Second vector +/// +/// # Returns +/// Dot product score, or 0.0 if vectors have different lengths or are empty. +pub fn compute_dot_product(a: &[f32], b: &[f32]) -> f32 { + if a.len() != b.len() || a.is_empty() { + return 0.0; + } + a.iter().zip(b.iter()).map(|(x, y)| x * y).sum() +} + +/// Compute Euclidean distance-based similarity of two vectors. +/// +/// Converts Euclidean distance to a similarity score using: 1 / (1 + distance) +/// This ensures similarity is in range [0, 1], where 1 means identical vectors. +/// +/// # Arguments +/// * `a` - First vector +/// * `b` - Second vector +/// +/// # Returns +/// Euclidean similarity score in range [0.0, 1.0], or 0.0 if vectors have different lengths or are empty. +pub fn compute_euclidean_similarity(a: &[f32], b: &[f32]) -> f32 { + if a.len() != b.len() || a.is_empty() { + return 0.0; + } + + // Compute Euclidean distance + let distance: f32 = a.iter() + .zip(b.iter()) + .map(|(x, y)| (x - y).powi(2)) + .sum::() + .sqrt(); + + // Convert distance to similarity: 1 / (1 + distance) + 1.0 / (1.0 + distance) +} + +/// Compute Manhattan distance-based similarity of two vectors. +/// +/// Converts Manhattan distance to a similarity score using: 1 / (1 + distance) +/// This ensures similarity is in range [0, 1], where 1 means identical vectors. +/// +/// # Arguments +/// * `a` - First vector +/// * `b` - Second vector +/// +/// # Returns +/// Manhattan similarity score in range [0.0, 1.0], or 0.0 if vectors have different lengths or are empty. +pub fn compute_manhattan_similarity(a: &[f32], b: &[f32]) -> f32 { + if a.len() != b.len() || a.is_empty() { + return 0.0; + } + + // Compute Manhattan distance (L1 norm) + let distance: f32 = a.iter() + .zip(b.iter()) + .map(|(x, y)| (x - y).abs()) + .sum(); + + // Convert distance to similarity: 1 / (1 + distance) + 1.0 / (1.0 + distance) +} + +/// Compute Hamming similarity of two vectors. +/// +/// Hamming distance counts the number of positions where elements differ. +/// Similarity is computed as: 1 - (hamming_distance / length) +/// +/// For float vectors, elements are considered different if their difference +/// exceeds a small epsilon (0.001). +/// +/// # Arguments +/// * `a` - First vector +/// * `b` - Second vector +/// +/// # Returns +/// Hamming similarity score in range [0.0, 1.0], or 0.0 if vectors have different lengths or are empty. +pub fn compute_hamming_similarity(a: &[f32], b: &[f32]) -> f32 { + if a.len() != b.len() || a.is_empty() { + return 0.0; + } + + const EPSILON: f32 = 0.001; + let different_count = a.iter() + .zip(b.iter()) + .filter(|(x, y)| (*x - *y).abs() > EPSILON) + .count(); + + 1.0 - (different_count as f32 / a.len() as f32) +} + +/// Compute Jaccard similarity of two vectors. +/// +/// Jaccard similarity = |A ∩ B| / |A ∪ B| +/// For float vectors, we interpret non-zero elements as "present". +/// +/// # Arguments +/// * `a` - First vector +/// * `b` - Second vector +/// +/// # Returns +/// Jaccard similarity score in range [0.0, 1.0], or 0.0 if both vectors are zero. +pub fn compute_jaccard_similarity(a: &[f32], b: &[f32]) -> f32 { + if a.len() != b.len() || a.is_empty() { + return 0.0; + } + + const EPSILON: f32 = 0.001; + let mut intersection = 0; + let mut union = 0; + + for (x, y) in a.iter().zip(b.iter()) { + let a_present = x.abs() > EPSILON; + let b_present = y.abs() > EPSILON; + + if a_present || b_present { + union += 1; + if a_present && b_present { + intersection += 1; + } + } + } + + if union == 0 { + return 0.0; + } + + intersection as f32 / union as f32 +} + +/// Select and compute similarity using the specified metric. +/// +/// This is a convenience function that dispatches to the appropriate +/// similarity computation based on the metric enum. +/// +/// # Arguments +/// * `a` - First vector +/// * `b` - Second vector +/// * `metric` - The similarity metric to use +/// +/// # Returns +/// Similarity score according to the specified metric. +pub fn compute_similarity(a: &[f32], b: &[f32], metric: &SimilarityMetric) -> f32 { + match metric { + SimilarityMetric::Cosine => compute_cosine_similarity(a, b), + SimilarityMetric::DotProduct => compute_dot_product(a, b), + SimilarityMetric::Euclidean => compute_euclidean_similarity(a, b), + SimilarityMetric::Manhattan => compute_manhattan_similarity(a, b), + SimilarityMetric::Hamming => compute_hamming_similarity(a, b), + SimilarityMetric::Jaccard => compute_jaccard_similarity(a, b), + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_cosine_similarity_identical() { + let a = vec![1.0, 0.0, 0.0]; + let b = vec![1.0, 0.0, 0.0]; + let sim = compute_cosine_similarity(&a, &b); + assert!((sim - 1.0).abs() < 0.0001); + } + + #[test] + fn test_cosine_similarity_orthogonal() { + let a = vec![1.0, 0.0, 0.0]; + let b = vec![0.0, 1.0, 0.0]; + let sim = compute_cosine_similarity(&a, &b); + assert!(sim.abs() < 0.0001); + } + + #[test] + fn test_dot_product() { + let a = vec![1.0, 2.0, 3.0]; + let b = vec![4.0, 5.0, 6.0]; + let dot = compute_dot_product(&a, &b); + assert!((dot - 32.0).abs() < 0.0001); // 1*4 + 2*5 + 3*6 = 32 + } + + #[test] + fn test_euclidean_similarity_identical() { + let a = vec![1.0, 2.0, 3.0]; + let b = vec![1.0, 2.0, 3.0]; + let sim = compute_euclidean_similarity(&a, &b); + assert!((sim - 1.0).abs() < 0.0001); + } + + #[test] + fn test_hamming_similarity() { + let a = vec![1.0, 0.0, 1.0, 0.0]; + let b = vec![1.0, 1.0, 1.0, 0.0]; + let sim = compute_hamming_similarity(&a, &b); + assert!((sim - 0.75).abs() < 0.0001); // 3/4 match + } + + #[test] + fn test_jaccard_similarity() { + let a = vec![1.0, 0.0, 1.0, 0.0]; + let b = vec![1.0, 1.0, 0.0, 0.0]; + let sim = compute_jaccard_similarity(&a, &b); + // Union: {0, 1, 2}, Intersection: {0} + assert!((sim - (1.0 / 3.0)).abs() < 0.0001); + } + + #[test] + fn test_extracted_modifiers_default() { + let mods = ExtractedModifiers::default(); + assert!(mods.query_embedding.is_none()); + assert!(mods.metric.is_none()); + assert!(mods.threshold.is_none()); + assert!(mods.top_k.is_none()); + assert!(mods.rank_function.is_none()); + } +} diff --git a/rholang/src/rust/interpreter/spaces/types/allocation.rs b/rholang/src/rust/interpreter/spaces/types/allocation.rs new file mode 100644 index 000000000..fd98d0798 --- /dev/null +++ b/rholang/src/rust/interpreter/spaces/types/allocation.rs @@ -0,0 +1,115 @@ +//! Allocation Mode for Channel Creation +//! +//! This module defines how `new` bindings allocate channels within different +//! space types, as specified in the "Reifying RSpaces" design document. +//! +//! # Allocation Modes +//! +//! - **Random**: Default for most spaces. Uses Blake2b512Random for cryptographic IDs. +//! - **ArrayIndex**: For Array spaces. Sequential indices up to max_size, wrapped in Unforgeable. +//! - **VectorIndex**: For Vector spaces. Growing indices, wrapped in Unforgeable. +//! +//! # Unforgeable Wrapping +//! +//! Array and Vector indices are wrapped in `GPrivate` with format: +//! `[space_id bytes (32)] ++ [index big-endian (8)]` = 40 bytes total +//! +//! This ensures: +//! - Determinism (required for blockchain consensus) +//! - Unforgeability (cannot guess the space_id prefix) +//! - Efficient lookup (extract last 8 bytes for O(1) access) + +use std::fmt; + +/// Allocation mode for `new` bindings within a space. +/// +/// Different outer storage types use different allocation strategies for +/// creating new channel names within `use space { new x in { ... } }` blocks. +#[derive(Clone, Debug, PartialEq, Eq)] +pub enum AllocationMode { + /// Random allocation using Blake2b512Random. + /// Used by: HashMap, PathMap, HashSet (and default space). + Random, + + /// Sequential index allocation for fixed-size arrays. + /// Returns indices 0, 1, 2, ... up to max_size-1. + /// In non-cyclic mode, returns OutOfNames error when exhausted. + /// In cyclic mode, wraps around to 0. + /// Indices are wrapped in Unforgeable with space_id prefix. + ArrayIndex { + /// Maximum number of channels in the array + max_size: usize, + /// Whether to wrap around (cyclic) or error (non-cyclic) + cyclic: bool, + }, + + /// Sequential index allocation for unbounded vectors. + /// Returns indices 0, 1, 2, ... growing without limit. + /// Only fails on out-of-memory. + /// Indices are wrapped in Unforgeable with space_id prefix. + VectorIndex, +} + +impl AllocationMode { + /// Returns true if this mode uses index-based allocation (wrapped in Unforgeable). + pub fn is_indexed(&self) -> bool { + matches!(self, AllocationMode::ArrayIndex { .. } | AllocationMode::VectorIndex) + } + + /// Returns true if this mode uses random ID allocation. + pub fn is_random(&self) -> bool { + matches!(self, AllocationMode::Random) + } +} + +impl Default for AllocationMode { + fn default() -> Self { + AllocationMode::Random + } +} + +impl fmt::Display for AllocationMode { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + match self { + AllocationMode::Random => write!(f, "Random"), + AllocationMode::ArrayIndex { max_size, cyclic } => { + write!(f, "ArrayIndex(max={}, cyclic={})", max_size, cyclic) + } + AllocationMode::VectorIndex => write!(f, "VectorIndex"), + } + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_allocation_mode_display() { + assert_eq!(format!("{}", AllocationMode::Random), "Random"); + assert_eq!( + format!("{}", AllocationMode::ArrayIndex { max_size: 10, cyclic: false }), + "ArrayIndex(max=10, cyclic=false)" + ); + assert_eq!(format!("{}", AllocationMode::VectorIndex), "VectorIndex"); + } + + #[test] + fn test_is_indexed() { + assert!(!AllocationMode::Random.is_indexed()); + assert!(AllocationMode::ArrayIndex { max_size: 10, cyclic: false }.is_indexed()); + assert!(AllocationMode::VectorIndex.is_indexed()); + } + + #[test] + fn test_is_random() { + assert!(AllocationMode::Random.is_random()); + assert!(!AllocationMode::ArrayIndex { max_size: 10, cyclic: false }.is_random()); + assert!(!AllocationMode::VectorIndex.is_random()); + } + + #[test] + fn test_default() { + assert_eq!(AllocationMode::default(), AllocationMode::Random); + } +} diff --git a/rholang/src/rust/interpreter/spaces/types/config.rs b/rholang/src/rust/interpreter/spaces/types/config.rs new file mode 100644 index 000000000..a5f684b31 --- /dev/null +++ b/rholang/src/rust/interpreter/spaces/types/config.rs @@ -0,0 +1,959 @@ +//! Space Configuration +//! +//! This module defines SpaceConfig and GasConfiguration for configuring +//! space behavior, storage types, and resource metering. + +use std::fmt; +use std::str::FromStr; + +use super::allocation::AllocationMode; +use super::collections::{InnerCollectionType, OuterStorageType}; +use super::qualifier::SpaceQualifier; +use super::theory::BoxedTheory; + +// ========================================================================== +// Gas/Phlogiston Configuration +// ========================================================================== + +/// Configuration for phlogiston (gas) accounting in a space. +/// +/// This determines whether and how resource consumption is metered for +/// operations within the space. +/// +/// # Formal Correspondence +/// - `Phlogiston.v`: Gas accounting invariants +/// - `Safety/Properties.v`: Resource exhaustion safety +#[derive(Clone, Debug)] +pub struct GasConfiguration { + /// Whether gas accounting is enabled. + pub enabled: bool, + + /// Initial gas limit for new operations. + pub initial_limit: u64, + + /// Cost multiplier for chain economics. + pub cost_multiplier: f64, +} + +impl Default for GasConfiguration { + fn default() -> Self { + GasConfiguration { + enabled: true, + initial_limit: 10_000_000, + cost_multiplier: 1.0, + } + } +} + +impl GasConfiguration { + /// Create a configuration with gas accounting disabled. + pub fn disabled() -> Self { + GasConfiguration { + enabled: false, + initial_limit: u64::MAX, + cost_multiplier: 1.0, + } + } + + /// Create a configuration with unlimited gas (for testing). + pub fn unlimited() -> Self { + GasConfiguration { + enabled: true, + initial_limit: u64::MAX, + cost_multiplier: 1.0, + } + } + + /// Create a configuration with a specific limit. + pub fn with_limit(limit: u64) -> Self { + GasConfiguration { + enabled: true, + initial_limit: limit, + cost_multiplier: 1.0, + } + } + + /// Set the cost multiplier. + pub fn with_multiplier(mut self, multiplier: f64) -> Self { + self.cost_multiplier = multiplier; + self + } +} + +// ========================================================================== +// Space Configuration +// ========================================================================== + +/// Full space configuration combining outer and inner types. +/// +/// This struct captures all the configuration needed to create a space with +/// specific storage and behavior characteristics. +/// +/// # Theory Integration +/// The optional `theory` field allows spaces to validate data against a type +/// theory before accepting it. This enables: +/// - Typed tuple spaces where only well-typed data can be stored +/// - Contract validation for smart contract execution +/// - Schema enforcement for structured data +/// +/// When a theory is present, `produce` operations validate data against the +/// theory before storing, rejecting invalid data with `SpaceError::TheoryValidationError`. +/// +/// # Gas/Phlogiston Accounting +/// The `gas_config` field enables resource consumption metering. +/// Operations consume phlogiston based on their cost, and +/// `OutOfPhlogiston` errors are raised when limits are exceeded. +pub struct SpaceConfig { + /// Outer storage structure (channel indexing) + pub outer: OuterStorageType, + + /// Inner collection type for data at channels + pub data_collection: InnerCollectionType, + + /// Inner collection type for continuations at channels + pub continuation_collection: InnerCollectionType, + + /// Qualifier for persistence/concurrency behavior + pub qualifier: SpaceQualifier, + + /// Optional theory for data validation (MeTTaIL integration). + /// + /// When present, data is validated against this theory before being + /// accepted by the space. This enables typed tuple spaces. + pub theory: Option, + + /// Gas/phlogiston configuration. + /// + /// Operations consume phlogiston and are rejected if the limit is exceeded. + pub gas_config: GasConfiguration, +} + +impl fmt::Debug for SpaceConfig { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + f.debug_struct("SpaceConfig") + .field("outer", &self.outer) + .field("data_collection", &self.data_collection) + .field("continuation_collection", &self.continuation_collection) + .field("qualifier", &self.qualifier) + .field("theory", &self.theory.as_ref().map(|t| t.name())) + .field("gas_config", &self.gas_config) + .finish() + } +} + +impl Clone for SpaceConfig { + fn clone(&self) -> Self { + SpaceConfig { + outer: self.outer.clone(), + data_collection: self.data_collection.clone(), + continuation_collection: self.continuation_collection.clone(), + qualifier: self.qualifier, + theory: self.theory.as_ref().map(|t| t.clone_box()), + gas_config: self.gas_config.clone(), + } + } +} + +impl Default for SpaceConfig { + fn default() -> Self { + SpaceConfig { + outer: OuterStorageType::PathMap, + data_collection: InnerCollectionType::Bag, + continuation_collection: InnerCollectionType::Bag, + qualifier: SpaceQualifier::Default, + theory: None, + gas_config: GasConfiguration::default(), + } + } +} + +impl SpaceConfig { + // ====================================================================== + // Pre-defined space configurations matching spec URNs + // ====================================================================== + + /// `rho:space:HashMapBagSpace` - HashMap outer + Bag inner (original default) + pub fn hashmap_bag() -> Self { + SpaceConfig { + outer: OuterStorageType::HashMap, + data_collection: InnerCollectionType::Bag, + continuation_collection: InnerCollectionType::Bag, + qualifier: SpaceQualifier::Default, + theory: None, + gas_config: GasConfiguration::default(), + } + } + + /// `rho:space:PathMapSpace` - PathMap outer + Bag inner (recommended default) + pub fn pathmap() -> Self { + SpaceConfig { + outer: OuterStorageType::PathMap, + data_collection: InnerCollectionType::Bag, + continuation_collection: InnerCollectionType::Bag, + qualifier: SpaceQualifier::Default, + theory: None, + gas_config: GasConfiguration::default(), + } + } + + /// `rho:space:QueueSpace` - HashMap outer + Queue inner (FIFO) + pub fn queue() -> Self { + SpaceConfig { + outer: OuterStorageType::HashMap, + data_collection: InnerCollectionType::Queue, + continuation_collection: InnerCollectionType::Queue, + qualifier: SpaceQualifier::Default, + theory: None, + gas_config: GasConfiguration::default(), + } + } + + /// `rho:space:StackSpace` - HashMap outer + Stack inner (LIFO) + pub fn stack() -> Self { + SpaceConfig { + outer: OuterStorageType::HashMap, + data_collection: InnerCollectionType::Stack, + continuation_collection: InnerCollectionType::Stack, + qualifier: SpaceQualifier::Default, + theory: None, + gas_config: GasConfiguration::default(), + } + } + + /// `rho:space:SetSpace` - HashMap outer + Set inner (idempotent) + pub fn set() -> Self { + SpaceConfig { + outer: OuterStorageType::HashMap, + data_collection: InnerCollectionType::Set, + continuation_collection: InnerCollectionType::Set, + qualifier: SpaceQualifier::Default, + theory: None, + gas_config: GasConfiguration::default(), + } + } + + /// `rho:space:CellSpace` - HashMap outer + Cell inner (exactly-once) + pub fn cell() -> Self { + SpaceConfig { + outer: OuterStorageType::HashMap, + data_collection: InnerCollectionType::Cell, + continuation_collection: InnerCollectionType::Cell, + qualifier: SpaceQualifier::Default, + theory: None, + gas_config: GasConfiguration::default(), + } + } + + /// `rho:space:ArraySpace` - Array outer + Bag inner (fixed size) + pub fn array(max_size: usize, cyclic: bool) -> Self { + SpaceConfig { + outer: OuterStorageType::Array { max_size, cyclic }, + data_collection: InnerCollectionType::Bag, + continuation_collection: InnerCollectionType::Bag, + qualifier: SpaceQualifier::Default, + theory: None, + gas_config: GasConfiguration::default(), + } + } + + /// `rho:space:VectorSpace` - Vector outer + Bag inner (unbounded) + pub fn vector() -> Self { + SpaceConfig { + outer: OuterStorageType::Vector, + data_collection: InnerCollectionType::Bag, + continuation_collection: InnerCollectionType::Bag, + qualifier: SpaceQualifier::Default, + theory: None, + gas_config: GasConfiguration::default(), + } + } + + /// `rho:space:SeqSpace` - HashSet outer + Set inner (sequential) + /// For sequential processes with restricted channel mobility. + pub fn seq() -> Self { + SpaceConfig { + outer: OuterStorageType::HashSet, + data_collection: InnerCollectionType::Set, + continuation_collection: InnerCollectionType::Set, + qualifier: SpaceQualifier::Seq, + theory: None, + gas_config: GasConfiguration::default(), + } + } + + /// `rho:space:TempSpace` - HashMap outer + Bag inner (non-persistent) + pub fn temp() -> Self { + SpaceConfig { + outer: OuterStorageType::HashMap, + data_collection: InnerCollectionType::Bag, + continuation_collection: InnerCollectionType::Bag, + qualifier: SpaceQualifier::Temp, + theory: None, + gas_config: GasConfiguration::default(), + } + } + + /// `rho:space:PriorityQueueSpace` - HashMap outer + PriorityQueue inner + pub fn priority_queue(priorities: usize) -> Self { + SpaceConfig { + outer: OuterStorageType::HashMap, + data_collection: InnerCollectionType::PriorityQueue { priorities }, + continuation_collection: InnerCollectionType::PriorityQueue { priorities }, + qualifier: SpaceQualifier::Default, + theory: None, + gas_config: GasConfiguration::default(), + } + } + + /// `rho:space:VectorDBSpace` - HashMap outer + VectorDB inner (similarity matching) + /// + /// Uses the default "rho" backend (in-memory SIMD-optimized). + pub fn vector_db(dimensions: usize) -> Self { + Self::vector_db_with_backend(dimensions, "rho".to_string()) + } + + /// `rho:space:VectorDBSpace` with explicit backend specification. + /// + /// # Arguments + /// * `dimensions` - Dimensionality of embedding vectors + /// * `backend` - Backend name (e.g., "rho", "pinecone"). Looked up in BackendRegistry. + /// + /// # Example + /// ```ignore + /// // In Rholang: + /// // VectorDBFactory!({"dimensions": 384, "backend": "pinecone", ...}, *space) + /// + /// let config = SpaceConfig::vector_db_with_backend(384, "pinecone".to_string()); + /// ``` + pub fn vector_db_with_backend(dimensions: usize, backend: String) -> Self { + SpaceConfig { + outer: OuterStorageType::HashMap, + data_collection: InnerCollectionType::VectorDB { dimensions, backend }, + continuation_collection: InnerCollectionType::Bag, // Continuations use Bag + qualifier: SpaceQualifier::Default, + theory: None, + gas_config: GasConfiguration::default(), + } + } + + // ====================================================================== + // Builder methods for custom configurations + // ====================================================================== + + /// Set the outer storage type. + pub fn with_outer(mut self, outer: OuterStorageType) -> Self { + self.outer = outer; + self + } + + /// Set the data collection type. + pub fn with_data_collection(mut self, collection: InnerCollectionType) -> Self { + self.data_collection = collection; + self + } + + /// Set the continuation collection type. + pub fn with_continuation_collection(mut self, collection: InnerCollectionType) -> Self { + self.continuation_collection = collection; + self + } + + /// Set the space qualifier. + pub fn with_qualifier(mut self, qualifier: SpaceQualifier) -> Self { + self.qualifier = qualifier; + self + } + + /// Set the theory for data validation. + /// + /// When a theory is set, all data entering the space via `produce` will + /// be validated against the theory. Invalid data will be rejected with + /// a `SpaceError::TheoryValidationError`. + /// + /// # Example + /// ```ignore + /// let theory = SimpleTypeTheory::new("NatTheory", vec!["Nat".to_string()]); + /// let config = SpaceConfig::default().with_theory(Box::new(theory)); + /// ``` + pub fn with_theory(mut self, theory: BoxedTheory) -> Self { + self.theory = Some(theory); + self + } + + /// Clear the theory (remove validation). + pub fn without_theory(mut self) -> Self { + self.theory = None; + self + } + + /// Set the gas/phlogiston configuration. + /// + /// When gas configuration is enabled, operations consume phlogiston + /// and are rejected if the limit is exceeded. + /// + /// # Example + /// ```ignore + /// let config = SpaceConfig::default() + /// .with_gas(GasConfiguration::with_limit(1_000_000)); + /// ``` + pub fn with_gas(mut self, gas_config: GasConfiguration) -> Self { + self.gas_config = gas_config; + self + } + + /// Set gas configuration with a specific limit. + pub fn with_gas_limit(mut self, limit: u64) -> Self { + self.gas_config = GasConfiguration::with_limit(limit); + self + } + + /// Enable unlimited gas (for testing). + pub fn with_unlimited_gas(mut self) -> Self { + self.gas_config = GasConfiguration::unlimited(); + self + } + + /// Disable gas accounting. + pub fn with_disabled_gas(mut self) -> Self { + self.gas_config = GasConfiguration::disabled(); + self + } + + /// Check if this space configuration supports persistence. + pub fn is_persistent(&self) -> bool { + matches!(self.qualifier, SpaceQualifier::Default) + } + + /// Check if this space configuration supports concurrent access. + pub fn is_concurrent(&self) -> bool { + !matches!(self.qualifier, SpaceQualifier::Seq) + } + + /// Check if channels in this space can be sent to other processes. + pub fn is_mobile(&self) -> bool { + !matches!(self.qualifier, SpaceQualifier::Seq) + } + + /// Check if this space has a theory for data validation. + pub fn has_theory(&self) -> bool { + self.theory.is_some() + } + + /// Get the theory name if one is set. + pub fn theory_name(&self) -> Option<&str> { + self.theory.as_ref().map(|t| t.name()) + } + + /// Validate data against the theory (if present). + /// + /// Returns `Ok(())` if no theory is set or if the data validates. + /// Returns `Err(description)` if validation fails. + pub fn validate_data(&self, term: &str) -> Result<(), String> { + match &self.theory { + Some(theory) => theory.validate(term), + None => Ok(()), + } + } + + /// Check if gas accounting is enabled. + pub fn has_gas(&self) -> bool { + self.gas_config.enabled + } + + /// Get the gas limit. + pub fn gas_limit(&self) -> u64 { + self.gas_config.initial_limit + } + + /// Get the gas configuration. + pub fn gas(&self) -> &GasConfiguration { + &self.gas_config + } + + /// Get the allocation mode for `new` bindings within this space. + /// + /// The allocation mode is derived from the outer storage type: + /// - HashMap, PathMap, HashSet: Random allocation using Blake2b512Random + /// - Array: Sequential indices up to max_size, wrapped in Unforgeable + /// - Vector: Growing indices, wrapped in Unforgeable + /// + /// This determines how `new` bindings allocate channel names when + /// executing inside a `use space { ... }` block. + pub fn allocation_mode(&self) -> AllocationMode { + match &self.outer { + OuterStorageType::Array { max_size, cyclic } => { + AllocationMode::ArrayIndex { + max_size: *max_size, + cyclic: *cyclic, + } + } + OuterStorageType::Vector => AllocationMode::VectorIndex, + OuterStorageType::HashMap + | OuterStorageType::PathMap + | OuterStorageType::HashSet => AllocationMode::Random, + } + } + + // ====================================================================== + // Configuration Validation + // ====================================================================== + + /// Validate that this configuration is internally consistent. + /// + /// This checks for invalid combinations of outer storage, inner collections, + /// and qualifiers. Call this after building a configuration to ensure it's valid. + /// + /// # Invalid Combinations + /// + /// - `Seq` qualifier with `PathMap` outer storage: Seq requires restricted + /// channel mobility which PathMap doesn't enforce well. + /// - `VectorDB` collection with `PathMap` outer: VectorDB requires HashMap-style + /// channel indexing for embedding lookups. + /// - `Cell` collection with `PathMap` outer: Cell's exactly-once semantics + /// don't align well with PathMap's prefix aggregation. + /// + /// # Example + /// + /// ```ignore + /// // This will fail validation + /// let result = SpaceConfig::seq() + /// .with_outer(OuterStorageType::PathMap) + /// .validate(); + /// assert!(result.is_err()); + /// + /// // This is valid + /// let config = SpaceConfig::hashmap_bag().validate()?; + /// ``` + pub fn validate(&self) -> Result + where + Self: Clone, + { + // Check: Seq qualifier cannot use PathMap + if self.qualifier == SpaceQualifier::Seq + && matches!(self.outer, OuterStorageType::PathMap) + { + return Err(super::super::errors::SpaceError::InvalidConfiguration { + description: "Seq qualifier cannot use PathMap storage: Seq requires \ + restricted channel mobility which PathMap doesn't enforce" + .to_string(), + }); + } + + // Check: VectorDB collection requires HashMap-compatible outer storage + if matches!(self.data_collection, InnerCollectionType::VectorDB { .. }) { + match self.outer { + OuterStorageType::HashMap | OuterStorageType::HashSet | OuterStorageType::Vector => { + // OK: These support direct channel lookup needed for embedding queries + } + OuterStorageType::PathMap | OuterStorageType::Array { .. } => { + return Err(super::super::errors::SpaceError::InvalidConfiguration { + description: format!( + "VectorDB collection cannot use {} outer storage: VectorDB \ + requires HashMap-style channel indexing for embedding lookups", + self.outer + ), + }); + } + } + } + + // Check: Cell collection (exactly-once) doesn't work well with PathMap aggregation + if matches!(self.data_collection, InnerCollectionType::Cell) + && matches!(self.outer, OuterStorageType::PathMap) + { + return Err(super::super::errors::SpaceError::InvalidConfiguration { + description: "Cell collection cannot use PathMap outer storage: Cell's \ + exactly-once semantics conflict with PathMap's prefix aggregation" + .to_string(), + }); + } + + // Check: PriorityQueue needs at least 1 priority level + if let InnerCollectionType::PriorityQueue { priorities } = self.data_collection { + if priorities == 0 { + return Err(super::super::errors::SpaceError::InvalidConfiguration { + description: "PriorityQueue must have at least 1 priority level".to_string(), + }); + } + } + + // Check: VectorDB needs at least 1 dimension + if let InnerCollectionType::VectorDB { dimensions, .. } = self.data_collection { + if dimensions == 0 { + return Err(super::super::errors::SpaceError::InvalidConfiguration { + description: "VectorDB must have at least 1 dimension".to_string(), + }); + } + } + + // Check: Array with max_size 0 is useless + if let OuterStorageType::Array { max_size, .. } = self.outer { + if max_size == 0 { + return Err(super::super::errors::SpaceError::InvalidConfiguration { + description: "Array outer storage must have max_size > 0".to_string(), + }); + } + } + + Ok(self.clone()) + } + + /// Validate this configuration, consuming self on success. + /// + /// This is a consuming version of `validate()` for use in builder chains + /// where you want to propagate ownership. + pub fn validated(self) -> Result { + self.validate() + } +} + +impl fmt::Display for SpaceConfig { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + if let Some(ref theory) = self.theory { + write!( + f, + "SpaceConfig({}, data={}, cont={}, {}, theory={})", + self.outer, self.data_collection, self.continuation_collection, + self.qualifier, theory.name() + ) + } else { + write!( + f, + "SpaceConfig({}, data={}, cont={}, {})", + self.outer, self.data_collection, self.continuation_collection, self.qualifier + ) + } + } +} + +impl FromStr for SpaceConfig { + type Err = super::super::errors::SpaceError; + + /// Parse a SpaceConfig from a URN string. + /// + /// This enables idiomatic Rust parsing: + /// + /// # Examples + /// + /// ```ignore + /// // Using parse() + /// let config: SpaceConfig = "rho:space:HashMapBagSpace".parse()?; + /// + /// // Using parse() with extended format + /// let config: SpaceConfig = "rho:space:queue:pathmap:default".parse()?; + /// + /// // Using FromStr::from_str() + /// let config = SpaceConfig::from_str("rho:space:QueueSpace")?; + /// ``` + /// + /// # Supported URN Formats + /// + /// ## Short Format (legacy) + /// - `rho:space:HashMapBagSpace` - HashMap + Bag + /// - `rho:space:PathMapSpace` - PathMap + Bag + /// - `rho:space:QueueSpace` - HashMap + Queue + /// - `rho:space:StackSpace` - HashMap + Stack + /// - `rho:space:SetSpace` - HashMap + Set + /// - `rho:space:CellSpace` - HashMap + Cell + /// - `rho:space:VectorSpace` - Vector + Bag + /// - `rho:space:SeqSpace` - HashSet + Set (sequential) + /// - `rho:space:TempSpace` - HashMap + Bag (non-persistent) + /// - `rho:space:ArraySpace(n,cyclic)` - Array + Bag + /// - `rho:space:PriorityQueueSpace(n)` - HashMap + PriorityQueue + /// - `rho:space:VectorDBSpace(dims)` - HashMap + VectorDB + /// + /// ## Extended Format: `rho:space:{inner}:{outer}:{qualifier}` + /// - Inner: bag, queue, stack, set, cell, priorityqueue, vectordb + /// - Outer: hashmap, pathmap, array, vector, hashset + /// - Qualifier: default, temp, seq + /// + /// # Errors + /// + /// Returns `SpaceError::InvalidConfiguration` if the URN is invalid or + /// represents an unsupported combination. + fn from_str(s: &str) -> Result { + super::super::factory::config_from_urn(s).ok_or_else(|| { + super::super::errors::SpaceError::InvalidConfiguration { + description: format!("Unknown or invalid space URN: {}", s), + } + }) + } +} + +/// Convenience trait implementation for converting string references. +impl TryFrom<&str> for SpaceConfig { + type Error = super::super::errors::SpaceError; + + fn try_from(s: &str) -> Result { + s.parse() + } +} + +/// Convenience trait implementation for converting owned strings. +impl TryFrom for SpaceConfig { + type Error = super::super::errors::SpaceError; + + fn try_from(s: String) -> Result { + s.parse() + } +} + +#[cfg(test)] +mod tests { + use super::*; + use super::super::theory::{NullTheory, SimpleTypeTheory}; + + #[test] + fn test_space_config_defaults() { + let config = SpaceConfig::default(); + assert_eq!(config.outer, OuterStorageType::PathMap); + assert_eq!(config.data_collection, InnerCollectionType::Bag); + assert!(config.is_persistent()); + assert!(config.is_concurrent()); + assert!(config.is_mobile()); + } + + #[test] + fn test_space_config_seq() { + let config = SpaceConfig::seq(); + assert_eq!(config.outer, OuterStorageType::HashSet); + assert_eq!(config.qualifier, SpaceQualifier::Seq); + assert!(!config.is_persistent()); + assert!(!config.is_concurrent()); + assert!(!config.is_mobile()); + } + + #[test] + fn test_space_config_builder() { + let config = SpaceConfig::default() + .with_outer(OuterStorageType::HashMap) + .with_data_collection(InnerCollectionType::Queue) + .with_qualifier(SpaceQualifier::Temp); + + assert_eq!(config.outer, OuterStorageType::HashMap); + assert_eq!(config.data_collection, InnerCollectionType::Queue); + assert_eq!(config.qualifier, SpaceQualifier::Temp); + } + + #[test] + fn test_space_config_with_theory() { + let theory = SimpleTypeTheory::new("TestTheory", vec!["Int".to_string()]); + let config = SpaceConfig::default().with_theory(Box::new(theory)); + + assert!(config.has_theory()); + assert_eq!(config.theory_name(), Some("TestTheory")); + + assert!(config.validate_data("Int 123").is_ok()); + assert!(config.validate_data("String hello").is_err()); + } + + #[test] + fn test_space_config_without_theory() { + let config = SpaceConfig::default(); + + assert!(!config.has_theory()); + assert_eq!(config.theory_name(), None); + + assert!(config.validate_data("anything").is_ok()); + } + + #[test] + fn test_theory_builder_pattern() { + let theory = NullTheory; + let config = SpaceConfig::hashmap_bag() + .with_qualifier(SpaceQualifier::Temp) + .with_theory(Box::new(theory)); + + assert!(config.has_theory()); + assert_eq!(config.qualifier, SpaceQualifier::Temp); + + let config2 = SpaceConfig::pathmap() + .with_theory(Box::new(NullTheory)) + .without_theory(); + + assert!(!config2.has_theory()); + } + + #[test] + fn test_validate_seq_with_pathmap_fails() { + let config = SpaceConfig::seq().with_outer(OuterStorageType::PathMap); + let result = config.validate(); + assert!(result.is_err()); + } + + #[test] + fn test_validate_vectordb_with_pathmap_fails() { + let config = SpaceConfig::vector_db(384).with_outer(OuterStorageType::PathMap); + let result = config.validate(); + assert!(result.is_err()); + } + + #[test] + fn test_validate_cell_with_pathmap_fails() { + let config = SpaceConfig::cell().with_outer(OuterStorageType::PathMap); + let result = config.validate(); + assert!(result.is_err()); + } + + #[test] + fn test_validate_priority_queue_zero_priorities_fails() { + let config = SpaceConfig::default() + .with_data_collection(InnerCollectionType::PriorityQueue { priorities: 0 }); + let result = config.validate(); + assert!(result.is_err()); + } + + #[test] + fn test_validate_vectordb_zero_dimensions_fails() { + let config = SpaceConfig::vector_db_with_backend(0, "rho".to_string()); + let result = config.validate(); + assert!(result.is_err()); + } + + #[test] + fn test_validate_array_zero_size_fails() { + let config = SpaceConfig::default().with_outer(OuterStorageType::Array { + max_size: 0, + cyclic: false, + }); + let result = config.validate(); + assert!(result.is_err()); + } + + #[test] + fn test_validate_valid_configs_pass() { + assert!(SpaceConfig::hashmap_bag().validate().is_ok()); + assert!(SpaceConfig::pathmap().validate().is_ok()); + assert!(SpaceConfig::queue().validate().is_ok()); + assert!(SpaceConfig::stack().validate().is_ok()); + assert!(SpaceConfig::set().validate().is_ok()); + assert!(SpaceConfig::cell().validate().is_ok()); + assert!(SpaceConfig::seq().validate().is_ok()); + assert!(SpaceConfig::temp().validate().is_ok()); + assert!(SpaceConfig::vector().validate().is_ok()); + assert!(SpaceConfig::array(10, false).validate().is_ok()); + assert!(SpaceConfig::priority_queue(3).validate().is_ok()); + assert!(SpaceConfig::vector_db(384).validate().is_ok()); + } + + #[test] + fn test_validated_consumes_and_returns_config() { + let config = SpaceConfig::hashmap_bag(); + let validated = config.validated().expect("validation should pass"); + assert_eq!(validated.outer, OuterStorageType::HashMap); + assert_eq!(validated.data_collection, InnerCollectionType::Bag); + } + + #[test] + fn test_gas_configuration() { + let config = SpaceConfig::default().with_gas_limit(1_000_000); + assert!(config.has_gas()); + assert_eq!(config.gas_limit(), 1_000_000); + + let config2 = SpaceConfig::default().with_unlimited_gas(); + assert!(config2.has_gas()); + assert_eq!(config2.gas_limit(), u64::MAX); + + let config3 = SpaceConfig::default().with_disabled_gas(); + assert!(!config3.has_gas()); + } + + // ========================================================================= + // FromStr / TryFrom Tests + // ========================================================================= + + #[test] + fn test_from_str_short_format() { + // Test parsing short-format URNs + let config: SpaceConfig = "rho:space:HashMapBagSpace".parse().expect("Should parse"); + assert_eq!(config.outer, OuterStorageType::HashMap); + assert_eq!(config.data_collection, InnerCollectionType::Bag); + + let config: SpaceConfig = "rho:space:QueueSpace".parse().expect("Should parse"); + assert_eq!(config.data_collection, InnerCollectionType::Queue); + + let config: SpaceConfig = "rho:space:StackSpace".parse().expect("Should parse"); + assert_eq!(config.data_collection, InnerCollectionType::Stack); + + let config: SpaceConfig = "rho:space:SetSpace".parse().expect("Should parse"); + assert_eq!(config.data_collection, InnerCollectionType::Set); + + let config: SpaceConfig = "rho:space:CellSpace".parse().expect("Should parse"); + assert_eq!(config.data_collection, InnerCollectionType::Cell); + } + + #[test] + fn test_from_str_extended_format() { + // Test parsing extended-format URNs + let config: SpaceConfig = "rho:space:bag:hashmap:default".parse().expect("Should parse"); + assert_eq!(config.outer, OuterStorageType::HashMap); + assert_eq!(config.data_collection, InnerCollectionType::Bag); + + let config: SpaceConfig = "rho:space:queue:pathmap:temp".parse().expect("Should parse"); + assert_eq!(config.outer, OuterStorageType::PathMap); + assert_eq!(config.data_collection, InnerCollectionType::Queue); + assert_eq!(config.qualifier, SpaceQualifier::Temp); + } + + #[test] + fn test_from_str_parametric() { + // Test parsing parametric URNs + let config: SpaceConfig = "rho:space:ArraySpace(500,true)".parse().expect("Should parse"); + match config.outer { + OuterStorageType::Array { max_size, cyclic } => { + assert_eq!(max_size, 500); + assert!(cyclic); + } + _ => panic!("Expected Array outer type"), + } + + let config: SpaceConfig = "rho:space:PriorityQueueSpace(4)".parse().expect("Should parse"); + match config.data_collection { + InnerCollectionType::PriorityQueue { priorities } => { + assert_eq!(priorities, 4); + } + _ => panic!("Expected PriorityQueue collection type"), + } + } + + #[test] + fn test_from_str_invalid_urn() { + // Test that invalid URNs return errors + let result: Result = "invalid".parse(); + assert!(result.is_err()); + + let result: Result = "rho:space:UnknownSpace".parse(); + assert!(result.is_err()); + + let result: Result = "".parse(); + assert!(result.is_err()); + } + + #[test] + fn test_try_from_str() { + // Test TryFrom<&str> implementation + let config = SpaceConfig::try_from("rho:space:HashMapBagSpace").expect("Should convert"); + assert_eq!(config.outer, OuterStorageType::HashMap); + + let result = SpaceConfig::try_from("invalid"); + assert!(result.is_err()); + } + + #[test] + fn test_try_from_string() { + // Test TryFrom implementation + let urn = String::from("rho:space:QueueSpace"); + let config = SpaceConfig::try_from(urn).expect("Should convert"); + assert_eq!(config.data_collection, InnerCollectionType::Queue); + + let result = SpaceConfig::try_from(String::from("invalid")); + assert!(result.is_err()); + } + + #[test] + fn test_from_str_using_from_str_method() { + // Test using FromStr::from_str() directly + let config = SpaceConfig::from_str("rho:space:PathMapSpace").expect("Should parse"); + assert_eq!(config.outer, OuterStorageType::PathMap); + } +} diff --git a/rholang/src/rust/interpreter/spaces/types/mod.rs b/rholang/src/rust/interpreter/spaces/types/mod.rs index 85b03becb..0de49035d 100644 --- a/rholang/src/rust/interpreter/spaces/types/mod.rs +++ b/rholang/src/rust/interpreter/spaces/types/mod.rs @@ -1,11 +1,101 @@ -//! Type definitions for spaces module. +//! Core Types for Multi-Space RSpace Integration +//! +//! This module defines the fundamental types for the 6-layer trait hierarchy +//! as specified in the "Reifying RSpaces" specification. +//! +//! # Module Organization +//! +//! - `theory`: Theory and Validatable traits for MeTTaIL integration +//! - `collections`: Inner collection types (Bag, Queue, Stack, Set, Cell, etc.) +//! and outer storage types (HashMap, PathMap, Array, Vector, HashSet) +//! - `pathmap`: PathMap prefix aggregation and Par-to-Path encoding +//! - `qualifier`: Space qualifiers (Default, Temp, Seq) +//! - `id`: Space identification (SpaceId) +//! - `config`: Space configuration and gas configuration +//! - `raw_config`: Generic pass-through configuration for VectorDB backends -pub mod bounds; -pub mod id; -pub mod collections; -pub mod pathmap; +mod allocation; +mod bounds; +mod theory; +mod collections; +mod pathmap; +mod qualifier; +mod id; +mod config; +mod raw_config; -pub use bounds::*; +// ========================================================================== +// Re-exports for backward compatibility +// ========================================================================== + +// From allocation module +pub use allocation::AllocationMode; + +// From bounds module (type bound trait aliases) +pub use bounds::{ + ChannelBound, + PatternBound, + DataBound, + ContinuationBound, + SpaceParamBound, +}; + +// From theory module +pub use theory::{ + BoxedTheory, + Theory, + NullTheory, + SimpleTypeTheory, + Validatable, + TheoryValidator, + ValidationResult, +}; + +// From collections module +pub use collections::{ + InnerCollectionType, + OuterStorageType, + HyperparamSchema, +}; + +// From pathmap module +pub use pathmap::{ + // Aggregation types + SuffixKey, + AggregatedDatum, + // Path prefix utilities + get_path_suffix, + path_prefixes, + is_path_prefix, + path_element_boundaries, + // Varint encoding (for external use) + encode_varint, + decode_varint, + // Par-to-Path conversion + par_to_path, + path_to_par, + is_par_path, + // Path tag constants + path_tags, +}; + +// From qualifier module +pub use qualifier::SpaceQualifier; + +// From id module pub use id::SpaceId; -pub use collections::{InnerCollectionType, OuterStorageType}; -pub use pathmap::{PathMapKeyConversion, PathTrie}; + +// From config module +pub use config::{ + GasConfiguration, + SpaceConfig, +}; + +// From raw_config module (VectorDB pass-through configuration) +pub use raw_config::{ + RawConfigValue, + RawVectorDBConfig, + RawConfigError, + par_to_raw_config_value, + parse_raw_vectordb_config, +}; diff --git a/rholang/src/rust/interpreter/spaces/types/qualifier.rs b/rholang/src/rust/interpreter/spaces/types/qualifier.rs new file mode 100644 index 000000000..fccbfe845 --- /dev/null +++ b/rholang/src/rust/interpreter/spaces/types/qualifier.rs @@ -0,0 +1,100 @@ +//! Space Qualifier +//! +//! This module defines the SpaceQualifier enum that controls persistence +//! and concurrency behavior for spaces. + +use std::fmt; + +// ========================================================================== +// LAYER 3: Space Qualifier (persistence and concurrency) +// ========================================================================== + +/// Qualifier for space behavior - controls persistence and concurrency. +/// +/// This determines how channels in the space behave with respect to: +/// - Persistence: Whether data survives across checkpoints +/// - Concurrency: Whether parallel access is allowed +/// - Mobility: Whether channel references can be sent to other spaces +#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Default, serde::Serialize, serde::Deserialize)] +pub enum SpaceQualifier { + /// Persistent, concurrent access (default behavior). + /// Data survives checkpoints, parallel processes can access simultaneously. + #[default] + Default, + + /// Non-persistent, concurrent access. + /// Data is cleared on checkpoint, but parallel access is allowed. + /// Useful for temporary computation that doesn't need to persist. + Temp, + + /// Non-persistent, sequential, restricted. + /// - Cannot be sent to other processes + /// - No concurrent access allowed + /// - Operations execute in strict sequence + /// Used for local mutable state that must not escape. + Seq, +} + +impl SpaceQualifier { + /// Check if channels with this qualifier can be sent to other processes. + /// Seq channels are non-mobile and cannot be sent. + pub fn is_mobile(&self) -> bool { + !matches!(self, SpaceQualifier::Seq) + } + + /// Check if this qualifier supports persistent storage. + pub fn is_persistent(&self) -> bool { + matches!(self, SpaceQualifier::Default) + } + + /// Check if this qualifier supports concurrent access. + pub fn is_concurrent(&self) -> bool { + !matches!(self, SpaceQualifier::Seq) + } +} + +impl fmt::Display for SpaceQualifier { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + match self { + SpaceQualifier::Default => write!(f, "default"), + SpaceQualifier::Temp => write!(f, "temp"), + SpaceQualifier::Seq => write!(f, "seq"), + } + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_qualifier_default() { + let q = SpaceQualifier::Default; + assert!(q.is_mobile()); + assert!(q.is_persistent()); + assert!(q.is_concurrent()); + } + + #[test] + fn test_qualifier_temp() { + let q = SpaceQualifier::Temp; + assert!(q.is_mobile()); + assert!(!q.is_persistent()); + assert!(q.is_concurrent()); + } + + #[test] + fn test_qualifier_seq() { + let q = SpaceQualifier::Seq; + assert!(!q.is_mobile()); + assert!(!q.is_persistent()); + assert!(!q.is_concurrent()); + } + + #[test] + fn test_qualifier_display() { + assert_eq!(format!("{}", SpaceQualifier::Default), "default"); + assert_eq!(format!("{}", SpaceQualifier::Temp), "temp"); + assert_eq!(format!("{}", SpaceQualifier::Seq), "seq"); + } +} diff --git a/rholang/src/rust/interpreter/spaces/types/raw_config.rs b/rholang/src/rust/interpreter/spaces/types/raw_config.rs new file mode 100644 index 000000000..de63e4675 --- /dev/null +++ b/rholang/src/rust/interpreter/spaces/types/raw_config.rs @@ -0,0 +1,776 @@ +//! Raw Configuration Types for VectorDB Pass-Through +//! +//! This module provides generic configuration types that allow Rholang to pass +//! configuration to VectorDB backends without understanding their semantics. +//! +//! # Design Principle +//! +//! Rholang is agnostic to VectorDB-specific semantics. The interpreter: +//! 1. Parses the Rholang configuration map into a generic `RawVectorDBConfig` +//! 2. Passes the entire configuration to the VectorDB backend +//! 3. The backend validates and interprets its own parameters +//! +//! This decoupling allows different VectorDB backends to accept different +//! configurations without coupling Rholang to any specific implementation. +//! +//! # Example +//! +//! ```rholang +//! VectorDBFactory!({ +//! "dimensions": 384, +//! "metric": "cosine", +//! "threshold": "0.7", +//! "index": "pre_normalize" +//! }, *space) +//! ``` +//! +//! Rholang extracts `dimensions` (the only universal parameter) and passes +//! everything else (`metric`, `threshold`, `index`) as generic key-value pairs +//! for the VectorDB backend to interpret. + +use std::collections::HashMap; +use std::fmt; + +use models::rhoapi::{expr::ExprInstance, Par}; + +// ========================================================================== +// RawConfigValue - Generic Configuration Value +// ========================================================================== + +/// A generic configuration value that can represent any Rholang primitive or collection. +/// +/// This type allows Rholang to pass configuration values to VectorDB backends +/// without interpreting their meaning. The backend is responsible for validating +/// and converting these values to their appropriate types. +/// +/// # Supported Types +/// +/// | Rholang Type | RawConfigValue Variant | +/// |--------------|------------------------| +/// | String | `String(String)` | +/// | Integer | `Int(i64)` | +/// | Float* | `Float(f64)` | +/// | Boolean | `Bool(bool)` | +/// | List | `List(Vec<...>)` | +/// | Map | `Map(HashMap<...>)` | +/// +/// *Note: Rholang doesn't have native float literals; floats are typically +/// passed as strings like `"0.7"` and parsed by the backend. +#[derive(Debug, Clone, PartialEq)] +pub enum RawConfigValue { + /// A string value (e.g., `"cosine"`, `"0.7"`) + String(String), + + /// An integer value (e.g., `384`, `16`) + Int(i64), + + /// A floating-point value (for backends that parse numeric strings) + Float(f64), + + /// A boolean value (e.g., `true`, `false`) + Bool(bool), + + /// A list of values (e.g., `["pre_normalize", "cache_norms"]`) + List(Vec), + + /// A nested map of key-value pairs (e.g., `{"type": "hnsw", "max_connections": 32}`) + Map(HashMap), +} + +impl fmt::Display for RawConfigValue { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + match self { + RawConfigValue::String(s) => write!(f, "\"{}\"", s), + RawConfigValue::Int(i) => write!(f, "{}", i), + RawConfigValue::Float(n) => write!(f, "{}", n), + RawConfigValue::Bool(b) => write!(f, "{}", b), + RawConfigValue::List(items) => { + write!(f, "[")?; + for (i, item) in items.iter().enumerate() { + if i > 0 { + write!(f, ", ")?; + } + write!(f, "{}", item)?; + } + write!(f, "]") + } + RawConfigValue::Map(map) => { + write!(f, "{{")?; + for (i, (k, v)) in map.iter().enumerate() { + if i > 0 { + write!(f, ", ")?; + } + write!(f, "\"{}\": {}", k, v)?; + } + write!(f, "}}") + } + } + } +} + +impl RawConfigValue { + /// Try to get the value as a string reference. + pub fn as_str(&self) -> Option<&str> { + match self { + RawConfigValue::String(s) => Some(s), + _ => None, + } + } + + /// Try to get the value as an integer. + pub fn as_int(&self) -> Option { + match self { + RawConfigValue::Int(i) => Some(*i), + _ => None, + } + } + + /// Try to get the value as a float. + pub fn as_float(&self) -> Option { + match self { + RawConfigValue::Float(f) => Some(*f), + RawConfigValue::Int(i) => Some(*i as f64), + _ => None, + } + } + + /// Try to get the value as a boolean. + pub fn as_bool(&self) -> Option { + match self { + RawConfigValue::Bool(b) => Some(*b), + _ => None, + } + } + + /// Try to get the value as a list reference. + pub fn as_list(&self) -> Option<&[RawConfigValue]> { + match self { + RawConfigValue::List(list) => Some(list), + _ => None, + } + } + + /// Try to get the value as a map reference. + pub fn as_map(&self) -> Option<&HashMap> { + match self { + RawConfigValue::Map(map) => Some(map), + _ => None, + } + } + + /// Check if this value is a string. + pub fn is_string(&self) -> bool { + matches!(self, RawConfigValue::String(_)) + } + + /// Check if this value is an integer. + pub fn is_int(&self) -> bool { + matches!(self, RawConfigValue::Int(_)) + } + + /// Check if this value is a float. + pub fn is_float(&self) -> bool { + matches!(self, RawConfigValue::Float(_)) + } + + /// Check if this value is a boolean. + pub fn is_bool(&self) -> bool { + matches!(self, RawConfigValue::Bool(_)) + } + + /// Check if this value is a list. + pub fn is_list(&self) -> bool { + matches!(self, RawConfigValue::List(_)) + } + + /// Check if this value is a map. + pub fn is_map(&self) -> bool { + matches!(self, RawConfigValue::Map(_)) + } + + /// Try to parse this value as a float, handling string conversion. + /// + /// This is useful for VectorDB backends that need to parse threshold values + /// which may be passed as strings like `"0.7"`. + pub fn try_as_float(&self) -> Option { + match self { + RawConfigValue::Float(f) => Some(*f), + RawConfigValue::Int(i) => Some(*i as f64), + RawConfigValue::String(s) => s.parse::().ok(), + _ => None, + } + } + + /// Try to parse this value as a usize, handling string conversion. + pub fn try_as_usize(&self) -> Option { + match self { + RawConfigValue::Int(i) if *i >= 0 => Some(*i as usize), + RawConfigValue::String(s) => s.parse::().ok(), + _ => None, + } + } +} + +// ========================================================================== +// RawVectorDBConfig - Top-Level Configuration +// ========================================================================== + +/// Raw configuration extracted from Rholang for VectorDB construction. +/// +/// This struct contains the minimal universal parameter (`dimensions`) that +/// all VectorDB implementations require, plus a generic `params` map for +/// backend-specific configuration. +/// +/// # Rholang Extraction +/// +/// Rholang only extracts `dimensions` from the configuration map. All other +/// parameters are passed through as-is in the `params` map: +/// +/// ```rholang +/// VectorDBFactory!({ +/// "dimensions": 384, // Extracted by Rholang +/// "metric": "cosine", // Passed to backend +/// "threshold": "0.7", // Passed to backend +/// "index": "pre_normalize" // Passed to backend +/// }, *space) +/// ``` +/// +/// # Backend Interpretation +/// +/// The VectorDB backend receives the `params` map and interprets: +/// - `metric`: Similarity metric (cosine, euclidean, etc.) +/// - `threshold`: Similarity threshold +/// - `embedding_type`: Data type of embeddings +/// - `index` / `indices`: Index optimization configuration +/// - Any other backend-specific parameters +#[derive(Debug, Clone, Default)] +pub struct RawVectorDBConfig { + /// Required: embedding dimensions (the only universal parameter). + /// + /// This is the only parameter that Rholang understands. All VectorDB + /// implementations need to know the dimensionality of embeddings. + pub dimensions: usize, + + /// All other configuration as key-value pairs (passed to backend as-is). + /// + /// The VectorDB backend is responsible for validating and interpreting + /// these parameters according to its own configuration schema. + pub params: HashMap, +} + +impl RawVectorDBConfig { + /// Create a new RawVectorDBConfig with the given dimensions. + pub fn new(dimensions: usize) -> Self { + RawVectorDBConfig { + dimensions, + params: HashMap::new(), + } + } + + /// Create a RawVectorDBConfig with dimensions and parameters. + pub fn with_params(dimensions: usize, params: HashMap) -> Self { + RawVectorDBConfig { dimensions, params } + } + + /// Get a parameter by key. + pub fn get(&self, key: &str) -> Option<&RawConfigValue> { + self.params.get(key) + } + + /// Check if a parameter exists. + pub fn contains_key(&self, key: &str) -> bool { + self.params.contains_key(key) + } + + /// Insert a parameter. + pub fn insert(&mut self, key: impl Into, value: RawConfigValue) { + self.params.insert(key.into(), value); + } + + /// Get a string parameter. + pub fn get_string(&self, key: &str) -> Option<&str> { + self.params.get(key).and_then(|v| v.as_str()) + } + + /// Get an integer parameter. + pub fn get_int(&self, key: &str) -> Option { + self.params.get(key).and_then(|v| v.as_int()) + } + + /// Get a float parameter, handling string conversion. + pub fn get_float(&self, key: &str) -> Option { + self.params.get(key).and_then(|v| v.try_as_float()) + } + + /// Get a boolean parameter. + pub fn get_bool(&self, key: &str) -> Option { + self.params.get(key).and_then(|v| v.as_bool()) + } +} + +// ========================================================================== +// Par Conversion +// ========================================================================== + +/// Error type for configuration parsing. +#[derive(Debug, Clone, PartialEq, Eq)] +pub enum RawConfigError { + /// A required parameter is missing. + MissingRequired { parameter: String }, + + /// A parameter has an invalid type. + InvalidType { parameter: String, expected: String, actual: String }, + + /// A parameter has an invalid value. + InvalidValue { parameter: String, description: String }, + + /// Failed to extract a string key from a map. + InvalidMapKey { description: String }, + + /// The configuration is not a map. + NotAMap, + + /// General conversion error. + ConversionError { description: String }, +} + +impl fmt::Display for RawConfigError { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + match self { + RawConfigError::MissingRequired { parameter } => { + write!(f, "Missing required parameter: '{}'", parameter) + } + RawConfigError::InvalidType { parameter, expected, actual } => { + write!(f, "Invalid type for '{}': expected {}, got {}", parameter, expected, actual) + } + RawConfigError::InvalidValue { parameter, description } => { + write!(f, "Invalid value for '{}': {}", parameter, description) + } + RawConfigError::InvalidMapKey { description } => { + write!(f, "Invalid map key: {}", description) + } + RawConfigError::NotAMap => { + write!(f, "Configuration must be a map") + } + RawConfigError::ConversionError { description } => { + write!(f, "Configuration conversion error: {}", description) + } + } + } +} + +impl std::error::Error for RawConfigError {} + +/// Convert a Par to a RawConfigValue (recursive). +/// +/// This function recursively converts a Rholang Par expression to a generic +/// RawConfigValue that can be passed to VectorDB backends. +pub fn par_to_raw_config_value(par: &Par) -> Result { + // Check expressions (most common case for data) + for expr in &par.exprs { + if let Some(ref instance) = expr.expr_instance { + return expr_instance_to_raw_config_value(instance); + } + } + + // Empty par or unsupported structure + Err(RawConfigError::ConversionError { + description: "Unsupported Par structure for configuration value".to_string(), + }) +} + +/// Convert an ExprInstance to a RawConfigValue. +fn expr_instance_to_raw_config_value(instance: &ExprInstance) -> Result { + match instance { + ExprInstance::GString(s) => Ok(RawConfigValue::String(s.clone())), + + ExprInstance::GInt(i) => Ok(RawConfigValue::Int(*i)), + + ExprInstance::GBool(b) => Ok(RawConfigValue::Bool(*b)), + + ExprInstance::EListBody(elist) => { + let items: Result, _> = elist.ps + .iter() + .map(par_to_raw_config_value) + .collect(); + Ok(RawConfigValue::List(items?)) + } + + ExprInstance::EMapBody(emap) => { + let mut map = HashMap::new(); + for kv in &emap.kvs { + // Extract key (must be a string) + let key = if let Some(ref key_par) = kv.key { + extract_string_from_par(key_par)? + } else { + return Err(RawConfigError::InvalidMapKey { + description: "Map key is None".to_string(), + }); + }; + + // Extract value + let value = if let Some(ref value_par) = kv.value { + par_to_raw_config_value(value_par)? + } else { + return Err(RawConfigError::ConversionError { + description: format!("Map value for key '{}' is None", key), + }); + }; + + map.insert(key, value); + } + Ok(RawConfigValue::Map(map)) + } + + // Tuples are converted to lists + ExprInstance::ETupleBody(etuple) => { + let items: Result, _> = etuple.ps + .iter() + .map(par_to_raw_config_value) + .collect(); + Ok(RawConfigValue::List(items?)) + } + + _ => Err(RawConfigError::ConversionError { + description: format!("Unsupported expression type for configuration: {:?}", + std::mem::discriminant(instance)), + }), + } +} + +/// Extract a string from a Par (for map keys). +fn extract_string_from_par(par: &Par) -> Result { + for expr in &par.exprs { + if let Some(ExprInstance::GString(s)) = &expr.expr_instance { + return Ok(s.clone()); + } + } + Err(RawConfigError::InvalidMapKey { + description: "Expected string key in map".to_string(), + }) +} + +/// Parse a RawVectorDBConfig from a Par (the Rholang configuration map). +/// +/// This function extracts `dimensions` (the only universal parameter) and +/// passes everything else through as generic configuration values. +/// +/// # Arguments +/// +/// * `par` - The Par representing the configuration map +/// +/// # Returns +/// +/// A `RawVectorDBConfig` with `dimensions` and all other parameters in `params`. +/// +/// # Errors +/// +/// Returns an error if: +/// - The Par is not a map +/// - The `dimensions` parameter is missing +/// - The `dimensions` parameter is not a valid positive integer +pub fn parse_raw_vectordb_config(par: &Par) -> Result { + // First, convert the entire Par to a RawConfigValue + let config_value = par_to_raw_config_value(par)?; + + // Must be a map + let config_map = match config_value { + RawConfigValue::Map(m) => m, + _ => return Err(RawConfigError::NotAMap), + }; + + // Extract dimensions (required) + let dimensions = match config_map.get("dimensions") { + Some(RawConfigValue::Int(i)) if *i > 0 => *i as usize, + Some(RawConfigValue::String(s)) => { + s.parse::().map_err(|_| RawConfigError::InvalidValue { + parameter: "dimensions".to_string(), + description: format!("Cannot parse '{}' as positive integer", s), + })? + } + Some(other) => { + return Err(RawConfigError::InvalidType { + parameter: "dimensions".to_string(), + expected: "positive integer".to_string(), + actual: format!("{:?}", std::mem::discriminant(other)), + }); + } + None => { + return Err(RawConfigError::MissingRequired { + parameter: "dimensions".to_string(), + }); + } + }; + + // Build params map (everything except dimensions) + let mut params = HashMap::new(); + for (key, value) in config_map { + if key != "dimensions" { + params.insert(key, value); + } + } + + Ok(RawVectorDBConfig { dimensions, params }) +} + +// ========================================================================== +// Tests +// ========================================================================== + +#[cfg(test)] +mod tests { + use super::*; + use models::rhoapi::{Expr, EMap, KeyValuePair}; + + fn make_string_par(s: &str) -> Par { + Par { + exprs: vec![Expr { + expr_instance: Some(ExprInstance::GString(s.to_string())), + }], + ..Default::default() + } + } + + fn make_int_par(i: i64) -> Par { + Par { + exprs: vec![Expr { + expr_instance: Some(ExprInstance::GInt(i)), + }], + ..Default::default() + } + } + + fn make_bool_par(b: bool) -> Par { + Par { + exprs: vec![Expr { + expr_instance: Some(ExprInstance::GBool(b)), + }], + ..Default::default() + } + } + + fn make_map_par(kvs: Vec<(String, Par)>) -> Par { + let kvs = kvs + .into_iter() + .map(|(k, v)| KeyValuePair { + key: Some(make_string_par(&k)), + value: Some(v), + }) + .collect(); + Par { + exprs: vec![Expr { + expr_instance: Some(ExprInstance::EMapBody(EMap { + kvs, + locally_free: vec![], + connective_used: false, + remainder: None, + })), + }], + ..Default::default() + } + } + + #[test] + fn test_raw_config_value_string() { + let value = RawConfigValue::String("hello".to_string()); + assert_eq!(value.as_str(), Some("hello")); + assert!(value.is_string()); + assert!(!value.is_int()); + } + + #[test] + fn test_raw_config_value_int() { + let value = RawConfigValue::Int(42); + assert_eq!(value.as_int(), Some(42)); + assert_eq!(value.as_float(), Some(42.0)); + assert!(value.is_int()); + } + + #[test] + fn test_raw_config_value_bool() { + let value = RawConfigValue::Bool(true); + assert_eq!(value.as_bool(), Some(true)); + assert!(value.is_bool()); + } + + #[test] + fn test_raw_config_value_list() { + let value = RawConfigValue::List(vec![ + RawConfigValue::String("a".to_string()), + RawConfigValue::String("b".to_string()), + ]); + assert!(value.is_list()); + assert_eq!(value.as_list().map(|l| l.len()), Some(2)); + } + + #[test] + fn test_raw_config_value_map() { + let mut map = HashMap::new(); + map.insert("key".to_string(), RawConfigValue::Int(123)); + let value = RawConfigValue::Map(map); + assert!(value.is_map()); + assert_eq!( + value.as_map().and_then(|m| m.get("key")).and_then(|v| v.as_int()), + Some(123) + ); + } + + #[test] + fn test_raw_config_value_try_as_float() { + assert_eq!(RawConfigValue::Float(0.5).try_as_float(), Some(0.5)); + assert_eq!(RawConfigValue::Int(42).try_as_float(), Some(42.0)); + assert_eq!(RawConfigValue::String("0.7".to_string()).try_as_float(), Some(0.7)); + assert_eq!(RawConfigValue::String("invalid".to_string()).try_as_float(), None); + assert_eq!(RawConfigValue::Bool(true).try_as_float(), None); + } + + #[test] + fn test_raw_config_value_try_as_usize() { + assert_eq!(RawConfigValue::Int(42).try_as_usize(), Some(42)); + assert_eq!(RawConfigValue::Int(-1).try_as_usize(), None); + assert_eq!(RawConfigValue::String("100".to_string()).try_as_usize(), Some(100)); + assert_eq!(RawConfigValue::String("bad".to_string()).try_as_usize(), None); + } + + #[test] + fn test_raw_config_value_display() { + assert_eq!(format!("{}", RawConfigValue::String("test".to_string())), "\"test\""); + assert_eq!(format!("{}", RawConfigValue::Int(42)), "42"); + assert_eq!(format!("{}", RawConfigValue::Bool(true)), "true"); + assert_eq!( + format!("{}", RawConfigValue::List(vec![RawConfigValue::Int(1), RawConfigValue::Int(2)])), + "[1, 2]" + ); + } + + #[test] + fn test_raw_vectordb_config_new() { + let config = RawVectorDBConfig::new(384); + assert_eq!(config.dimensions, 384); + assert!(config.params.is_empty()); + } + + #[test] + fn test_raw_vectordb_config_with_params() { + let mut params = HashMap::new(); + params.insert("metric".to_string(), RawConfigValue::String("cosine".to_string())); + params.insert("threshold".to_string(), RawConfigValue::String("0.7".to_string())); + + let config = RawVectorDBConfig::with_params(384, params); + assert_eq!(config.dimensions, 384); + assert_eq!(config.get_string("metric"), Some("cosine")); + assert_eq!(config.get_float("threshold"), Some(0.7)); + } + + #[test] + fn test_par_to_raw_config_value_string() { + let par = make_string_par("hello"); + let value = par_to_raw_config_value(&par).expect("conversion failed"); + assert_eq!(value.as_str(), Some("hello")); + } + + #[test] + fn test_par_to_raw_config_value_int() { + let par = make_int_par(42); + let value = par_to_raw_config_value(&par).expect("conversion failed"); + assert_eq!(value.as_int(), Some(42)); + } + + #[test] + fn test_par_to_raw_config_value_bool() { + let par = make_bool_par(true); + let value = par_to_raw_config_value(&par).expect("conversion failed"); + assert_eq!(value.as_bool(), Some(true)); + } + + #[test] + fn test_par_to_raw_config_value_map() { + let par = make_map_par(vec![ + ("key1".to_string(), make_string_par("value1")), + ("key2".to_string(), make_int_par(42)), + ]); + let value = par_to_raw_config_value(&par).expect("conversion failed"); + let map = value.as_map().expect("should be map"); + assert_eq!(map.get("key1").and_then(|v| v.as_str()), Some("value1")); + assert_eq!(map.get("key2").and_then(|v| v.as_int()), Some(42)); + } + + #[test] + fn test_parse_raw_vectordb_config_basic() { + let par = make_map_par(vec![ + ("dimensions".to_string(), make_int_par(384)), + ("metric".to_string(), make_string_par("cosine")), + ]); + + let config = parse_raw_vectordb_config(&par).expect("parse failed"); + assert_eq!(config.dimensions, 384); + assert_eq!(config.get_string("metric"), Some("cosine")); + assert!(!config.contains_key("dimensions")); // dimensions removed from params + } + + #[test] + fn test_parse_raw_vectordb_config_with_index() { + let par = make_map_par(vec![ + ("dimensions".to_string(), make_int_par(128)), + ("metric".to_string(), make_string_par("euclidean")), + ("threshold".to_string(), make_string_par("0.8")), + ("index".to_string(), make_string_par("pre_normalize")), + ]); + + let config = parse_raw_vectordb_config(&par).expect("parse failed"); + assert_eq!(config.dimensions, 128); + assert_eq!(config.get_string("metric"), Some("euclidean")); + assert_eq!(config.get_float("threshold"), Some(0.8)); + assert_eq!(config.get_string("index"), Some("pre_normalize")); + } + + #[test] + fn test_parse_raw_vectordb_config_missing_dimensions() { + let par = make_map_par(vec![ + ("metric".to_string(), make_string_par("cosine")), + ]); + + let result = parse_raw_vectordb_config(&par); + assert!(matches!( + result, + Err(RawConfigError::MissingRequired { parameter }) if parameter == "dimensions" + )); + } + + #[test] + fn test_parse_raw_vectordb_config_nested_index() { + // Test with nested map for index configuration + let index_map = make_map_par(vec![ + ("type".to_string(), make_string_par("hnsw")), + ("max_connections".to_string(), make_int_par(32)), + ("ef_construction".to_string(), make_int_par(400)), + ]); + + let par = make_map_par(vec![ + ("dimensions".to_string(), make_int_par(768)), + ("index".to_string(), index_map), + ]); + + let config = parse_raw_vectordb_config(&par).expect("parse failed"); + assert_eq!(config.dimensions, 768); + + let index = config.get("index").expect("index should exist"); + let index_map = index.as_map().expect("index should be map"); + assert_eq!(index_map.get("type").and_then(|v| v.as_str()), Some("hnsw")); + assert_eq!(index_map.get("max_connections").and_then(|v| v.as_int()), Some(32)); + assert_eq!(index_map.get("ef_construction").and_then(|v| v.as_int()), Some(400)); + } + + #[test] + fn test_raw_config_error_display() { + let err = RawConfigError::MissingRequired { parameter: "dimensions".to_string() }; + assert!(format!("{}", err).contains("dimensions")); + + let err = RawConfigError::InvalidType { + parameter: "threshold".to_string(), + expected: "float".to_string(), + actual: "string".to_string(), + }; + assert!(format!("{}", err).contains("threshold")); + } +} diff --git a/rholang/src/rust/interpreter/spaces/types/theory.rs b/rholang/src/rust/interpreter/spaces/types/theory.rs new file mode 100644 index 000000000..9ed9db903 --- /dev/null +++ b/rholang/src/rust/interpreter/spaces/types/theory.rs @@ -0,0 +1,439 @@ +//! Theory and Validatable Traits for MeTTaIL Integration +//! +//! This module defines the fundamental types for type/contract validation of data. +//! When integrated with MeTTaIL, this enables typed tuple spaces where only +//! well-typed data can be stored. + +use std::fmt; + +use models::rhoapi::{expr::ExprInstance, EList, Expr, ListParWithRandom, Par}; + +// ========================================================================== +// Theory Trait (for MeTTaIL integration) +// ========================================================================== + +/// Boxed theory for type erasure in SpaceConfig. +pub type BoxedTheory = Box; + +/// Theory trait for type/contract validation of data. +/// +/// This trait allows spaces to validate data against a type theory or contract +/// before accepting it. In the full implementation, this integrates with +/// MeTTaIL for rich type theory support including dependent types and contracts. +/// +/// # Spec Reference +/// From the Reifying RSpaces spec: "Each space can optionally be associated with +/// a MeTTaIL theory that validates data before it enters the space." +/// +/// # Example +/// ```ignore +/// // Future integration with MeTTaIL +/// use mettail::Theory as MettailTheory; +/// +/// let theory = MettailTheory::parse("(: Nat Type)")?; +/// let config = SpaceConfig::default().with_theory(Box::new(theory)); +/// ``` +pub trait Theory: Send + Sync + fmt::Debug { + /// Validate that the given term conforms to this theory. + /// + /// Returns `Ok(())` if the term is valid according to the theory, + /// or `Err(description)` if validation fails. + /// + /// # Arguments + /// * `term` - The term to validate, typically serialized as a string + fn validate(&self, term: &str) -> Result<(), String>; + + /// Get the name/identifier of this theory. + /// + /// Used for debugging and error messages. + fn name(&self) -> &str; + + /// Check if a type is defined in this theory. + /// + /// # Arguments + /// * `type_name` - The name of the type to check + fn has_type(&self, type_name: &str) -> bool { + // Default implementation - override for actual type checking + let _ = type_name; + false + } + + /// Get a human-readable description of the theory. + fn description(&self) -> &str { + "Unnamed theory" + } + + /// Clone this theory into a boxed trait object. + /// + /// This is needed because trait objects cannot implement Clone directly. + fn clone_box(&self) -> BoxedTheory; +} + +/// A null theory that accepts all data (no validation). +/// +/// This is the default when no theory is specified. +#[derive(Clone, Debug, Default)] +pub struct NullTheory; + +impl Theory for NullTheory { + fn validate(&self, _term: &str) -> Result<(), String> { + Ok(()) // Accept everything + } + + fn name(&self) -> &str { + "NullTheory" + } + + fn description(&self) -> &str { + "Accepts all data without validation" + } + + fn clone_box(&self) -> BoxedTheory { + Box::new(self.clone()) + } +} + +// ========================================================================== +// Validatable Trait +// ========================================================================== + +/// Trait for data types that can be validated against a theory. +/// +/// This trait allows data to be validated before being stored in a typed +/// tuple space. Types implementing this trait can be serialized to a string +/// representation that the theory can validate. +/// +/// # Example +/// ```ignore +/// impl Validatable for MyData { +/// fn to_validatable_string(&self) -> String { +/// format!("MyData({})", self.value) +/// } +/// +/// fn type_name(&self) -> &str { +/// "MyData" +/// } +/// } +/// ``` +pub trait Validatable { + /// Convert the data to a string representation for theory validation. + /// + /// This string is passed to `Theory::validate()` to check conformance. + fn to_validatable_string(&self) -> String; + + /// Get the type name for this data. + /// + /// Used for error messages and theory type checking. + fn type_name(&self) -> &str; + + /// Validate this data against a theory. + /// + /// Returns `Ok(())` if validation passes, or an error message if it fails. + fn validate(&self, theory: &dyn Theory) -> Result<(), String> { + theory.validate(&self.to_validatable_string()) + } +} + +/// A validation result containing either success or a detailed error. +pub type ValidationResult = Result<(), super::super::errors::SpaceError>; + +/// Extension trait for validating data before insertion into a typed space. +pub trait TheoryValidator { + /// Validate data against this theory, returning a SpaceError on failure. + fn validate_data(&self, data: &V) -> ValidationResult; +} + +impl TheoryValidator for T { + fn validate_data(&self, data: &V) -> ValidationResult { + match self.validate(&data.to_validatable_string()) { + Ok(()) => Ok(()), + Err(validation_error) => Err(super::super::errors::SpaceError::TheoryValidationError { + theory_name: self.name().to_string(), + validation_error, + term: data.to_validatable_string(), + }), + } + } +} + +// ========================================================================== +// Theory Implementations +// ========================================================================== + +/// A simple type theory that validates against a list of allowed type names. +/// +/// This is a placeholder for testing until full MeTTaIL integration. +#[derive(Clone, Debug)] +pub struct SimpleTypeTheory { + name: String, + allowed_types: Vec, +} + +impl SimpleTypeTheory { + /// Create a new SimpleTypeTheory with the given name and allowed types. + pub fn new(name: impl Into, allowed_types: Vec) -> Self { + SimpleTypeTheory { + name: name.into(), + allowed_types, + } + } +} + +impl Theory for SimpleTypeTheory { + fn validate(&self, term: &str) -> Result<(), String> { + // Simple validation: check if term starts with an allowed type + for allowed in &self.allowed_types { + if term.starts_with(allowed) { + return Ok(()); + } + } + Err(format!( + "Term '{}' does not match any allowed type: {:?}", + term, self.allowed_types + )) + } + + fn name(&self) -> &str { + &self.name + } + + fn has_type(&self, type_name: &str) -> bool { + self.allowed_types.iter().any(|t| t == type_name) + } + + fn description(&self) -> &str { + "Simple type validation against allowed type names" + } + + fn clone_box(&self) -> BoxedTheory { + Box::new(self.clone()) + } +} + +// ========================================================================== +// Validatable Implementation for ListParWithRandom +// ========================================================================== + +impl Validatable for ListParWithRandom { + fn to_validatable_string(&self) -> String { + // Convert the ListParWithRandom to a type-prefixed string for validation. + // This examines the actual data and produces strings like: + // - "Nat(42)" for non-negative integers + // - "Int(-5)" for negative integers + // - "String(hello)" for strings + // - "Bool(true)" for booleans + // - "Unknown" for complex/unknown data + // + // For multiple pars, we concatenate their representations. + + if self.pars.is_empty() { + return "Unit".to_string(); + } + + let mut parts = Vec::new(); + for par in &self.pars { + parts.push(par_to_validatable_type(par)); + } + + if parts.len() == 1 { + parts.into_iter().next().expect("expected at least one part") + } else { + format!("Tuple({})", parts.join(", ")) + } + } + + fn type_name(&self) -> &str { + // Determine the primary type name based on contents + if self.pars.is_empty() { + return "Unit"; + } + if self.pars.len() > 1 { + return "Tuple"; + } + + // Single par - determine its type + par_type_name(&self.pars[0]) + } +} + +/// Convert a Par to a type-prefixed validation string. +fn par_to_validatable_type(par: &Par) -> String { + // Check expressions first (most common case for data) + for expr in &par.exprs { + if let Some(ref instance) = expr.expr_instance { + return expr_instance_to_validatable(instance); + } + } + + // Check other Par fields + if !par.sends.is_empty() { + return "Process(Send)".to_string(); + } + if !par.receives.is_empty() { + return "Process(Receive)".to_string(); + } + if !par.news.is_empty() { + return "Process(New)".to_string(); + } + if !par.matches.is_empty() { + return "Process(Match)".to_string(); + } + if !par.unforgeables.is_empty() { + return "Unforgeable".to_string(); + } + if !par.bundles.is_empty() { + return "Bundle".to_string(); + } + if !par.connectives.is_empty() { + return "Connective".to_string(); + } + + // Empty or unknown + "Unknown".to_string() +} + +/// Convert an ExprInstance to a type-prefixed validation string. +fn expr_instance_to_validatable(instance: &ExprInstance) -> String { + match instance { + ExprInstance::GInt(n) => { + if *n >= 0 { + format!("Nat({})", n) + } else { + format!("Int({})", n) + } + } + ExprInstance::GString(s) => format!("String({})", s), + ExprInstance::GBool(b) => format!("Bool({})", b), + ExprInstance::GUri(u) => format!("Uri({})", u), + ExprInstance::GByteArray(bytes) => format!("ByteArray(len={})", bytes.len()), + + // Collections + ExprInstance::EListBody(elist) => { + let items: Vec = elist.ps.iter().map(par_to_validatable_type).collect(); + format!("List({})", items.join(", ")) + } + ExprInstance::ETupleBody(etuple) => { + let items: Vec = etuple.ps.iter().map(par_to_validatable_type).collect(); + format!("Tuple({})", items.join(", ")) + } + ExprInstance::ESetBody(eset) => { + let items: Vec = eset.ps.iter().map(par_to_validatable_type).collect(); + format!("Set({})", items.join(", ")) + } + ExprInstance::EMapBody(emap) => { + format!("Map(len={})", emap.kvs.len()) + } + + // Variables + ExprInstance::EVarBody(_) => "Var".to_string(), + ExprInstance::EFreeBody(_) => "Free".to_string(), + + // Arithmetic operations + ExprInstance::EPlusBody(_) + | ExprInstance::EMinusBody(_) + | ExprInstance::EMultBody(_) + | ExprInstance::EDivBody(_) + | ExprInstance::EModBody(_) + | ExprInstance::ENegBody(_) => "ArithExpr".to_string(), + + // Comparison operations + ExprInstance::ELtBody(_) + | ExprInstance::ELteBody(_) + | ExprInstance::EGtBody(_) + | ExprInstance::EGteBody(_) + | ExprInstance::EEqBody(_) + | ExprInstance::ENeqBody(_) => "CompareExpr".to_string(), + + // Logical operations + ExprInstance::EAndBody(_) + | ExprInstance::EOrBody(_) + | ExprInstance::ENotBody(_) => "LogicalExpr".to_string(), + + // Collection operations + ExprInstance::EPlusPlusBody(_) | ExprInstance::EMinusMinusBody(_) => { + "CollectionExpr".to_string() + } + + // String operations + ExprInstance::EPercentPercentBody(_) => "StringExpr".to_string(), + + // Method call + ExprInstance::EMethodBody(_) => "MethodCall".to_string(), + + // Function call + ExprInstance::EFunctionBody(_) => "FunctionCall".to_string(), + + // Match expression + ExprInstance::EMatchesBody(_) => "MatchExpr".to_string(), + + // PathMap + ExprInstance::EPathmapBody(_) => "PathMap".to_string(), + + // Zipper + ExprInstance::EZipperBody(_) => "Zipper".to_string(), + } +} + +/// Get the primary type name for a Par. +fn par_type_name(par: &Par) -> &'static str { + for expr in &par.exprs { + if let Some(ref instance) = expr.expr_instance { + return match instance { + ExprInstance::GInt(n) => { + if *n >= 0 { + "Nat" + } else { + "Int" + } + } + ExprInstance::GString(_) => "String", + ExprInstance::GBool(_) => "Bool", + ExprInstance::GUri(_) => "Uri", + ExprInstance::GByteArray(_) => "ByteArray", + ExprInstance::EListBody(_) => "List", + ExprInstance::ETupleBody(_) => "Tuple", + ExprInstance::ESetBody(_) => "Set", + ExprInstance::EMapBody(_) => "Map", + _ => "Expr", + }; + } + } + "Unknown" +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_null_theory_accepts_everything() { + let theory = NullTheory; + assert!(theory.validate("anything").is_ok()); + assert!(theory.validate("").is_ok()); + assert!(theory.validate("(+ 1 2)").is_ok()); + assert_eq!(theory.name(), "NullTheory"); + } + + #[test] + fn test_simple_type_theory_validates() { + let theory = SimpleTypeTheory::new( + "NatOrBool", + vec!["Nat".to_string(), "Bool".to_string()], + ); + + // Valid terms + assert!(theory.validate("Nat 42").is_ok()); + assert!(theory.validate("Bool true").is_ok()); + + // Invalid terms + assert!(theory.validate("String hello").is_err()); + assert!(theory.validate("Float 3.14").is_err()); + + // Type checks + assert!(theory.has_type("Nat")); + assert!(theory.has_type("Bool")); + assert!(!theory.has_type("String")); + + assert_eq!(theory.name(), "NatOrBool"); + } +}