Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
31 changes: 31 additions & 0 deletions .github/workflows/claude-security-review.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
name: Claude Security Review

on:
pull_request:
types: [opened, synchronize, reopened]
workflow_dispatch:

permissions:
contents: read
pull-requests: write

jobs:
security-review:
if: github.event.pull_request.draft == false || github.event_name == 'workflow_dispatch'
runs-on: ubuntu-latest
timeout-minutes: 30

steps:
- name: Checkout repository
uses: actions/checkout@v4
with:
fetch-depth: 0

- name: Run Claude Security Review
uses: anthropics/claude-code-security-review@main
with:
claude-api-key: ${{ secrets.CLAUDE_API_KEY }}
claude-model: claude-opus-4-6
claudecode-timeout: 25
run-every-commit: true
exclude-directories: "docs,tests,benches,.github"
10 changes: 10 additions & 0 deletions crates/engine/tree/src/persistence.rs
Original file line number Diff line number Diff line change
Expand Up @@ -222,6 +222,11 @@ where
// and storage_db internally. For fault tolerance, stage checkpoints ensure
// idempotency - each stage's checkpoint is verified before writing, guaranteeing
// exactly-once execution even if the process crashes mid-block.
// Design Intent (GRETH-069): State and trie writes happen in separate
// threads with separate commits. If trie commits before state, another
// thread may transiently observe new trie nodes but old state data. This
// is acceptable because the cache overlay always provides consistent data
// for merklization, and readers only fall back to DB on cache miss.
thread::scope(|scope| -> Result<(), PersistenceError> {
let state_handle = scope.spawn(|| -> Result<(), PersistenceError> {
let start = Instant::now();
Expand Down Expand Up @@ -281,6 +286,11 @@ where
)
.record(start.elapsed());

// Design Intent (GRETH-072): Validator nodes skip history index
// writes because they are not RPC servers and do not serve
// historical queries. Note: historical RPC queries against a
// validator node may return empty/incorrect results without
// explicit error, since no guard rejects them.
if !get_gravity_config().validator_node_only {
let start = Instant::now();
let provider_rw = inner_provider.database_provider_rw()?;
Expand Down
8 changes: 8 additions & 0 deletions crates/engine/tree/src/recovery.rs
Original file line number Diff line number Diff line change
Expand Up @@ -111,6 +111,14 @@ impl<'a, N: ProviderNodeTypes> StorageRecoveryHelper<'a, N> {
/// - History indices correctly track all state changes
/// - All checkpoints are synchronized at `recover_block_number`
pub fn check_and_recover(&self) -> ProviderResult<()> {
// Design Intent (GRETH-008): Recovery trusts the canonical tip state root
// without re-verification. This is acceptable because the only way a corrupted
// checkpoint can exist is via hardware failure or external data corruption,
// which would require manual intervention regardless.
// TODO(GRETH-070): Recovery uses StageId::Execution checkpoint as ground truth.
// If this checkpoint is corrupted (e.g., by partial write or disk corruption),
// recovery operates at the wrong block number. Consider adding a cross-check
// against block headers stored in static files.
let provider_ro = self.factory.database_provider_ro()?;
let recover_block_number = provider_ro.recover_block_number()?;
let best_block_number = provider_ro.best_block_number()?;
Expand Down
8 changes: 7 additions & 1 deletion crates/engine/tree/src/tree/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -538,13 +538,19 @@ where
ForkchoiceStatus::Valid,
);

// TODO(GRETH-068): Panic on make_canonical failure. Any error (including
// transient RocksDB I/O errors) causes immediate node crash with no retry
// mechanism. Consider adding retry logic for transient errors.
self.make_canonical(block_hash).unwrap_or_else(|err| {
panic!(
"Failed to make canonical, block_number={block_number} block_hash={block_hash}: {err}",
)
});

// deterministic consensus means canonical block is immediately safe and finalized
// Design Intent (GRETH-009): In Gravity's BFT consensus model, every block
// that passes consensus is immediately considered safe and finalized. There is
// no concept of probabilistic finality or confirmation depth. This is by design
// and matches the deterministic consensus guarantees.
self.canonical_in_memory_state.set_safe(sealed_header.clone());
self.canonical_in_memory_state.set_finalized(sealed_header);
}
Expand Down
7 changes: 7 additions & 0 deletions crates/ethereum/evm/src/parallel_execute.rs
Original file line number Diff line number Diff line change
Expand Up @@ -78,6 +78,13 @@ where
let state_clear_flag = self.chain_spec.is_spurious_dragon_active_at_block(block.number);
let state = self.state.as_mut().unwrap();
state.set_state_clear_flag(state_clear_flag);
// Design Intent (GRETH-058-old / WrapDatabaseRef state boundary):
// WrapDatabaseRef provides a read-only database interface to the EVM, but
// apply_pre_execution_changes() operates through system call hooks that write
// directly to the underlying ParallelState (e.g., beacon root contract calls).
// The state changes are not lost — they are committed via the ParallelState
// reference that WrapDatabaseRef wraps. This is consistent with upstream Reth's
// execution model where pre-execution changes are applied in-place.
let mut evm =
self.evm_config.evm_for_block(WrapDatabaseRef(state), block.header()).map_err(|e| {
BlockExecutionError::Internal(InternalBlockExecutionError::Other(Box::new(e)))
Expand Down
16 changes: 16 additions & 0 deletions crates/gravity-storage/src/block_view_storage/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -56,6 +56,12 @@ where
}

fn state_root(&self, hashed_state: &HashedPostState) -> ProviderResult<(B256, TrieUpdatesV2)> {
// Design Intent (GRETH-034, GRETH-035): The DB transaction here reflects the
// persist frontier (height P), while the cache overlay contains data up to the
// current execution height M > P. In normal operation, the cache always provides
// the most recent data. DB fallback only occurs on cache misses, which are rare
// for recently-touched accounts. The three-database sharding (state/account/storage)
// also lacks cross-DB snapshot coordination, but the cache overlay dominates reads.
let tx = self.client.database_provider_ro()?.into_tx();
let nested_hash = NestedStateRoot::new(&tx, Some(self.cache.clone()));
nested_hash.calculate(hashed_state)
Expand Down Expand Up @@ -147,6 +153,11 @@ impl<Tx: DbTx> DatabaseRef for RawBlockViewProvider<Tx> {
.unwrap_or_default())
}

// TODO(GRETH-031): BLOCKHASH opcode (0x40) is unimplemented. Any user transaction
// calling BLOCKHASH triggers a panic, which — combined with GRETH-029 (no barrier
// timeout) — causes permanent pipeline deadlock. Implement using the
// block_number_to_id BTreeMap maintained by BlockViewStorage::update_canonical(),
// which already stores the last BLOCK_HASH_HISTORY block hashes.
fn block_hash_ref(&self, _number: u64) -> Result<B256, Self::Error> {
unimplemented!("not support block_hash_ref in BlockViewProvider")
}
Expand Down Expand Up @@ -207,6 +218,11 @@ impl DatabaseRef for BlockViewProvider {
self.db.storage_ref(address, index)
}

// TODO(GRETH-031): BLOCKHASH opcode (0x40) is unimplemented. Any user transaction
// calling BLOCKHASH triggers a panic, which — combined with GRETH-029 (no barrier
// timeout) — causes permanent pipeline deadlock. Implement using the
// block_number_to_id BTreeMap maintained by BlockViewStorage::update_canonical(),
// which already stores the last BLOCK_HASH_HISTORY block hashes.
fn block_hash_ref(&self, _number: u64) -> Result<B256, Self::Error> {
unimplemented!("not support block_hash_ref in BlockViewProvider")
}
Expand Down
11 changes: 11 additions & 0 deletions crates/pipe-exec-layer-ext-v2/event-bus/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -9,10 +9,21 @@ use tokio::sync::{mpsc::UnboundedReceiver, oneshot};
use tracing::info;

/// A static instance of `PipeExecLayerEventBus` used for dispatching events.
// Design Intent (GRETH-037): The type-erased Box<dyn Any> is used because Rust's
// static globals cannot be generic. Type safety is enforced at node startup: the
// generic parameter N is consistent across the single binary entry point. A
// mismatched downcast would panic at initialization, before any blocks are processed.
pub static PIPE_EXEC_LAYER_EVENT_BUS: OnceCell<Box<dyn Any + Send + Sync>> = OnceCell::new();

/// Get a reference to the global `PipeExecLayerEventBus` instance.
pub fn get_pipe_exec_layer_event_bus<N: NodePrimitives>() -> &'static PipeExecLayerEventBus<N> {
// Design Intent (GRETH-038): The busy-wait loop with thread::sleep is used
// because this is called during node startup, before the async runtime is fully
// initialized. The event bus is always set during normal startup sequence, so this
// loop terminates quickly (typically 0-2 iterations). In abnormal conditions
// (initialization bug), the loop serves as a diagnostic via the log message.
// TODO: Consider adding a maximum iteration count to avoid hanging in truly broken
// deployments where the event bus is never initialized.
let mut wait_time = 0;
loop {
let event_bus = PIPE_EXEC_LAYER_EVENT_BUS
Expand Down
3 changes: 3 additions & 0 deletions crates/pipe-exec-layer-ext-v2/execute/src/bls_precompile.rs
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,9 @@ const BLS_POP_LEN: usize = 96;
const EXPECTED_INPUT_LEN: usize = BLS_PUBKEY_LEN + BLS_POP_LEN;

/// Gas cost for PoP verification (2 bilinear pairings + hash-to-curve)
// TODO(GRETH-065): POP_VERIFY_GAS = 45,000 is ~23% of EIP-2537 equivalent
// (~193,000 gas). Benchmark actual CPU cost on target hardware and adjust
// to prevent computational DoS via repeated PoP verification calls.
const POP_VERIFY_GAS: u64 = 45_000;

/// Domain separation tag for BLS PoP verification
Expand Down
15 changes: 9 additions & 6 deletions crates/pipe-exec-layer-ext-v2/execute/src/channel.rs
Original file line number Diff line number Diff line change
Expand Up @@ -52,8 +52,10 @@ impl<K: Eq + Clone + Debug + Hash, V> Channel<K, V> {
}

async fn wait_inner(&self, key: K, timeout: Option<Duration>) -> Option<V> {
// ATTN: We can guarantee that `.await` will not occur within the critical zone, which means
// `MutexGuard` will not be sent across threads.
// Design Intent (GRETH-047-old): The SendMutexGuard uses `unsafe impl Send`.
// Safety: We guarantee the MutexGuard is dropped before any `.await` point. The
// guard is only held during synchronous HashMap operations and released via
// `drop(inner)` on line 75, before the `rx.await` on line 80/94.
struct SendMutexGuard<'a, T>(MutexGuard<'a, T>);
unsafe impl<'a, T> Send for SendMutexGuard<'a, T> {}

Expand All @@ -79,10 +81,11 @@ impl<K: Eq + Clone + Debug + Hash, V> Channel<K, V> {
match tokio::time::timeout(duration, rx).await {
Ok(result) => result.ok(),
Err(_) => {
// Timeout occurred, clean up the waiting state only if still
// waiting. If the state is
// Notified, we should not remove it to avoid losing
// the notify signal.
// Timeout occurred. Clean up waiting state only if still waiting.
// If a notify arrived between timeout and lock acquisition
// (State::Notified), we must NOT remove it to avoid losing the
// signal. These orphaned Notified entries are bounded by the
// pipeline concurrency window and naturally consumed by retry.
let mut inner = self.inner.lock().unwrap();
if matches!(inner.states.get(&key), Some(State::Waiting(_))) {
inner.states.remove(&key);
Expand Down
46 changes: 45 additions & 1 deletion crates/pipe-exec-layer-ext-v2/execute/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -230,11 +230,22 @@ struct Core<Storage: GravityStorage> {
custom_precompiles: Arc<Vec<(Address, DynPrecompile)>>,
event_tx: std::sync::mpsc::Sender<PipeExecLayerEvent<EthPrimitives>>,
execute_block_barrier: Channel<(u64, u64) /* epoch, block number */, ExecuteBlockContext>,
// TODO(GRETH-029): merklize/seal/make_canonical barriers lack timeouts. If a process()
// task panics, downstream barriers wait forever. Add wait_timeout to all barriers and
// implement a circuit breaker for graceful shutdown on task failure.
merklize_barrier: Channel<u64 /* block number */, ()>,
seal_barrier: Channel<u64 /* block number */, B256 /* block hash */>,
make_canonical_barrier: Channel<u64 /* block number */, Instant>,
// TODO(GRETH-036): All inter-layer channels are unbounded. Under sustained load or
// adversarial block conditions, blocks accumulate without bound. Replace with
// bounded_channel(32-64) to provide backpressure from execution to consensus.
discard_txs_tx: UnboundedSender<Vec<TxHash>>,
cache: PersistBlockCache,
// Design Intent (GRETH-057): `epoch` and `execute_height` are two independent AtomicU64
// values. Cross-variable ordering is guaranteed by the `execute_block_barrier` Channel
// (backed by Mutex<Inner>), which serializes block processing: block N cannot proceed
// past wait((epoch, N-1)) until block N-1 completes. Release/Acquire ordering on each
// individual atomic is sufficient; SeqCst is unnecessary.
epoch: AtomicU64,
execute_height: AtomicU64,
metrics: PipeExecLayerMetrics,
Expand Down Expand Up @@ -266,6 +277,11 @@ impl<Storage: GravityStorage> PipeExecService<Storage> {
);

let core = self.core.clone();
// TODO(GRETH-044): JoinHandle is discarded — panics in process() are silently
// swallowed by the tokio runtime. Monitor the JoinHandle to detect and propagate
// task failures, triggering graceful shutdown instead of silent deadlock.
// TODO(GRETH-045): No concurrency limit on spawned process() tasks. Each waiting
// task holds its ReceivedBlock in memory. Add a semaphore or task queue.
tokio::spawn(async move {
let start_time = Instant::now();
core.process(block).await;
Expand Down Expand Up @@ -398,7 +414,11 @@ impl<Storage: GravityStorage> Core<Storage> {
}
self.storage.insert_block_id(block_number, block_id);

// Wait for persist gap with a reasonable timeout (2 seconds)
// Design Intent (GRETH-038-old / wait_persist_gap timeout):
// The 2000ms timeout is intentional. We deliberately allow execution to proceed
// when persistence lags, preventing consensus halts. The resulting cache growth
// is managed by the eviction daemon (see cache.rs). Blocking indefinitely here
// would halt consensus whenever disk I/O is temporarily slow.
self.cache.wait_persist_gap(Some(2000));
let start_time = Instant::now();
let ExecuteOrderedBlockResult {
Expand Down Expand Up @@ -530,7 +550,13 @@ impl<Storage: GravityStorage> Core<Storage> {
let executed_block_hash = execution_result.block_hash;
self.execution_result_tx.send(execution_result).ok()?;
let block_hash = self.verified_block_hash_rx.wait(block_id).await?;
// TODO(GRETH-052): When block_hash is None, verification is skipped entirely.
// The None path exists for genesis/bootstrap but has no guard preventing it for
// post-genesis blocks. Add an explicit check that None is only valid at genesis.
if let Some(block_hash) = block_hash {
// Design Intent (GRETH-053): Deliberate panic on hash mismatch. This is the
// correct behavior for consensus safety — a mismatch means non-deterministic
// execution, which must halt the node rather than silently diverge.
assert_eq!(executed_block_hash, block_hash);
}
let elapsed = start_time.elapsed();
Expand Down Expand Up @@ -752,6 +778,10 @@ impl<Storage: GravityStorage> Core<Storage> {

// Merge state changes into accumulated changes
for (addr, account) in validator_state_changes {
// TODO(GRETH-051): `HashMap::insert` replaces the entire account entry.
// If a precompile modifies an account that is also touched by a system
// transaction, the updates from the system transaction are lost.
// Replace with deep-merge semantics (merge storage maps, take latest AccountInfo).
accumulated_state_changes.insert(addr, account);
}

Expand Down Expand Up @@ -803,6 +833,9 @@ impl<Storage: GravityStorage> Core<Storage> {
for (address, account) in precompile_bundle.state {
if let Some(info) = account.info {
use revm::state::{Account, AccountStatus, EvmStorageSlot};
// TODO(GRETH-051): Same as above, replacing `insert` with deep-merge
// to avoid losing precompile state updates if the account was already
// modified by a metadata or validator transaction.
accumulated_state_changes.insert(
address,
Account {
Expand Down Expand Up @@ -1213,6 +1246,12 @@ fn filter_invalid_txs<DB: ParallelDatabase>(
sender_idx.entry(sender).or_default().push(i);
}

// Design Intent (GRETH-074 / GRETH-016): This is a performance-only pre-filter,
// NOT a security boundary. It uses effective_gas_price (which is <= max_fee_per_gas)
// making the filter less strict than EVM validation. This is intentional: the filter
// may produce false negatives (letting some insufficient-balance txns through) but
// the EVM will catch them during actual execution. The purpose is to reduce wasted
// parallel EVM work on obviously-invalid transactions.
let is_tx_valid = |tx: &TransactionSigned, sender: &Address, account: &mut AccountInfo| {
if account.nonce != tx.nonce() {
warn!(target: "filter_invalid_txs",
Expand Down Expand Up @@ -1329,6 +1368,9 @@ impl<Storage: GravityStorage, EthApi> PipeExecLayerApi<Storage, EthApi> {

/// Wait for the block with the given block number to be persisted in the storage.
/// Returns `None` if the channel has been closed.
// TODO(GRETH-054): rx.await has no timeout. If persistence never completes (disk
// full, I/O error), the commit loop stalls permanently and consensus hangs. Add a
// timeout and trigger graceful shutdown on expiry.
pub async fn wait_for_block_persistence(&self, block_number: u64) -> Option<()> {
let (tx, rx) = oneshot::channel();
self.event_tx
Expand Down Expand Up @@ -1366,6 +1408,8 @@ where
EthApi: EthCall,
EthApi::NetworkTypes: RpcTypes<TransactionRequest = TransactionRequest>,
{
// TODO(GRETH-036): All channels below are unbounded. Replace with bounded channels
// (capacity 32-64) to provide backpressure from execution to consensus layer.
let (ordered_block_tx, ordered_block_rx) = tokio::sync::mpsc::unbounded_channel();
let (execution_result_tx, execution_result_rx) = tokio::sync::mpsc::unbounded_channel();
let verified_block_hash_ch = Arc::new(Channel::new());
Expand Down
Loading