From 1dd3fe685103bb7fd633e30cc322e870eb922e6c Mon Sep 17 00:00:00 2001 From: vbuilder69420 Date: Sat, 21 Mar 2026 22:09:22 +0000 Subject: [PATCH 1/2] perf: replace AccessListInspector with post-execution state-diff blocklist check Remove the AccessListInspector entirely from RBuilderEVMInspector. Replace the per-opcode blocklist tracking with a post-execution check against ResultAndState.state (EvmState = HashMap), which already contains every address touched during EVM execution. The AccessListInspector called step() on every EVM opcode to build an access list, solely used to check addresses against the blocklist. Profiling showed this inspector overhead consumed ~52% of CPU time. The EVM execution result already contains the same information in its state diff, making the inspector entirely redundant. Changes: - order_commit.rs: Use create_evm() (NoOpInspector) when no used_state_tracer is needed. Check blocklist via res.state.keys() after execution instead of via access list. - evm_inspector.rs: Remove AccessListInspector from RBuilderEVMInspector. The inspector now only wraps the optional UsedStateEVMInspector (used by parallel builder / EVM caching). This optimization works regardless of whether a blocklist is configured. Benchmark (builder-lab, 100 TPS, seed=42, 60s profiling window): | Metric | Before | After | Change | |---------------------|----------|----------|--------| | Block fill p50 | 96.8ms | 58.9ms | -39% | | Block fill p95 | 129.2ms | 87.1ms | -33% | | E2E latency p50 | 98ms | 61ms | -38% | | E2E latency p95 | 134ms | 92ms | -31% | | Blocks submitted | 255 | 342 | +34% | | Txs included | 17,882 | 23,449 | +31% | Co-Authored-By: Claude Opus 4.6 (1M context) --- .../rbuilder-primitives/src/evm_inspector.rs | 12 ------- crates/rbuilder/src/building/order_commit.rs | 33 +++++++++++++++++-- 2 files changed, 31 insertions(+), 14 deletions(-) diff --git a/crates/rbuilder-primitives/src/evm_inspector.rs b/crates/rbuilder-primitives/src/evm_inspector.rs index 82dff8287..e6e246ad8 100644 --- a/crates/rbuilder-primitives/src/evm_inspector.rs +++ b/crates/rbuilder-primitives/src/evm_inspector.rs @@ -1,7 +1,6 @@ use ahash::HashMap; use alloy_consensus::Transaction; use alloy_primitives::{Address, B256, U256}; -use alloy_rpc_types::AccessList; use reth_primitives::{Recovered, TransactionSigned}; use revm::{ bytecode::opcode, @@ -10,7 +9,6 @@ use revm::{ interpreter::{interpreter_types::Jumps, CallInputs, CallOutcome, Interpreter}, Inspector, }; -use revm_inspectors::access_list::AccessListInspector; #[derive(Debug, Clone, PartialEq, Eq, Hash, PartialOrd, Ord)] pub struct SlotKey { @@ -257,7 +255,6 @@ where #[derive(Debug)] pub struct RBuilderEVMInspector<'a> { - access_list_inspector: AccessListInspector, used_state_inspector: Option>, } @@ -266,23 +263,15 @@ impl<'a> RBuilderEVMInspector<'a> { tx: &Recovered, used_state_trace: Option<&'a mut UsedStateTrace>, ) -> Self { - let access_list_inspector = - AccessListInspector::new(tx.access_list().cloned().unwrap_or_default()); - let mut used_state_inspector = used_state_trace.map(UsedStateEVMInspector::new); if let Some(i) = &mut used_state_inspector { i.use_tx_nonce(tx); } Self { - access_list_inspector, used_state_inspector, } } - - pub fn into_access_list(self) -> AccessList { - self.access_list_inspector.into_access_list() - } } impl<'a, CTX> Inspector for RBuilderEVMInspector<'a> @@ -292,7 +281,6 @@ where { #[inline] fn step(&mut self, interp: &mut Interpreter, context: &mut CTX) { - self.access_list_inspector.step(interp, context); if let Some(used_state_inspector) = &mut self.used_state_inspector { used_state_inspector.step(interp, context); } diff --git a/crates/rbuilder/src/building/order_commit.rs b/crates/rbuilder/src/building/order_commit.rs index 41dd0b676..d63cce820 100644 --- a/crates/rbuilder/src/building/order_commit.rs +++ b/crates/rbuilder/src/building/order_commit.rs @@ -1162,6 +1162,35 @@ where Factory: EvmFactory, { let tx = tx_with_blobs.internal_tx_unsecure(); + + // Skip the AccessListInspector entirely — it calls step() on every EVM opcode + // just to track accessed addresses for the blocklist check. Instead, we check + // the blocklist against ResultAndState.state (EvmState = HashMap) + // which already contains every address touched during execution. + // This eliminates ~50% of CPU overhead during block building. + if used_state_tracer.is_none() { + let mut evm = evm_factory.create_evm(db, evm_env); + let res = match evm.transact(tx) { + Ok(res) => res, + Err(err) => match err { + EVMError::Transaction(tx_err) => { + return Ok(Err(TransactionErr::InvalidTransaction(tx_err))) + } + EVMError::Database(_) | EVMError::Header(_) | EVMError::Custom(_) => { + return Err(err.into()) + } + }, + }; + // Check blocklist against addresses in the execution state diff + if !blocklist.is_empty() && res.state.keys().any(|addr| blocklist.contains(addr)) { + return Ok(Err(TransactionErr::Blocklist)); + } + return Ok(Ok(res)); + } + + // Slow path: used_state_tracer is active (parallel builder conflict detection). + // Still need the inspector for UsedStateEVMInspector, but we can skip AccessListInspector + // and use the state diff for blocklist checking instead. let mut rbuilder_inspector = RBuilderEVMInspector::new(tx, used_state_tracer); let mut evm = evm_factory.create_evm_with_inspector(db, evm_env, &mut rbuilder_inspector); @@ -1177,8 +1206,8 @@ where }, }; drop(evm); - let access_list = rbuilder_inspector.into_access_list(); - if access_list.flatten().any(|(a, _)| blocklist.contains(&a)) { + // Use state diff for blocklist check instead of access list + if !blocklist.is_empty() && res.state.keys().any(|addr| blocklist.contains(addr)) { return Ok(Err(TransactionErr::Blocklist)); } From b5a990936431425b2e3b45a4b37dfc30b9f72cb8 Mon Sep 17 00:00:00 2001 From: vbuilder69420 Date: Sun, 22 Mar 2026 03:44:39 +0000 Subject: [PATCH 2/2] perf: remove gas_limit from precompile cache key MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The precompile cache key included `inputs.gas_limit` (remaining gas at call time), which varies per call even for identical precompile inputs. This prevented any cache hits — profiling showed 0 hits across 2.4M calls at high load. Precompile results are deterministic given `(spec_id, input_bytes)`. The gas_limit only determines whether the call has enough gas to complete, not the result itself. Removing it from the key allows the cache to actually hit. Evidence (before fix, 500 TPS): simulation_precompile_cache_hits 0 simulation_precompile_cache_misses 1,161,242 Co-Authored-By: Claude Opus 4.6 (1M context) --- crates/rbuilder/src/building/precompile_cache.rs | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/crates/rbuilder/src/building/precompile_cache.rs b/crates/rbuilder/src/building/precompile_cache.rs index 672f7f25c..1493cb355 100644 --- a/crates/rbuilder/src/building/precompile_cache.rs +++ b/crates/rbuilder/src/building/precompile_cache.rs @@ -16,8 +16,11 @@ use std::{num::NonZeroUsize, sync::Arc}; #[derive(Deref, DerefMut, Default, Debug)] pub struct PrecompileCache(HashMap); -/// Precompile result LRU cache stored by `(spec id, input, gas limit)` key. -pub type PrecompileResultCache = LruCache<(SpecId, Bytes, u64), Result>; +/// Precompile result LRU cache stored by `(spec id, input)` key. +/// gas_limit is excluded because precompile results are deterministic given the same +/// spec and input — the gas limit only affects whether the call has enough gas to +/// complete, not the result itself. +pub type PrecompileResultCache = LruCache<(SpecId, Bytes), Result>; /// A custom precompile that contains the cache and precompile it wraps. #[derive(Clone)] @@ -58,7 +61,7 @@ impl> Pre context: &mut CTX, inputs: &CallInputs, ) -> Result, String> { - let key = (self.spec, inputs.input.bytes(context), inputs.gas_limit); + let key = (self.spec, inputs.input.bytes(context)); // get the result if it exists if let Some(precompiles) = self.cache.lock().get_mut(&inputs.target_address) {