From 76a6a8f50ecbae57203c519cf78fa3b2c7e613b4 Mon Sep 17 00:00:00 2001 From: Roman Krasiuk Date: Thu, 1 Feb 2024 10:09:00 +0100 Subject: [PATCH] refactor(trie): hashed state (#6244) --- .../commands/debug_cmd/in_memory_merkle.rs | 13 +- .../src/bundle_state/hashed_state_changes.rs | 37 +- .../src/providers/state/historical.rs | 2 - crates/trie/src/hashed_cursor/post_state.rs | 135 ++++--- crates/trie/src/lib.rs | 2 +- crates/trie/src/state.rs | 337 +++++++----------- 6 files changed, 212 insertions(+), 314 deletions(-) diff --git a/bin/reth/src/commands/debug_cmd/in_memory_merkle.rs b/bin/reth/src/commands/debug_cmd/in_memory_merkle.rs index 858520ecb..44d464c38 100644 --- a/bin/reth/src/commands/debug_cmd/in_memory_merkle.rs +++ b/bin/reth/src/commands/debug_cmd/in_memory_merkle.rs @@ -25,7 +25,7 @@ use reth_provider::{ StorageReader, }; use reth_tasks::TaskExecutor; -use reth_trie::{hashed_cursor::HashedPostStateCursorFactory, updates::TrieKey, StateRoot}; +use reth_trie::{updates::TrieKey, StateRoot}; use std::{ net::{SocketAddr, SocketAddrV4}, path::PathBuf, @@ -181,15 +181,8 @@ impl Command { let block_state = executor.take_output_state(); // Unpacked `BundleState::state_root_slow` function - let hashed_post_state = block_state.hash_state_slow().sorted(); - let (account_prefix_set, storage_prefix_set) = hashed_post_state.construct_prefix_sets(); - let tx = provider.tx_ref(); - let hashed_cursor_factory = HashedPostStateCursorFactory::new(tx, &hashed_post_state); - let (in_memory_state_root, in_memory_updates) = StateRoot::from_tx(tx) - .with_hashed_cursor_factory(hashed_cursor_factory) - .with_changed_account_prefixes(account_prefix_set) - .with_changed_storage_prefixes(storage_prefix_set) - .root_with_updates()?; + let (in_memory_state_root, in_memory_updates) = + block_state.hash_state_slow().state_root_with_updates(provider.tx_ref())?; if in_memory_state_root == block.state_root { info!(target: "reth::cli", state_root = ?in_memory_state_root, "Computed in-memory state root matches"); diff --git a/crates/storage/provider/src/bundle_state/hashed_state_changes.rs b/crates/storage/provider/src/bundle_state/hashed_state_changes.rs index 6ef152d12..0c14ec49f 100644 --- a/crates/storage/provider/src/bundle_state/hashed_state_changes.rs +++ b/crates/storage/provider/src/bundle_state/hashed_state_changes.rs @@ -1,12 +1,12 @@ +use itertools::Itertools; use reth_db::{ cursor::{DbCursorRO, DbCursorRW, DbDupCursorRO, DbDupCursorRW}, tables, transaction::{DbTx, DbTxMut}, DatabaseError, }; -use reth_primitives::{Account, StorageEntry, B256, U256}; +use reth_primitives::{StorageEntry, U256}; use reth_trie::HashedPostState; -use std::collections::BTreeMap; /// Changes to the hashed state. #[derive(Debug, Default)] @@ -15,15 +15,10 @@ pub struct HashedStateChanges(pub HashedPostState); impl HashedStateChanges { /// Write the bundle state to the database. pub fn write_to_db(self, tx: &TX) -> Result<(), DatabaseError> { - // Collect hashed account changes. - let mut hashed_accounts = BTreeMap::>::default(); - for (hashed_address, account) in self.0.accounts() { - hashed_accounts.insert(hashed_address, account); - } - // Write hashed account updates. + let sorted_accounts = self.0.accounts.into_iter().sorted_unstable_by_key(|(key, _)| *key); let mut hashed_accounts_cursor = tx.cursor_write::()?; - for (hashed_address, account) in hashed_accounts { + for (hashed_address, account) in sorted_accounts { if let Some(account) = account { hashed_accounts_cursor.upsert(hashed_address, account)?; } else if hashed_accounts_cursor.seek_exact(hashed_address)?.is_some() { @@ -31,24 +26,16 @@ impl HashedStateChanges { } } - // Collect hashed storage changes. - let mut hashed_storages = BTreeMap::)>::default(); - for (hashed_address, storage) in self.0.storages() { - let entry = hashed_storages.entry(*hashed_address).or_default(); - entry.0 |= storage.wiped(); - for (hashed_slot, value) in storage.storage_slots() { - entry.1.insert(hashed_slot, value); - } - } - // Write hashed storage changes. + let sorted_storages = self.0.storages.into_iter().sorted_by_key(|(key, _)| *key); let mut hashed_storage_cursor = tx.cursor_dup_write::()?; - for (hashed_address, (wiped, storage)) in hashed_storages { - if wiped && hashed_storage_cursor.seek_exact(hashed_address)?.is_some() { + for (hashed_address, storage) in sorted_storages { + if storage.wiped && hashed_storage_cursor.seek_exact(hashed_address)?.is_some() { hashed_storage_cursor.delete_current_duplicates()?; } - for (hashed_slot, value) in storage { + let sorted_storage = storage.storage.into_iter().sorted_by_key(|(key, _)| *key); + for (hashed_slot, value) in sorted_storage { let entry = StorageEntry { key: hashed_slot, value }; if let Some(db_entry) = hashed_storage_cursor.seek_by_key_subkey(hashed_address, entry.key)? @@ -72,7 +59,7 @@ impl HashedStateChanges { mod tests { use super::*; use crate::test_utils::create_test_provider_factory; - use reth_primitives::{keccak256, Address}; + use reth_primitives::{keccak256, Account, Address, B256}; use reth_trie::HashedStorage; #[test] @@ -104,8 +91,8 @@ mod tests { } let mut hashed_state = HashedPostState::default(); - hashed_state.insert_account(destroyed_address_hashed, None); - hashed_state.insert_hashed_storage(destroyed_address_hashed, HashedStorage::new(true)); + hashed_state.accounts.insert(destroyed_address_hashed, None); + hashed_state.storages.insert(destroyed_address_hashed, HashedStorage::new(true)); let provider_rw = provider_factory.provider_rw().unwrap(); assert_eq!(HashedStateChanges(hashed_state).write_to_db(provider_rw.tx_ref()), Ok(())); diff --git a/crates/storage/provider/src/providers/state/historical.rs b/crates/storage/provider/src/providers/state/historical.rs index f2892ee7d..8cc9bb347 100644 --- a/crates/storage/provider/src/providers/state/historical.rs +++ b/crates/storage/provider/src/providers/state/historical.rs @@ -228,7 +228,6 @@ impl<'b, TX: DbTx> StateRootProvider for HistoricalStateProviderRef<'b, TX> { fn state_root(&self, state: &BundleStateWithReceipts) -> ProviderResult { let mut revert_state = self.revert_state()?; revert_state.extend(state.hash_state_slow()); - revert_state.sort(); revert_state.state_root(self.tx).map_err(|err| ProviderError::Database(err.into())) } @@ -238,7 +237,6 @@ impl<'b, TX: DbTx> StateRootProvider for HistoricalStateProviderRef<'b, TX> { ) -> ProviderResult<(B256, TrieUpdates)> { let mut revert_state = self.revert_state()?; revert_state.extend(state.hash_state_slow()); - revert_state.sort(); revert_state .state_root_with_updates(self.tx) .map_err(|err| ProviderError::Database(err.into())) diff --git a/crates/trie/src/hashed_cursor/post_state.rs b/crates/trie/src/hashed_cursor/post_state.rs index 2baf72a2d..8f1ec137e 100644 --- a/crates/trie/src/hashed_cursor/post_state.rs +++ b/crates/trie/src/hashed_cursor/post_state.rs @@ -1,17 +1,17 @@ use super::{HashedAccountCursor, HashedCursorFactory, HashedStorageCursor}; -use crate::state::HashedPostState; +use crate::state::HashedPostStateSorted; use reth_primitives::{Account, StorageEntry, B256, U256}; /// The hashed cursor factory for the post state. #[derive(Debug, Clone)] pub struct HashedPostStateCursorFactory<'a, CF> { cursor_factory: CF, - post_state: &'a HashedPostState, + post_state: &'a HashedPostStateSorted, } impl<'a, CF> HashedPostStateCursorFactory<'a, CF> { /// Create a new factory. - pub fn new(cursor_factory: CF, post_state: &'a HashedPostState) -> Self { + pub fn new(cursor_factory: CF, post_state: &'a HashedPostStateSorted) -> Self { Self { cursor_factory, post_state } } } @@ -37,8 +37,8 @@ impl<'a, CF: HashedCursorFactory> HashedCursorFactory for HashedPostStateCursorF pub struct HashedPostStateAccountCursor<'b, C> { /// The database cursor. cursor: C, - /// The reference to the in-memory [HashedPostState]. - post_state: &'b HashedPostState, + /// The reference to the in-memory [HashedPostStateSorted]. + post_state: &'b HashedPostStateSorted, /// The post state account index where the cursor is currently at. post_state_account_index: usize, /// The last hashed account that was returned by the cursor. @@ -48,7 +48,7 @@ pub struct HashedPostStateAccountCursor<'b, C> { impl<'b, C> HashedPostStateAccountCursor<'b, C> { /// Create new instance of [HashedPostStateAccountCursor]. - pub fn new(cursor: C, post_state: &'b HashedPostState) -> Self { + pub fn new(cursor: C, post_state: &'b HashedPostStateSorted) -> Self { Self { cursor, post_state, last_account: None, post_state_account_index: 0 } } @@ -98,8 +98,6 @@ where /// The returned account key is memoized and the cursor remains positioned at that key until /// [HashedAccountCursor::seek] or [HashedAccountCursor::next] are called. fn seek(&mut self, key: B256) -> Result, reth_db::DatabaseError> { - debug_assert!(self.post_state.sorted, "`HashedPostState` must be pre-sorted"); - self.last_account = None; // Take the next account from the post state with the key greater than or equal to the @@ -144,8 +142,6 @@ where /// NOTE: This function will not return any entry unless [HashedAccountCursor::seek] has been /// called. fn next(&mut self) -> Result, reth_db::DatabaseError> { - debug_assert!(self.post_state.sorted, "`HashedPostState` must be pre-sorted"); - let last_account = match self.last_account.as_ref() { Some(account) => account, None => return Ok(None), // no previous entry was found @@ -182,7 +178,7 @@ pub struct HashedPostStateStorageCursor<'b, C> { /// The database cursor. cursor: C, /// The reference to the post state. - post_state: &'b HashedPostState, + post_state: &'b HashedPostStateSorted, /// The post state index where the cursor is currently at. post_state_storage_index: usize, /// The current hashed account key. @@ -194,7 +190,7 @@ pub struct HashedPostStateStorageCursor<'b, C> { impl<'b, C> HashedPostStateStorageCursor<'b, C> { /// Create new instance of [HashedPostStateStorageCursor]. - pub fn new(cursor: C, post_state: &'b HashedPostState) -> Self { + pub fn new(cursor: C, post_state: &'b HashedPostStateSorted) -> Self { Self { cursor, post_state, account: None, last_slot: None, post_state_storage_index: 0 } } @@ -279,8 +275,6 @@ where // Attempt to find the account's storage in post state. let mut post_state_entry = None; if let Some(storage) = self.post_state.storages.get(&account) { - debug_assert!(storage.sorted, "`HashedStorage` must be pre-sorted"); - post_state_entry = storage.non_zero_valued_slots.get(self.post_state_storage_index); while post_state_entry.map(|(slot, _)| slot < &subkey).unwrap_or_default() { @@ -358,8 +352,6 @@ where // Attempt to find the account's storage in post state. let mut post_state_entry = None; if let Some(storage) = self.post_state.storages.get(&account) { - debug_assert!(storage.sorted, "`HashedStorage` must be pre-sorted"); - post_state_entry = storage.non_zero_valued_slots.get(self.post_state_storage_index); while post_state_entry.map(|(slot, _)| slot <= last_slot).unwrap_or_default() { self.post_state_storage_index += 1; @@ -376,9 +368,8 @@ where #[cfg(test)] mod tests { - use crate::HashedStorage; - use super::*; + use crate::{HashedPostState, HashedStorage}; use proptest::prelude::*; use reth_db::{ database::Database, tables, test_utils::create_test_rw_db, transaction::DbTxMut, @@ -430,14 +421,14 @@ mod tests { let mut hashed_post_state = HashedPostState::default(); for (hashed_address, account) in &accounts { - hashed_post_state.insert_account(*hashed_address, Some(*account)); + hashed_post_state.accounts.insert(*hashed_address, Some(*account)); } - hashed_post_state.sort(); let db = create_test_rw_db(); - let tx = db.tx().unwrap(); - let factory = HashedPostStateCursorFactory::new(&tx, &hashed_post_state); + let sorted = hashed_post_state.into_sorted(); + let tx = db.tx().unwrap(); + let factory = HashedPostStateCursorFactory::new(&tx, &sorted); assert_account_cursor_order(&factory, accounts.into_iter()); } @@ -454,9 +445,9 @@ mod tests { }) .unwrap(); + let sorted_post_state = HashedPostState::default().into_sorted(); let tx = db.tx().unwrap(); - let post_state = HashedPostState::default(); - let factory = HashedPostStateCursorFactory::new(&tx, &post_state); + let factory = HashedPostStateCursorFactory::new(&tx, &sorted_post_state); assert_account_cursor_order(&factory, accounts.into_iter()); } @@ -476,12 +467,12 @@ mod tests { let mut hashed_post_state = HashedPostState::default(); for (hashed_address, account) in accounts.iter().filter(|x| x.0[31] % 2 != 0) { - hashed_post_state.insert_account(*hashed_address, Some(*account)); + hashed_post_state.accounts.insert(*hashed_address, Some(*account)); } - hashed_post_state.sort(); + let sorted = hashed_post_state.into_sorted(); let tx = db.tx().unwrap(); - let factory = HashedPostStateCursorFactory::new(&tx, &hashed_post_state); + let factory = HashedPostStateCursorFactory::new(&tx, &sorted); assert_account_cursor_order(&factory, accounts.into_iter()); } @@ -503,14 +494,15 @@ mod tests { let mut hashed_post_state = HashedPostState::default(); for (hashed_address, account) in accounts.iter().filter(|x| x.0[31] % 2 != 0) { - let account_info = - if removed_keys.contains(hashed_address) { None } else { Some(*account) }; - hashed_post_state.insert_account(*hashed_address, account_info) + hashed_post_state.accounts.insert( + *hashed_address, + if removed_keys.contains(hashed_address) { None } else { Some(*account) }, + ); } - hashed_post_state.sort(); + let sorted = hashed_post_state.into_sorted(); let tx = db.tx().unwrap(); - let factory = HashedPostStateCursorFactory::new(&tx, &hashed_post_state); + let factory = HashedPostStateCursorFactory::new(&tx, &sorted); let expected = accounts.into_iter().filter(|x| !removed_keys.contains(&x.0)); assert_account_cursor_order(&factory, expected); } @@ -532,12 +524,12 @@ mod tests { let mut hashed_post_state = HashedPostState::default(); for (hashed_address, account) in &accounts { - hashed_post_state.insert_account(*hashed_address, Some(*account)); + hashed_post_state.accounts.insert(*hashed_address, Some(*account)); } - hashed_post_state.sort(); + let sorted = hashed_post_state.into_sorted(); let tx = db.tx().unwrap(); - let factory = HashedPostStateCursorFactory::new(&tx, &hashed_post_state); + let factory = HashedPostStateCursorFactory::new(&tx, &sorted); assert_account_cursor_order(&factory, accounts.into_iter()); } @@ -554,9 +546,8 @@ mod tests { let mut hashed_post_state = HashedPostState::default(); for (hashed_address, account) in &post_state_accounts { - hashed_post_state.insert_account(*hashed_address, *account); + hashed_post_state.accounts.insert(*hashed_address, *account); } - hashed_post_state.sort(); let mut expected = db_accounts; // overwrite or remove accounts from the expected result @@ -568,8 +559,9 @@ mod tests { } } + let sorted = hashed_post_state.into_sorted(); let tx = db.tx().unwrap(); - let factory = HashedPostStateCursorFactory::new(&tx, &hashed_post_state); + let factory = HashedPostStateCursorFactory::new(&tx, &sorted); assert_account_cursor_order(&factory, expected.into_iter()); } ); @@ -582,9 +574,9 @@ mod tests { // empty from the get go { - let post_state = HashedPostState::default(); + let sorted = HashedPostState::default().into_sorted(); let tx = db.tx().unwrap(); - let factory = HashedPostStateCursorFactory::new(&tx, &post_state); + let factory = HashedPostStateCursorFactory::new(&tx, &sorted); let mut cursor = factory.hashed_storage_cursor().unwrap(); assert!(cursor.is_storage_empty(address).unwrap()); } @@ -605,9 +597,9 @@ mod tests { // not empty { - let post_state = HashedPostState::default(); + let sorted = HashedPostState::default().into_sorted(); let tx = db.tx().unwrap(); - let factory = HashedPostStateCursorFactory::new(&tx, &post_state); + let factory = HashedPostStateCursorFactory::new(&tx, &sorted); let mut cursor = factory.hashed_storage_cursor().unwrap(); assert!(!cursor.is_storage_empty(address).unwrap()); } @@ -618,10 +610,11 @@ mod tests { let hashed_storage = HashedStorage::new(wiped); let mut hashed_post_state = HashedPostState::default(); - hashed_post_state.insert_hashed_storage(address, hashed_storage); + hashed_post_state.storages.insert(address, hashed_storage); + let sorted = hashed_post_state.into_sorted(); let tx = db.tx().unwrap(); - let factory = HashedPostStateCursorFactory::new(&tx, &hashed_post_state); + let factory = HashedPostStateCursorFactory::new(&tx, &sorted); let mut cursor = factory.hashed_storage_cursor().unwrap(); assert!(cursor.is_storage_empty(address).unwrap()); } @@ -630,13 +623,14 @@ mod tests { { let wiped = true; let mut hashed_storage = HashedStorage::new(wiped); - hashed_storage.insert_slot(B256::random(), U256::ZERO); + hashed_storage.storage.insert(B256::random(), U256::ZERO); let mut hashed_post_state = HashedPostState::default(); - hashed_post_state.insert_hashed_storage(address, hashed_storage); + hashed_post_state.storages.insert(address, hashed_storage); + let sorted = hashed_post_state.into_sorted(); let tx = db.tx().unwrap(); - let factory = HashedPostStateCursorFactory::new(&tx, &hashed_post_state); + let factory = HashedPostStateCursorFactory::new(&tx, &sorted); let mut cursor = factory.hashed_storage_cursor().unwrap(); assert!(cursor.is_storage_empty(address).unwrap()); } @@ -645,13 +639,14 @@ mod tests { { let wiped = true; let mut hashed_storage = HashedStorage::new(wiped); - hashed_storage.insert_slot(B256::random(), U256::from(1)); + hashed_storage.storage.insert(B256::random(), U256::from(1)); let mut hashed_post_state = HashedPostState::default(); - hashed_post_state.insert_hashed_storage(address, hashed_storage); + hashed_post_state.storages.insert(address, hashed_storage); + let sorted = hashed_post_state.into_sorted(); let tx = db.tx().unwrap(); - let factory = HashedPostStateCursorFactory::new(&tx, &hashed_post_state); + let factory = HashedPostStateCursorFactory::new(&tx, &sorted); let mut cursor = factory.hashed_storage_cursor().unwrap(); assert!(!cursor.is_storage_empty(address).unwrap()); } @@ -681,15 +676,15 @@ mod tests { let wiped = false; let mut hashed_storage = HashedStorage::new(wiped); for (slot, value) in post_state_storage.iter() { - hashed_storage.insert_slot(*slot, *value); + hashed_storage.storage.insert(*slot, *value); } let mut hashed_post_state = HashedPostState::default(); - hashed_post_state.insert_hashed_storage(address, hashed_storage); - hashed_post_state.sort(); + hashed_post_state.storages.insert(address, hashed_storage); + let sorted = hashed_post_state.into_sorted(); let tx = db.tx().unwrap(); - let factory = HashedPostStateCursorFactory::new(&tx, &hashed_post_state); + let factory = HashedPostStateCursorFactory::new(&tx, &sorted); let expected = [(address, db_storage.into_iter().chain(post_state_storage).collect())].into_iter(); assert_storage_cursor_order(&factory, expected); @@ -717,15 +712,15 @@ mod tests { let wiped = false; let mut hashed_storage = HashedStorage::new(wiped); for (slot, value) in post_state_storage.iter() { - hashed_storage.insert_slot(*slot, *value); + hashed_storage.storage.insert(*slot, *value); } let mut hashed_post_state = HashedPostState::default(); - hashed_post_state.insert_hashed_storage(address, hashed_storage); - hashed_post_state.sort(); + hashed_post_state.storages.insert(address, hashed_storage); + let sorted = hashed_post_state.into_sorted(); let tx = db.tx().unwrap(); - let factory = HashedPostStateCursorFactory::new(&tx, &hashed_post_state); + let factory = HashedPostStateCursorFactory::new(&tx, &sorted); let expected = [( address, post_state_storage.into_iter().filter(|(_, value)| *value > U256::ZERO).collect(), @@ -755,15 +750,15 @@ mod tests { let wiped = true; let mut hashed_storage = HashedStorage::new(wiped); for (slot, value) in post_state_storage.iter() { - hashed_storage.insert_slot(*slot, *value); + hashed_storage.storage.insert(*slot, *value); } let mut hashed_post_state = HashedPostState::default(); - hashed_post_state.insert_hashed_storage(address, hashed_storage); - hashed_post_state.sort(); + hashed_post_state.storages.insert(address, hashed_storage); + let sorted = hashed_post_state.into_sorted(); let tx = db.tx().unwrap(); - let factory = HashedPostStateCursorFactory::new(&tx, &hashed_post_state); + let factory = HashedPostStateCursorFactory::new(&tx, &sorted); let expected = [(address, post_state_storage)].into_iter(); assert_storage_cursor_order(&factory, expected); } @@ -790,15 +785,15 @@ mod tests { let wiped = false; let mut hashed_storage = HashedStorage::new(wiped); for (slot, value) in storage.iter() { - hashed_storage.insert_slot(*slot, *value); + hashed_storage.storage.insert(*slot, *value); } let mut hashed_post_state = HashedPostState::default(); - hashed_post_state.insert_hashed_storage(address, hashed_storage); - hashed_post_state.sort(); + hashed_post_state.storages.insert(address, hashed_storage); + let sorted = hashed_post_state.into_sorted(); let tx = db.tx().unwrap(); - let factory = HashedPostStateCursorFactory::new(&tx, &hashed_post_state); + let factory = HashedPostStateCursorFactory::new(&tx, &sorted); let expected = [(address, storage)].into_iter(); assert_storage_cursor_order(&factory, expected); } @@ -827,12 +822,11 @@ mod tests { for (address, (wiped, storage)) in &post_state_storages { let mut hashed_storage = HashedStorage::new(*wiped); for (slot, value) in storage { - hashed_storage.insert_slot(*slot, *value); + hashed_storage.storage.insert(*slot, *value); } - hashed_post_state.insert_hashed_storage(*address, hashed_storage); + hashed_post_state.storages.insert(*address, hashed_storage); } - hashed_post_state.sort(); let mut expected = db_storages; // overwrite or remove accounts from the expected result @@ -844,8 +838,9 @@ mod tests { entry.extend(storage); } + let sorted = hashed_post_state.into_sorted(); let tx = db.tx().unwrap(); - let factory = HashedPostStateCursorFactory::new(&tx, &hashed_post_state); + let factory = HashedPostStateCursorFactory::new(&tx, &sorted); assert_storage_cursor_order(&factory, expected.into_iter()); }); } diff --git a/crates/trie/src/lib.rs b/crates/trie/src/lib.rs index 189bd8778..d81af7799 100644 --- a/crates/trie/src/lib.rs +++ b/crates/trie/src/lib.rs @@ -34,7 +34,7 @@ pub(crate) mod node_iter; /// In-memory hashed state. mod state; -pub use state::{HashedPostState, HashedStorage}; +pub use state::*; /// Merkle proof generation. pub mod proof; diff --git a/crates/trie/src/state.rs b/crates/trie/src/state.rs index 6d195cc5f..adab30800 100644 --- a/crates/trie/src/state.rs +++ b/crates/trie/src/state.rs @@ -22,28 +22,13 @@ use std::{ ops::RangeInclusive, }; -/// The post state with hashed addresses as keys. -#[derive(Debug, Clone, Eq, PartialEq)] +/// Representation of in-memory hashed state. +#[derive(PartialEq, Eq, Clone, Default, Debug)] pub struct HashedPostState { - /// Collection of hashed addresses and their account info. - pub(crate) accounts: Vec<(B256, Account)>, - /// Set of destroyed account keys. - pub(crate) destroyed_accounts: AHashSet, - /// Map of hashed addresses to hashed storage. - pub(crate) storages: AHashMap, - /// Flag indicating whether the account and storage entries were sorted. - pub(crate) sorted: bool, -} - -impl Default for HashedPostState { - fn default() -> Self { - Self { - accounts: Vec::new(), - destroyed_accounts: AHashSet::new(), - storages: AHashMap::new(), - sorted: true, // empty is sorted - } - } + /// Mapping of hashed address to account info, `None` if destroyed. + pub accounts: AHashMap>, + /// Mapping of hashed address to hashed storage. + pub storages: AHashMap, } impl HashedPostState { @@ -54,21 +39,19 @@ impl HashedPostState { state: impl IntoIterator, ) -> Self { let mut this = Self::default(); - for (address, account) in state { let hashed_address = keccak256(address); - this.insert_account(hashed_address, account.info.clone().map(into_reth_acc)); + this.accounts.insert(hashed_address, account.info.clone().map(into_reth_acc)); - // insert storage. - let mut hashed_storage = HashedStorage::new(account.status.was_destroyed()); - - for (key, value) in account.storage.iter() { - let hashed_key = keccak256(B256::new(key.to_be_bytes())); - hashed_storage.insert_slot(hashed_key, value.present_value); - } - this.insert_hashed_storage(hashed_address, hashed_storage) + let hashed_storage = HashedStorage::from_iter( + account.status.was_destroyed(), + account.storage.iter().map(|(key, value)| { + (keccak256(B256::new(key.to_be_bytes())), value.present_value) + }), + ); + this.storages.insert(hashed_address, hashed_storage); } - this.sorted() + this } /// Initialize [HashedPostState] from revert range. @@ -81,77 +64,48 @@ impl HashedPostState { tx: &TX, range: RangeInclusive, ) -> Result { - // A single map for aggregating state changes where each map value is a tuple - // `(maybe_account_change, storage_changes)`. - // If `maybe_account_change` is `None`, no account info change had occurred. - // If `maybe_account_change` is `Some(None)`, the account had previously been destroyed - // or non-existent. - // If `maybe_account_change` is `Some(Some(info))`, the contained value is the previous - // account state. - let mut state = - HashMap::>, HashMap)>::default(); + let mut this = Self::default(); - // Iterate over account changesets in reverse. + // Iterate over account changesets and record value before first occurring account change. let mut account_changesets_cursor = tx.cursor_read::()?; for entry in account_changesets_cursor.walk_range(range.clone())? { let (_, AccountBeforeTx { address, info }) = entry?; - let account_entry = state.entry(address).or_default(); - if account_entry.0.is_none() { - account_entry.0 = Some(info); + let hashed_address = keccak256(address); // TODO: cache hashes? + if let hash_map::Entry::Vacant(entry) = this.accounts.entry(hashed_address) { + entry.insert(info); } } - // Iterate over storage changesets in reverse. + // Iterate over storage changesets and record value before first occurring storage change. + let mut storages = AHashMap::>::default(); let mut storage_changesets_cursor = tx.cursor_read::()?; for entry in storage_changesets_cursor.walk_range(BlockNumberAddress::range(range))? { let (BlockNumberAddress((_, address)), storage) = entry?; - let account_entry = state.entry(address).or_default(); - if let hash_map::Entry::Vacant(entry) = account_entry.1.entry(storage.key) { + let account_storage = storages.entry(address).or_default(); + if let hash_map::Entry::Vacant(entry) = account_storage.entry(storage.key) { entry.insert(storage.value); } } - let mut this = Self::default(); - for (address, (maybe_account_change, storage)) in state { - let hashed_address = keccak256(address); - - if let Some(account_change) = maybe_account_change { - this.insert_account(hashed_address, account_change); - } - - // The `wiped` flag indicates only whether previous storage entries should be looked + for (address, storage) in storages { + // The `wiped` flag indicates only whether previous storage entries should be looked // up in db or not. For reverts it's a noop since all wiped changes had been written as // storage reverts. - let mut hashed_storage = HashedStorage::new(false); - for (slot, value) in storage { - hashed_storage.insert_slot(keccak256(slot), value); - } - this.insert_hashed_storage(hashed_address, hashed_storage); + let hashed_storage = HashedStorage::from_iter( + false, + storage.into_iter().map(|(slot, value)| (keccak256(slot), value)), + ); + this.storages.insert(keccak256(address), hashed_storage); } - Ok(this.sorted()) + Ok(this) } /// Extend this hashed post state with contents of another. /// Entries in the second hashed post state take precedence. pub fn extend(&mut self, other: Self) { - // Merge accounts and insert them into extended state. - let mut accounts: HashMap> = HashMap::from_iter( - self.accounts - .drain(..) - .map(|(hashed_address, account)| (hashed_address, Some(account))) - .chain( - self.destroyed_accounts.drain().map(|hashed_address| (hashed_address, None)), - ), - ); for (hashed_address, account) in other.accounts { - accounts.insert(hashed_address, Some(account)); - } - for hashed_address in other.destroyed_accounts { - accounts.insert(hashed_address, None); - } - for (hashed_address, account) in accounts { - self.insert_account(hashed_address, account); + self.accounts.insert(hashed_address, account); } for (hashed_address, storage) in other.storages { @@ -166,55 +120,26 @@ impl HashedPostState { } } - /// Sort and return self. - pub fn sorted(mut self) -> Self { - self.sort(); - self - } - - /// Returns all accounts with their state. - pub fn accounts(&self) -> impl Iterator)> + '_ { - self.destroyed_accounts.iter().map(|hashed_address| (*hashed_address, None)).chain( - self.accounts.iter().map(|(hashed_address, account)| (*hashed_address, Some(*account))), - ) - } - - /// Returns all account storages. - pub fn storages(&self) -> impl Iterator { - self.storages.iter() - } - - /// Sort account and storage entries. - pub fn sort(&mut self) { - if !self.sorted { - for (_, storage) in self.storages.iter_mut() { - storage.sort_storage(); + /// Converts hashed post state into [HashedPostStateSorted]. + pub fn into_sorted(self) -> HashedPostStateSorted { + let mut accounts = Vec::new(); + let mut destroyed_accounts = AHashSet::default(); + for (hashed_address, info) in self.accounts { + if let Some(info) = info { + accounts.push((hashed_address, info)); + } else { + destroyed_accounts.insert(hashed_address); } - - self.accounts.sort_unstable_by_key(|(address, _)| *address); - self.sorted = true; } - } + accounts.sort_unstable_by_key(|(address, _)| *address); - /// Insert account. If `account` is `None`, the account had previously been destroyed. - pub fn insert_account(&mut self, hashed_address: B256, account: Option) { - if let Some(account) = account { - self.accounts.push((hashed_address, account)); - self.sorted = false; - } else { - self.destroyed_accounts.insert(hashed_address); - } - } + let storages = self + .storages + .into_iter() + .map(|(hashed_address, storage)| (hashed_address, storage.into_sorted())) + .collect(); - /// Insert hashed storage entry. - pub fn insert_hashed_storage(&mut self, hashed_address: B256, hashed_storage: HashedStorage) { - self.sorted &= hashed_storage.sorted; - self.storages.insert(hashed_address, hashed_storage); - } - - /// Returns all destroyed accounts. - pub fn destroyed_accounts(&self) -> AHashSet { - self.destroyed_accounts.clone() + HashedPostStateSorted { accounts, destroyed_accounts, storages } } /// Construct [PrefixSet] from hashed post state. @@ -229,19 +154,13 @@ impl HashedPostState { for (hashed_address, _) in &self.accounts { account_prefix_set.insert(Nibbles::unpack(hashed_address)); } - for hashed_address in &self.destroyed_accounts { - account_prefix_set.insert(Nibbles::unpack(hashed_address)); - } // Populate storage prefix sets. for (hashed_address, hashed_storage) in self.storages.iter() { account_prefix_set.insert(Nibbles::unpack(hashed_address)); let storage_prefix_set_entry = storage_prefix_set.entry(*hashed_address).or_default(); - for (hashed_slot, _) in &hashed_storage.non_zero_valued_slots { - storage_prefix_set_entry.insert(Nibbles::unpack(hashed_slot)); - } - for hashed_slot in &hashed_storage.zero_valued_slots { + for (hashed_slot, _) in &hashed_storage.storage { storage_prefix_set_entry.insert(Nibbles::unpack(hashed_slot)); } } @@ -252,21 +171,6 @@ impl HashedPostState { ) } - /// Returns [StateRoot] calculator based on database and in-memory state. - fn state_root_calculator<'a, TX: DbTx>( - &self, - tx: &'a TX, - ) -> StateRoot<&'a TX, HashedPostStateCursorFactory<'_, &'a TX>> { - assert!(self.sorted, "Hashed post state must be sorted for state root calculation"); - let (account_prefix_set, storage_prefix_set) = self.construct_prefix_sets(); - let hashed_cursor_factory = HashedPostStateCursorFactory::new(tx, self); - StateRoot::from_tx(tx) - .with_hashed_cursor_factory(hashed_cursor_factory) - .with_changed_account_prefixes(account_prefix_set) - .with_changed_storage_prefixes(storage_prefix_set) - .with_destroyed_accounts(self.destroyed_accounts()) - } - /// Calculate the state root for this [HashedPostState]. /// Internally, this method retrieves prefixsets and uses them /// to calculate incremental state root. @@ -283,11 +187,10 @@ impl HashedPostState { /// /// // Initialize hashed post state /// let mut hashed_state = HashedPostState::default(); - /// hashed_state.insert_account( + /// hashed_state.accounts.insert( /// [0x11; 32].into(), /// Some(Account { nonce: 1, balance: U256::from(10), bytecode_hash: None }), /// ); - /// hashed_state.sort(); /// /// // Calculate the state root /// let tx = db.tx().expect("failed to create transaction"); @@ -298,7 +201,13 @@ impl HashedPostState { /// /// The state root for this [HashedPostState]. pub fn state_root(&self, tx: &TX) -> Result { - self.state_root_calculator(tx).root() + let sorted = self.clone().into_sorted(); + let (account_prefix_set, storage_prefix_set) = self.construct_prefix_sets(); + sorted + .state_root_calculator(tx) + .with_changed_account_prefixes(account_prefix_set) + .with_changed_storage_prefixes(storage_prefix_set) + .root() } /// Calculates the state root for this [HashedPostState] and returns it alongside trie updates. @@ -307,82 +216,98 @@ impl HashedPostState { &self, tx: &TX, ) -> Result<(B256, TrieUpdates), StateRootError> { - self.state_root_calculator(tx).root_with_updates() + let sorted = self.clone().into_sorted(); + let (account_prefix_set, storage_prefix_set) = self.construct_prefix_sets(); + sorted + .state_root_calculator(tx) + .with_changed_account_prefixes(account_prefix_set) + .with_changed_storage_prefixes(storage_prefix_set) + .root_with_updates() } } -/// The post state account storage with hashed slots. -#[derive(Clone, Eq, PartialEq, Debug)] +/// Representation of in-memory hashed storage. +#[derive(PartialEq, Eq, Clone, Debug)] pub struct HashedStorage { - /// Hashed storage slots with non-zero. - pub(crate) non_zero_valued_slots: Vec<(B256, U256)>, - /// Slots that have been zero valued. - pub(crate) zero_valued_slots: AHashSet, - /// Whether the storage was wiped or not. - pub(crate) wiped: bool, - /// Whether the storage entries were sorted or not. - pub(crate) sorted: bool, + /// Flag indicating whether the storage was wiped or not. + pub wiped: bool, + /// Mapping of hashed storage slot to storage value. + pub storage: AHashMap, } impl HashedStorage { /// Create new instance of [HashedStorage]. pub fn new(wiped: bool) -> Self { - Self { - non_zero_valued_slots: Vec::new(), - zero_valued_slots: AHashSet::new(), - wiped, - sorted: true, // empty is sorted - } + Self { wiped, storage: AHashMap::default() } + } + + /// Create new hashed storage from iterator. + pub fn from_iter(wiped: bool, iter: impl IntoIterator) -> Self { + Self { wiped, storage: AHashMap::from_iter(iter) } } /// Extend hashed storage with contents of other. /// The entries in second hashed storage take precedence. pub fn extend(&mut self, other: Self) { - let mut entries: HashMap = - HashMap::from_iter(self.non_zero_valued_slots.drain(..).chain( - self.zero_valued_slots.drain().map(|hashed_slot| (hashed_slot, U256::ZERO)), - )); - for (hashed_slot, value) in other.non_zero_valued_slots { - entries.insert(hashed_slot, value); - } - for hashed_slot in other.zero_valued_slots { - entries.insert(hashed_slot, U256::ZERO); - } - for (hashed_slot, value) in entries { - self.insert_slot(hashed_slot, value); + for (hashed_slot, value) in other.storage { + self.storage.insert(hashed_slot, value); } self.wiped |= other.wiped; } - /// Returns `true` if the storage was wiped. - pub fn wiped(&self) -> bool { - self.wiped - } - - /// Returns all storage slots. - pub fn storage_slots(&self) -> impl Iterator + '_ { - self.zero_valued_slots - .iter() - .map(|slot| (*slot, U256::ZERO)) - .chain(self.non_zero_valued_slots.iter().cloned()) - } - - /// Sorts the non zero value storage entries. - pub fn sort_storage(&mut self) { - if !self.sorted { - self.non_zero_valued_slots.sort_unstable_by_key(|(slot, _)| *slot); - self.sorted = true; + /// Converts hashed storage into [HashedStorageSorted]. + pub fn into_sorted(self) -> HashedStorageSorted { + let mut non_zero_valued_slots = Vec::new(); + let mut zero_valued_slots = AHashSet::default(); + for (hashed_slot, value) in self.storage { + if value == U256::ZERO { + zero_valued_slots.insert(hashed_slot); + } else { + non_zero_valued_slots.push((hashed_slot, value)); + } } - } + non_zero_valued_slots.sort_unstable_by_key(|(key, _)| *key); - /// Insert storage entry. - #[inline] - pub fn insert_slot(&mut self, slot: B256, value: U256) { - if value.is_zero() { - self.zero_valued_slots.insert(slot); - } else { - self.non_zero_valued_slots.push((slot, value)); - self.sorted = false; - } + HashedStorageSorted { non_zero_valued_slots, zero_valued_slots, wiped: self.wiped } } } + +/// Sorted hashed post state optimized for iterating during state trie calculation. +#[derive(PartialEq, Eq, Clone, Debug)] +pub struct HashedPostStateSorted { + /// Sorted collection of hashed addresses and their account info. + pub(crate) accounts: Vec<(B256, Account)>, + /// Set of destroyed account keys. + pub(crate) destroyed_accounts: AHashSet, + /// Map of hashed addresses to hashed storage. + pub(crate) storages: AHashMap, +} + +impl HashedPostStateSorted { + /// Returns all destroyed accounts. + pub fn destroyed_accounts(&self) -> AHashSet { + self.destroyed_accounts.clone() + } + + /// Returns [StateRoot] calculator based on database and in-memory state. + fn state_root_calculator<'a, TX: DbTx>( + &self, + tx: &'a TX, + ) -> StateRoot<&'a TX, HashedPostStateCursorFactory<'_, &'a TX>> { + let hashed_cursor_factory = HashedPostStateCursorFactory::new(tx, self); + StateRoot::from_tx(tx) + .with_hashed_cursor_factory(hashed_cursor_factory) + .with_destroyed_accounts(self.destroyed_accounts()) + } +} + +/// Sorted hashed storage optimized for iterating during state trie calculation. +#[derive(Clone, Eq, PartialEq, Debug)] +pub struct HashedStorageSorted { + /// Sorted hashed storage slots with non-zero value. + pub(crate) non_zero_valued_slots: Vec<(B256, U256)>, + /// Slots that have been zero valued. + pub(crate) zero_valued_slots: AHashSet, + /// Flag indicating hether the storage was wiped or not. + pub(crate) wiped: bool, +}