feat(trie): parallel storage roots (#6903)

2025-12-06 10:59:55 +00:00 · 2024-03-08 14:23:27 +01:00
parent 820b122409
commit 9569692d47
23 changed files with 1216 additions and 123 deletions
--- a/crates/trie/Cargo.toml
+++ b/crates/trie/Cargo.toml
@ -56,7 +56,6 @@ similar-asserts.workspace = true
 criterion.workspace = true

 [features]
-default = ["metrics"]
 metrics = ["reth-metrics", "dep:metrics"]
 test-utils = ["triehash"]

--- a/crates/trie/src/node_iter.rs
+++ b/crates/trie/src/node_iter.rs
@ -1,9 +1,9 @@
 use crate::{
-    hashed_cursor::{HashedAccountCursor, HashedStorageCursor},
+    hashed_cursor::{HashedAccountCursor, HashedCursorFactory, HashedStorageCursor},
    trie_cursor::TrieCursor,
    walker::TrieWalker,
-    StateRootError, StorageRootError,
 };
+use reth_db::DatabaseError;
 use reth_primitives::{trie::Nibbles, Account, StorageEntry, B256, U256};

 /// Represents a branch node in the trie.
@ -71,6 +71,14 @@ impl<C, H> AccountNodeIter<C, H> {
        }
    }

+    /// Create new `AccountNodeIter` by creating hashed account cursor from factory.
+    pub fn from_factory<F: HashedCursorFactory<AccountCursor = H>>(
+        walker: TrieWalker<C>,
+        factory: F,
+    ) -> Result<Self, DatabaseError> {
+        Ok(Self::new(walker, factory.hashed_account_cursor()?))
+    }
+
    /// Sets the last iterated account key and returns the modified `AccountNodeIter`.
    /// This is used to resume iteration from the last checkpoint.
    pub fn with_last_account_key(mut self, previous_account_key: B256) -> Self {
@ -95,7 +103,7 @@ where
    /// 5. Repeat.
    ///
    /// NOTE: The iteration will start from the key of the previous hashed entry if it was supplied.
-    pub fn try_next(&mut self) -> Result<Option<AccountNode>, StateRootError> {
+    pub fn try_next(&mut self) -> Result<Option<AccountNode>, DatabaseError> {
        loop {
            // If the walker has a key...
            if let Some(key) = self.walker.key() {
@ -194,7 +202,7 @@ where
    /// 3. Reposition the hashed storage cursor on the next unprocessed key.
    /// 4. Return every hashed storage entry up to the key of the current intermediate branch node.
    /// 5. Repeat.
-    pub fn try_next(&mut self) -> Result<Option<StorageNode>, StorageRootError> {
+    pub fn try_next(&mut self) -> Result<Option<StorageNode>, DatabaseError> {
        loop {
            // Check if there's a key in the walker.
            if let Some(key) = self.walker.key() {
--- a/crates/trie/src/trie.rs
+++ b/crates/trie/src/trie.rs
@ -1,7 +1,7 @@
 use crate::{
    hashed_cursor::{HashedCursorFactory, HashedStorageCursor},
    node_iter::{AccountNode, AccountNodeIter, StorageNode, StorageNodeIter},
-    prefix_set::{PrefixSet, PrefixSetLoader, PrefixSetMut, TriePrefixSets},
+    prefix_set::{PrefixSet, PrefixSetLoader, TriePrefixSets},
    progress::{IntermediateStateRootState, StateRootProgress},
    stats::TrieTracker,
    trie_cursor::TrieCursorFactory,
@ -214,31 +214,32 @@ where
        let mut tracker = TrieTracker::default();
        let mut trie_updates = TrieUpdates::default();

-        let hashed_account_cursor = self.hashed_cursor_factory.hashed_account_cursor()?;
        let trie_cursor = self.trie_cursor_factory.account_trie_cursor()?;

        let (mut hash_builder, mut account_node_iter) = match self.previous_state {
            Some(state) => {
+                let hash_builder = state.hash_builder.with_updates(retain_updates);
                let walker = TrieWalker::from_stack(
                    trie_cursor,
                    state.walker_stack,
                    self.prefix_sets.account_prefix_set,
-                );
-                (
-                    state.hash_builder,
-                    AccountNodeIter::new(walker, hashed_account_cursor)
-                        .with_last_account_key(state.last_account_key),
                )
+                .with_updates(retain_updates);
+                let node_iter =
+                    AccountNodeIter::from_factory(walker, self.hashed_cursor_factory.clone())?
+                        .with_last_account_key(state.last_account_key);
+                (hash_builder, node_iter)
            }
            None => {
-                let walker = TrieWalker::new(trie_cursor, self.prefix_sets.account_prefix_set);
-                (HashBuilder::default(), AccountNodeIter::new(walker, hashed_account_cursor))
+                let hash_builder = HashBuilder::default().with_updates(retain_updates);
+                let walker = TrieWalker::new(trie_cursor, self.prefix_sets.account_prefix_set)
+                    .with_updates(retain_updates);
+                let node_iter =
+                    AccountNodeIter::from_factory(walker, self.hashed_cursor_factory.clone())?;
+                (hash_builder, node_iter)
            }
        };

-        account_node_iter.walker.set_updates(retain_updates);
-        hash_builder.set_updates(retain_updates);
-
        let mut account_rlp = Vec::with_capacity(128);
        let mut hashed_entries_walked = 0;
        while let Some(node) = account_node_iter.try_next()? {
@ -283,11 +284,9 @@ where
                        storage_root_calculator.root()?
                    };

-                    let account = TrieAccount::from((account, storage_root));
-
                    account_rlp.clear();
+                    let account = TrieAccount::from((account, storage_root));
                    account.encode(&mut account_rlp as &mut dyn BufMut);
-
                    hash_builder.add_leaf(Nibbles::unpack(hashed_address), &account_rlp);

                    // Decide if we need to return intermediate progress.
@ -319,13 +318,10 @@ where

        let root = hash_builder.root();

-        let (_, walker_updates) = account_node_iter.walker.split();
-        let (_, hash_builder_updates) = hash_builder.split();
-
-        trie_updates.extend(walker_updates);
-        trie_updates.extend_with_account_updates(hash_builder_updates);
-        trie_updates.extend_with_deletes(
-            self.prefix_sets.destroyed_accounts.into_iter().map(TrieKey::StorageTrie),
+        trie_updates.finalize_state_updates(
+            account_node_iter.walker,
+            hash_builder,
+            self.prefix_sets.destroyed_accounts,
        );

        let stats = tracker.finish();
@ -357,8 +353,8 @@ pub struct StorageRoot<T, H> {
    pub hashed_address: B256,
    /// The set of storage slot prefixes that have changed.
    pub prefix_set: PrefixSet,
-    #[cfg(feature = "metrics")]
    /// Storage root metrics.
+    #[cfg(feature = "metrics")]
    metrics: TrieRootMetrics,
 }

@ -390,7 +386,7 @@ impl<T, H> StorageRoot<T, H> {
            trie_cursor_factory,
            hashed_cursor_factory,
            hashed_address,
-            prefix_set: PrefixSetMut::default().freeze(),
+            prefix_set: PrefixSet::default(),
            #[cfg(feature = "metrics")]
            metrics,
        }
@ -475,7 +471,13 @@ where
        Ok(root)
    }

-    fn calculate(
+    /// Walks the hashed storage table entries for a given address and calculates the storage root.
+    ///
+    /// # Returns
+    ///
+    /// The storage root, number of walked entries and trie updates
+    /// for a given address if requested.
+    pub fn calculate(
        self,
        retain_updates: bool,
    ) -> Result<(B256, usize, TrieUpdates), StorageRootError> {
@ -518,12 +520,12 @@ where

        let root = hash_builder.root();

-        let (_, hash_builder_updates) = hash_builder.split();
-        let (_, walker_updates) = storage_node_iter.walker.split();
-
        let mut trie_updates = TrieUpdates::default();
-        trie_updates.extend(walker_updates);
-        trie_updates.extend_with_storage_updates(self.hashed_address, hash_builder_updates);
+        trie_updates.finalize_storage_updates(
+            self.hashed_address,
+            storage_node_iter.walker,
+            hash_builder,
+        );

        let stats = tracker.finish();

@ -548,8 +550,9 @@ where
 #[cfg(test)]
 mod tests {
    use super::*;
-    use crate::test_utils::{
-        state_root, state_root_prehashed, storage_root, storage_root_prehashed,
+    use crate::{
+        prefix_set::PrefixSetMut,
+        test_utils::{state_root, state_root_prehashed, storage_root, storage_root_prehashed},
    };
    use proptest::{prelude::ProptestConfig, proptest};
    use reth_db::{
@ -788,7 +791,7 @@ mod tests {
                tx.commit().unwrap();
                let tx =  factory.provider_rw().unwrap();

-                let expected = state_root(state.into_iter());
+                let expected = state_root(state);

                let threshold = 10;
                let mut got = None;
--- a/crates/trie/src/updates.rs
+++ b/crates/trie/src/updates.rs
@ -6,12 +6,14 @@ use reth_db::{
 };
 use reth_primitives::{
    trie::{
-        BranchNodeCompact, Nibbles, StorageTrieEntry, StoredBranchNode, StoredNibbles,
+        BranchNodeCompact, HashBuilder, Nibbles, StorageTrieEntry, StoredBranchNode, StoredNibbles,
        StoredNibblesSubKey,
    },
    B256,
 };
-use std::collections::{hash_map::IntoIter, HashMap};
+use std::collections::{hash_map::IntoIter, HashMap, HashSet};
+
+use crate::walker::TrieWalker;

 /// The key of a trie node.
 #[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Ord, Hash)]
@ -88,20 +90,43 @@ impl TrieUpdates {
        );
    }

-    /// Extend the updates with storage trie updates.
-    pub fn extend_with_storage_updates(
+    /// Finalize state trie updates.
+    pub fn finalize_state_updates<C>(
        &mut self,
-        hashed_address: B256,
-        updates: HashMap<Nibbles, BranchNodeCompact>,
+        walker: TrieWalker<C>,
+        hash_builder: HashBuilder,
+        destroyed_accounts: HashSet<B256>,
    ) {
-        self.extend(updates.into_iter().map(|(nibbles, node)| {
-            (TrieKey::StorageNode(hashed_address, nibbles.into()), TrieOp::Update(node))
-        }));
+        // Add updates from trie walker.
+        let (_, walker_updates) = walker.split();
+        self.extend(walker_updates);
+
+        // Add account node updates from hash builder.
+        let (_, hash_builder_updates) = hash_builder.split();
+        self.extend_with_account_updates(hash_builder_updates);
+
+        // Add deleted storage tries for destroyed accounts.
+        self.extend(
+            destroyed_accounts.into_iter().map(|key| (TrieKey::StorageTrie(key), TrieOp::Delete)),
+        );
    }

-    /// Extend the updates with deletes.
-    pub fn extend_with_deletes(&mut self, keys: impl IntoIterator<Item = TrieKey>) {
-        self.extend(keys.into_iter().map(|key| (key, TrieOp::Delete)));
+    /// Finalize storage trie updates for a given address.
+    pub fn finalize_storage_updates<C>(
+        &mut self,
+        hashed_address: B256,
+        walker: TrieWalker<C>,
+        hash_builder: HashBuilder,
+    ) {
+        // Add updates from trie walker.
+        let (_, walker_updates) = walker.split();
+        self.extend(walker_updates);
+
+        // Add storage node updates from hash builder.
+        let (_, hash_builder_updates) = hash_builder.split();
+        self.extend(hash_builder_updates.into_iter().map(|(nibbles, node)| {
+            (TrieKey::StorageNode(hashed_address, nibbles.into()), TrieOp::Update(node))
+        }));
    }

    /// Flush updates all aggregated updates to the database.
--- a/crates/trie/src/walker.rs
+++ b/crates/trie/src/walker.rs
@ -28,28 +28,7 @@ pub struct TrieWalker<C> {
    trie_updates: Option<TrieUpdates>,
 }

-impl<C: TrieCursor> TrieWalker<C> {
-    /// Constructs a new TrieWalker, setting up the initial state of the stack and cursor.
-    pub fn new(cursor: C, changes: PrefixSet) -> Self {
-        // Initialize the walker with a single empty stack element.
-        let mut this = Self {
-            cursor,
-            changes,
-            stack: vec![CursorSubNode::default()],
-            can_skip_current_node: false,
-            trie_updates: None,
-        };
-
-        // Set up the root node of the trie in the stack, if it exists.
-        if let Some((key, value)) = this.node(true).unwrap() {
-            this.stack[0] = CursorSubNode::new(key, Some(value));
-        }
-
-        // Update the skip state for the root node.
-        this.update_skip_node();
-        this
-    }
-
+impl<C> TrieWalker<C> {
    /// Constructs a new TrieWalker from existing stack and a cursor.
    pub fn from_stack(cursor: C, stack: Vec<CursorSubNode>, changes: PrefixSet) -> Self {
        let mut this =
@ -91,6 +70,68 @@ impl<C: TrieCursor> TrieWalker<C> {
        self.trie_updates.as_ref().map(|u| u.len()).unwrap_or(0)
    }

+    /// Returns the current key in the trie.
+    pub fn key(&self) -> Option<&Nibbles> {
+        self.stack.last().map(|n| n.full_key())
+    }
+
+    /// Returns the current hash in the trie if any.
+    pub fn hash(&self) -> Option<B256> {
+        self.stack.last().and_then(|n| n.hash())
+    }
+
+    /// Indicates whether the children of the current node are present in the trie.
+    pub fn children_are_in_trie(&self) -> bool {
+        self.stack.last().map_or(false, |n| n.tree_flag())
+    }
+
+    /// Returns the next unprocessed key in the trie.
+    pub fn next_unprocessed_key(&self) -> Option<B256> {
+        self.key()
+            .and_then(|key| {
+                if self.can_skip_current_node {
+                    key.increment().map(|inc| inc.pack())
+                } else {
+                    Some(key.pack())
+                }
+            })
+            .map(|mut key| {
+                key.resize(32, 0);
+                B256::from_slice(key.as_slice())
+            })
+    }
+
+    /// Updates the skip node flag based on the walker's current state.
+    fn update_skip_node(&mut self) {
+        self.can_skip_current_node = self
+            .stack
+            .last()
+            .map_or(false, |node| !self.changes.contains(node.full_key()) && node.hash_flag());
+    }
+}
+
+impl<C: TrieCursor> TrieWalker<C> {
+    /// Constructs a new TrieWalker, setting up the initial state of the stack and cursor.
+    pub fn new(cursor: C, changes: PrefixSet) -> Self {
+        // Initialize the walker with a single empty stack element.
+        let mut this = Self {
+            cursor,
+            changes,
+            stack: vec![CursorSubNode::default()],
+            can_skip_current_node: false,
+            trie_updates: None,
+        };
+
+        // Set up the root node of the trie in the stack, if it exists.
+        if let Some((key, value)) = this.node(true).unwrap() {
+            this.stack[0] = CursorSubNode::new(key, Some(value));
+        }
+
+        // Update the skip state for the root node.
+        this.update_skip_node();
+        this
+    }
+
    /// Advances the walker to the next trie node and updates the skip node flag.
    ///
    /// # Returns
@ -200,45 +241,6 @@ impl<C: TrieCursor> TrieWalker<C> {

        Ok(())
    }
-
-    /// Returns the current key in the trie.
-    pub fn key(&self) -> Option<&Nibbles> {
-        self.stack.last().map(|n| n.full_key())
-    }
-
-    /// Returns the current hash in the trie if any.
-    pub fn hash(&self) -> Option<B256> {
-        self.stack.last().and_then(|n| n.hash())
-    }
-
-    /// Indicates whether the children of the current node are present in the trie.
-    pub fn children_are_in_trie(&self) -> bool {
-        self.stack.last().map_or(false, |n| n.tree_flag())
-    }
-
-    /// Returns the next unprocessed key in the trie.
-    pub fn next_unprocessed_key(&self) -> Option<B256> {
-        self.key()
-            .and_then(|key| {
-                if self.can_skip_current_node {
-                    key.increment().map(|inc| inc.pack())
-                } else {
-                    Some(key.pack())
-                }
-            })
-            .map(|mut key| {
-                key.resize(32, 0);
-                B256::from_slice(key.as_slice())
-            })
-    }
-
-    /// Updates the skip node flag based on the walker's current state.
-    fn update_skip_node(&mut self) {
-        self.can_skip_current_node = self
-            .stack
-            .last()
-            .map_or(false, |node| !self.changes.contains(node.full_key()) && node.hash_flag());
-    }
 }

 #[cfg(test)]