chore(trie): dedup node iters (#8381)

This commit is contained in:
Roman Krasiuk
2024-05-27 15:07:48 +02:00
committed by GitHub
parent b4d7d368a4
commit cfc1344454
6 changed files with 89 additions and 195 deletions

View File

@ -10,8 +10,8 @@ use reth_primitives::{
use reth_provider::{providers::ConsistentDbView, DatabaseProviderFactory, ProviderError};
use reth_tasks::pool::BlockingTaskPool;
use reth_trie::{
hashed_cursor::HashedPostStateCursorFactory,
node_iter::{AccountNode, AccountNodeIter},
hashed_cursor::{HashedCursorFactory, HashedPostStateCursorFactory},
node_iter::{TrieElement, TrieNodeIter},
trie_cursor::TrieCursorFactory,
updates::TrieUpdates,
walker::TrieWalker,
@ -131,23 +131,24 @@ where
let hashed_cursor_factory = HashedPostStateCursorFactory::new(tx, &hashed_state_sorted);
let trie_cursor_factory = tx;
let trie_cursor =
trie_cursor_factory.account_trie_cursor().map_err(ProviderError::Database)?;
let walker = TrieWalker::new(
trie_cursor_factory.account_trie_cursor().map_err(ProviderError::Database)?,
prefix_sets.account_prefix_set,
)
.with_updates(retain_updates);
let mut account_node_iter = TrieNodeIter::new(
walker,
hashed_cursor_factory.hashed_account_cursor().map_err(ProviderError::Database)?,
);
let mut hash_builder = HashBuilder::default().with_updates(retain_updates);
let walker = TrieWalker::new(trie_cursor, prefix_sets.account_prefix_set)
.with_updates(retain_updates);
let mut account_node_iter =
AccountNodeIter::from_factory(walker, hashed_cursor_factory.clone())
.map_err(ProviderError::Database)?;
let mut account_rlp = Vec::with_capacity(128);
while let Some(node) = account_node_iter.try_next().map_err(ProviderError::Database)? {
match node {
AccountNode::Branch(node) => {
TrieElement::Branch(node) => {
hash_builder.add_branch(node.key, node.value, node.children_are_in_trie);
}
AccountNode::Leaf(hashed_address, account) => {
TrieElement::Leaf(hashed_address, account) => {
let (storage_root, _, updates) = match storage_roots.remove(&hashed_address) {
Some(rx) => rx.await.map_err(|_| {
AsyncStateRootError::StorageRootChannelClosed { hashed_address }

View File

@ -10,7 +10,7 @@ use reth_primitives::{
use reth_provider::{providers::ConsistentDbView, DatabaseProviderFactory, ProviderError};
use reth_trie::{
hashed_cursor::{HashedCursorFactory, HashedPostStateCursorFactory},
node_iter::{AccountNode, AccountNodeIter},
node_iter::{TrieElement, TrieNodeIter},
trie_cursor::TrieCursorFactory,
updates::TrieUpdates,
walker::TrieWalker,
@ -115,23 +115,24 @@ where
HashedPostStateCursorFactory::new(provider_ro.tx_ref(), &hashed_state_sorted);
let trie_cursor_factory = provider_ro.tx_ref();
let hashed_account_cursor =
hashed_cursor_factory.hashed_account_cursor().map_err(ProviderError::Database)?;
let trie_cursor =
trie_cursor_factory.account_trie_cursor().map_err(ProviderError::Database)?;
let walker = TrieWalker::new(
trie_cursor_factory.account_trie_cursor().map_err(ProviderError::Database)?,
prefix_sets.account_prefix_set,
)
.with_updates(retain_updates);
let mut account_node_iter = TrieNodeIter::new(
walker,
hashed_cursor_factory.hashed_account_cursor().map_err(ProviderError::Database)?,
);
let walker = TrieWalker::new(trie_cursor, prefix_sets.account_prefix_set)
.with_updates(retain_updates);
let mut account_node_iter = AccountNodeIter::new(walker, hashed_account_cursor);
let mut hash_builder = HashBuilder::default().with_updates(retain_updates);
let mut account_rlp = Vec::with_capacity(128);
while let Some(node) = account_node_iter.try_next().map_err(ProviderError::Database)? {
match node {
AccountNode::Branch(node) => {
TrieElement::Branch(node) => {
hash_builder.add_branch(node.key, node.value, node.children_are_in_trie);
}
AccountNode::Leaf(hashed_address, account) => {
TrieElement::Leaf(hashed_address, account) => {
let (storage_root, _, updates) = match storage_roots.remove(&hashed_address) {
Some(result) => result,
// Since we do not store all intermediate nodes in the database, there might

View File

@ -28,7 +28,7 @@ pub trait HashedCursorFactory {
/// The cursor for iterating over hashed entries.
pub trait HashedCursor {
/// Value returned by the cursor.
type Value;
type Value: std::fmt::Debug;
/// Seek an entry greater or equal to the given key and position the cursor there.
/// Returns the first entry with the key greater or equal to the sought key.

View File

@ -1,10 +1,6 @@
use crate::{
hashed_cursor::{HashedCursor, HashedCursorFactory, HashedStorageCursor},
trie_cursor::TrieCursor,
walker::TrieWalker,
};
use crate::{hashed_cursor::HashedCursor, trie_cursor::TrieCursor, walker::TrieWalker};
use reth_db::DatabaseError;
use reth_primitives::{trie::Nibbles, Account, B256, U256};
use reth_primitives::{trie::Nibbles, B256};
/// Represents a branch node in the trie.
#[derive(Debug)]
@ -24,95 +20,80 @@ impl TrieBranchNode {
}
}
/// Represents a variant of an account node.
/// Represents variants of trie nodes returned by the iteration.
#[derive(Debug)]
pub enum AccountNode {
pub enum TrieElement<Value> {
/// Branch node.
Branch(TrieBranchNode),
/// Leaf node.
Leaf(B256, Account),
}
/// Represents a variant of a storage node.
#[derive(Debug)]
pub enum StorageNode {
/// Branch node.
Branch(TrieBranchNode),
/// Leaf node.
Leaf(B256, U256),
Leaf(B256, Value),
}
/// An iterator over existing intermediate branch nodes and updated leaf nodes.
#[derive(Debug)]
pub struct AccountNodeIter<C, H> {
/// Underlying walker over intermediate nodes.
pub struct TrieNodeIter<C, H: HashedCursor> {
/// The walker over intermediate nodes.
pub walker: TrieWalker<C>,
/// The cursor for the hashed account entries.
pub hashed_account_cursor: H,
/// The previous account key. If the iteration was previously interrupted, this value can be
/// The cursor for the hashed entries.
pub hashed_cursor: H,
/// The previous hashed key. If the iteration was previously interrupted, this value can be
/// used to resume iterating from the last returned leaf node.
previous_account_key: Option<B256>,
previous_hashed_key: Option<B256>,
/// Current hashed account entry.
current_hashed_entry: Option<(B256, Account)>,
/// Current hashed entry.
current_hashed_entry: Option<(B256, <H as HashedCursor>::Value)>,
/// Flag indicating whether we should check the current walker key.
current_walker_key_checked: bool,
}
impl<C, H> AccountNodeIter<C, H> {
/// Creates a new `AccountNodeIter`.
pub fn new(walker: TrieWalker<C>, hashed_account_cursor: H) -> Self {
impl<C, H: HashedCursor> TrieNodeIter<C, H> {
/// Creates a new [TrieNodeIter].
pub fn new(walker: TrieWalker<C>, hashed_cursor: H) -> Self {
Self {
walker,
hashed_account_cursor,
previous_account_key: None,
hashed_cursor,
previous_hashed_key: None,
current_hashed_entry: None,
current_walker_key_checked: false,
}
}
/// Create new `AccountNodeIter` by creating hashed account cursor from factory.
pub fn from_factory<F: HashedCursorFactory<AccountCursor = H>>(
walker: TrieWalker<C>,
factory: F,
) -> Result<Self, DatabaseError> {
Ok(Self::new(walker, factory.hashed_account_cursor()?))
}
/// Sets the last iterated account key and returns the modified `AccountNodeIter`.
/// Sets the last iterated hashed key and returns the modified [TrieNodeIter].
/// This is used to resume iteration from the last checkpoint.
pub fn with_last_account_key(mut self, previous_account_key: B256) -> Self {
self.previous_account_key = Some(previous_account_key);
pub fn with_last_hashed_key(mut self, previous_hashed_key: B256) -> Self {
self.previous_hashed_key = Some(previous_hashed_key);
self
}
}
impl<C, H> AccountNodeIter<C, H>
impl<C, H> TrieNodeIter<C, H>
where
C: TrieCursor,
H: HashedCursor<Value = Account>,
H: HashedCursor,
{
/// Return the next account trie node to be added to the hash builder.
/// Return the next trie node to be added to the hash builder.
///
/// Returns the nodes using this algorithm:
/// 1. Return the current intermediate branch node if it hasn't been updated.
/// 2. Advance the trie walker to the next intermediate branch node and retrieve next
/// unprocessed key.
/// 3. Reposition the hashed account cursor on the next unprocessed key.
/// 4. Return every hashed account entry up to the key of the current intermediate branch node.
/// 3. Reposition the hashed cursor on the next unprocessed key.
/// 4. Return every hashed entry up to the key of the current intermediate branch node.
/// 5. Repeat.
///
/// NOTE: The iteration will start from the key of the previous hashed entry if it was supplied.
pub fn try_next(&mut self) -> Result<Option<AccountNode>, DatabaseError> {
pub fn try_next(
&mut self,
) -> Result<Option<TrieElement<<H as HashedCursor>::Value>>, DatabaseError> {
loop {
// If the walker has a key...
if let Some(key) = self.walker.key() {
// Check if the current walker key is unchecked and there's no previous account key
if !self.current_walker_key_checked && self.previous_account_key.is_none() {
// Check if the current walker key is unchecked and there's no previous hashed key
if !self.current_walker_key_checked && self.previous_hashed_key.is_none() {
self.current_walker_key_checked = true;
// If it's possible to skip the current node in the walker, return a branch node
if self.walker.can_skip_current_node {
return Ok(Some(AccountNode::Branch(TrieBranchNode::new(
return Ok(Some(TrieElement::Branch(TrieBranchNode::new(
key.clone(),
self.walker.hash().unwrap(),
self.walker.children_are_in_trie(),
@ -121,26 +102,26 @@ where
}
}
// If there's a hashed address and account...
if let Some((hashed_address, account)) = self.current_hashed_entry.take() {
// If the walker's key is less than the unpacked hashed address, reset the checked
// status and continue
if self.walker.key().map_or(false, |key| key < &Nibbles::unpack(hashed_address)) {
// If there's a hashed entry...
if let Some((hashed_key, value)) = self.current_hashed_entry.take() {
// If the walker's key is less than the unpacked hashed key,
// reset the checked status and continue
if self.walker.key().map_or(false, |key| key < &Nibbles::unpack(hashed_key)) {
self.current_walker_key_checked = false;
continue
}
// Set the next hashed entry as a leaf node and return
self.current_hashed_entry = self.hashed_account_cursor.next()?;
return Ok(Some(AccountNode::Leaf(hashed_address, account)))
self.current_hashed_entry = self.hashed_cursor.next()?;
return Ok(Some(TrieElement::Leaf(hashed_key, value)))
}
// Handle seeking and advancing based on the previous account key
match self.previous_account_key.take() {
Some(account_key) => {
// Seek to the previous account key and get the next hashed entry
self.hashed_account_cursor.seek(account_key)?;
self.current_hashed_entry = self.hashed_account_cursor.next()?;
// Handle seeking and advancing based on the previous hashed key
match self.previous_hashed_key.take() {
Some(hashed_key) => {
// Seek to the previous hashed key and get the next hashed entry
self.hashed_cursor.seek(hashed_key)?;
self.current_hashed_entry = self.hashed_cursor.next()?;
}
None => {
// Get the seek key and set the current hashed entry based on walker's next
@ -149,7 +130,7 @@ where
Some(key) => key,
None => break, // no more keys
};
self.current_hashed_entry = self.hashed_account_cursor.seek(seek_key)?;
self.current_hashed_entry = self.hashed_cursor.seek(seek_key)?;
self.walker.advance()?;
}
}
@ -158,91 +139,3 @@ where
Ok(None)
}
}
/// An iterator over existing intermediate storage branch nodes and updated leaf nodes.
#[derive(Debug)]
pub struct StorageNodeIter<C, H> {
/// Underlying walker over intermediate nodes.
pub walker: TrieWalker<C>,
/// The cursor for the hashed storage entries.
pub hashed_storage_cursor: H,
/// Current hashed storage entry.
current_hashed_entry: Option<(B256, U256)>,
/// Flag indicating whether we should check the current walker key.
current_walker_key_checked: bool,
}
impl<C, H> StorageNodeIter<C, H> {
/// Creates a new instance of StorageNodeIter.
pub fn new(walker: TrieWalker<C>, hashed_storage_cursor: H) -> Self {
Self {
walker,
hashed_storage_cursor,
current_walker_key_checked: false,
current_hashed_entry: None,
}
}
}
impl<C, H> StorageNodeIter<C, H>
where
C: TrieCursor,
H: HashedStorageCursor<Value = U256>,
{
/// Return the next storage trie node to be added to the hash builder.
///
/// Returns the nodes using this algorithm:
/// 1. Return the current intermediate branch node if it hasn't been updated.
/// 2. Advance the trie walker to the next intermediate branch node and retrieve next
/// unprocessed key.
/// 3. Reposition the hashed storage cursor on the next unprocessed key.
/// 4. Return every hashed storage entry up to the key of the current intermediate branch node.
/// 5. Repeat.
pub fn try_next(&mut self) -> Result<Option<StorageNode>, DatabaseError> {
loop {
// Check if there's a key in the walker.
if let Some(key) = self.walker.key() {
// Check if the walker key hasn't been checked yet.
if !self.current_walker_key_checked {
self.current_walker_key_checked = true;
// Check if the current node can be skipped in the walker.
if self.walker.can_skip_current_node {
// Return a branch node based on the walker's properties.
return Ok(Some(StorageNode::Branch(TrieBranchNode::new(
key.clone(),
self.walker.hash().unwrap(),
self.walker.children_are_in_trie(),
))))
}
}
}
// Check for a current hashed storage entry.
if let Some((hashed_key, value)) = self.current_hashed_entry.take() {
// Compare keys and proceed accordingly.
if self.walker.key().map_or(false, |key| key < &Nibbles::unpack(hashed_key)) {
self.current_walker_key_checked = false;
continue
}
// Move to the next hashed storage entry and return the corresponding leaf node.
self.current_hashed_entry = self.hashed_storage_cursor.next()?;
return Ok(Some(StorageNode::Leaf(hashed_key, value)))
}
// Attempt to get the next unprocessed key from the walker.
match self.walker.next_unprocessed_key() {
Some(seek_key) => {
// Seek and update the current hashed entry based on the new seek key.
self.current_hashed_entry = self.hashed_storage_cursor.seek(seek_key)?;
self.walker.advance()?;
}
// No more keys to process, break the loop.
None => break,
};
}
Ok(None) // Return None if no more nodes are available.
}
}

View File

@ -1,6 +1,6 @@
use crate::{
hashed_cursor::{HashedCursorFactory, HashedStorageCursor},
node_iter::{AccountNode, AccountNodeIter, StorageNode, StorageNodeIter},
node_iter::{TrieElement, TrieNodeIter},
prefix_set::PrefixSetMut,
trie_cursor::{DatabaseAccountTrieCursor, DatabaseStorageTrieCursor},
walker::TrieWalker,
@ -64,13 +64,13 @@ where
let mut hash_builder = HashBuilder::default().with_proof_retainer(retainer);
let mut account_rlp = Vec::with_capacity(128);
let mut account_node_iter = AccountNodeIter::new(walker, hashed_account_cursor);
let mut account_node_iter = TrieNodeIter::new(walker, hashed_account_cursor);
while let Some(account_node) = account_node_iter.try_next()? {
match account_node {
AccountNode::Branch(node) => {
TrieElement::Branch(node) => {
hash_builder.add_branch(node.key, node.value, node.children_are_in_trie);
}
AccountNode::Leaf(hashed_address, account) => {
TrieElement::Leaf(hashed_address, account) => {
let storage_root = if hashed_address == target_hashed_address {
let (storage_root, storage_proofs) =
self.storage_root_with_proofs(hashed_address, slots)?;
@ -129,13 +129,13 @@ where
let retainer = ProofRetainer::from_iter(target_nibbles);
let mut hash_builder = HashBuilder::default().with_proof_retainer(retainer);
let mut storage_node_iter = StorageNodeIter::new(walker, hashed_storage_cursor);
let mut storage_node_iter = TrieNodeIter::new(walker, hashed_storage_cursor);
while let Some(node) = storage_node_iter.try_next()? {
match node {
StorageNode::Branch(node) => {
TrieElement::Branch(node) => {
hash_builder.add_branch(node.key, node.value, node.children_are_in_trie);
}
StorageNode::Leaf(hashed_slot, value) => {
TrieElement::Leaf(hashed_slot, value) => {
let nibbles = Nibbles::unpack(hashed_slot);
if let Some(proof) = proofs.iter_mut().find(|proof| proof.nibbles == nibbles) {
proof.set_value(value);

View File

@ -1,6 +1,6 @@
use crate::{
hashed_cursor::{HashedCursorFactory, HashedStorageCursor},
node_iter::{AccountNode, AccountNodeIter, StorageNode, StorageNodeIter},
node_iter::{TrieElement, TrieNodeIter},
prefix_set::{PrefixSet, PrefixSetLoader, TriePrefixSets},
progress::{IntermediateStateRootState, StateRootProgress},
stats::TrieTracker,
@ -216,6 +216,7 @@ where
let trie_cursor = self.trie_cursor_factory.account_trie_cursor()?;
let hashed_account_cursor = self.hashed_cursor_factory.hashed_account_cursor()?;
let (mut hash_builder, mut account_node_iter) = match self.previous_state {
Some(state) => {
let hash_builder = state.hash_builder.with_updates(retain_updates);
@ -225,17 +226,15 @@ where
self.prefix_sets.account_prefix_set,
)
.with_updates(retain_updates);
let node_iter =
AccountNodeIter::from_factory(walker, self.hashed_cursor_factory.clone())?
.with_last_account_key(state.last_account_key);
let node_iter = TrieNodeIter::new(walker, hashed_account_cursor)
.with_last_hashed_key(state.last_account_key);
(hash_builder, node_iter)
}
None => {
let hash_builder = HashBuilder::default().with_updates(retain_updates);
let walker = TrieWalker::new(trie_cursor, self.prefix_sets.account_prefix_set)
.with_updates(retain_updates);
let node_iter =
AccountNodeIter::from_factory(walker, self.hashed_cursor_factory.clone())?;
let node_iter = TrieNodeIter::new(walker, hashed_account_cursor);
(hash_builder, node_iter)
}
};
@ -244,11 +243,11 @@ where
let mut hashed_entries_walked = 0;
while let Some(node) = account_node_iter.try_next()? {
match node {
AccountNode::Branch(node) => {
TrieElement::Branch(node) => {
tracker.inc_branch();
hash_builder.add_branch(node.key, node.value, node.children_are_in_trie);
}
AccountNode::Leaf(hashed_address, account) => {
TrieElement::Leaf(hashed_address, account) => {
tracker.inc_leaf();
hashed_entries_walked += 1;
@ -501,14 +500,14 @@ where
let mut hash_builder = HashBuilder::default().with_updates(retain_updates);
let mut storage_node_iter = StorageNodeIter::new(walker, hashed_storage_cursor);
let mut storage_node_iter = TrieNodeIter::new(walker, hashed_storage_cursor);
while let Some(node) = storage_node_iter.try_next()? {
match node {
StorageNode::Branch(node) => {
TrieElement::Branch(node) => {
tracker.inc_branch();
hash_builder.add_branch(node.key, node.value, node.children_are_in_trie);
}
StorageNode::Leaf(hashed_slot, value) => {
TrieElement::Leaf(hashed_slot, value) => {
tracker.inc_leaf();
hash_builder.add_leaf(
Nibbles::unpack(hashed_slot),