feat(trie): forward-only in-memory cursor (#9079)

This commit is contained in:
Roman Krasiuk
2024-06-27 02:43:29 -07:00
committed by GitHub
parent 18eef6a991
commit 7a82f4eaec
3 changed files with 223 additions and 212 deletions

View File

@ -0,0 +1,51 @@
/// The implementation of forward-only in memory cursor over the entries.
/// The cursor operates under the assumption that the supplied collection is pre-sorted.
#[derive(Debug)]
pub struct ForwardInMemoryCursor<'a, K, V> {
/// The reference to the pre-sorted collection of entries.
entries: &'a Vec<(K, V)>,
/// The index where cursor is currently positioned.
index: usize,
}
impl<'a, K, V> ForwardInMemoryCursor<'a, K, V> {
/// Create new forward cursor positioned at the beginning of the collection.
/// The cursor expects all of the entries have been sorted in advance.
pub const fn new(entries: &'a Vec<(K, V)>) -> Self {
Self { entries, index: 0 }
}
/// Returns `true` if the cursor is empty, regardless of its position.
pub fn is_empty(&self) -> bool {
self.entries.is_empty()
}
}
impl<'a, K, V> ForwardInMemoryCursor<'a, K, V>
where
K: PartialOrd + Copy,
V: Copy,
{
/// Advances the cursor forward while `comparator` returns `true` or until the collection is
/// exhausted. Returns the first entry for which `comparator` returns `false` or `None`.
fn advance_while_false(&mut self, comparator: impl Fn(&K) -> bool) -> Option<(K, V)> {
let mut entry = self.entries.get(self.index);
while entry.map_or(false, |entry| comparator(&entry.0)) {
self.index += 1;
entry = self.entries.get(self.index);
}
entry.copied()
}
/// Returns the first entry from the current cursor position that's greater or equal to the
/// provided key. This method advances the cursor forward.
pub fn seek(&mut self, key: &K) -> Option<(K, V)> {
self.advance_while_false(|k| k < key)
}
/// Returns the first entry from the current cursor position that's greater than the provided
/// key. This method advances the cursor forward.
pub fn first_after(&mut self, key: &K) -> Option<(K, V)> {
self.advance_while_false(|k| k <= key)
}
}

View File

@ -1,6 +1,11 @@
use super::{HashedCursor, HashedCursorFactory, HashedStorageCursor};
use crate::{HashedAccountsSorted, HashedPostStateSorted, HashedStorageSorted};
use crate::{
forward_cursor::ForwardInMemoryCursor, HashedAccountsSorted, HashedPostStateSorted,
HashedStorageSorted,
};
use reth_db::DatabaseError;
use reth_primitives::{Account, B256, U256};
use std::collections::HashSet;
/// The hashed cursor factory for the post state.
#[derive(Debug, Clone)]
@ -20,7 +25,7 @@ impl<'a, CF: HashedCursorFactory> HashedCursorFactory for HashedPostStateCursorF
type AccountCursor = HashedPostStateAccountCursor<'a, CF::AccountCursor>;
type StorageCursor = HashedPostStateStorageCursor<'a, CF::StorageCursor>;
fn hashed_account_cursor(&self) -> Result<Self::AccountCursor, reth_db::DatabaseError> {
fn hashed_account_cursor(&self) -> Result<Self::AccountCursor, DatabaseError> {
let cursor = self.cursor_factory.hashed_account_cursor()?;
Ok(HashedPostStateAccountCursor::new(cursor, &self.post_state.accounts))
}
@ -28,7 +33,7 @@ impl<'a, CF: HashedCursorFactory> HashedCursorFactory for HashedPostStateCursorF
fn hashed_storage_cursor(
&self,
hashed_address: B256,
) -> Result<Self::StorageCursor, reth_db::DatabaseError> {
) -> Result<Self::StorageCursor, DatabaseError> {
let cursor = self.cursor_factory.hashed_storage_cursor(hashed_address)?;
Ok(HashedPostStateStorageCursor::new(cursor, self.post_state.storages.get(&hashed_address)))
}
@ -36,23 +41,28 @@ impl<'a, CF: HashedCursorFactory> HashedCursorFactory for HashedPostStateCursorF
/// The cursor to iterate over post state hashed accounts and corresponding database entries.
/// It will always give precedence to the data from the hashed post state.
#[derive(Debug, Clone)]
#[derive(Debug)]
pub struct HashedPostStateAccountCursor<'a, C> {
/// The database cursor.
cursor: C,
/// The reference to the in-memory [`HashedAccountsSorted`].
post_state_accounts: &'a HashedAccountsSorted,
/// The post state account index where the cursor is currently at.
post_state_account_index: usize,
/// Forward-only in-memory cursor over accounts.
post_state_cursor: ForwardInMemoryCursor<'a, B256, Account>,
/// Reference to the collection of account keys that were destroyed.
destroyed_accounts: &'a HashSet<B256>,
/// The last hashed account that was returned by the cursor.
/// De facto, this is a current cursor position.
last_account: Option<B256>,
}
impl<'a, C> HashedPostStateAccountCursor<'a, C> {
impl<'a, C> HashedPostStateAccountCursor<'a, C>
where
C: HashedCursor<Value = Account>,
{
/// Create new instance of [`HashedPostStateAccountCursor`].
pub const fn new(cursor: C, post_state_accounts: &'a HashedAccountsSorted) -> Self {
Self { cursor, post_state_accounts, last_account: None, post_state_account_index: 0 }
let post_state_cursor = ForwardInMemoryCursor::new(&post_state_accounts.accounts);
let destroyed_accounts = &post_state_accounts.destroyed_accounts;
Self { cursor, post_state_cursor, destroyed_accounts, last_account: None }
}
/// Returns `true` if the account has been destroyed.
@ -61,29 +71,62 @@ impl<'a, C> HashedPostStateAccountCursor<'a, C> {
/// This function only checks the post state, not the database, because the latter does not
/// store destroyed accounts.
fn is_account_cleared(&self, account: &B256) -> bool {
self.post_state_accounts.destroyed_accounts.contains(account)
self.destroyed_accounts.contains(account)
}
fn seek_inner(&mut self, key: B256) -> Result<Option<(B256, Account)>, DatabaseError> {
// Take the next account from the post state with the key greater than or equal to the
// sought key.
let post_state_entry = self.post_state_cursor.seek(&key);
// It's an exact match, return the account from post state without looking up in the
// database.
if post_state_entry.map_or(false, |entry| entry.0 == key) {
return Ok(post_state_entry)
}
// It's not an exact match, reposition to the first greater or equal account that wasn't
// cleared.
let mut db_entry = self.cursor.seek(key)?;
while db_entry.as_ref().map_or(false, |(address, _)| self.is_account_cleared(address)) {
db_entry = self.cursor.next()?;
}
// Compare two entries and return the lowest.
Ok(Self::compare_entries(post_state_entry, db_entry))
}
fn next_inner(&mut self, last_account: B256) -> Result<Option<(B256, Account)>, DatabaseError> {
// Take the next account from the post state with the key greater than the last sought key.
let post_state_entry = self.post_state_cursor.first_after(&last_account);
// If post state was given precedence or account was cleared, move the cursor forward.
let mut db_entry = self.cursor.seek(last_account)?;
while db_entry.as_ref().map_or(false, |(address, _)| {
address <= &last_account || self.is_account_cleared(address)
}) {
db_entry = self.cursor.next()?;
}
// Compare two entries and return the lowest.
Ok(Self::compare_entries(post_state_entry, db_entry))
}
/// Return the account with the lowest hashed account key.
///
/// Given the next post state and database entries, return the smallest of the two.
/// If the account keys are the same, the post state entry is given precedence.
fn next_account(
post_state_item: Option<&(B256, Account)>,
fn compare_entries(
post_state_item: Option<(B256, Account)>,
db_item: Option<(B256, Account)>,
) -> Option<(B256, Account)> {
match (post_state_item, db_item) {
if let Some((post_state_entry, db_entry)) = post_state_item.zip(db_item) {
// If both are not empty, return the smallest of the two
// Post state is given precedence if keys are equal
(Some((post_state_address, post_state_account)), Some((db_address, db_account))) => {
if post_state_address <= &db_address {
Some((*post_state_address, *post_state_account))
} else {
Some((db_address, db_account))
}
}
Some(if post_state_entry.0 <= db_entry.0 { post_state_entry } else { db_entry })
} else {
// Return either non-empty entry
_ => post_state_item.copied().or(db_item),
db_item.or(post_state_item)
}
}
}
@ -102,42 +145,11 @@ where
///
/// The returned account key is memoized and the cursor remains positioned at that key until
/// [`HashedCursor::seek`] or [`HashedCursor::next`] are called.
fn seek(&mut self, key: B256) -> Result<Option<(B256, Self::Value)>, reth_db::DatabaseError> {
self.last_account = None;
// Take the next account from the post state with the key greater than or equal to the
// sought key.
let mut post_state_entry =
self.post_state_accounts.accounts.get(self.post_state_account_index);
while post_state_entry.map(|(k, _)| k < &key).unwrap_or_default() {
self.post_state_account_index += 1;
post_state_entry = self.post_state_accounts.accounts.get(self.post_state_account_index);
}
// It's an exact match, return the account from post state without looking up in the
// database.
if let Some((address, account)) = post_state_entry {
if address == &key {
self.last_account = Some(*address);
return Ok(Some((*address, *account)))
}
}
// It's not an exact match, reposition to the first greater or equal account that wasn't
// cleared.
let mut db_entry = self.cursor.seek(key)?;
while db_entry
.as_ref()
.map(|(address, _)| self.is_account_cleared(address))
.unwrap_or_default()
{
db_entry = self.cursor.next()?;
}
// Compare two entries and return the lowest.
let result = Self::next_account(post_state_entry, db_entry);
self.last_account = result.as_ref().map(|(address, _)| *address);
Ok(result)
fn seek(&mut self, key: B256) -> Result<Option<(B256, Self::Value)>, DatabaseError> {
// Find the closes account.
let entry = self.seek_inner(key)?;
self.last_account = entry.as_ref().map(|entry| entry.0);
Ok(entry)
}
/// Retrieve the next entry from the cursor.
@ -147,100 +159,118 @@ where
///
/// NOTE: This function will not return any entry unless [`HashedCursor::seek`] has been
/// called.
fn next(&mut self) -> Result<Option<(B256, Self::Value)>, reth_db::DatabaseError> {
let last_account = match self.last_account.as_ref() {
Some(account) => account,
None => return Ok(None), // no previous entry was found
fn next(&mut self) -> Result<Option<(B256, Self::Value)>, DatabaseError> {
let next = match self.last_account {
Some(account) => {
let entry = self.next_inner(account)?;
self.last_account = entry.as_ref().map(|entry| entry.0);
entry
}
// no previous entry was found
None => None,
};
// If post state was given precedence, move the cursor forward.
let mut db_entry = self.cursor.seek(*last_account)?;
while db_entry
.as_ref()
.map(|(address, _)| address <= last_account || self.is_account_cleared(address))
.unwrap_or_default()
{
db_entry = self.cursor.next()?;
}
// Take the next account from the post state with the key greater than the last sought key.
let mut post_state_entry =
self.post_state_accounts.accounts.get(self.post_state_account_index);
while post_state_entry.map(|(k, _)| k <= last_account).unwrap_or_default() {
self.post_state_account_index += 1;
post_state_entry = self.post_state_accounts.accounts.get(self.post_state_account_index);
}
// Compare two entries and return the lowest.
let result = Self::next_account(post_state_entry, db_entry);
self.last_account = result.as_ref().map(|(address, _)| *address);
Ok(result)
Ok(next)
}
}
/// The cursor to iterate over post state hashed storages and corresponding database entries.
/// It will always give precedence to the data from the post state.
#[derive(Debug, Clone)]
#[derive(Debug)]
pub struct HashedPostStateStorageCursor<'a, C> {
/// The database cursor.
cursor: C,
/// The reference to post state storage.
post_state_storage: Option<&'a HashedStorageSorted>,
/// The post state index where the cursor is currently at.
post_state_storage_index: usize,
/// Forward-only in-memory cursor over non zero-valued account storage slots.
post_state_cursor: Option<ForwardInMemoryCursor<'a, B256, U256>>,
/// Reference to the collection of storage slot keys that were cleared.
cleared_slots: Option<&'a HashSet<B256>>,
/// Flag indicating whether database storage was wiped.
storage_wiped: bool,
/// The last slot that has been returned by the cursor.
/// De facto, this is the cursor's position for the given account key.
last_slot: Option<B256>,
}
impl<'a, C> HashedPostStateStorageCursor<'a, C> {
impl<'a, C> HashedPostStateStorageCursor<'a, C>
where
C: HashedStorageCursor<Value = U256>,
{
/// Create new instance of [`HashedPostStateStorageCursor`] for the given hashed address.
pub const fn new(cursor: C, post_state: Option<&'a HashedStorageSorted>) -> Self {
Self {
cursor,
post_state_storage: post_state,
last_slot: None,
post_state_storage_index: 0,
}
}
/// Returns `true` if the storage for the given
/// The database is not checked since it already has no wiped storage entries.
const fn is_db_storage_wiped(&self) -> bool {
match self.post_state_storage {
Some(storage) => storage.wiped,
None => false,
}
pub fn new(cursor: C, post_state_storage: Option<&'a HashedStorageSorted>) -> Self {
let post_state_cursor =
post_state_storage.map(|s| ForwardInMemoryCursor::new(&s.non_zero_valued_slots));
let cleared_slots = post_state_storage.map(|s| &s.zero_valued_slots);
let storage_wiped = post_state_storage.map_or(false, |s| s.wiped);
Self { cursor, post_state_cursor, cleared_slots, storage_wiped, last_slot: None }
}
/// Check if the slot was zeroed out in the post state.
/// The database is not checked since it already has no zero-valued slots.
fn is_slot_zero_valued(&self, slot: &B256) -> bool {
self.post_state_storage
.map(|storage| storage.zero_valued_slots.contains(slot))
.unwrap_or_default()
self.cleared_slots.map_or(false, |s| s.contains(slot))
}
/// Find the storage entry in post state or database that's greater or equal to provided subkey.
fn seek_inner(&mut self, subkey: B256) -> Result<Option<(B256, U256)>, DatabaseError> {
// Attempt to find the account's storage in post state.
let post_state_entry = self.post_state_cursor.as_mut().and_then(|c| c.seek(&subkey));
// If database storage was wiped or it's an exact match,
// return the storage slot from post state without looking up in the database.
if self.storage_wiped || post_state_entry.map_or(false, |entry| entry.0 == subkey) {
return Ok(post_state_entry)
}
// It's not an exact match and storage was not wiped,
// reposition to the first greater or equal account.
let mut db_entry = self.cursor.seek(subkey)?;
while db_entry.as_ref().map_or(false, |entry| self.is_slot_zero_valued(&entry.0)) {
db_entry = self.cursor.next()?;
}
// Compare two entries and return the lowest.
Ok(Self::compare_entries(post_state_entry, db_entry))
}
/// Find the storage entry that is right after current cursor position.
fn next_inner(&mut self, last_slot: B256) -> Result<Option<(B256, U256)>, DatabaseError> {
// Attempt to find the account's storage in post state.
let post_state_entry =
self.post_state_cursor.as_mut().and_then(|c| c.first_after(&last_slot));
// Return post state entry immediately if database was wiped.
if self.storage_wiped {
return Ok(post_state_entry)
}
// If post state was given precedence, move the cursor forward.
// If the entry was already returned or is zero-valued, move to the next.
let mut db_entry = self.cursor.seek(last_slot)?;
while db_entry
.as_ref()
.map_or(false, |entry| entry.0 == last_slot || self.is_slot_zero_valued(&entry.0))
{
db_entry = self.cursor.next()?;
}
// Compare two entries and return the lowest.
Ok(Self::compare_entries(post_state_entry, db_entry))
}
/// Return the storage entry with the lowest hashed storage key (hashed slot).
///
/// Given the next post state and database entries, return the smallest of the two.
/// If the storage keys are the same, the post state entry is given precedence.
fn next_slot(
post_state_item: Option<&(B256, U256)>,
fn compare_entries(
post_state_item: Option<(B256, U256)>,
db_item: Option<(B256, U256)>,
) -> Option<(B256, U256)> {
match (post_state_item, db_item) {
if let Some((post_state_entry, db_entry)) = post_state_item.zip(db_item) {
// If both are not empty, return the smallest of the two
// Post state is given precedence if keys are equal
(Some((post_state_slot, post_state_value)), Some((db_slot, db_value))) => {
if post_state_slot <= &db_slot {
Some((*post_state_slot, *post_state_value))
} else {
Some((db_slot, db_value))
}
}
Some(if post_state_entry.0 <= db_entry.0 { post_state_entry } else { db_entry })
} else {
// Return either non-empty entry
_ => db_item.or_else(|| post_state_item.copied()),
db_item.or(post_state_item)
}
}
}
@ -252,97 +282,24 @@ where
type Value = U256;
/// Seek the next account storage entry for a given hashed key pair.
fn seek(
&mut self,
subkey: B256,
) -> Result<Option<(B256, Self::Value)>, reth_db::DatabaseError> {
// Attempt to find the account's storage in post state.
let mut post_state_entry = None;
if let Some(storage) = self.post_state_storage {
post_state_entry = storage.non_zero_valued_slots.get(self.post_state_storage_index);
while post_state_entry.map(|(slot, _)| slot < &subkey).unwrap_or_default() {
self.post_state_storage_index += 1;
post_state_entry = storage.non_zero_valued_slots.get(self.post_state_storage_index);
}
}
// It's an exact match, return the storage slot from post state without looking up in
// the database.
if let Some((slot, value)) = post_state_entry {
if slot == &subkey {
self.last_slot = Some(*slot);
return Ok(Some((*slot, *value)))
}
}
// It's not an exact match, reposition to the first greater or equal account.
let db_entry = if self.is_db_storage_wiped() {
None
} else {
let mut db_entry = self.cursor.seek(subkey)?;
while db_entry
.as_ref()
.map(|entry| self.is_slot_zero_valued(&entry.0))
.unwrap_or_default()
{
db_entry = self.cursor.next()?;
}
db_entry
};
// Compare two entries and return the lowest.
let result = Self::next_slot(post_state_entry, db_entry);
self.last_slot = result.as_ref().map(|entry| entry.0);
Ok(result)
fn seek(&mut self, subkey: B256) -> Result<Option<(B256, Self::Value)>, DatabaseError> {
let entry = self.seek_inner(subkey)?;
self.last_slot = entry.as_ref().map(|entry| entry.0);
Ok(entry)
}
/// Return the next account storage entry for the current account key.
///
/// # Panics
///
/// If the account key is not set. [`HashedCursor::seek`] must be called first in order to
/// position the cursor.
fn next(&mut self) -> Result<Option<(B256, Self::Value)>, reth_db::DatabaseError> {
let last_slot = match self.last_slot.as_ref() {
Some(slot) => slot,
None => return Ok(None), // no previous entry was found
};
let db_entry = if self.is_db_storage_wiped() {
None
} else {
// If post state was given precedence, move the cursor forward.
let mut db_entry = self.cursor.seek(*last_slot)?;
// If the entry was already returned or is zero-values, move to the next.
while db_entry
.as_ref()
.map(|entry| &entry.0 == last_slot || self.is_slot_zero_valued(&entry.0))
.unwrap_or_default()
{
db_entry = self.cursor.next()?;
fn next(&mut self) -> Result<Option<(B256, Self::Value)>, DatabaseError> {
let next = match self.last_slot {
Some(last_slot) => {
let entry = self.next_inner(last_slot)?;
self.last_slot = entry.as_ref().map(|entry| entry.0);
entry
}
db_entry
// no previous entry was found
None => None,
};
// Attempt to find the account's storage in post state.
let mut post_state_entry = None;
if let Some(storage) = self.post_state_storage {
post_state_entry = storage.non_zero_valued_slots.get(self.post_state_storage_index);
while post_state_entry.map(|(slot, _)| slot <= last_slot).unwrap_or_default() {
self.post_state_storage_index += 1;
post_state_entry = storage.non_zero_valued_slots.get(self.post_state_storage_index);
}
}
// Compare two entries and return the lowest.
let result = Self::next_slot(post_state_entry, db_entry);
self.last_slot = result.as_ref().map(|entry| entry.0);
Ok(result)
Ok(next)
}
}
@ -354,13 +311,13 @@ where
///
/// This function should be called before attempting to call [`HashedCursor::seek`] or
/// [`HashedCursor::next`].
fn is_storage_empty(&mut self) -> Result<bool, reth_db::DatabaseError> {
let is_empty = match self.post_state_storage {
Some(storage) => {
fn is_storage_empty(&mut self) -> Result<bool, DatabaseError> {
let is_empty = match &self.post_state_cursor {
Some(cursor) => {
// If the storage has been wiped at any point
storage.wiped &&
self.storage_wiped &&
// and the current storage does not contain any non-zero values
storage.non_zero_valued_slots.is_empty()
cursor.is_empty()
}
None => self.cursor.is_storage_empty()?,
};

View File

@ -17,6 +17,9 @@
/// The container indicates when the trie has been modified.
pub mod prefix_set;
/// The implementation of forward-only in-memory cursor.
pub mod forward_cursor;
/// The cursor implementations for navigating account and storage tries.
pub mod trie_cursor;