From 3eddaf31d0bb13721ca321c36fc074697af0526e Mon Sep 17 00:00:00 2001 From: Abner Zheng Date: Thu, 23 May 2024 05:47:20 +0800 Subject: [PATCH] feat: implement table range checksums for reth db checksum (#7623) Co-authored-by: Dan Cline <6798349+Rjected@users.noreply.github.com> --- bin/reth/src/commands/db/checksum.rs | 79 +++++++++++++++++++++++++--- bin/reth/src/commands/db/get.rs | 4 +- 2 files changed, 74 insertions(+), 9 deletions(-) diff --git a/bin/reth/src/commands/db/checksum.rs b/bin/reth/src/commands/db/checksum.rs index 689b6ca5a..9562c9839 100644 --- a/bin/reth/src/commands/db/checksum.rs +++ b/bin/reth/src/commands/db/checksum.rs @@ -1,12 +1,15 @@ -use crate::utils::DbTool; -use ahash::AHasher; +use crate::{ + commands::db::get::{maybe_json_value_parser, table_key}, + utils::DbTool, +}; +use ahash::RandomState; use clap::Parser; use reth_db::{ cursor::DbCursorRO, database::Database, table::Table, transaction::DbTx, DatabaseEnv, RawKey, RawTable, RawValue, TableViewer, Tables, }; use std::{ - hash::Hasher, + hash::{BuildHasher, Hasher}, time::{Duration, Instant}, }; use tracing::{info, warn}; @@ -16,35 +19,81 @@ use tracing::{info, warn}; pub struct Command { /// The table name table: Tables, + + /// The start of the range to checksum. + #[arg(long, value_parser = maybe_json_value_parser)] + start_key: Option, + + /// The end of the range to checksum. + #[arg(long, value_parser = maybe_json_value_parser)] + end_key: Option, + + /// The maximum number of records that are queried and used to compute the + /// checksum. + #[arg(long)] + limit: Option, } impl Command { /// Execute `db checksum` command pub fn execute(self, tool: &DbTool) -> eyre::Result<()> { warn!("This command should be run without the node running!"); - self.table.view(&ChecksumViewer { tool }) + self.table.view(&ChecksumViewer { + tool, + start_key: self.start_key, + end_key: self.end_key, + limit: self.limit, + }) } } pub(crate) struct ChecksumViewer<'a, DB: Database> { tool: &'a DbTool, + start_key: Option, + end_key: Option, + limit: Option, } impl ChecksumViewer<'_, DB> { pub(crate) fn new(tool: &'_ DbTool) -> ChecksumViewer<'_, DB> { - ChecksumViewer { tool } + ChecksumViewer { tool, start_key: None, end_key: None, limit: None } } pub(crate) fn get_checksum(&self) -> Result<(u64, Duration), eyre::Report> { let provider = self.tool.provider_factory.provider()?.disable_long_read_transaction_safety(); let tx = provider.tx_ref(); + info!( + "Start computing checksum, start={:?}, end={:?}, limit={:?}", + self.start_key, self.end_key, self.limit + ); let mut cursor = tx.cursor_read::>()?; - let walker = cursor.walk(None)?; + let walker = match (self.start_key.as_deref(), self.end_key.as_deref()) { + (Some(start), Some(end)) => { + let start_key = table_key::(start).map(RawKey::::new)?; + let end_key = table_key::(end).map(RawKey::::new)?; + cursor.walk_range(start_key..=end_key)? + } + (None, Some(end)) => { + let end_key = table_key::(end).map(RawKey::::new)?; + + cursor.walk_range(..=end_key)? + } + (Some(start), None) => { + let start_key = table_key::(start).map(RawKey::::new)?; + cursor.walk_range(start_key..)? + } + (None, None) => cursor.walk_range(..)?, + }; let start_time = Instant::now(); - let mut hasher = AHasher::default(); + let mut hasher = RandomState::with_seeds(1, 2, 3, 4).build_hasher(); + let mut total = 0; + + let limit = self.limit.unwrap_or(usize::MAX); + let mut enumerate_start_key = None; + let mut enumerate_end_key = None; for (index, entry) in walker.enumerate() { let (k, v): (RawKey, RawValue) = entry?; @@ -54,6 +103,22 @@ impl ChecksumViewer<'_, DB> { hasher.write(k.raw_key()); hasher.write(v.raw_value()); + + if enumerate_start_key.is_none() { + enumerate_start_key = Some(k.clone()); + } + enumerate_end_key = Some(k); + + total = index + 1; + if total >= limit { + break + } + } + + info!("Hashed {total} entries."); + if let (Some(s), Some(e)) = (enumerate_start_key, enumerate_end_key) { + info!("start-key: {}", serde_json::to_string(&s.key()?).unwrap_or_default()); + info!("end-key: {}", serde_json::to_string(&e.key()?).unwrap_or_default()); } let checksum = hasher.finish(); diff --git a/bin/reth/src/commands/db/get.rs b/bin/reth/src/commands/db/get.rs index 80e3ae393..f1f6b963c 100644 --- a/bin/reth/src/commands/db/get.rs +++ b/bin/reth/src/commands/db/get.rs @@ -125,7 +125,7 @@ impl Command { } /// Get an instance of key for given table -fn table_key(key: &str) -> Result { +pub(crate) fn table_key(key: &str) -> Result { serde_json::from_str::(key).map_err(|e| eyre::eyre!(e)) } @@ -188,7 +188,7 @@ impl TableViewer<()> for GetValueViewer<'_, DB> { } /// Map the user input value to json -fn maybe_json_value_parser(value: &str) -> Result { +pub(crate) fn maybe_json_value_parser(value: &str) -> Result { if serde_json::from_str::(value).is_ok() { Ok(value.to_string()) } else {