feat: add reth db snapshot transactions | receipts commands (#5007)

Co-authored-by: Alexey Shekhirin <a.shekhirin@gmail.com>
This commit is contained in:
joshieDo
2023-10-26 13:01:29 +01:00
committed by GitHub
parent c1a6e42d19
commit 0116b80414
23 changed files with 924 additions and 179 deletions

View File

@ -4,7 +4,7 @@ use reth_primitives::{
ChainSpec, SnapshotSegment,
};
use reth_provider::{DatabaseProviderRO, ProviderFactory};
use std::{sync::Arc, time::Instant};
use std::{fmt::Debug, sync::Arc, time::Instant};
#[derive(Debug)]
pub(crate) enum BenchKind {
@ -14,7 +14,7 @@ pub(crate) enum BenchKind {
RandomHash,
}
pub(crate) fn bench<F1, F2>(
pub(crate) fn bench<F1, F2, R>(
bench_kind: BenchKind,
db: (DatabaseEnvRO, Arc<ChainSpec>),
segment: SnapshotSegment,
@ -24,28 +24,34 @@ pub(crate) fn bench<F1, F2>(
database_method: F2,
) -> eyre::Result<()>
where
F1: FnMut() -> eyre::Result<()>,
F2: Fn(DatabaseProviderRO<'_, DatabaseEnvRO>) -> eyre::Result<()>,
F1: FnMut() -> eyre::Result<R>,
F2: Fn(DatabaseProviderRO<'_, DatabaseEnvRO>) -> eyre::Result<R>,
R: Debug + PartialEq,
{
let (db, chain) = db;
println!();
println!("############");
println!("## [{segment:?}] [{compression:?}] [{filters:?}] [{bench_kind:?}]");
{
let snap_result = {
let start = Instant::now();
snapshot_method()?;
let result = snapshot_method()?;
let end = start.elapsed().as_micros();
println!("# snapshot {bench_kind:?} | {end} μs");
}
{
result
};
let db_result = {
let factory = ProviderFactory::new(db, chain);
let provider = factory.provider()?;
let start = Instant::now();
database_method(provider)?;
let result = database_method(provider)?;
let end = start.elapsed().as_micros();
println!("# database {bench_kind:?} | {end} μs");
}
result
};
assert_eq!(snap_result, db_result);
Ok(())
}

View File

@ -2,23 +2,22 @@ use super::{
bench::{bench, BenchKind},
Command,
};
use crate::utils::DbTool;
use rand::{seq::SliceRandom, Rng};
use reth_db::{database::Database, open_db_read_only, table::Decompress, DatabaseEnvRO};
use reth_db::{database::Database, open_db_read_only, table::Decompress};
use reth_interfaces::db::LogLevel;
use reth_nippy_jar::NippyJar;
use reth_primitives::{
snapshot::{Compression, Filters, InclusionFilter, PerfectHashingFunction},
ChainSpec, Header, SnapshotSegment,
};
use reth_provider::{HeaderProvider, ProviderError, ProviderFactory};
use reth_provider::{DatabaseProviderRO, HeaderProvider, ProviderError, ProviderFactory};
use reth_snapshot::segments::{get_snapshot_segment_file_name, Headers, Segment};
use std::{path::Path, sync::Arc};
impl Command {
pub(crate) fn generate_headers_snapshot(
pub(crate) fn generate_headers_snapshot<DB: Database>(
&self,
tool: &DbTool<'_, DatabaseEnvRO>,
provider: &DatabaseProviderRO<'_, DB>,
compression: Compression,
inclusion_filter: InclusionFilter,
phf: PerfectHashingFunction,
@ -31,7 +30,7 @@ impl Command {
Filters::WithoutFilters
},
);
segment.snapshot(&tool.db.tx()?, self.from..=(self.from + self.block_interval - 1))?;
segment.snapshot::<DB>(provider, self.from..=(self.from + self.block_interval - 1))?;
Ok(())
}
@ -56,7 +55,7 @@ impl Command {
let mut row_indexes = range.clone().collect::<Vec<_>>();
let mut rng = rand::thread_rng();
let mut dictionaries = None;
let mut jar = NippyJar::load_without_header(&get_snapshot_segment_file_name(
let mut jar = NippyJar::load(&get_snapshot_segment_file_name(
SnapshotSegment::Headers,
filters,
compression,
@ -114,18 +113,16 @@ impl Command {
filters,
compression,
|| {
Header::decompress(
Ok(Header::decompress(
cursor
.row_by_number_with_cols::<0b01, 2>((num - self.from) as usize)?
.ok_or(ProviderError::HeaderNotFound((num as u64).into()))?[0],
)?;
Ok(())
)?)
},
|provider| {
provider
Ok(provider
.header_by_number(num as u64)?
.ok_or(ProviderError::HeaderNotFound((num as u64).into()))?;
Ok(())
.ok_or(ProviderError::HeaderNotFound((num as u64).into()))?)
},
)?;
}
@ -154,13 +151,12 @@ impl Command {
// Might be a false positive, so in the real world we have to validate it
assert_eq!(header.hash_slow(), header_hash);
Ok(())
Ok(header)
},
|provider| {
provider
Ok(provider
.header(&header_hash)?
.ok_or(ProviderError::HeaderNotFound(header_hash.into()))?;
Ok(())
.ok_or(ProviderError::HeaderNotFound(header_hash.into()))?)
},
)?;
}

View File

@ -1,44 +1,26 @@
use crate::{db::genesis_value_parser, utils::DbTool};
use clap::Parser;
use itertools::Itertools;
use reth_db::open_db_read_only;
use reth_db::{open_db_read_only, DatabaseEnvRO};
use reth_interfaces::db::LogLevel;
use reth_nippy_jar::{
compression::{DecoderDictionary, Decompressor},
NippyJar,
};
use reth_primitives::{
snapshot::{Compression, InclusionFilter, PerfectHashingFunction},
snapshot::{Compression, InclusionFilter, PerfectHashingFunction, SegmentHeader},
BlockNumber, ChainSpec, SnapshotSegment,
};
use reth_provider::providers::SnapshotProvider;
use reth_provider::{providers::SnapshotProvider, ProviderFactory};
use std::{path::Path, sync::Arc};
mod bench;
mod headers;
mod receipts;
mod transactions;
#[derive(Parser, Debug)]
/// Arguments for the `reth db snapshot` command.
pub struct Command {
/// The chain this node is running.
///
/// Possible values are either a built-in chain or the path to a chain specification file.
///
/// Built-in chains:
/// - mainnet
/// - goerli
/// - sepolia
/// - holesky
#[arg(
long,
value_name = "CHAIN_OR_PATH",
verbatim_doc_comment,
default_value = "mainnet",
value_parser = genesis_value_parser,
global = true,
)]
chain: Arc<ChainSpec>,
/// Snapshot segments to generate.
segments: Vec<SnapshotSegment>,
@ -87,19 +69,33 @@ impl Command {
{
let db = open_db_read_only(db_path, None)?;
let tool = DbTool::new(&db, chain.clone())?;
let factory = ProviderFactory::new(db, chain.clone());
let provider = factory.provider()?;
if !self.only_bench {
for ((mode, compression), phf) in all_combinations.clone() {
match mode {
SnapshotSegment::Headers => self.generate_headers_snapshot(
&tool,
*compression,
InclusionFilter::Cuckoo,
*phf,
)?,
SnapshotSegment::Transactions => todo!(),
SnapshotSegment::Receipts => todo!(),
SnapshotSegment::Headers => self
.generate_headers_snapshot::<DatabaseEnvRO>(
&provider,
*compression,
InclusionFilter::Cuckoo,
*phf,
)?,
SnapshotSegment::Transactions => self
.generate_transactions_snapshot::<DatabaseEnvRO>(
&provider,
*compression,
InclusionFilter::Cuckoo,
*phf,
)?,
SnapshotSegment::Receipts => self
.generate_receipts_snapshot::<DatabaseEnvRO>(
&provider,
*compression,
InclusionFilter::Cuckoo,
*phf,
)?,
}
}
}
@ -116,8 +112,22 @@ impl Command {
InclusionFilter::Cuckoo,
*phf,
)?,
SnapshotSegment::Transactions => todo!(),
SnapshotSegment::Receipts => todo!(),
SnapshotSegment::Transactions => self.bench_transactions_snapshot(
db_path,
log_level,
chain.clone(),
*compression,
InclusionFilter::Cuckoo,
*phf,
)?,
SnapshotSegment::Receipts => self.bench_receipts_snapshot(
db_path,
log_level,
chain.clone(),
*compression,
InclusionFilter::Cuckoo,
*phf,
)?,
}
}
}
@ -129,7 +139,7 @@ impl Command {
/// [`DecoderDictionary`] and [`Decompressor`] if necessary.
fn prepare_jar_provider<'a>(
&self,
jar: &'a mut NippyJar,
jar: &'a mut NippyJar<SegmentHeader>,
dictionaries: &'a mut Option<Vec<DecoderDictionary<'_>>>,
) -> eyre::Result<(SnapshotProvider<'a>, Vec<Decompressor<'a>>)> {
let mut decompressors: Vec<Decompressor<'_>> = vec![];
@ -140,6 +150,6 @@ impl Command {
}
}
Ok((SnapshotProvider { jar: &*jar, jar_start_block: self.from }, decompressors))
Ok((SnapshotProvider { jar: &*jar }, decompressors))
}
}

View File

@ -0,0 +1,176 @@
use super::{
bench::{bench, BenchKind},
Command, Compression, PerfectHashingFunction,
};
use rand::{seq::SliceRandom, Rng};
use reth_db::{database::Database, open_db_read_only, table::Decompress};
use reth_interfaces::db::LogLevel;
use reth_nippy_jar::NippyJar;
use reth_primitives::{
snapshot::{Filters, InclusionFilter},
ChainSpec, Receipt, SnapshotSegment,
};
use reth_provider::{
DatabaseProviderRO, ProviderError, ProviderFactory, ReceiptProvider, TransactionsProvider,
TransactionsProviderExt,
};
use reth_snapshot::{
segments,
segments::{get_snapshot_segment_file_name, Segment},
};
use std::{path::Path, sync::Arc};
impl Command {
pub(crate) fn generate_receipts_snapshot<DB: Database>(
&self,
provider: &DatabaseProviderRO<'_, DB>,
compression: Compression,
inclusion_filter: InclusionFilter,
phf: PerfectHashingFunction,
) -> eyre::Result<()> {
let segment = segments::Receipts::new(
compression,
if self.with_filters {
Filters::WithFilters(inclusion_filter, phf)
} else {
Filters::WithoutFilters
},
);
segment.snapshot::<DB>(provider, self.from..=(self.from + self.block_interval - 1))?;
Ok(())
}
pub(crate) fn bench_receipts_snapshot(
&self,
db_path: &Path,
log_level: Option<LogLevel>,
chain: Arc<ChainSpec>,
compression: Compression,
inclusion_filter: InclusionFilter,
phf: PerfectHashingFunction,
) -> eyre::Result<()> {
let filters = if self.with_filters {
Filters::WithFilters(inclusion_filter, phf)
} else {
Filters::WithoutFilters
};
let block_range = self.from..=(self.from + self.block_interval - 1);
let mut rng = rand::thread_rng();
let mut dictionaries = None;
let mut jar = NippyJar::load(&get_snapshot_segment_file_name(
SnapshotSegment::Receipts,
filters,
compression,
&block_range,
))?;
let tx_range = ProviderFactory::new(open_db_read_only(db_path, log_level)?, chain.clone())
.provider()?
.transaction_range_by_block_range(block_range)?;
let mut row_indexes = tx_range.clone().collect::<Vec<_>>();
let (provider, decompressors) = self.prepare_jar_provider(&mut jar, &mut dictionaries)?;
let mut cursor = if !decompressors.is_empty() {
provider.cursor_with_decompressors(decompressors)
} else {
provider.cursor()
};
for bench_kind in [BenchKind::Walk, BenchKind::RandomAll] {
bench(
bench_kind,
(open_db_read_only(db_path, log_level)?, chain.clone()),
SnapshotSegment::Receipts,
filters,
compression,
|| {
for num in row_indexes.iter() {
Receipt::decompress(
cursor
.row_by_number_with_cols::<0b1, 1>(
(num - tx_range.start()) as usize,
)?
.ok_or(ProviderError::ReceiptNotFound((*num).into()))?[0],
)?;
// TODO: replace with below when eventually SnapshotProvider re-uses cursor
// provider.receipt(num as
// u64)?.ok_or(ProviderError::ReceiptNotFound((*num).into()))?;
}
Ok(())
},
|provider| {
for num in row_indexes.iter() {
provider
.receipt(*num)?
.ok_or(ProviderError::ReceiptNotFound((*num).into()))?;
}
Ok(())
},
)?;
// For random walk
row_indexes.shuffle(&mut rng);
}
// BENCHMARK QUERYING A RANDOM RECEIPT BY NUMBER
{
let num = row_indexes[rng.gen_range(0..row_indexes.len())];
bench(
BenchKind::RandomOne,
(open_db_read_only(db_path, log_level)?, chain.clone()),
SnapshotSegment::Receipts,
filters,
compression,
|| {
Ok(Receipt::decompress(
cursor
.row_by_number_with_cols::<0b1, 1>((num - tx_range.start()) as usize)?
.ok_or(ProviderError::ReceiptNotFound((num as u64).into()))?[0],
)?)
},
|provider| {
Ok(provider
.receipt(num as u64)?
.ok_or(ProviderError::ReceiptNotFound((num as u64).into()))?)
},
)?;
}
// BENCHMARK QUERYING A RANDOM RECEIPT BY HASH
{
let num = row_indexes[rng.gen_range(0..row_indexes.len())] as u64;
let tx_hash =
ProviderFactory::new(open_db_read_only(db_path, log_level)?, chain.clone())
.transaction_by_id(num)?
.ok_or(ProviderError::ReceiptNotFound(num.into()))?
.hash();
bench(
BenchKind::RandomHash,
(open_db_read_only(db_path, log_level)?, chain.clone()),
SnapshotSegment::Receipts,
filters,
compression,
|| {
let receipt = Receipt::decompress(
cursor
.row_by_key_with_cols::<0b1, 1>(tx_hash.as_slice())?
.ok_or(ProviderError::ReceiptNotFound(tx_hash.into()))?[0],
)?;
Ok(receipt)
},
|provider| {
Ok(provider
.receipt_by_hash(tx_hash)?
.ok_or(ProviderError::ReceiptNotFound(tx_hash.into()))?)
},
)?;
}
Ok(())
}
}

View File

@ -0,0 +1,179 @@
use super::{
bench::{bench, BenchKind},
Command, Compression, PerfectHashingFunction,
};
use rand::{seq::SliceRandom, Rng};
use reth_db::{database::Database, open_db_read_only, table::Decompress};
use reth_interfaces::db::LogLevel;
use reth_nippy_jar::NippyJar;
use reth_primitives::{
snapshot::{Filters, InclusionFilter},
ChainSpec, SnapshotSegment, TransactionSignedNoHash,
};
use reth_provider::{
DatabaseProviderRO, ProviderError, ProviderFactory, TransactionsProvider,
TransactionsProviderExt,
};
use reth_snapshot::{
segments,
segments::{get_snapshot_segment_file_name, Segment},
};
use std::{path::Path, sync::Arc};
impl Command {
pub(crate) fn generate_transactions_snapshot<DB: Database>(
&self,
provider: &DatabaseProviderRO<'_, DB>,
compression: Compression,
inclusion_filter: InclusionFilter,
phf: PerfectHashingFunction,
) -> eyre::Result<()> {
let segment = segments::Transactions::new(
compression,
if self.with_filters {
Filters::WithFilters(inclusion_filter, phf)
} else {
Filters::WithoutFilters
},
);
segment.snapshot::<DB>(provider, self.from..=(self.from + self.block_interval - 1))?;
Ok(())
}
pub(crate) fn bench_transactions_snapshot(
&self,
db_path: &Path,
log_level: Option<LogLevel>,
chain: Arc<ChainSpec>,
compression: Compression,
inclusion_filter: InclusionFilter,
phf: PerfectHashingFunction,
) -> eyre::Result<()> {
let filters = if self.with_filters {
Filters::WithFilters(inclusion_filter, phf)
} else {
Filters::WithoutFilters
};
let block_range = self.from..=(self.from + self.block_interval - 1);
let mut rng = rand::thread_rng();
let mut dictionaries = None;
let mut jar = NippyJar::load(&get_snapshot_segment_file_name(
SnapshotSegment::Transactions,
filters,
compression,
&block_range,
))?;
let tx_range = ProviderFactory::new(open_db_read_only(db_path, log_level)?, chain.clone())
.provider()?
.transaction_range_by_block_range(block_range)?;
let mut row_indexes = tx_range.clone().collect::<Vec<_>>();
let (provider, decompressors) = self.prepare_jar_provider(&mut jar, &mut dictionaries)?;
let mut cursor = if !decompressors.is_empty() {
provider.cursor_with_decompressors(decompressors)
} else {
provider.cursor()
};
for bench_kind in [BenchKind::Walk, BenchKind::RandomAll] {
bench(
bench_kind,
(open_db_read_only(db_path, log_level)?, chain.clone()),
SnapshotSegment::Transactions,
filters,
compression,
|| {
for num in row_indexes.iter() {
TransactionSignedNoHash::decompress(
cursor
.row_by_number_with_cols::<0b1, 1>(
(num - tx_range.start()) as usize,
)?
.ok_or(ProviderError::TransactionNotFound((*num).into()))?[0],
)?
.with_hash();
// TODO: replace with below when eventually SnapshotProvider re-uses cursor
// provider.transaction_by_id(num as
// u64)?.ok_or(ProviderError::TransactionNotFound((*num).into()))?;
}
Ok(())
},
|provider| {
for num in row_indexes.iter() {
provider
.transaction_by_id(*num)?
.ok_or(ProviderError::TransactionNotFound((*num).into()))?;
}
Ok(())
},
)?;
// For random walk
row_indexes.shuffle(&mut rng);
}
// BENCHMARK QUERYING A RANDOM TRANSACTION BY NUMBER
{
let num = row_indexes[rng.gen_range(0..row_indexes.len())];
bench(
BenchKind::RandomOne,
(open_db_read_only(db_path, log_level)?, chain.clone()),
SnapshotSegment::Transactions,
filters,
compression,
|| {
Ok(TransactionSignedNoHash::decompress(
cursor
.row_by_number_with_cols::<0b1, 1>((num - tx_range.start()) as usize)?
.ok_or(ProviderError::TransactionNotFound((num as u64).into()))?[0],
)?
.with_hash())
},
|provider| {
Ok(provider
.transaction_by_id(num as u64)?
.ok_or(ProviderError::TransactionNotFound((num as u64).into()))?)
},
)?;
}
// BENCHMARK QUERYING A RANDOM TRANSACTION BY HASH
{
let num = row_indexes[rng.gen_range(0..row_indexes.len())] as u64;
let transaction_hash =
ProviderFactory::new(open_db_read_only(db_path, log_level)?, chain.clone())
.transaction_by_id(num)?
.ok_or(ProviderError::TransactionNotFound(num.into()))?
.hash();
bench(
BenchKind::RandomHash,
(open_db_read_only(db_path, log_level)?, chain.clone()),
SnapshotSegment::Transactions,
filters,
compression,
|| {
let transaction = TransactionSignedNoHash::decompress(
cursor
.row_by_key_with_cols::<0b1, 1>(transaction_hash.as_slice())?
.ok_or(ProviderError::TransactionNotFound(transaction_hash.into()))?[0],
)?;
// Might be a false positive, so in the real world we have to validate it
Ok(transaction.with_hash())
},
|provider| {
Ok(provider
.transaction_by_hash(transaction_hash)?
.ok_or(ProviderError::TransactionNotFound(transaction_hash.into()))?)
},
)?;
}
Ok(())
}
}