mirror of
https://github.com/hl-archive-node/nanoreth.git
synced 2025-12-06 10:59:55 +00:00
feat: add reth db snapshot <TYPE> command (#4889)
This commit is contained in:
@ -50,6 +50,7 @@ reth-discv4 = { path = "../../crates/net/discv4" }
|
||||
reth-prune = { path = "../../crates/prune" }
|
||||
reth-snapshot = { path = "../../crates/snapshot" }
|
||||
reth-trie = { path = "../../crates/trie" }
|
||||
reth-nippy-jar = { path = "../../crates/storage/nippy-jar" }
|
||||
|
||||
# crypto
|
||||
alloy-rlp.workspace = true
|
||||
@ -76,6 +77,7 @@ metrics.workspace = true
|
||||
|
||||
# test vectors generation
|
||||
proptest.workspace = true
|
||||
rand.workspace = true
|
||||
|
||||
# tui
|
||||
comfy-table = "7.0"
|
||||
@ -102,6 +104,7 @@ pretty_assertions = "1.3.0"
|
||||
humantime = "2.1.0"
|
||||
const-str = "0.5.6"
|
||||
boyer-moore-magiclen = "0.2.16"
|
||||
itertools.workspace = true
|
||||
|
||||
[target.'cfg(not(windows))'.dependencies]
|
||||
jemallocator = { version = "0.5.0", optional = true }
|
||||
|
||||
@ -24,6 +24,7 @@ mod clear;
|
||||
mod diff;
|
||||
mod get;
|
||||
mod list;
|
||||
mod snapshots;
|
||||
/// DB List TUI
|
||||
mod tui;
|
||||
|
||||
@ -85,6 +86,8 @@ pub enum Subcommands {
|
||||
},
|
||||
/// Deletes all table entries
|
||||
Clear(clear::Command),
|
||||
/// Snapshots tables from database
|
||||
Snapshot(snapshots::Command),
|
||||
/// Lists current and local database versions
|
||||
Version,
|
||||
/// Returns the full database path
|
||||
@ -210,6 +213,9 @@ impl Command {
|
||||
let db = open_db(&db_path, self.db.log_level)?;
|
||||
command.execute(&db)?;
|
||||
}
|
||||
Subcommands::Snapshot(command) => {
|
||||
command.execute(&db_path, self.db.log_level, self.chain.clone())?;
|
||||
}
|
||||
Subcommands::Version => {
|
||||
let local_db_version = match get_db_version(&db_path) {
|
||||
Ok(version) => Some(version),
|
||||
|
||||
48
bin/reth/src/db/snapshots/bench.rs
Normal file
48
bin/reth/src/db/snapshots/bench.rs
Normal file
@ -0,0 +1,48 @@
|
||||
use super::JarConfig;
|
||||
use reth_db::DatabaseEnvRO;
|
||||
use reth_primitives::ChainSpec;
|
||||
use reth_provider::{DatabaseProviderRO, ProviderFactory};
|
||||
use std::{sync::Arc, time::Instant};
|
||||
|
||||
#[derive(Debug)]
|
||||
pub(crate) enum BenchKind {
|
||||
Walk,
|
||||
RandomAll,
|
||||
RandomOne,
|
||||
RandomHash,
|
||||
}
|
||||
|
||||
pub(crate) fn bench<F1, F2>(
|
||||
bench_kind: BenchKind,
|
||||
db: (DatabaseEnvRO, Arc<ChainSpec>),
|
||||
jar_config: JarConfig,
|
||||
mut snapshot_method: F1,
|
||||
database_method: F2,
|
||||
) -> eyre::Result<()>
|
||||
where
|
||||
F1: FnMut() -> eyre::Result<()>,
|
||||
F2: Fn(DatabaseProviderRO<'_, DatabaseEnvRO>) -> eyre::Result<()>,
|
||||
{
|
||||
let (mode, compression, phf) = jar_config;
|
||||
let (db, chain) = db;
|
||||
|
||||
println!();
|
||||
println!("############");
|
||||
println!("## [{mode:?}] [{compression:?}] [{phf:?}] [{bench_kind:?}]");
|
||||
{
|
||||
let start = Instant::now();
|
||||
snapshot_method()?;
|
||||
let end = start.elapsed().as_micros();
|
||||
println!("# snapshot {bench_kind:?} | {end} μs");
|
||||
}
|
||||
{
|
||||
let factory = ProviderFactory::new(db, chain);
|
||||
let provider = factory.provider()?;
|
||||
let start = Instant::now();
|
||||
database_method(provider)?;
|
||||
let end = start.elapsed().as_micros();
|
||||
println!("# database {bench_kind:?} | {end} μs");
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
192
bin/reth/src/db/snapshots/headers.rs
Normal file
192
bin/reth/src/db/snapshots/headers.rs
Normal file
@ -0,0 +1,192 @@
|
||||
use super::{
|
||||
bench::{bench, BenchKind},
|
||||
Command, Compression, PerfectHashingFunction, Rows, Snapshots,
|
||||
};
|
||||
use crate::utils::DbTool;
|
||||
use rand::{seq::SliceRandom, Rng};
|
||||
use reth_db::{
|
||||
cursor::DbCursorRO, database::Database, open_db_read_only, snapshot::create_snapshot_T1_T2,
|
||||
table::Decompress, tables, transaction::DbTx, DatabaseEnvRO,
|
||||
};
|
||||
use reth_interfaces::db::LogLevel;
|
||||
use reth_nippy_jar::NippyJar;
|
||||
use reth_primitives::{BlockNumber, ChainSpec, Header};
|
||||
use reth_provider::{HeaderProvider, ProviderError, ProviderFactory};
|
||||
use std::{path::Path, sync::Arc};
|
||||
use tables::*;
|
||||
|
||||
impl Command {
|
||||
pub(crate) fn generate_headers_snapshot(
|
||||
&self,
|
||||
tool: &DbTool<'_, DatabaseEnvRO>,
|
||||
compression: Compression,
|
||||
phf: PerfectHashingFunction,
|
||||
) -> eyre::Result<()> {
|
||||
let mut jar = self.prepare_jar(2, (Snapshots::Headers, compression, phf), tool, || {
|
||||
// Generates the dataset to train a zstd dictionary if necessary, with the most recent
|
||||
// rows (at most 1000).
|
||||
let dataset = tool.db.view(|tx| {
|
||||
let mut cursor = tx.cursor_read::<reth_db::RawTable<reth_db::Headers>>()?;
|
||||
let v1 = cursor
|
||||
.walk_back(Some(RawKey::from((self.from + self.block_interval - 1) as u64)))?
|
||||
.take(self.block_interval.min(1000))
|
||||
.map(|row| row.map(|(_key, value)| value.into_value()).expect("should exist"))
|
||||
.collect::<Vec<_>>();
|
||||
let mut cursor = tx.cursor_read::<reth_db::RawTable<reth_db::HeaderTD>>()?;
|
||||
let v2 = cursor
|
||||
.walk_back(Some(RawKey::from((self.from + self.block_interval - 1) as u64)))?
|
||||
.take(self.block_interval.min(1000))
|
||||
.map(|row| row.map(|(_key, value)| value.into_value()).expect("should exist"))
|
||||
.collect::<Vec<_>>();
|
||||
Ok::<Rows, eyre::Error>(vec![v1, v2])
|
||||
})??;
|
||||
Ok(dataset)
|
||||
})?;
|
||||
|
||||
tool.db.view(|tx| {
|
||||
// Hacky type inference. TODO fix
|
||||
let mut none_vec = Some(vec![vec![vec![0u8]].into_iter()]);
|
||||
let _ = none_vec.take();
|
||||
|
||||
// Generate list of hashes for filters & PHF
|
||||
let mut cursor = tx.cursor_read::<RawTable<CanonicalHeaders>>()?;
|
||||
let mut hashes = None;
|
||||
if self.with_filters {
|
||||
hashes = Some(
|
||||
cursor
|
||||
.walk(Some(RawKey::from(self.from as u64)))?
|
||||
.take(self.block_interval)
|
||||
.map(|row| {
|
||||
row.map(|(_key, value)| value.into_value()).map_err(|e| e.into())
|
||||
}),
|
||||
);
|
||||
}
|
||||
|
||||
create_snapshot_T1_T2::<Headers, HeaderTD, BlockNumber>(
|
||||
tx,
|
||||
self.from as u64..=(self.from as u64 + self.block_interval as u64),
|
||||
None,
|
||||
// We already prepared the dictionary beforehand
|
||||
none_vec,
|
||||
hashes,
|
||||
self.block_interval,
|
||||
&mut jar,
|
||||
)
|
||||
})??;
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
pub(crate) fn bench_headers_snapshot(
|
||||
&self,
|
||||
db_path: &Path,
|
||||
log_level: Option<LogLevel>,
|
||||
chain: Arc<ChainSpec>,
|
||||
compression: Compression,
|
||||
phf: PerfectHashingFunction,
|
||||
) -> eyre::Result<()> {
|
||||
let mode = Snapshots::Headers;
|
||||
let jar_config = (mode, compression, phf);
|
||||
let mut row_indexes = (self.from..(self.from + self.block_interval)).collect::<Vec<_>>();
|
||||
let mut rng = rand::thread_rng();
|
||||
let mut dictionaries = None;
|
||||
let mut jar = NippyJar::load_without_header(&self.get_file_path(jar_config))?;
|
||||
|
||||
let (provider, decompressors) = self.prepare_jar_provider(&mut jar, &mut dictionaries)?;
|
||||
let mut cursor = if !decompressors.is_empty() {
|
||||
provider.cursor_with_decompressors(decompressors)
|
||||
} else {
|
||||
provider.cursor()
|
||||
};
|
||||
|
||||
for bench_kind in [BenchKind::Walk, BenchKind::RandomAll] {
|
||||
bench(
|
||||
bench_kind,
|
||||
(open_db_read_only(db_path, log_level)?, chain.clone()),
|
||||
jar_config,
|
||||
|| {
|
||||
for num in row_indexes.iter() {
|
||||
Header::decompress(
|
||||
cursor
|
||||
.row_by_number_with_cols::<0b01, 2>(num - self.from)?
|
||||
.ok_or(ProviderError::HeaderNotFound((*num as u64).into()))?[0],
|
||||
)?;
|
||||
// TODO: replace with below when eventually SnapshotProvider re-uses cursor
|
||||
// provider.header_by_number(num as
|
||||
// u64)?.ok_or(ProviderError::HeaderNotFound((*num as u64).into()))?;
|
||||
}
|
||||
Ok(())
|
||||
},
|
||||
|provider| {
|
||||
for num in row_indexes.iter() {
|
||||
provider
|
||||
.header_by_number(*num as u64)?
|
||||
.ok_or(ProviderError::HeaderNotFound((*num as u64).into()))?;
|
||||
}
|
||||
Ok(())
|
||||
},
|
||||
)?;
|
||||
|
||||
// For random walk
|
||||
row_indexes.shuffle(&mut rng);
|
||||
}
|
||||
|
||||
// BENCHMARK QUERYING A RANDOM HEADER BY NUMBER
|
||||
{
|
||||
let num = row_indexes[rng.gen_range(0..row_indexes.len())];
|
||||
bench(
|
||||
BenchKind::RandomOne,
|
||||
(open_db_read_only(db_path, log_level)?, chain.clone()),
|
||||
jar_config,
|
||||
|| {
|
||||
Header::decompress(
|
||||
cursor
|
||||
.row_by_number_with_cols::<0b01, 2>((num - self.from) as usize)?
|
||||
.ok_or(ProviderError::HeaderNotFound((num as u64).into()))?[0],
|
||||
)?;
|
||||
Ok(())
|
||||
},
|
||||
|provider| {
|
||||
provider
|
||||
.header_by_number(num as u64)?
|
||||
.ok_or(ProviderError::HeaderNotFound((num as u64).into()))?;
|
||||
Ok(())
|
||||
},
|
||||
)?;
|
||||
}
|
||||
|
||||
// BENCHMARK QUERYING A RANDOM HEADER BY HASH
|
||||
{
|
||||
let num = row_indexes[rng.gen_range(0..row_indexes.len())] as u64;
|
||||
let header_hash =
|
||||
ProviderFactory::new(open_db_read_only(db_path, log_level)?, chain.clone())
|
||||
.header_by_number(num)?
|
||||
.ok_or(ProviderError::HeaderNotFound(num.into()))?
|
||||
.hash_slow();
|
||||
|
||||
bench(
|
||||
BenchKind::RandomHash,
|
||||
(open_db_read_only(db_path, log_level)?, chain.clone()),
|
||||
jar_config,
|
||||
|| {
|
||||
let header = Header::decompress(
|
||||
cursor
|
||||
.row_by_key_with_cols::<0b01, 2>(header_hash.as_slice())?
|
||||
.ok_or(ProviderError::HeaderNotFound(header_hash.into()))?[0],
|
||||
)?;
|
||||
|
||||
// Might be a false positive, so in the real world we have to validate it
|
||||
assert!(header.hash_slow() == header_hash);
|
||||
Ok(())
|
||||
},
|
||||
|provider| {
|
||||
provider
|
||||
.header(&header_hash)?
|
||||
.ok_or(ProviderError::HeaderNotFound(header_hash.into()))?;
|
||||
Ok(())
|
||||
},
|
||||
)?;
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
216
bin/reth/src/db/snapshots/mod.rs
Normal file
216
bin/reth/src/db/snapshots/mod.rs
Normal file
@ -0,0 +1,216 @@
|
||||
use crate::utils::DbTool;
|
||||
use clap::{clap_derive::ValueEnum, Parser};
|
||||
use eyre::WrapErr;
|
||||
use itertools::Itertools;
|
||||
use reth_db::{database::Database, open_db_read_only, table::Table, tables, DatabaseEnvRO};
|
||||
use reth_interfaces::db::LogLevel;
|
||||
use reth_nippy_jar::{
|
||||
compression::{DecoderDictionary, Decompressor},
|
||||
NippyJar,
|
||||
};
|
||||
use reth_primitives::ChainSpec;
|
||||
use reth_provider::providers::SnapshotProvider;
|
||||
use std::{
|
||||
path::{Path, PathBuf},
|
||||
sync::Arc,
|
||||
};
|
||||
|
||||
mod bench;
|
||||
mod headers;
|
||||
|
||||
pub(crate) type Rows = Vec<Vec<Vec<u8>>>;
|
||||
pub(crate) type JarConfig = (Snapshots, Compression, PerfectHashingFunction);
|
||||
|
||||
#[derive(Parser, Debug)]
|
||||
/// Arguments for the `reth db snapshot` command.
|
||||
pub struct Command {
|
||||
/// Snapshot categories to generate.
|
||||
modes: Vec<Snapshots>,
|
||||
|
||||
/// Starting block for the snapshot.
|
||||
#[arg(long, short, default_value = "0")]
|
||||
from: usize,
|
||||
|
||||
/// Number of blocks in the snapshot.
|
||||
#[arg(long, short, default_value = "500000")]
|
||||
block_interval: usize,
|
||||
|
||||
/// Flag to enable database-to-snapshot benchmarking.
|
||||
#[arg(long, default_value = "false")]
|
||||
bench: bool,
|
||||
|
||||
/// Flag to skip snapshot creation and only run benchmarks on existing snapshots.
|
||||
#[arg(long, default_value = "false")]
|
||||
only_bench: bool,
|
||||
|
||||
/// Compression algorithms to use.
|
||||
#[arg(long, short, value_delimiter = ',', default_value = "lz4")]
|
||||
compression: Vec<Compression>,
|
||||
|
||||
/// Flag to enable inclusion list filters and PHFs.
|
||||
#[arg(long, default_value = "true")]
|
||||
with_filters: bool,
|
||||
|
||||
/// Specifies the perfect hashing function to use.
|
||||
#[arg(long, value_delimiter = ',', default_value_if("with_filters", "true", "mphf"))]
|
||||
phf: Vec<PerfectHashingFunction>,
|
||||
}
|
||||
|
||||
impl Command {
|
||||
/// Execute `db snapshot` command
|
||||
pub fn execute(
|
||||
self,
|
||||
db_path: &Path,
|
||||
log_level: Option<LogLevel>,
|
||||
chain: Arc<ChainSpec>,
|
||||
) -> eyre::Result<()> {
|
||||
let all_combinations = self
|
||||
.modes
|
||||
.iter()
|
||||
.cartesian_product(self.compression.iter())
|
||||
.cartesian_product(self.phf.iter());
|
||||
|
||||
{
|
||||
let db = open_db_read_only(db_path, None)?;
|
||||
let tool = DbTool::new(&db, chain.clone())?;
|
||||
|
||||
if !self.only_bench {
|
||||
for ((mode, compression), phf) in all_combinations.clone() {
|
||||
match mode {
|
||||
Snapshots::Headers => {
|
||||
self.generate_headers_snapshot(&tool, *compression, *phf)?
|
||||
}
|
||||
Snapshots::Transactions => todo!(),
|
||||
Snapshots::Receipts => todo!(),
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if self.only_bench || self.bench {
|
||||
for ((mode, compression), phf) in all_combinations {
|
||||
match mode {
|
||||
Snapshots::Headers => self.bench_headers_snapshot(
|
||||
db_path,
|
||||
log_level,
|
||||
chain.clone(),
|
||||
*compression,
|
||||
*phf,
|
||||
)?,
|
||||
Snapshots::Transactions => todo!(),
|
||||
Snapshots::Receipts => todo!(),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Returns a [`SnapshotProvider`] of the provided [`NippyJar`], alongside a list of
|
||||
/// [`DecoderDictionary`] and [`Decompressor`] if necessary.
|
||||
fn prepare_jar_provider<'a>(
|
||||
&self,
|
||||
jar: &'a mut NippyJar,
|
||||
dictionaries: &'a mut Option<Vec<DecoderDictionary<'_>>>,
|
||||
) -> eyre::Result<(SnapshotProvider<'a>, Vec<Decompressor<'a>>)> {
|
||||
let mut decompressors: Vec<Decompressor<'_>> = vec![];
|
||||
if let Some(reth_nippy_jar::compression::Compressors::Zstd(zstd)) = jar.compressor_mut() {
|
||||
if zstd.use_dict {
|
||||
*dictionaries = zstd.generate_decompress_dictionaries();
|
||||
decompressors = zstd.generate_decompressors(dictionaries.as_ref().expect("qed"))?;
|
||||
}
|
||||
}
|
||||
|
||||
Ok((SnapshotProvider { jar: &*jar, jar_start_block: self.from as u64 }, decompressors))
|
||||
}
|
||||
|
||||
/// Returns a [`NippyJar`] according to the desired configuration.
|
||||
fn prepare_jar<F: Fn() -> eyre::Result<Rows>>(
|
||||
&self,
|
||||
num_columns: usize,
|
||||
jar_config: JarConfig,
|
||||
tool: &DbTool<'_, DatabaseEnvRO>,
|
||||
prepare_compression: F,
|
||||
) -> eyre::Result<NippyJar> {
|
||||
let (mode, compression, phf) = jar_config;
|
||||
let snap_file = self.get_file_path(jar_config);
|
||||
let table_name = match mode {
|
||||
Snapshots::Headers => tables::Headers::NAME,
|
||||
Snapshots::Transactions | Snapshots::Receipts => tables::Transactions::NAME,
|
||||
};
|
||||
|
||||
let total_rows = tool.db.view(|tx| {
|
||||
let table_db = tx.inner.open_db(Some(table_name)).wrap_err("Could not open db.")?;
|
||||
let stats = tx
|
||||
.inner
|
||||
.db_stat(&table_db)
|
||||
.wrap_err(format!("Could not find table: {}", table_name))?;
|
||||
|
||||
Ok::<usize, eyre::Error>((stats.entries() - self.from).min(self.block_interval))
|
||||
})??;
|
||||
|
||||
assert!(
|
||||
total_rows >= self.block_interval,
|
||||
"Not enough rows on database {} < {}.",
|
||||
total_rows,
|
||||
self.block_interval
|
||||
);
|
||||
|
||||
let mut nippy_jar = NippyJar::new_without_header(num_columns, snap_file.as_path());
|
||||
nippy_jar = match compression {
|
||||
Compression::Lz4 => nippy_jar.with_lz4(),
|
||||
Compression::Zstd => nippy_jar.with_zstd(false, 0),
|
||||
Compression::ZstdWithDictionary => {
|
||||
let dataset = prepare_compression()?;
|
||||
|
||||
nippy_jar = nippy_jar.with_zstd(true, 5_000_000);
|
||||
nippy_jar.prepare_compression(dataset)?;
|
||||
nippy_jar
|
||||
}
|
||||
Compression::Uncompressed => nippy_jar,
|
||||
};
|
||||
|
||||
if self.with_filters {
|
||||
nippy_jar = nippy_jar.with_cuckoo_filter(self.block_interval);
|
||||
nippy_jar = match phf {
|
||||
PerfectHashingFunction::Mphf => nippy_jar.with_mphf(),
|
||||
PerfectHashingFunction::GoMphf => nippy_jar.with_gomphf(),
|
||||
};
|
||||
}
|
||||
|
||||
Ok(nippy_jar)
|
||||
}
|
||||
|
||||
/// Generates a filename according to the desired configuration.
|
||||
fn get_file_path(&self, jar_config: JarConfig) -> PathBuf {
|
||||
let (mode, compression, phf) = jar_config;
|
||||
format!(
|
||||
"snapshot_{mode:?}_{}_{}_{compression:?}_{phf:?}",
|
||||
self.from,
|
||||
self.from + self.block_interval
|
||||
)
|
||||
.into()
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, Copy, Clone, ValueEnum)]
|
||||
pub(crate) enum Snapshots {
|
||||
Headers,
|
||||
Transactions,
|
||||
Receipts,
|
||||
}
|
||||
|
||||
#[derive(Debug, Copy, Clone, ValueEnum, Default)]
|
||||
pub(crate) enum Compression {
|
||||
Lz4,
|
||||
Zstd,
|
||||
ZstdWithDictionary,
|
||||
#[default]
|
||||
Uncompressed,
|
||||
}
|
||||
|
||||
#[derive(Debug, Copy, Clone, ValueEnum)]
|
||||
pub(crate) enum PerfectHashingFunction {
|
||||
Mphf,
|
||||
GoMphf,
|
||||
}
|
||||
Reference in New Issue
Block a user