feat: add reth db snapshot <TYPE> command (#4889)

This commit is contained in:
joshieDo
2023-10-06 17:33:56 +01:00
committed by GitHub
parent 529635f8d4
commit 9c8eca6a49
19 changed files with 954 additions and 159 deletions

View File

@ -50,6 +50,7 @@ reth-discv4 = { path = "../../crates/net/discv4" }
reth-prune = { path = "../../crates/prune" }
reth-snapshot = { path = "../../crates/snapshot" }
reth-trie = { path = "../../crates/trie" }
reth-nippy-jar = { path = "../../crates/storage/nippy-jar" }
# crypto
alloy-rlp.workspace = true
@ -76,6 +77,7 @@ metrics.workspace = true
# test vectors generation
proptest.workspace = true
rand.workspace = true
# tui
comfy-table = "7.0"
@ -102,6 +104,7 @@ pretty_assertions = "1.3.0"
humantime = "2.1.0"
const-str = "0.5.6"
boyer-moore-magiclen = "0.2.16"
itertools.workspace = true
[target.'cfg(not(windows))'.dependencies]
jemallocator = { version = "0.5.0", optional = true }

View File

@ -24,6 +24,7 @@ mod clear;
mod diff;
mod get;
mod list;
mod snapshots;
/// DB List TUI
mod tui;
@ -85,6 +86,8 @@ pub enum Subcommands {
},
/// Deletes all table entries
Clear(clear::Command),
/// Snapshots tables from database
Snapshot(snapshots::Command),
/// Lists current and local database versions
Version,
/// Returns the full database path
@ -210,6 +213,9 @@ impl Command {
let db = open_db(&db_path, self.db.log_level)?;
command.execute(&db)?;
}
Subcommands::Snapshot(command) => {
command.execute(&db_path, self.db.log_level, self.chain.clone())?;
}
Subcommands::Version => {
let local_db_version = match get_db_version(&db_path) {
Ok(version) => Some(version),

View File

@ -0,0 +1,48 @@
use super::JarConfig;
use reth_db::DatabaseEnvRO;
use reth_primitives::ChainSpec;
use reth_provider::{DatabaseProviderRO, ProviderFactory};
use std::{sync::Arc, time::Instant};
#[derive(Debug)]
pub(crate) enum BenchKind {
Walk,
RandomAll,
RandomOne,
RandomHash,
}
pub(crate) fn bench<F1, F2>(
bench_kind: BenchKind,
db: (DatabaseEnvRO, Arc<ChainSpec>),
jar_config: JarConfig,
mut snapshot_method: F1,
database_method: F2,
) -> eyre::Result<()>
where
F1: FnMut() -> eyre::Result<()>,
F2: Fn(DatabaseProviderRO<'_, DatabaseEnvRO>) -> eyre::Result<()>,
{
let (mode, compression, phf) = jar_config;
let (db, chain) = db;
println!();
println!("############");
println!("## [{mode:?}] [{compression:?}] [{phf:?}] [{bench_kind:?}]");
{
let start = Instant::now();
snapshot_method()?;
let end = start.elapsed().as_micros();
println!("# snapshot {bench_kind:?} | {end} μs");
}
{
let factory = ProviderFactory::new(db, chain);
let provider = factory.provider()?;
let start = Instant::now();
database_method(provider)?;
let end = start.elapsed().as_micros();
println!("# database {bench_kind:?} | {end} μs");
}
Ok(())
}

View File

@ -0,0 +1,192 @@
use super::{
bench::{bench, BenchKind},
Command, Compression, PerfectHashingFunction, Rows, Snapshots,
};
use crate::utils::DbTool;
use rand::{seq::SliceRandom, Rng};
use reth_db::{
cursor::DbCursorRO, database::Database, open_db_read_only, snapshot::create_snapshot_T1_T2,
table::Decompress, tables, transaction::DbTx, DatabaseEnvRO,
};
use reth_interfaces::db::LogLevel;
use reth_nippy_jar::NippyJar;
use reth_primitives::{BlockNumber, ChainSpec, Header};
use reth_provider::{HeaderProvider, ProviderError, ProviderFactory};
use std::{path::Path, sync::Arc};
use tables::*;
impl Command {
pub(crate) fn generate_headers_snapshot(
&self,
tool: &DbTool<'_, DatabaseEnvRO>,
compression: Compression,
phf: PerfectHashingFunction,
) -> eyre::Result<()> {
let mut jar = self.prepare_jar(2, (Snapshots::Headers, compression, phf), tool, || {
// Generates the dataset to train a zstd dictionary if necessary, with the most recent
// rows (at most 1000).
let dataset = tool.db.view(|tx| {
let mut cursor = tx.cursor_read::<reth_db::RawTable<reth_db::Headers>>()?;
let v1 = cursor
.walk_back(Some(RawKey::from((self.from + self.block_interval - 1) as u64)))?
.take(self.block_interval.min(1000))
.map(|row| row.map(|(_key, value)| value.into_value()).expect("should exist"))
.collect::<Vec<_>>();
let mut cursor = tx.cursor_read::<reth_db::RawTable<reth_db::HeaderTD>>()?;
let v2 = cursor
.walk_back(Some(RawKey::from((self.from + self.block_interval - 1) as u64)))?
.take(self.block_interval.min(1000))
.map(|row| row.map(|(_key, value)| value.into_value()).expect("should exist"))
.collect::<Vec<_>>();
Ok::<Rows, eyre::Error>(vec![v1, v2])
})??;
Ok(dataset)
})?;
tool.db.view(|tx| {
// Hacky type inference. TODO fix
let mut none_vec = Some(vec![vec![vec![0u8]].into_iter()]);
let _ = none_vec.take();
// Generate list of hashes for filters & PHF
let mut cursor = tx.cursor_read::<RawTable<CanonicalHeaders>>()?;
let mut hashes = None;
if self.with_filters {
hashes = Some(
cursor
.walk(Some(RawKey::from(self.from as u64)))?
.take(self.block_interval)
.map(|row| {
row.map(|(_key, value)| value.into_value()).map_err(|e| e.into())
}),
);
}
create_snapshot_T1_T2::<Headers, HeaderTD, BlockNumber>(
tx,
self.from as u64..=(self.from as u64 + self.block_interval as u64),
None,
// We already prepared the dictionary beforehand
none_vec,
hashes,
self.block_interval,
&mut jar,
)
})??;
Ok(())
}
pub(crate) fn bench_headers_snapshot(
&self,
db_path: &Path,
log_level: Option<LogLevel>,
chain: Arc<ChainSpec>,
compression: Compression,
phf: PerfectHashingFunction,
) -> eyre::Result<()> {
let mode = Snapshots::Headers;
let jar_config = (mode, compression, phf);
let mut row_indexes = (self.from..(self.from + self.block_interval)).collect::<Vec<_>>();
let mut rng = rand::thread_rng();
let mut dictionaries = None;
let mut jar = NippyJar::load_without_header(&self.get_file_path(jar_config))?;
let (provider, decompressors) = self.prepare_jar_provider(&mut jar, &mut dictionaries)?;
let mut cursor = if !decompressors.is_empty() {
provider.cursor_with_decompressors(decompressors)
} else {
provider.cursor()
};
for bench_kind in [BenchKind::Walk, BenchKind::RandomAll] {
bench(
bench_kind,
(open_db_read_only(db_path, log_level)?, chain.clone()),
jar_config,
|| {
for num in row_indexes.iter() {
Header::decompress(
cursor
.row_by_number_with_cols::<0b01, 2>(num - self.from)?
.ok_or(ProviderError::HeaderNotFound((*num as u64).into()))?[0],
)?;
// TODO: replace with below when eventually SnapshotProvider re-uses cursor
// provider.header_by_number(num as
// u64)?.ok_or(ProviderError::HeaderNotFound((*num as u64).into()))?;
}
Ok(())
},
|provider| {
for num in row_indexes.iter() {
provider
.header_by_number(*num as u64)?
.ok_or(ProviderError::HeaderNotFound((*num as u64).into()))?;
}
Ok(())
},
)?;
// For random walk
row_indexes.shuffle(&mut rng);
}
// BENCHMARK QUERYING A RANDOM HEADER BY NUMBER
{
let num = row_indexes[rng.gen_range(0..row_indexes.len())];
bench(
BenchKind::RandomOne,
(open_db_read_only(db_path, log_level)?, chain.clone()),
jar_config,
|| {
Header::decompress(
cursor
.row_by_number_with_cols::<0b01, 2>((num - self.from) as usize)?
.ok_or(ProviderError::HeaderNotFound((num as u64).into()))?[0],
)?;
Ok(())
},
|provider| {
provider
.header_by_number(num as u64)?
.ok_or(ProviderError::HeaderNotFound((num as u64).into()))?;
Ok(())
},
)?;
}
// BENCHMARK QUERYING A RANDOM HEADER BY HASH
{
let num = row_indexes[rng.gen_range(0..row_indexes.len())] as u64;
let header_hash =
ProviderFactory::new(open_db_read_only(db_path, log_level)?, chain.clone())
.header_by_number(num)?
.ok_or(ProviderError::HeaderNotFound(num.into()))?
.hash_slow();
bench(
BenchKind::RandomHash,
(open_db_read_only(db_path, log_level)?, chain.clone()),
jar_config,
|| {
let header = Header::decompress(
cursor
.row_by_key_with_cols::<0b01, 2>(header_hash.as_slice())?
.ok_or(ProviderError::HeaderNotFound(header_hash.into()))?[0],
)?;
// Might be a false positive, so in the real world we have to validate it
assert!(header.hash_slow() == header_hash);
Ok(())
},
|provider| {
provider
.header(&header_hash)?
.ok_or(ProviderError::HeaderNotFound(header_hash.into()))?;
Ok(())
},
)?;
}
Ok(())
}
}

View File

@ -0,0 +1,216 @@
use crate::utils::DbTool;
use clap::{clap_derive::ValueEnum, Parser};
use eyre::WrapErr;
use itertools::Itertools;
use reth_db::{database::Database, open_db_read_only, table::Table, tables, DatabaseEnvRO};
use reth_interfaces::db::LogLevel;
use reth_nippy_jar::{
compression::{DecoderDictionary, Decompressor},
NippyJar,
};
use reth_primitives::ChainSpec;
use reth_provider::providers::SnapshotProvider;
use std::{
path::{Path, PathBuf},
sync::Arc,
};
mod bench;
mod headers;
pub(crate) type Rows = Vec<Vec<Vec<u8>>>;
pub(crate) type JarConfig = (Snapshots, Compression, PerfectHashingFunction);
#[derive(Parser, Debug)]
/// Arguments for the `reth db snapshot` command.
pub struct Command {
/// Snapshot categories to generate.
modes: Vec<Snapshots>,
/// Starting block for the snapshot.
#[arg(long, short, default_value = "0")]
from: usize,
/// Number of blocks in the snapshot.
#[arg(long, short, default_value = "500000")]
block_interval: usize,
/// Flag to enable database-to-snapshot benchmarking.
#[arg(long, default_value = "false")]
bench: bool,
/// Flag to skip snapshot creation and only run benchmarks on existing snapshots.
#[arg(long, default_value = "false")]
only_bench: bool,
/// Compression algorithms to use.
#[arg(long, short, value_delimiter = ',', default_value = "lz4")]
compression: Vec<Compression>,
/// Flag to enable inclusion list filters and PHFs.
#[arg(long, default_value = "true")]
with_filters: bool,
/// Specifies the perfect hashing function to use.
#[arg(long, value_delimiter = ',', default_value_if("with_filters", "true", "mphf"))]
phf: Vec<PerfectHashingFunction>,
}
impl Command {
/// Execute `db snapshot` command
pub fn execute(
self,
db_path: &Path,
log_level: Option<LogLevel>,
chain: Arc<ChainSpec>,
) -> eyre::Result<()> {
let all_combinations = self
.modes
.iter()
.cartesian_product(self.compression.iter())
.cartesian_product(self.phf.iter());
{
let db = open_db_read_only(db_path, None)?;
let tool = DbTool::new(&db, chain.clone())?;
if !self.only_bench {
for ((mode, compression), phf) in all_combinations.clone() {
match mode {
Snapshots::Headers => {
self.generate_headers_snapshot(&tool, *compression, *phf)?
}
Snapshots::Transactions => todo!(),
Snapshots::Receipts => todo!(),
}
}
}
}
if self.only_bench || self.bench {
for ((mode, compression), phf) in all_combinations {
match mode {
Snapshots::Headers => self.bench_headers_snapshot(
db_path,
log_level,
chain.clone(),
*compression,
*phf,
)?,
Snapshots::Transactions => todo!(),
Snapshots::Receipts => todo!(),
}
}
}
Ok(())
}
/// Returns a [`SnapshotProvider`] of the provided [`NippyJar`], alongside a list of
/// [`DecoderDictionary`] and [`Decompressor`] if necessary.
fn prepare_jar_provider<'a>(
&self,
jar: &'a mut NippyJar,
dictionaries: &'a mut Option<Vec<DecoderDictionary<'_>>>,
) -> eyre::Result<(SnapshotProvider<'a>, Vec<Decompressor<'a>>)> {
let mut decompressors: Vec<Decompressor<'_>> = vec![];
if let Some(reth_nippy_jar::compression::Compressors::Zstd(zstd)) = jar.compressor_mut() {
if zstd.use_dict {
*dictionaries = zstd.generate_decompress_dictionaries();
decompressors = zstd.generate_decompressors(dictionaries.as_ref().expect("qed"))?;
}
}
Ok((SnapshotProvider { jar: &*jar, jar_start_block: self.from as u64 }, decompressors))
}
/// Returns a [`NippyJar`] according to the desired configuration.
fn prepare_jar<F: Fn() -> eyre::Result<Rows>>(
&self,
num_columns: usize,
jar_config: JarConfig,
tool: &DbTool<'_, DatabaseEnvRO>,
prepare_compression: F,
) -> eyre::Result<NippyJar> {
let (mode, compression, phf) = jar_config;
let snap_file = self.get_file_path(jar_config);
let table_name = match mode {
Snapshots::Headers => tables::Headers::NAME,
Snapshots::Transactions | Snapshots::Receipts => tables::Transactions::NAME,
};
let total_rows = tool.db.view(|tx| {
let table_db = tx.inner.open_db(Some(table_name)).wrap_err("Could not open db.")?;
let stats = tx
.inner
.db_stat(&table_db)
.wrap_err(format!("Could not find table: {}", table_name))?;
Ok::<usize, eyre::Error>((stats.entries() - self.from).min(self.block_interval))
})??;
assert!(
total_rows >= self.block_interval,
"Not enough rows on database {} < {}.",
total_rows,
self.block_interval
);
let mut nippy_jar = NippyJar::new_without_header(num_columns, snap_file.as_path());
nippy_jar = match compression {
Compression::Lz4 => nippy_jar.with_lz4(),
Compression::Zstd => nippy_jar.with_zstd(false, 0),
Compression::ZstdWithDictionary => {
let dataset = prepare_compression()?;
nippy_jar = nippy_jar.with_zstd(true, 5_000_000);
nippy_jar.prepare_compression(dataset)?;
nippy_jar
}
Compression::Uncompressed => nippy_jar,
};
if self.with_filters {
nippy_jar = nippy_jar.with_cuckoo_filter(self.block_interval);
nippy_jar = match phf {
PerfectHashingFunction::Mphf => nippy_jar.with_mphf(),
PerfectHashingFunction::GoMphf => nippy_jar.with_gomphf(),
};
}
Ok(nippy_jar)
}
/// Generates a filename according to the desired configuration.
fn get_file_path(&self, jar_config: JarConfig) -> PathBuf {
let (mode, compression, phf) = jar_config;
format!(
"snapshot_{mode:?}_{}_{}_{compression:?}_{phf:?}",
self.from,
self.from + self.block_interval
)
.into()
}
}
#[derive(Debug, Copy, Clone, ValueEnum)]
pub(crate) enum Snapshots {
Headers,
Transactions,
Receipts,
}
#[derive(Debug, Copy, Clone, ValueEnum, Default)]
pub(crate) enum Compression {
Lz4,
Zstd,
ZstdWithDictionary,
#[default]
Uncompressed,
}
#[derive(Debug, Copy, Clone, ValueEnum)]
pub(crate) enum PerfectHashingFunction {
Mphf,
GoMphf,
}