chore(db): microbenchmarking for table serialization and db insertion (#513)

Co-authored-by: Georgios Konstantopoulos <me@gakonst.com>
This commit is contained in:
joshieDo
2023-01-31 12:08:51 +08:00
committed by GitHub
parent 6ef48829bd
commit cc43b72835
19 changed files with 722 additions and 64 deletions

View File

@ -8,7 +8,7 @@ readme = "README.md"
[dependencies]
# reth
reth-primitives = { path = "../../crates/primitives" }
reth-primitives = { path = "../../crates/primitives", features = ["arbitrary"] }
reth-db = {path = "../../crates/storage/db", features = ["mdbx", "test-utils"] }
# TODO: Temporary use of the test-utils feature
reth-provider = { path = "../../crates/storage/provider", features = ["test-utils"] }
@ -44,6 +44,9 @@ metrics = "0.20.1"
metrics-exporter-prometheus = { version = "0.11.0", features = ["http-listener"] }
metrics-util = "0.14.0"
# test vectors generation
proptest = "1.0"
# misc
eyre = "0.6.8"
clap = { version = "4.0", features = ["derive", "cargo"] }

View File

@ -2,7 +2,7 @@
use crate::{
db,
dirs::{LogsDir, PlatformPath},
node, p2p, stage, test_eth_chain,
node, p2p, stage, test_eth_chain, test_vectors,
};
use clap::{ArgAction, Args, Parser, Subcommand};
use reth_tracing::{
@ -25,6 +25,7 @@ pub async fn run() -> eyre::Result<()> {
Commands::Db(command) => command.execute().await,
Commands::Stage(command) => command.execute().await,
Commands::P2P(command) => command.execute().await,
Commands::TestVectors(command) => command.execute().await,
}
}
@ -51,6 +52,9 @@ pub enum Commands {
/// Run Ethereum blockchain tests
#[command(name = "test-chain")]
TestEthChain(test_eth_chain::Command),
/// Generate Test Vectors
#[command(name = "test-vectors")]
TestVectors(test_vectors::Command),
}
#[derive(Parser)]

View File

@ -170,6 +170,7 @@ impl Command {
BlockBodies,
BlockOmmers,
TxHashNumber,
PlainStorageState,
PlainAccountState,
BlockTransitionIndex,
TxTransitionIndex,

View File

@ -14,6 +14,7 @@ pub mod p2p;
pub mod prometheus_exporter;
pub mod stage;
pub mod test_eth_chain;
pub mod test_vectors;
pub use reth_staged_sync::utils;
use clap::Args;

View File

@ -0,0 +1,33 @@
//! Command for generating test vectors.
use clap::{Parser, Subcommand};
mod tables;
/// Generate test-vectors for different data types.
#[derive(Debug, Parser)]
pub struct Command {
#[clap(subcommand)]
command: Subcommands,
}
#[derive(Subcommand, Debug)]
/// `reth test-vectors` subcommands
pub enum Subcommands {
/// Generates test vectors for specified tables. If no table is specified, generate for all.
Tables {
/// List of table names. Case-sensitive.
names: Vec<String>,
},
}
impl Command {
/// Execute the command
pub async fn execute(self) -> eyre::Result<()> {
match self.command {
Subcommands::Tables { names } => {
tables::generate_vectors(names)?;
}
}
Ok(())
}
}

View File

@ -0,0 +1,170 @@
use std::collections::HashSet;
use eyre::Result;
use proptest::{
arbitrary::Arbitrary,
prelude::{any_with, ProptestConfig},
strategy::{Strategy, ValueTree},
test_runner::TestRunner,
};
use reth_db::{
table::{DupSort, Table},
tables,
};
use tracing::error;
const VECTORS_FOLDER: &str = "testdata/micro/db";
const PER_TABLE: usize = 1000;
/// Generates test vectors for specified `tables`. If list is empty, then generate for all tables.
pub(crate) fn generate_vectors(mut tables: Vec<String>) -> Result<()> {
let mut runner = TestRunner::new(ProptestConfig::default());
std::fs::create_dir_all(VECTORS_FOLDER)?;
macro_rules! generate_vector {
($table_type:ident, $per_table:expr, TABLE) => {
generate_table_vector::<tables::$table_type>(&mut runner, $per_table)?;
};
($table_type:ident, $per_table:expr, DUPSORT) => {
generate_dupsort_vector::<tables::$table_type>(&mut runner, $per_table)?;
};
}
macro_rules! generate {
([$(($table_type:ident, $per_table:expr, $table_or_dup:tt)),*]) => {
let all_tables = vec![$(stringify!($table_type).to_string(),)*];
if tables.is_empty() {
tables = all_tables;
}
for table in tables {
match table.as_str() {
$(
stringify!($table_type) => {
println!("Generating test vectors for {} <{}>.", stringify!($table_or_dup), tables::$table_type::NAME);
generate_vector!($table_type, $per_table, $table_or_dup);
},
)*
_ => {
error!(target: "reth::cli", "Unknown table: {}", table);
}
}
}
}
}
generate!([
(CanonicalHeaders, PER_TABLE, TABLE),
(HeaderTD, PER_TABLE, TABLE),
(HeaderNumbers, PER_TABLE, TABLE),
(Headers, PER_TABLE, TABLE),
(BlockBodies, PER_TABLE, TABLE),
(BlockOmmers, 100, TABLE),
(TxHashNumber, PER_TABLE, TABLE),
(BlockTransitionIndex, PER_TABLE, TABLE),
(TxTransitionIndex, PER_TABLE, TABLE),
(Transactions, 100, TABLE),
(PlainStorageState, PER_TABLE, DUPSORT),
(PlainAccountState, PER_TABLE, TABLE)
]);
Ok(())
}
/// Generates test-vectors for normal tables. Keys are sorted and not repeated.
fn generate_table_vector<T: Table>(runner: &mut TestRunner, per_table: usize) -> Result<()>
where
T::Key: Arbitrary + serde::Serialize + Ord + std::hash::Hash,
T::Value: Arbitrary + serde::Serialize,
{
let mut rows = vec![];
let mut seen_keys = HashSet::new();
let strat = proptest::collection::vec(
any_with::<(T::Key, T::Value)>((
<T::Key as Arbitrary>::Parameters::default(),
<T::Value as Arbitrary>::Parameters::default(),
)),
per_table - rows.len(),
)
.no_shrink()
.boxed();
while rows.len() < per_table {
// Generate all `per_table` rows: (Key, Value)
rows.extend(
&mut strat
.new_tree(runner)
.map_err(|e| eyre::eyre!("{e}"))?
.current()
.into_iter()
.filter(|e| seen_keys.insert(e.0.clone())),
);
}
// Sort them by `Key`
rows.sort_by(|a, b| a.0.cmp(&b.0));
save_to_file::<T>(rows)
}
/// Generates test-vectors for DUPSORT tables. Each key has multiple (subkey, value). Keys and
/// subkeys are sorted.
fn generate_dupsort_vector<T: Table>(runner: &mut TestRunner, per_table: usize) -> Result<()>
where
T: DupSort,
T::Key: Arbitrary + serde::Serialize + Ord + std::hash::Hash,
T::Value: Arbitrary + serde::Serialize + Ord,
{
let mut rows = vec![];
// We want to control our repeated keys
let mut seen_keys = HashSet::new();
let strat_values = proptest::collection::vec(
any_with::<T::Value>(<T::Value as Arbitrary>::Parameters::default()),
100..300,
)
.no_shrink()
.boxed();
let strat_keys =
any_with::<T::Key>(<T::Key as Arbitrary>::Parameters::default()).no_shrink().boxed();
while rows.len() < per_table {
let key: T::Key = strat_keys.new_tree(runner).map_err(|e| eyre::eyre!("{e}"))?.current();
if !seen_keys.insert(key.clone()) {
continue
}
let mut values: Vec<T::Value> =
strat_values.new_tree(runner).map_err(|e| eyre::eyre!("{e}"))?.current();
values.sort();
for value in values {
rows.push((key.clone(), value));
}
}
// Sort them by `Key`
rows.sort_by(|a, b| a.0.cmp(&b.0));
save_to_file::<T>(rows)
}
/// Save rows to file.
fn save_to_file<T: Table>(rows: Vec<(T::Key, T::Value)>) -> eyre::Result<()>
where
T::Key: serde::Serialize,
T::Value: serde::Serialize,
{
serde_json::to_writer_pretty(
std::io::BufWriter::new(
std::fs::File::create(format!("{VECTORS_FOLDER}/{}.json", T::NAME)).unwrap(),
),
&rows,
)
.map_err(|e| eyre::eyre!({ e }))
}