From cc43b7283513c89f27965bd428f49f95260867a8 Mon Sep 17 00:00:00 2001 From: joshieDo <93316087+joshieDo@users.noreply.github.com> Date: Tue, 31 Jan 2023 12:08:51 +0800 Subject: [PATCH] chore(db): microbenchmarking for table serialization and db insertion (#513) Co-authored-by: Georgios Konstantopoulos --- .github/scripts/compare_iai.sh | 6 + .github/workflows/ci.yml | 40 +++ .gitignore | 3 + Cargo.lock | 3 + bin/reth/Cargo.toml | 5 +- bin/reth/src/cli.rs | 6 +- bin/reth/src/db/mod.rs | 1 + bin/reth/src/lib.rs | 1 + bin/reth/src/test_vectors/mod.rs | 33 +++ bin/reth/src/test_vectors/tables.rs | 170 +++++++++++ crates/primitives/src/storage.rs | 7 +- crates/storage/db/Cargo.toml | 12 +- crates/storage/db/benches/criterion.rs | 280 ++++++++++++++++++ crates/storage/db/benches/encoding_crit.rs | 33 --- crates/storage/db/benches/encoding_iai.rs | 21 -- crates/storage/db/benches/iai.rs | 79 +++++ crates/storage/db/benches/utils.rs | 70 +++++ crates/storage/db/src/tables/models/blocks.rs | 2 +- crates/storage/db/src/tables/utils.rs | 14 + 19 files changed, 722 insertions(+), 64 deletions(-) create mode 100755 .github/scripts/compare_iai.sh create mode 100644 bin/reth/src/test_vectors/mod.rs create mode 100644 bin/reth/src/test_vectors/tables.rs create mode 100644 crates/storage/db/benches/criterion.rs delete mode 100644 crates/storage/db/benches/encoding_crit.rs delete mode 100644 crates/storage/db/benches/encoding_iai.rs create mode 100644 crates/storage/db/benches/iai.rs create mode 100644 crates/storage/db/benches/utils.rs diff --git a/.github/scripts/compare_iai.sh b/.github/scripts/compare_iai.sh new file mode 100755 index 000000000..3f1ce1d90 --- /dev/null +++ b/.github/scripts/compare_iai.sh @@ -0,0 +1,6 @@ +#!/bin/bash + +# This script should be run on the main branch, after running the iai benchmarks on the target branch. + +# If the main branch has a better iai performance, exits in error. It ignores L2 differences, since they seem hard to stabilize across runs. +cargo bench --package reth-db --bench iai | tee /dev/tty | awk '/((L1)|(Ins)|(RAM)|(Est))+.*\(\+[1-9]+[0-9]*\..*%\)/{f=1} END{exit f}' \ No newline at end of file diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index ad9c21538..77cba335b 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -182,3 +182,43 @@ jobs: - uses: Swatinem/rust-cache@v2 - name: Check if documentation builds run: RUSTDOCFLAGS="-D warnings" cargo doc --all --no-deps --all-features --document-private-items + + benchmarks: + # Pin to `20.04` instead of `ubuntu-latest`, until ubuntu-latest migration is complete + # See also + runs-on: ubuntu-20.04 + steps: + - name: Install Valgrind + run: | + sudo apt install valgrind + + - name: Checkout PR sources + uses: actions/checkout@v3 + with: + ref: main + + - uses: Swatinem/rust-cache@v1 + with: + cache-on-failure: true + + - name: Generate test-vectors + uses: actions-rs/cargo@v1 + with: + command: run + args: --bin reth -- test-vectors tables + + - name: Set main baseline + uses: actions-rs/cargo@v1 + with: + command: bench + args: --package reth-db --bench iai + + - name: Checkout main sources + uses: actions/checkout@v3 + with: + clean: false + + - name: Compare PR benchmark + shell: 'script -q -e -c "bash {0}"' # required to workaround /dev/tty not being available + run: | + ./.github/scripts/compare_iai.sh diff --git a/.gitignore b/.gitignore index 3433ce391..c2eff62e8 100644 --- a/.gitignore +++ b/.gitignore @@ -15,3 +15,6 @@ target/ # Generated by MacOS .DS_Store + +# Generated test-vectors for DB +testdata/micro/db \ No newline at end of file diff --git a/Cargo.lock b/Cargo.lock index 2348f4f6b..c05c100a9 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -3970,6 +3970,7 @@ dependencies = [ "metrics", "metrics-exporter-prometheus", "metrics-util", + "proptest", "reth-consensus", "reth-db", "reth-discv4", @@ -4052,6 +4053,7 @@ dependencies = [ "modular-bitfield", "page_size", "parity-scale-codec", + "paste", "postcard", "proptest", "proptest-derive", @@ -4063,6 +4065,7 @@ dependencies = [ "reth-primitives", "secp256k1 0.24.2", "serde", + "serde_json", "tempfile", "test-fuzz", "thiserror", diff --git a/bin/reth/Cargo.toml b/bin/reth/Cargo.toml index 9aaee3d1e..b28f441c7 100644 --- a/bin/reth/Cargo.toml +++ b/bin/reth/Cargo.toml @@ -8,7 +8,7 @@ readme = "README.md" [dependencies] # reth -reth-primitives = { path = "../../crates/primitives" } +reth-primitives = { path = "../../crates/primitives", features = ["arbitrary"] } reth-db = {path = "../../crates/storage/db", features = ["mdbx", "test-utils"] } # TODO: Temporary use of the test-utils feature reth-provider = { path = "../../crates/storage/provider", features = ["test-utils"] } @@ -44,6 +44,9 @@ metrics = "0.20.1" metrics-exporter-prometheus = { version = "0.11.0", features = ["http-listener"] } metrics-util = "0.14.0" +# test vectors generation +proptest = "1.0" + # misc eyre = "0.6.8" clap = { version = "4.0", features = ["derive", "cargo"] } diff --git a/bin/reth/src/cli.rs b/bin/reth/src/cli.rs index 936d46e02..9d297ae60 100644 --- a/bin/reth/src/cli.rs +++ b/bin/reth/src/cli.rs @@ -2,7 +2,7 @@ use crate::{ db, dirs::{LogsDir, PlatformPath}, - node, p2p, stage, test_eth_chain, + node, p2p, stage, test_eth_chain, test_vectors, }; use clap::{ArgAction, Args, Parser, Subcommand}; use reth_tracing::{ @@ -25,6 +25,7 @@ pub async fn run() -> eyre::Result<()> { Commands::Db(command) => command.execute().await, Commands::Stage(command) => command.execute().await, Commands::P2P(command) => command.execute().await, + Commands::TestVectors(command) => command.execute().await, } } @@ -51,6 +52,9 @@ pub enum Commands { /// Run Ethereum blockchain tests #[command(name = "test-chain")] TestEthChain(test_eth_chain::Command), + /// Generate Test Vectors + #[command(name = "test-vectors")] + TestVectors(test_vectors::Command), } #[derive(Parser)] diff --git a/bin/reth/src/db/mod.rs b/bin/reth/src/db/mod.rs index 693a3aafc..e72f164dc 100644 --- a/bin/reth/src/db/mod.rs +++ b/bin/reth/src/db/mod.rs @@ -170,6 +170,7 @@ impl Command { BlockBodies, BlockOmmers, TxHashNumber, + PlainStorageState, PlainAccountState, BlockTransitionIndex, TxTransitionIndex, diff --git a/bin/reth/src/lib.rs b/bin/reth/src/lib.rs index 04631f8be..1bfaf3741 100644 --- a/bin/reth/src/lib.rs +++ b/bin/reth/src/lib.rs @@ -14,6 +14,7 @@ pub mod p2p; pub mod prometheus_exporter; pub mod stage; pub mod test_eth_chain; +pub mod test_vectors; pub use reth_staged_sync::utils; use clap::Args; diff --git a/bin/reth/src/test_vectors/mod.rs b/bin/reth/src/test_vectors/mod.rs new file mode 100644 index 000000000..4f81d400d --- /dev/null +++ b/bin/reth/src/test_vectors/mod.rs @@ -0,0 +1,33 @@ +//! Command for generating test vectors. +use clap::{Parser, Subcommand}; + +mod tables; + +/// Generate test-vectors for different data types. +#[derive(Debug, Parser)] +pub struct Command { + #[clap(subcommand)] + command: Subcommands, +} + +#[derive(Subcommand, Debug)] +/// `reth test-vectors` subcommands +pub enum Subcommands { + /// Generates test vectors for specified tables. If no table is specified, generate for all. + Tables { + /// List of table names. Case-sensitive. + names: Vec, + }, +} + +impl Command { + /// Execute the command + pub async fn execute(self) -> eyre::Result<()> { + match self.command { + Subcommands::Tables { names } => { + tables::generate_vectors(names)?; + } + } + Ok(()) + } +} diff --git a/bin/reth/src/test_vectors/tables.rs b/bin/reth/src/test_vectors/tables.rs new file mode 100644 index 000000000..f2eae45ba --- /dev/null +++ b/bin/reth/src/test_vectors/tables.rs @@ -0,0 +1,170 @@ +use std::collections::HashSet; + +use eyre::Result; +use proptest::{ + arbitrary::Arbitrary, + prelude::{any_with, ProptestConfig}, + strategy::{Strategy, ValueTree}, + test_runner::TestRunner, +}; +use reth_db::{ + table::{DupSort, Table}, + tables, +}; +use tracing::error; + +const VECTORS_FOLDER: &str = "testdata/micro/db"; +const PER_TABLE: usize = 1000; + +/// Generates test vectors for specified `tables`. If list is empty, then generate for all tables. +pub(crate) fn generate_vectors(mut tables: Vec) -> Result<()> { + let mut runner = TestRunner::new(ProptestConfig::default()); + std::fs::create_dir_all(VECTORS_FOLDER)?; + + macro_rules! generate_vector { + ($table_type:ident, $per_table:expr, TABLE) => { + generate_table_vector::(&mut runner, $per_table)?; + }; + ($table_type:ident, $per_table:expr, DUPSORT) => { + generate_dupsort_vector::(&mut runner, $per_table)?; + }; + } + + macro_rules! generate { + ([$(($table_type:ident, $per_table:expr, $table_or_dup:tt)),*]) => { + let all_tables = vec![$(stringify!($table_type).to_string(),)*]; + + if tables.is_empty() { + tables = all_tables; + } + + for table in tables { + match table.as_str() { + $( + stringify!($table_type) => { + println!("Generating test vectors for {} <{}>.", stringify!($table_or_dup), tables::$table_type::NAME); + + generate_vector!($table_type, $per_table, $table_or_dup); + }, + )* + _ => { + error!(target: "reth::cli", "Unknown table: {}", table); + } + } + } + } + } + + generate!([ + (CanonicalHeaders, PER_TABLE, TABLE), + (HeaderTD, PER_TABLE, TABLE), + (HeaderNumbers, PER_TABLE, TABLE), + (Headers, PER_TABLE, TABLE), + (BlockBodies, PER_TABLE, TABLE), + (BlockOmmers, 100, TABLE), + (TxHashNumber, PER_TABLE, TABLE), + (BlockTransitionIndex, PER_TABLE, TABLE), + (TxTransitionIndex, PER_TABLE, TABLE), + (Transactions, 100, TABLE), + (PlainStorageState, PER_TABLE, DUPSORT), + (PlainAccountState, PER_TABLE, TABLE) + ]); + + Ok(()) +} + +/// Generates test-vectors for normal tables. Keys are sorted and not repeated. +fn generate_table_vector(runner: &mut TestRunner, per_table: usize) -> Result<()> +where + T::Key: Arbitrary + serde::Serialize + Ord + std::hash::Hash, + T::Value: Arbitrary + serde::Serialize, +{ + let mut rows = vec![]; + let mut seen_keys = HashSet::new(); + let strat = proptest::collection::vec( + any_with::<(T::Key, T::Value)>(( + ::Parameters::default(), + ::Parameters::default(), + )), + per_table - rows.len(), + ) + .no_shrink() + .boxed(); + + while rows.len() < per_table { + // Generate all `per_table` rows: (Key, Value) + rows.extend( + &mut strat + .new_tree(runner) + .map_err(|e| eyre::eyre!("{e}"))? + .current() + .into_iter() + .filter(|e| seen_keys.insert(e.0.clone())), + ); + } + // Sort them by `Key` + rows.sort_by(|a, b| a.0.cmp(&b.0)); + + save_to_file::(rows) +} + +/// Generates test-vectors for DUPSORT tables. Each key has multiple (subkey, value). Keys and +/// subkeys are sorted. +fn generate_dupsort_vector(runner: &mut TestRunner, per_table: usize) -> Result<()> +where + T: DupSort, + T::Key: Arbitrary + serde::Serialize + Ord + std::hash::Hash, + T::Value: Arbitrary + serde::Serialize + Ord, +{ + let mut rows = vec![]; + + // We want to control our repeated keys + let mut seen_keys = HashSet::new(); + + let strat_values = proptest::collection::vec( + any_with::(::Parameters::default()), + 100..300, + ) + .no_shrink() + .boxed(); + + let strat_keys = + any_with::(::Parameters::default()).no_shrink().boxed(); + + while rows.len() < per_table { + let key: T::Key = strat_keys.new_tree(runner).map_err(|e| eyre::eyre!("{e}"))?.current(); + + if !seen_keys.insert(key.clone()) { + continue + } + + let mut values: Vec = + strat_values.new_tree(runner).map_err(|e| eyre::eyre!("{e}"))?.current(); + + values.sort(); + + for value in values { + rows.push((key.clone(), value)); + } + } + + // Sort them by `Key` + rows.sort_by(|a, b| a.0.cmp(&b.0)); + + save_to_file::(rows) +} + +/// Save rows to file. +fn save_to_file(rows: Vec<(T::Key, T::Value)>) -> eyre::Result<()> +where + T::Key: serde::Serialize, + T::Value: serde::Serialize, +{ + serde_json::to_writer_pretty( + std::io::BufWriter::new( + std::fs::File::create(format!("{VECTORS_FOLDER}/{}.json", T::NAME)).unwrap(), + ), + &rows, + ) + .map_err(|e| eyre::eyre!({ e })) +} diff --git a/crates/primitives/src/storage.rs b/crates/primitives/src/storage.rs index 2ce36381f..254499fce 100644 --- a/crates/primitives/src/storage.rs +++ b/crates/primitives/src/storage.rs @@ -1,9 +1,10 @@ use super::{H256, U256}; -use reth_codecs::Compact; -use serde::Serialize; +use reth_codecs::{derive_arbitrary, Compact}; +use serde::{Deserialize, Serialize}; /// Account storage entry. -#[derive(Debug, Default, Copy, Clone, PartialEq, Eq, Serialize)] +#[derive_arbitrary(compact)] +#[derive(Debug, Default, Copy, Clone, PartialEq, Eq, Serialize, Deserialize, PartialOrd, Ord)] pub struct StorageEntry { /// Storage key. pub key: H256, diff --git a/crates/storage/db/Cargo.toml b/crates/storage/db/Cargo.toml index 59559c012..5638755c3 100644 --- a/crates/storage/db/Cargo.toml +++ b/crates/storage/db/Cargo.toml @@ -63,10 +63,14 @@ arbitrary = { version = "1.1.7", features = ["derive"] } proptest = { version = "1.0" } proptest-derive = "0.3" +serde_json = "1.0" + +paste = "1.0" + [features] default = ["mdbx"] -test-utils = ["tempfile"] +test-utils = ["tempfile", "arbitrary"] bench-postcard = ["bench"] mdbx = ["reth-libmdbx"] bench = [] @@ -79,9 +83,9 @@ arbitrary = [ ] [[bench]] -name = "encoding_crit" +name = "criterion" harness = false [[bench]] -name = "encoding_iai" -harness = false \ No newline at end of file +name = "iai" +harness = false diff --git a/crates/storage/db/benches/criterion.rs b/crates/storage/db/benches/criterion.rs new file mode 100644 index 000000000..d1df4a0d2 --- /dev/null +++ b/crates/storage/db/benches/criterion.rs @@ -0,0 +1,280 @@ +#![allow(dead_code, unused_imports, non_snake_case)] + +use criterion::{ + black_box, criterion_group, criterion_main, measurement::WallTime, BenchmarkGroup, Criterion, +}; +use reth_db::cursor::{DbDupCursorRO, DbDupCursorRW}; +use std::time::Instant; + +criterion_group!(benches, db, serialization); +criterion_main!(benches); + +pub fn db(c: &mut Criterion) { + let mut group = c.benchmark_group("tables_db"); + group.measurement_time(std::time::Duration::from_millis(200)); + group.warm_up_time(std::time::Duration::from_millis(200)); + + measure_table_db::(&mut group); + measure_table_db::(&mut group); + measure_table_db::(&mut group); + measure_table_db::(&mut group); + measure_table_db::(&mut group); + measure_table_db::(&mut group); + measure_table_db::(&mut group); + measure_table_db::(&mut group); + measure_table_db::(&mut group); + measure_table_db::(&mut group); + measure_dupsort_db::(&mut group); + measure_table_db::(&mut group); +} + +pub fn serialization(c: &mut Criterion) { + let mut group = c.benchmark_group("tables_serialization"); + group.measurement_time(std::time::Duration::from_millis(200)); + group.warm_up_time(std::time::Duration::from_millis(200)); + + measure_table_serialization::(&mut group); + measure_table_serialization::(&mut group); + measure_table_serialization::(&mut group); + measure_table_serialization::(&mut group); + measure_table_serialization::(&mut group); + measure_table_serialization::(&mut group); + measure_table_serialization::(&mut group); + measure_table_serialization::(&mut group); + measure_table_serialization::(&mut group); + measure_table_serialization::(&mut group); + measure_table_serialization::(&mut group); + measure_table_serialization::(&mut group); +} + +/// Measures `Encode`, `Decode`, `Compress` and `Decompress`. +fn measure_table_serialization(group: &mut BenchmarkGroup) +where + T: Table + Default, + T::Key: Default + Clone + for<'de> serde::Deserialize<'de>, + T::Value: Default + Clone + for<'de> serde::Deserialize<'de>, +{ + let input = &load_vectors::(); + group.bench_function(format!("{}.KeyEncode", T::NAME), move |b| { + b.iter_with_setup( + || input.clone(), + |input| { + black_box({ + for (k, _, _, _) in input { + k.encode(); + } + }); + }, + ) + }); + + group.bench_function(format!("{}.KeyDecode", T::NAME), |b| { + b.iter_with_setup( + || input.clone(), + |input| { + black_box({ + for (_, k, _, _) in input { + let _ = ::Key::decode(k); + } + }); + }, + ) + }); + + group.bench_function(format!("{}.ValueCompress", T::NAME), move |b| { + b.iter_with_setup( + || input.clone(), + |input| { + black_box({ + for (_, _, v, _) in input { + v.compress(); + } + }); + }, + ) + }); + + group.bench_function(format!("{}.ValueDecompress", T::NAME), |b| { + b.iter_with_setup( + || input.clone(), + |input| { + black_box({ + for (_, _, _, v) in input { + let _ = ::Value::decompress(v); + } + }); + }, + ) + }); +} + +/// Measures `SeqWrite`, `RandomWrite`, `SeqRead` and `RandomRead` using `cursor` and `tx.put`. +fn measure_table_db(group: &mut BenchmarkGroup) +where + T: Table + Default, + T::Key: Default + Clone + for<'de> serde::Deserialize<'de>, + T::Value: Default + Clone + for<'de> serde::Deserialize<'de>, +{ + let input = &load_vectors::(); + let bench_db_path = Path::new(BENCH_DB_PATH); + + group.bench_function(format!("{}.SeqWrite", T::NAME), |b| { + b.iter_with_setup( + || { + // Reset DB + let _ = std::fs::remove_dir_all(bench_db_path); + (input.clone(), create_test_db_with_path::(EnvKind::RW, bench_db_path)) + }, + |(input, db)| { + // Create TX + let tx = db.tx_mut().expect("tx"); + let mut crsr = tx.cursor_write::().expect("cursor"); + + black_box({ + for (k, _, v, _) in input { + crsr.append(k, v).expect("submit"); + } + + tx.inner.commit().unwrap(); + }); + }, + ) + }); + + group.bench_function(format!("{}.RandomWrite", T::NAME), |b| { + b.iter_with_setup( + || { + // Reset DB + let _ = std::fs::remove_dir_all(bench_db_path); + (input, create_test_db_with_path::(EnvKind::RW, bench_db_path)) + }, + |(input, db)| { + // Create TX + let tx = db.tx_mut().expect("tx"); + let mut crsr = tx.cursor_write::().expect("cursor"); + + black_box({ + for index in RANDOM_INDEXES { + let (k, _, v, _) = input.get(index).unwrap().clone(); + crsr.insert(k, v).expect("submit"); + } + + tx.inner.commit().unwrap(); + }); + }, + ) + }); + + group.bench_function(format!("{}.SeqRead", T::NAME), |b| { + let db = set_up_db::(bench_db_path, input); + + b.iter(|| { + // Create TX + let tx = db.tx().expect("tx"); + + black_box({ + let mut cursor = tx.cursor_read::().expect("cursor"); + let walker = cursor.walk(input.first().unwrap().0.clone()).unwrap(); + for element in walker { + element.unwrap(); + } + }); + }) + }); + + group.bench_function(format!("{}.RandomRead", T::NAME), |b| { + let db = set_up_db::(bench_db_path, input); + + b.iter(|| { + // Create TX + let tx = db.tx().expect("tx"); + + black_box({ + for index in RANDOM_INDEXES { + let mut cursor = tx.cursor_read::().expect("cursor"); + cursor.seek_exact(input.get(index).unwrap().0.clone()).unwrap(); + } + }); + }) + }); +} + +/// Measures `SeqWrite`, `RandomWrite` and `SeqRead` using `cursor_dup` and `tx.put`. +fn measure_dupsort_db(group: &mut BenchmarkGroup) +where + T: Table + Default + DupSort, + T::Key: Default + Clone + for<'de> serde::Deserialize<'de>, + T::Value: Default + Clone + for<'de> serde::Deserialize<'de>, + T::SubKey: Default + Clone + for<'de> serde::Deserialize<'de>, +{ + let input = &load_vectors::(); + let bench_db_path = Path::new(BENCH_DB_PATH); + + group.bench_function(format!("{}.SeqWrite", T::NAME), |b| { + b.iter_with_setup( + || { + // Reset DB + let _ = std::fs::remove_dir_all(bench_db_path); + (input.clone(), create_test_db_with_path::(EnvKind::RW, bench_db_path)) + }, + |(input, db)| { + // Create TX + let tx = db.tx_mut().expect("tx"); + let mut crsr = tx.cursor_dup_write::().expect("cursor"); + + black_box({ + for (k, _, v, _) in input { + crsr.append_dup(k, v).expect("submit"); + } + + tx.inner.commit().unwrap(); + }); + }, + ) + }); + + group.bench_function(format!("{}.RandomWrite", T::NAME), |b| { + b.iter_with_setup( + || { + // Reset DB + let _ = std::fs::remove_dir_all(bench_db_path); + + (input, create_test_db_with_path::(EnvKind::RW, bench_db_path)) + }, + |(input, db)| { + // Create TX + let tx = db.tx_mut().expect("tx"); + + for index in RANDOM_INDEXES { + let (k, _, v, _) = input.get(index).unwrap().clone(); + tx.put::(k, v).unwrap(); + } + + tx.inner.commit().unwrap(); + }, + ) + }); + + group.bench_function(format!("{}.SeqRead", T::NAME), |b| { + let db = set_up_db::(bench_db_path, input); + + b.iter(|| { + // Create TX + let tx = db.tx().expect("tx"); + + black_box({ + let mut cursor = tx.cursor_dup_read::().expect("cursor"); + let walker = cursor + .walk_dup(input.first().unwrap().0.clone(), T::SubKey::default()) + .unwrap(); + for element in walker { + element.unwrap(); + } + }); + }) + }); + + // group.bench_function(format!("{}.RandomRead", T::NAME), |b| {}); +} + +include!("./utils.rs"); diff --git a/crates/storage/db/benches/encoding_crit.rs b/crates/storage/db/benches/encoding_crit.rs deleted file mode 100644 index a3a8e653d..000000000 --- a/crates/storage/db/benches/encoding_crit.rs +++ /dev/null @@ -1,33 +0,0 @@ -use criterion::{black_box, criterion_group, criterion_main, Criterion}; - -/// Benchmarks the encoding and decoding of `IntegerList` using criterion. -macro_rules! impl_criterion_encoding_benchmark { - ($name:tt) => { - pub fn criterion_benchmark(c: &mut Criterion) { - let mut size = 0; - c.bench_function(stringify!($name), |b| { - b.iter(|| { - let encoded_size = - reth_db::tables::codecs::fuzz::IntegerList::encode_and_decode(black_box( - reth_primitives::IntegerList::default(), - )) - .0; - - if size == 0 { - size = encoded_size; - } - }) - }); - println!("Size (bytes): `{size}`"); - } - - criterion_group!(benches, criterion_benchmark); - criterion_main!(benches); - }; -} - -#[cfg(not(feature = "bench-postcard"))] -impl_criterion_encoding_benchmark!(scale); - -#[cfg(feature = "bench-postcard")] -impl_criterion_encoding_benchmark!(postcard); diff --git a/crates/storage/db/benches/encoding_iai.rs b/crates/storage/db/benches/encoding_iai.rs deleted file mode 100644 index 163061cef..000000000 --- a/crates/storage/db/benches/encoding_iai.rs +++ /dev/null @@ -1,21 +0,0 @@ -use iai::{black_box, main}; -use reth_db::tables::codecs; - -/// Benchmarks the encoding and decoding of `Header` using iai. -macro_rules! impl_iai_encoding_benchmark { - ($name:tt) => { - fn $name() { - codecs::fuzz::IntegerList::encode_and_decode(black_box( - reth_primitives::IntegerList::default(), - )); - } - - main!($name); - }; -} - -#[cfg(not(feature = "bench-postcard"))] -impl_iai_encoding_benchmark!(scale); - -#[cfg(feature = "bench-postcard")] -impl_iai_encoding_benchmark!(postcard); diff --git a/crates/storage/db/benches/iai.rs b/crates/storage/db/benches/iai.rs new file mode 100644 index 000000000..77bc67e73 --- /dev/null +++ b/crates/storage/db/benches/iai.rs @@ -0,0 +1,79 @@ +#![allow(dead_code, unused_imports, non_snake_case)] + +use iai::main; +use paste::paste; + +macro_rules! impl_iai_inner { + ( + $(($name:tt, $mod:tt, $compress:tt, $decompress:tt, $encode:tt, $decode:tt, $seqread:tt, $randread:tt, $seqwrite:tt, $randwrite:tt))+ + ) => { + $( + mod $mod { + use iai::{black_box}; + include!("./utils.rs"); + + pub fn $compress() { + for (_, _, v, _) in black_box(load_vectors::()) { + black_box(v.compress()); + } + } + pub fn $decompress() { + for (_, _, _, comp) in black_box(load_vectors::()) { + let _ = black_box(::Value::decompress(comp)); + } + } + pub fn $encode() { + for (k, _, _, _) in black_box(load_vectors::()) { + black_box(k.encode()); + } + } + pub fn $decode() { + for (_, enc, _, _) in black_box(load_vectors::()) { + let _ = black_box(::Key::decode(enc)); + } + } + pub fn $seqread() {} + pub fn $randread() {} + pub fn $seqwrite() {} + pub fn $randwrite() {} + } + use $mod::*; + )+ + + main!( + $( + $compress, + $decompress, + $encode, + $decode, + )+ + ); + }; +} + +macro_rules! impl_iai { + ($($name:tt),+) => { + paste! { + impl_iai_inner!( + $( + ( $name, [<$name _mod>], [<$name _ValueCompress>], [<$name _ValueDecompress>], [<$name _ValueEncode>], [<$name _ValueDecode>], [<$name _SeqRead>], [<$name _RandomRead>], [<$name _SeqWrite>], [<$name _RandomWrite>]) + )+ + ); + } + }; +} + +impl_iai!( + CanonicalHeaders, + HeaderTD, + HeaderNumbers, + Headers, + BlockBodies, + BlockOmmers, + TxHashNumber, + BlockTransitionIndex, + TxTransitionIndex, + Transactions, + PlainStorageState, + PlainAccountState +); diff --git a/crates/storage/db/benches/utils.rs b/crates/storage/db/benches/utils.rs new file mode 100644 index 000000000..d82eb55ae --- /dev/null +++ b/crates/storage/db/benches/utils.rs @@ -0,0 +1,70 @@ +use reth_db::{ + cursor::{DbCursorRO, DbCursorRW}, + database::Database, + mdbx::{test_utils::create_test_db_with_path, EnvKind, WriteMap}, + table::*, + tables::*, + transaction::{DbTx, DbTxMut}, +}; +use std::path::Path; + +/// Path where the DB is initialized for benchmarks. +const BENCH_DB_PATH: &str = "/tmp/reth-benches"; + +/// Used for RandomRead and RandomWrite benchmarks. +const RANDOM_INDEXES: [usize; 10] = [23, 2, 42, 5, 3, 99, 54, 0, 33, 64]; + +/// Returns bench vectors in the format: `Vec<(Key, EncodedKey, Value, CompressedValue)>`. +fn load_vectors() -> Vec<(T::Key, bytes::Bytes, T::Value, bytes::Bytes)> +where + T: Default, + T::Key: Default + Clone + for<'de> serde::Deserialize<'de>, + T::Value: Default + Clone + for<'de> serde::Deserialize<'de>, +{ + let list: Vec<(T::Key, T::Value)> = serde_json::from_reader(std::io::BufReader::new( + std::fs::File::open(format!( + "{}/../../../testdata/micro/db/{}.json", + env!("CARGO_MANIFEST_DIR"), + T::NAME + )) + .expect("Test vectors not found. They can be generated from the workspace by calling `cargo run --bin reth -- test-vectors tables`."), + )) + .unwrap(); + + list.into_iter() + .map(|(k, v)| { + ( + k.clone(), + bytes::Bytes::copy_from_slice(k.encode().as_ref()), + v.clone(), + bytes::Bytes::copy_from_slice(v.compress().as_ref()), + ) + }) + .collect::>() +} + +/// Sets up a clear database at `bench_db_path`. +fn set_up_db( + bench_db_path: &Path, + pair: &Vec<(::Key, bytes::Bytes, ::Value, bytes::Bytes)>, +) -> reth_db::mdbx::Env +where + T: Table + Default, + T::Key: Default + Clone, + T::Value: Default + Clone, +{ + // Reset DB + let _ = std::fs::remove_dir_all(bench_db_path); + let db = create_test_db_with_path::(EnvKind::RW, bench_db_path); + + { + // Prepare data to be read + let tx = db.tx_mut().expect("tx"); + for (k, _, v, _) in pair.clone() { + tx.put::(k, v).expect("submit"); + } + tx.inner.commit().unwrap(); + } + + db +} diff --git a/crates/storage/db/src/tables/models/blocks.rs b/crates/storage/db/src/tables/models/blocks.rs index 4fd92fda1..8fa20190f 100644 --- a/crates/storage/db/src/tables/models/blocks.rs +++ b/crates/storage/db/src/tables/models/blocks.rs @@ -65,7 +65,7 @@ pub type HeaderHash = H256; /// element as BlockNumber, helps out with querying/sorting. /// /// Since it's used as a key, the `BlockNumber` is not compressed when encoding it. -#[derive(Clone, Copy, PartialEq, Eq, Serialize, Deserialize, Default, Ord, PartialOrd)] +#[derive(Clone, Copy, PartialEq, Eq, Serialize, Deserialize, Default, Ord, PartialOrd, Hash)] pub struct BlockNumHash(pub (BlockNumber, BlockHash)); impl std::fmt::Debug for BlockNumHash { diff --git a/crates/storage/db/src/tables/utils.rs b/crates/storage/db/src/tables/utils.rs index 8cd68d88e..1e20a809d 100644 --- a/crates/storage/db/src/tables/utils.rs +++ b/crates/storage/db/src/tables/utils.rs @@ -23,6 +23,20 @@ macro_rules! impl_fixed_arbitrary { Decode::decode(buffer).map_err(|_| arbitrary::Error::IncorrectFormat) } } + + #[cfg(any(test, feature = "arbitrary"))] + use proptest::strategy::Strategy; + #[cfg(any(test, feature = "arbitrary"))] + impl proptest::prelude::Arbitrary for $name { + type Parameters = (); + type Strategy = proptest::prelude::BoxedStrategy<$name>; + + fn arbitrary_with(args: Self::Parameters) -> Self::Strategy { + proptest::collection::vec(proptest::arbitrary::any_with::(args), $size) + .prop_map(move |vec| Decode::decode(vec).unwrap()) + .boxed() + } + } }; }