From f545877bb82724f60092ec87dbd71b668ab0132e Mon Sep 17 00:00:00 2001 From: Matthias Seitz Date: Tue, 29 Oct 2024 14:23:54 +0100 Subject: [PATCH] Revert "Revert "feat: add geometry to database args"" (#12165) --- book/cli/reth/db.md | 6 + book/cli/reth/db/diff.md | 6 + book/cli/reth/debug/build-block.md | 6 + book/cli/reth/debug/execution.md | 6 + book/cli/reth/debug/in-memory-merkle.md | 6 + book/cli/reth/debug/merkle.md | 6 + book/cli/reth/debug/replay-engine.md | 6 + book/cli/reth/import.md | 6 + book/cli/reth/init-state.md | 6 + book/cli/reth/init.md | 6 + book/cli/reth/node.md | 6 + book/cli/reth/p2p.md | 6 + book/cli/reth/prune.md | 6 + book/cli/reth/recover/storage-tries.md | 6 + book/cli/reth/stage/drop.md | 6 + book/cli/reth/stage/dump.md | 6 + book/cli/reth/stage/run.md | 6 + book/cli/reth/stage/unwind.md | 6 + crates/node/core/src/args/database.rs | 184 +++++++++++++++++- .../storage/db/src/implementation/mdbx/mod.rs | 48 +++-- crates/storage/libmdbx-rs/src/environment.rs | 2 + 21 files changed, 326 insertions(+), 16 deletions(-) diff --git a/book/cli/reth/db.md b/book/cli/reth/db.md index f9a8a158a..17a6de4e6 100644 --- a/book/cli/reth/db.md +++ b/book/cli/reth/db.md @@ -81,6 +81,12 @@ Database: [possible values: true, false] + --db.max-size + Maximum database size (e.g., 4TB, 8MB) + + --db.growth-step + Database growth step (e.g., 4GB, 4KB) + --db.read-transaction-timeout Read transaction timeout in seconds, 0 means no timeout diff --git a/book/cli/reth/db/diff.md b/book/cli/reth/db/diff.md index f57c6ac36..efb9e7d32 100644 --- a/book/cli/reth/db/diff.md +++ b/book/cli/reth/db/diff.md @@ -45,6 +45,12 @@ Database: [possible values: true, false] + --db.max-size + Maximum database size (e.g., 4TB, 8MB) + + --db.growth-step + Database growth step (e.g., 4GB, 4KB) + --db.read-transaction-timeout Read transaction timeout in seconds, 0 means no timeout diff --git a/book/cli/reth/debug/build-block.md b/book/cli/reth/debug/build-block.md index 2e6d637d5..7bceb62b9 100644 --- a/book/cli/reth/debug/build-block.md +++ b/book/cli/reth/debug/build-block.md @@ -69,6 +69,12 @@ Database: [possible values: true, false] + --db.max-size + Maximum database size (e.g., 4TB, 8MB) + + --db.growth-step + Database growth step (e.g., 4GB, 4KB) + --db.read-transaction-timeout Read transaction timeout in seconds, 0 means no timeout diff --git a/book/cli/reth/debug/execution.md b/book/cli/reth/debug/execution.md index 9ca74897c..b8e1ce05d 100644 --- a/book/cli/reth/debug/execution.md +++ b/book/cli/reth/debug/execution.md @@ -69,6 +69,12 @@ Database: [possible values: true, false] + --db.max-size + Maximum database size (e.g., 4TB, 8MB) + + --db.growth-step + Database growth step (e.g., 4GB, 4KB) + --db.read-transaction-timeout Read transaction timeout in seconds, 0 means no timeout diff --git a/book/cli/reth/debug/in-memory-merkle.md b/book/cli/reth/debug/in-memory-merkle.md index 3e322a691..a183db997 100644 --- a/book/cli/reth/debug/in-memory-merkle.md +++ b/book/cli/reth/debug/in-memory-merkle.md @@ -69,6 +69,12 @@ Database: [possible values: true, false] + --db.max-size + Maximum database size (e.g., 4TB, 8MB) + + --db.growth-step + Database growth step (e.g., 4GB, 4KB) + --db.read-transaction-timeout Read transaction timeout in seconds, 0 means no timeout diff --git a/book/cli/reth/debug/merkle.md b/book/cli/reth/debug/merkle.md index d701803b8..d9a72794e 100644 --- a/book/cli/reth/debug/merkle.md +++ b/book/cli/reth/debug/merkle.md @@ -69,6 +69,12 @@ Database: [possible values: true, false] + --db.max-size + Maximum database size (e.g., 4TB, 8MB) + + --db.growth-step + Database growth step (e.g., 4GB, 4KB) + --db.read-transaction-timeout Read transaction timeout in seconds, 0 means no timeout diff --git a/book/cli/reth/debug/replay-engine.md b/book/cli/reth/debug/replay-engine.md index dd587620a..b7a1266d3 100644 --- a/book/cli/reth/debug/replay-engine.md +++ b/book/cli/reth/debug/replay-engine.md @@ -69,6 +69,12 @@ Database: [possible values: true, false] + --db.max-size + Maximum database size (e.g., 4TB, 8MB) + + --db.growth-step + Database growth step (e.g., 4GB, 4KB) + --db.read-transaction-timeout Read transaction timeout in seconds, 0 means no timeout diff --git a/book/cli/reth/import.md b/book/cli/reth/import.md index 28e085bda..82a521ac0 100644 --- a/book/cli/reth/import.md +++ b/book/cli/reth/import.md @@ -69,6 +69,12 @@ Database: [possible values: true, false] + --db.max-size + Maximum database size (e.g., 4TB, 8MB) + + --db.growth-step + Database growth step (e.g., 4GB, 4KB) + --db.read-transaction-timeout Read transaction timeout in seconds, 0 means no timeout diff --git a/book/cli/reth/init-state.md b/book/cli/reth/init-state.md index 3e0735167..533c0f8f8 100644 --- a/book/cli/reth/init-state.md +++ b/book/cli/reth/init-state.md @@ -69,6 +69,12 @@ Database: [possible values: true, false] + --db.max-size + Maximum database size (e.g., 4TB, 8MB) + + --db.growth-step + Database growth step (e.g., 4GB, 4KB) + --db.read-transaction-timeout Read transaction timeout in seconds, 0 means no timeout diff --git a/book/cli/reth/init.md b/book/cli/reth/init.md index cd01accc0..ebe2a8386 100644 --- a/book/cli/reth/init.md +++ b/book/cli/reth/init.md @@ -69,6 +69,12 @@ Database: [possible values: true, false] + --db.max-size + Maximum database size (e.g., 4TB, 8MB) + + --db.growth-step + Database growth step (e.g., 4GB, 4KB) + --db.read-transaction-timeout Read transaction timeout in seconds, 0 means no timeout diff --git a/book/cli/reth/node.md b/book/cli/reth/node.md index a3ff8f6a5..52f597279 100644 --- a/book/cli/reth/node.md +++ b/book/cli/reth/node.md @@ -590,6 +590,12 @@ Database: [possible values: true, false] + --db.max-size + Maximum database size (e.g., 4TB, 8MB) + + --db.growth-step + Database growth step (e.g., 4GB, 4KB) + --db.read-transaction-timeout Read transaction timeout in seconds, 0 means no timeout diff --git a/book/cli/reth/p2p.md b/book/cli/reth/p2p.md index 603b451d9..33639042a 100644 --- a/book/cli/reth/p2p.md +++ b/book/cli/reth/p2p.md @@ -247,6 +247,12 @@ Database: [possible values: true, false] + --db.max-size + Maximum database size (e.g., 4TB, 8MB) + + --db.growth-step + Database growth step (e.g., 4GB, 4KB) + --db.read-transaction-timeout Read transaction timeout in seconds, 0 means no timeout diff --git a/book/cli/reth/prune.md b/book/cli/reth/prune.md index ed16197a7..41684ecd9 100644 --- a/book/cli/reth/prune.md +++ b/book/cli/reth/prune.md @@ -69,6 +69,12 @@ Database: [possible values: true, false] + --db.max-size + Maximum database size (e.g., 4TB, 8MB) + + --db.growth-step + Database growth step (e.g., 4GB, 4KB) + --db.read-transaction-timeout Read transaction timeout in seconds, 0 means no timeout diff --git a/book/cli/reth/recover/storage-tries.md b/book/cli/reth/recover/storage-tries.md index ecdaabe77..1afe94f55 100644 --- a/book/cli/reth/recover/storage-tries.md +++ b/book/cli/reth/recover/storage-tries.md @@ -69,6 +69,12 @@ Database: [possible values: true, false] + --db.max-size + Maximum database size (e.g., 4TB, 8MB) + + --db.growth-step + Database growth step (e.g., 4GB, 4KB) + --db.read-transaction-timeout Read transaction timeout in seconds, 0 means no timeout diff --git a/book/cli/reth/stage/drop.md b/book/cli/reth/stage/drop.md index 399b3818c..c22d6be66 100644 --- a/book/cli/reth/stage/drop.md +++ b/book/cli/reth/stage/drop.md @@ -69,6 +69,12 @@ Database: [possible values: true, false] + --db.max-size + Maximum database size (e.g., 4TB, 8MB) + + --db.growth-step + Database growth step (e.g., 4GB, 4KB) + --db.read-transaction-timeout Read transaction timeout in seconds, 0 means no timeout diff --git a/book/cli/reth/stage/dump.md b/book/cli/reth/stage/dump.md index 4b3de3fb1..e3df5bf2d 100644 --- a/book/cli/reth/stage/dump.md +++ b/book/cli/reth/stage/dump.md @@ -76,6 +76,12 @@ Database: [possible values: true, false] + --db.max-size + Maximum database size (e.g., 4TB, 8MB) + + --db.growth-step + Database growth step (e.g., 4GB, 4KB) + --db.read-transaction-timeout Read transaction timeout in seconds, 0 means no timeout diff --git a/book/cli/reth/stage/run.md b/book/cli/reth/stage/run.md index 9da3ce0de..204efc968 100644 --- a/book/cli/reth/stage/run.md +++ b/book/cli/reth/stage/run.md @@ -69,6 +69,12 @@ Database: [possible values: true, false] + --db.max-size + Maximum database size (e.g., 4TB, 8MB) + + --db.growth-step + Database growth step (e.g., 4GB, 4KB) + --db.read-transaction-timeout Read transaction timeout in seconds, 0 means no timeout diff --git a/book/cli/reth/stage/unwind.md b/book/cli/reth/stage/unwind.md index 700ab3d7e..cb72b9313 100644 --- a/book/cli/reth/stage/unwind.md +++ b/book/cli/reth/stage/unwind.md @@ -74,6 +74,12 @@ Database: [possible values: true, false] + --db.max-size + Maximum database size (e.g., 4TB, 8MB) + + --db.growth-step + Database growth step (e.g., 4GB, 4KB) + --db.read-transaction-timeout Read transaction timeout in seconds, 0 means no timeout diff --git a/crates/node/core/src/args/database.rs b/crates/node/core/src/args/database.rs index 0eec6639a..5b9d6ae61 100644 --- a/crates/node/core/src/args/database.rs +++ b/crates/node/core/src/args/database.rs @@ -1,6 +1,6 @@ //! clap [Args](clap::Args) for database configuration -use std::time::Duration; +use std::{fmt, str::FromStr, time::Duration}; use crate::version::default_client_version; use clap::{ @@ -22,6 +22,12 @@ pub struct DatabaseArgs { /// NFS volume. #[arg(long = "db.exclusive")] pub exclusive: Option, + /// Maximum database size (e.g., 4TB, 8MB) + #[arg(long = "db.max-size", value_parser = parse_byte_size)] + pub max_size: Option, + /// Database growth step (e.g., 4GB, 4KB) + #[arg(long = "db.growth-step", value_parser = parse_byte_size)] + pub growth_step: Option, /// Read transaction timeout in seconds, 0 means no timeout. #[arg(long = "db.read-transaction-timeout")] pub read_transaction_timeout: Option, @@ -33,8 +39,9 @@ impl DatabaseArgs { self.get_database_args(default_client_version()) } - /// Returns the database arguments with configured log level and given client version. - pub const fn get_database_args( + /// Returns the database arguments with configured log level, client version, + /// max read transaction duration, and geometry. + pub fn get_database_args( &self, client_version: ClientVersion, ) -> reth_db::mdbx::DatabaseArguments { @@ -48,6 +55,8 @@ impl DatabaseArgs { .with_log_level(self.log_level) .with_exclusive(self.exclusive) .with_max_read_transaction_duration(max_read_transaction_duration) + .with_geometry_max_size(self.max_size) + .with_growth_step(self.growth_step) } } @@ -89,10 +98,84 @@ impl TypedValueParser for LogLevelValueParser { Some(Box::new(values)) } } + +/// Size in bytes. +#[derive(Debug, Default, Clone, Copy, PartialEq, Eq)] +pub struct ByteSize(pub usize); + +impl From for usize { + fn from(s: ByteSize) -> Self { + s.0 + } +} + +impl FromStr for ByteSize { + type Err = String; + + fn from_str(s: &str) -> Result { + let s = s.trim().to_uppercase(); + let parts: Vec<&str> = s.split_whitespace().collect(); + + let (num_str, unit) = match parts.len() { + 1 => { + let (num, unit) = + s.split_at(s.find(|c: char| c.is_alphabetic()).unwrap_or(s.len())); + (num, unit) + } + 2 => (parts[0], parts[1]), + _ => { + return Err("Invalid format. Use '' or ' '.".to_string()) + } + }; + + let num: usize = num_str.parse().map_err(|_| "Invalid number".to_string())?; + + let multiplier = match unit { + "B" | "" => 1, // Assume bytes if no unit is specified + "KB" => 1024, + "MB" => 1024 * 1024, + "GB" => 1024 * 1024 * 1024, + "TB" => 1024 * 1024 * 1024 * 1024, + _ => return Err(format!("Invalid unit: {}. Use B, KB, MB, GB, or TB.", unit)), + }; + + Ok(Self(num * multiplier)) + } +} + +impl fmt::Display for ByteSize { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + const KB: usize = 1024; + const MB: usize = KB * 1024; + const GB: usize = MB * 1024; + const TB: usize = GB * 1024; + + let (size, unit) = if self.0 >= TB { + (self.0 as f64 / TB as f64, "TB") + } else if self.0 >= GB { + (self.0 as f64 / GB as f64, "GB") + } else if self.0 >= MB { + (self.0 as f64 / MB as f64, "MB") + } else if self.0 >= KB { + (self.0 as f64 / KB as f64, "KB") + } else { + (self.0 as f64, "B") + }; + + write!(f, "{:.2}{}", size, unit) + } +} + +/// Value parser function that supports various formats. +fn parse_byte_size(s: &str) -> Result { + s.parse::().map(Into::into) +} + #[cfg(test)] mod tests { use super::*; use clap::Parser; + use reth_db::mdbx::{GIGABYTE, KILOBYTE, MEGABYTE, TERABYTE}; /// A helper type to parse Args more easily #[derive(Parser)] @@ -108,6 +191,101 @@ mod tests { assert_eq!(args, default_args); } + #[test] + fn test_command_parser_with_valid_max_size() { + let cmd = CommandParser::::try_parse_from([ + "reth", + "--db.max-size", + "4398046511104", + ]) + .unwrap(); + assert_eq!(cmd.args.max_size, Some(TERABYTE * 4)); + } + + #[test] + fn test_command_parser_with_invalid_max_size() { + let result = + CommandParser::::try_parse_from(["reth", "--db.max-size", "invalid"]); + assert!(result.is_err()); + } + + #[test] + fn test_command_parser_with_valid_growth_step() { + let cmd = CommandParser::::try_parse_from([ + "reth", + "--db.growth-step", + "4294967296", + ]) + .unwrap(); + assert_eq!(cmd.args.growth_step, Some(GIGABYTE * 4)); + } + + #[test] + fn test_command_parser_with_invalid_growth_step() { + let result = + CommandParser::::try_parse_from(["reth", "--db.growth-step", "invalid"]); + assert!(result.is_err()); + } + + #[test] + fn test_command_parser_with_valid_max_size_and_growth_step_from_str() { + let cmd = CommandParser::::try_parse_from([ + "reth", + "--db.max-size", + "2TB", + "--db.growth-step", + "1GB", + ]) + .unwrap(); + assert_eq!(cmd.args.max_size, Some(TERABYTE * 2)); + assert_eq!(cmd.args.growth_step, Some(GIGABYTE)); + + let cmd = CommandParser::::try_parse_from([ + "reth", + "--db.max-size", + "12MB", + "--db.growth-step", + "2KB", + ]) + .unwrap(); + assert_eq!(cmd.args.max_size, Some(MEGABYTE * 12)); + assert_eq!(cmd.args.growth_step, Some(KILOBYTE * 2)); + + // with spaces + let cmd = CommandParser::::try_parse_from([ + "reth", + "--db.max-size", + "12 MB", + "--db.growth-step", + "2 KB", + ]) + .unwrap(); + assert_eq!(cmd.args.max_size, Some(MEGABYTE * 12)); + assert_eq!(cmd.args.growth_step, Some(KILOBYTE * 2)); + + let cmd = CommandParser::::try_parse_from([ + "reth", + "--db.max-size", + "1073741824", + "--db.growth-step", + "1048576", + ]) + .unwrap(); + assert_eq!(cmd.args.max_size, Some(GIGABYTE)); + assert_eq!(cmd.args.growth_step, Some(MEGABYTE)); + } + + #[test] + fn test_command_parser_max_size_and_growth_step_from_str_invalid_unit() { + let result = + CommandParser::::try_parse_from(["reth", "--db.growth-step", "1 PB"]); + assert!(result.is_err()); + + let result = + CommandParser::::try_parse_from(["reth", "--db.max-size", "2PB"]); + assert!(result.is_err()); + } + #[test] fn test_possible_values() { // Initialize the LogLevelValueParser diff --git a/crates/storage/db/src/implementation/mdbx/mod.rs b/crates/storage/db/src/implementation/mdbx/mod.rs index 65b804e6a..78a3f7971 100644 --- a/crates/storage/db/src/implementation/mdbx/mod.rs +++ b/crates/storage/db/src/implementation/mdbx/mod.rs @@ -23,7 +23,7 @@ use reth_libmdbx::{ use reth_storage_errors::db::LogLevel; use reth_tracing::tracing::error; use std::{ - ops::Deref, + ops::{Deref, Range}, path::Path, sync::Arc, time::{SystemTime, UNIX_EPOCH}, @@ -33,8 +33,14 @@ use tx::Tx; pub mod cursor; pub mod tx; -const GIGABYTE: usize = 1024 * 1024 * 1024; -const TERABYTE: usize = GIGABYTE * 1024; +/// 1 KB in bytes +pub const KILOBYTE: usize = 1024; +/// 1 MB in bytes +pub const MEGABYTE: usize = KILOBYTE * 1024; +/// 1 GB in bytes +pub const GIGABYTE: usize = MEGABYTE * 1024; +/// 1 TB in bytes +pub const TERABYTE: usize = GIGABYTE * 1024; /// MDBX allows up to 32767 readers (`MDBX_READERS_LIMIT`), but we limit it to slightly below that const DEFAULT_MAX_READERS: u64 = 32_000; @@ -64,6 +70,8 @@ impl DatabaseEnvKind { pub struct DatabaseArguments { /// Client version that accesses the database. client_version: ClientVersion, + /// Database geometry settings. + geometry: Geometry>, /// Database log level. If [None], the default value is used. log_level: Option, /// Maximum duration of a read transaction. If [None], the default value is used. @@ -93,15 +101,37 @@ pub struct DatabaseArguments { impl DatabaseArguments { /// Create new database arguments with given client version. - pub const fn new(client_version: ClientVersion) -> Self { + pub fn new(client_version: ClientVersion) -> Self { Self { client_version, + geometry: Geometry { + size: Some(0..(4 * TERABYTE)), + growth_step: Some(4 * GIGABYTE as isize), + shrink_threshold: Some(0), + page_size: Some(PageSize::Set(default_page_size())), + }, log_level: None, max_read_transaction_duration: None, exclusive: None, } } + /// Sets the upper size limit of the db environment, the maximum database size in bytes. + pub const fn with_geometry_max_size(mut self, max_size: Option) -> Self { + if let Some(max_size) = max_size { + self.geometry.size = Some(0..max_size); + } + self + } + + /// Configures the database growth step in bytes. + pub const fn with_growth_step(mut self, growth_step: Option) -> Self { + if let Some(growth_step) = growth_step { + self.geometry.growth_step = Some(growth_step as isize); + } + self + } + /// Set the log level. pub const fn with_log_level(mut self, log_level: Option) -> Self { self.log_level = log_level; @@ -278,15 +308,7 @@ impl DatabaseEnv { // environment creation. debug_assert!(Tables::ALL.len() <= 256, "number of tables exceed max dbs"); inner_env.set_max_dbs(256); - inner_env.set_geometry(Geometry { - // Maximum database size of 4 terabytes - size: Some(0..(4 * TERABYTE)), - // We grow the database in increments of 4 gigabytes - growth_step: Some(4 * GIGABYTE as isize), - // The database never shrinks - shrink_threshold: Some(0), - page_size: Some(PageSize::Set(default_page_size())), - }); + inner_env.set_geometry(args.geometry); fn is_current_process(id: u32) -> bool { #[cfg(unix)] diff --git a/crates/storage/libmdbx-rs/src/environment.rs b/crates/storage/libmdbx-rs/src/environment.rs index 480f5aaab..6a0b21040 100644 --- a/crates/storage/libmdbx-rs/src/environment.rs +++ b/crates/storage/libmdbx-rs/src/environment.rs @@ -489,8 +489,10 @@ pub struct PageOps { pub mincore: u64, } +/// Represents the geometry settings for the database environment #[derive(Clone, Debug, PartialEq, Eq)] pub struct Geometry { + /// The size range in bytes. pub size: Option, pub growth_step: Option, pub shrink_threshold: Option,