feat(cli): reth prune (#9055)

This commit is contained in:
Alexey Shekhirin
2024-06-26 15:54:06 +01:00
committed by GitHub
parent bdabe66426
commit 8775a93d33
20 changed files with 245 additions and 35 deletions

3
Cargo.lock generated
View File

@ -6288,7 +6288,7 @@ dependencies = [
"reth-payload-validator",
"reth-primitives",
"reth-provider",
"reth-prune-types",
"reth-prune",
"reth-revm",
"reth-rpc",
"reth-rpc-api",
@ -8239,6 +8239,7 @@ dependencies = [
"reth-provider",
"reth-prune-types",
"reth-stages",
"reth-stages-types",
"reth-static-file-types",
"reth-storage-errors",
"reth-testing-utils",

View File

@ -66,7 +66,7 @@ reth-node-builder.workspace = true
reth-node-events.workspace = true
reth-consensus.workspace = true
reth-optimism-primitives.workspace = true
reth-prune-types.workspace = true
reth-prune.workspace = true
# crypto
alloy-rlp.workspace = true

View File

@ -8,7 +8,7 @@ use crate::{
commands::{
config_cmd, db, debug_cmd, dump_genesis, import, init_cmd, init_state,
node::{self, NoArgs},
p2p, recover, stage, test_vectors,
p2p, prune, recover, stage, test_vectors,
},
version::{LONG_VERSION, SHORT_VERSION},
};
@ -164,6 +164,7 @@ impl<Ext: clap::Args + fmt::Debug> Cli<Ext> {
Commands::Config(command) => runner.run_until_ctrl_c(command.execute()),
Commands::Debug(command) => runner.run_command_until_exit(|ctx| command.execute(ctx)),
Commands::Recover(command) => runner.run_command_until_exit(|ctx| command.execute(ctx)),
Commands::Prune(command) => runner.run_until_ctrl_c(command.execute()),
}
}
@ -223,6 +224,9 @@ pub enum Commands<Ext: clap::Args + fmt::Debug = NoArgs> {
/// Scripts for node recovery
#[command(name = "recover")]
Recover(recover::Command),
/// Prune according to the configuration without any limits
#[command(name = "prune")]
Prune(prune::PruneCommand),
}
#[cfg(test)]

View File

@ -26,7 +26,7 @@ use reth_primitives::{BlockHashOrNumber, BlockNumber, B256};
use reth_provider::{
BlockExecutionWriter, ChainSpecProvider, ProviderFactory, StageCheckpointReader,
};
use reth_prune_types::PruneModes;
use reth_prune::PruneModes;
use reth_stages::{
sets::DefaultStages,
stages::{ExecutionStage, ExecutionStageThresholds},

View File

@ -23,7 +23,7 @@ use reth_provider::{
BlockNumReader, BlockWriter, ChainSpecProvider, HeaderProvider, LatestStateProviderRef,
OriginalValuesKnown, ProviderError, ProviderFactory, StateWriter,
};
use reth_prune_types::PruneModes;
use reth_prune::PruneModes;
use reth_revm::database::StateProviderDatabase;
use reth_stages::{
stages::{AccountHashingStage, MerkleStage, StorageHashingStage},

View File

@ -22,7 +22,7 @@ use reth_payload_builder::{PayloadBuilderHandle, PayloadBuilderService};
use reth_provider::{
providers::BlockchainProvider, CanonStateSubscriptions, ChainSpecProvider, ProviderFactory,
};
use reth_prune_types::PruneModes;
use reth_prune::PruneModes;
use reth_stages::Pipeline;
use reth_static_file::StaticFileProducer;
use reth_tasks::TaskExecutor;

View File

@ -27,7 +27,7 @@ use reth_provider::{
BlockNumReader, ChainSpecProvider, HeaderProvider, ProviderError, ProviderFactory,
StageCheckpointReader,
};
use reth_prune_types::PruneModes;
use reth_prune::PruneModes;
use reth_stages::{prelude::*, Pipeline, StageId, StageSet};
use reth_static_file::StaticFileProducer;
use std::{path::PathBuf, sync::Arc};

View File

@ -17,7 +17,7 @@ use reth_downloaders::file_client::{
};
use reth_optimism_primitives::bedrock_import::is_dup_tx;
use reth_provider::StageCheckpointReader;
use reth_prune_types::PruneModes;
use reth_prune::PruneModes;
use reth_stages::StageId;
use reth_static_file::StaticFileProducer;
use std::{path::PathBuf, sync::Arc};

View File

@ -7,12 +7,11 @@ pub mod dump_genesis;
pub mod import;
pub mod import_op;
pub mod import_receipts_op;
pub mod init_cmd;
pub mod init_state;
pub mod node;
pub mod p2p;
pub mod prune;
pub mod recover;
pub mod stage;
pub mod test_vectors;

View File

@ -0,0 +1,43 @@
//! Command that runs pruning without any limits.
use crate::commands::common::{AccessRights, Environment, EnvironmentArgs};
use clap::Parser;
use reth_prune::PrunerBuilder;
use reth_static_file::StaticFileProducer;
use tracing::info;
/// Prunes according to the configuration without any limits
#[derive(Debug, Parser)]
pub struct PruneCommand {
#[command(flatten)]
env: EnvironmentArgs,
}
impl PruneCommand {
/// Execute the `prune` command
pub async fn execute(self) -> eyre::Result<()> {
let Environment { config, provider_factory, .. } = self.env.init(AccessRights::RW)?;
let prune_config = config.prune.unwrap_or_default();
// Copy data from database to static files
info!(target: "reth::cli", "Copying data from database to static files...");
let static_file_producer =
StaticFileProducer::new(provider_factory.clone(), prune_config.segments.clone());
let lowest_static_file_height = static_file_producer.lock().copy_to_static_files()?.min();
info!(target: "reth::cli", ?lowest_static_file_height, "Copied data from database to static files");
// Delete data which has been copied to static files.
if let Some(prune_tip) = lowest_static_file_height {
info!(target: "reth::cli", ?prune_tip, ?prune_config, "Pruning data from database...");
// Run the pruner according to the configuration, and don't enforce any limits on it
let mut pruner = PrunerBuilder::new(prune_config)
.prune_delete_limit(usize::MAX)
.build(provider_factory);
pruner.run(prune_tip)?;
info!(target: "reth::cli", "Pruned data from database");
}
Ok(())
}
}

View File

@ -9,7 +9,7 @@ use reth_exex::ExExManagerHandle;
use reth_node_core::dirs::{ChainPath, DataDirPath};
use reth_primitives::BlockNumber;
use reth_provider::{providers::StaticFileProvider, ProviderFactory};
use reth_prune_types::PruneModes;
use reth_prune::PruneModes;
use reth_stages::{
stages::{
AccountHashingStage, ExecutionStage, ExecutionStageThresholds, MerkleStage,

View File

@ -13,7 +13,7 @@ use reth_provider::{
BlockExecutionWriter, BlockNumReader, ChainSpecProvider, FinalizedBlockReader,
FinalizedBlockWriter, ProviderFactory, StaticFileProviderFactory,
};
use reth_prune_types::PruneModes;
use reth_prune::PruneModes;
use reth_stages::{
sets::{DefaultStages, OfflineStages},
stages::{ExecutionStage, ExecutionStageThresholds},

View File

@ -72,6 +72,7 @@
- [`reth debug replay-engine`](./cli/reth/debug/replay-engine.md)
- [`reth recover`](./cli/reth/recover.md)
- [`reth recover storage-tries`](./cli/reth/recover/storage-tries.md)
- [`reth prune`](./cli/reth/prune.md)
- [Developers](./developers/developers.md) <!-- CLI_REFERENCE END -->
- [Execution Extensions](./developers/exex/exex.md)
- [How do ExExes work?](./developers/exex/how-it-works.md)

1
book/cli/SUMMARY.md vendored
View File

@ -43,4 +43,5 @@
- [`reth debug replay-engine`](./reth/debug/replay-engine.md)
- [`reth recover`](./reth/recover.md)
- [`reth recover storage-tries`](./reth/recover/storage-tries.md)
- [`reth prune`](./reth/prune.md)

1
book/cli/reth.md vendored
View File

@ -19,6 +19,7 @@ Commands:
config Write config to stdout
debug Various debug routines
recover Scripts for node recovery
prune Prune according to the configuration without any limits
help Print this message or the help of the given subcommand(s)
Options:

146
book/cli/reth/prune.md vendored Normal file
View File

@ -0,0 +1,146 @@
# reth prune
Prune according to the configuration without any limits
```bash
$ reth prune --help
Usage: reth prune [OPTIONS]
Options:
--instance <INSTANCE>
Add a new instance of a node.
Configures the ports of the node to avoid conflicts with the defaults. This is useful for running multiple nodes on the same machine.
Max number of instances is 200. It is chosen in a way so that it's not possible to have port numbers that conflict with each other.
Changes to the following port numbers: - `DISCOVERY_PORT`: default + `instance` - 1 - `AUTH_PORT`: default + `instance` * 100 - 100 - `HTTP_RPC_PORT`: default - `instance` + 1 - `WS_RPC_PORT`: default + `instance` * 2 - 2
[default: 1]
-h, --help
Print help (see a summary with '-h')
Datadir:
--datadir <DATA_DIR>
The path to the data dir for all reth files and subdirectories.
Defaults to the OS-specific data directory:
- Linux: `$XDG_DATA_HOME/reth/` or `$HOME/.local/share/reth/`
- Windows: `{FOLDERID_RoamingAppData}/reth/`
- macOS: `$HOME/Library/Application Support/reth/`
[default: default]
--datadir.static_files <PATH>
The absolute path to store static files in.
--config <FILE>
The path to the configuration file to use
--chain <CHAIN_OR_PATH>
The chain this node is running.
Possible values are either a built-in chain or the path to a chain specification file.
Built-in chains:
mainnet, sepolia, goerli, holesky, dev
[default: mainnet]
Database:
--db.log-level <LOG_LEVEL>
Database logging level. Levels higher than "notice" require a debug build
Possible values:
- fatal: Enables logging for critical conditions, i.e. assertion failures
- error: Enables logging for error conditions
- warn: Enables logging for warning conditions
- notice: Enables logging for normal but significant condition
- verbose: Enables logging for verbose informational
- debug: Enables logging for debug-level messages
- trace: Enables logging for trace debug-level messages
- extra: Enables logging for extra debug-level messages
--db.exclusive <EXCLUSIVE>
Open environment in exclusive/monopolistic mode. Makes it possible to open a database on an NFS volume
[possible values: true, false]
Logging:
--log.stdout.format <FORMAT>
The format to use for logs written to stdout
[default: terminal]
Possible values:
- json: Represents JSON formatting for logs. This format outputs log records as JSON objects, making it suitable for structured logging
- log-fmt: Represents logfmt (key=value) formatting for logs. This format is concise and human-readable, typically used in command-line applications
- terminal: Represents terminal-friendly formatting for logs
--log.stdout.filter <FILTER>
The filter to use for logs written to stdout
[default: ]
--log.file.format <FORMAT>
The format to use for logs written to the log file
[default: terminal]
Possible values:
- json: Represents JSON formatting for logs. This format outputs log records as JSON objects, making it suitable for structured logging
- log-fmt: Represents logfmt (key=value) formatting for logs. This format is concise and human-readable, typically used in command-line applications
- terminal: Represents terminal-friendly formatting for logs
--log.file.filter <FILTER>
The filter to use for logs written to the log file
[default: debug]
--log.file.directory <PATH>
The path to put log files in
[default: <CACHE_DIR>/logs]
--log.file.max-size <SIZE>
The maximum size (in MB) of one log file
[default: 200]
--log.file.max-files <COUNT>
The maximum amount of log files that will be stored. If set to 0, background file logging is disabled
[default: 5]
--log.journald
Write logs to journald
--log.journald.filter <FILTER>
The filter to use for logs written to journald
[default: error]
--color <COLOR>
Sets whether or not the formatter emits ANSI terminal escape codes for colors and other text formatting
[default: always]
Possible values:
- always: Colors on
- auto: Colors on
- never: Colors off
Display:
-v, --verbosity...
Set the minimum log level.
-v Errors
-vv Warnings
-vvv Info
-vvvv Debug
-vvvvv Traces (warning: very verbose!)
-q, --quiet
Silence all log output
```

View File

@ -13,7 +13,6 @@ use reth_provider::{
};
use reth_prune::PrunerBuilder;
use reth_static_file::StaticFileProducer;
use reth_static_file_types::HighestStaticFiles;
use reth_tokio_util::{EventSender, EventStream};
use std::pin::Pin;
use tokio::sync::watch;
@ -248,26 +247,9 @@ where
/// CAUTION: This method locks the static file producer Mutex, hence can block the thread if the
/// lock is occupied.
pub fn move_to_static_files(&self) -> RethResult<()> {
let static_file_producer = self.static_file_producer.lock();
// Copies data from database to static files
let lowest_static_file_height = {
let provider = self.provider_factory.provider()?;
let stages_checkpoints = [StageId::Headers, StageId::Execution, StageId::Bodies]
.into_iter()
.map(|stage| {
provider.get_stage_checkpoint(stage).map(|c| c.map(|c| c.block_number))
})
.collect::<Result<Vec<_>, _>>()?;
let targets = static_file_producer.get_static_file_targets(HighestStaticFiles {
headers: stages_checkpoints[0],
receipts: stages_checkpoints[1],
transactions: stages_checkpoints[2],
})?;
static_file_producer.run(targets)?;
stages_checkpoints.into_iter().min().expect("exists")
};
let lowest_static_file_height =
self.static_file_producer.lock().copy_to_static_files()?.min();
// Deletes data which has been copied to static files.
if let Some(prune_tip) = lowest_static_file_height {

View File

@ -21,6 +21,7 @@ reth-nippy-jar.workspace = true
reth-tokio-util.workspace = true
reth-prune-types.workspace = true
reth-static-file-types.workspace = true
reth-stages-types.workspace = true
alloy-primitives.workspace = true

View File

@ -5,8 +5,12 @@ use alloy_primitives::BlockNumber;
use parking_lot::Mutex;
use rayon::prelude::*;
use reth_db_api::database::Database;
use reth_provider::{providers::StaticFileWriter, ProviderFactory, StaticFileProviderFactory};
use reth_provider::{
providers::StaticFileWriter, ProviderFactory, StageCheckpointReader as _,
StaticFileProviderFactory,
};
use reth_prune_types::PruneModes;
use reth_stages_types::StageId;
use reth_static_file_types::HighestStaticFiles;
use reth_storage_errors::provider::ProviderResult;
use reth_tokio_util::{EventSender, EventStream};
@ -56,7 +60,7 @@ pub struct StaticFileProducerInner<DB> {
event_sender: EventSender<StaticFileProducerEvent>,
}
/// Static File targets, per data part, measured in [`BlockNumber`].
/// Static File targets, per data segment, measured in [`BlockNumber`].
#[derive(Debug, Clone, Eq, PartialEq)]
pub struct StaticFileTargets {
headers: Option<RangeInclusive<BlockNumber>>,
@ -167,6 +171,28 @@ impl<DB: Database> StaticFileProducerInner<DB> {
Ok(targets)
}
/// Copies data from database to static files according to
/// [stage checkpoints](reth_stages_types::StageCheckpoint).
///
/// Returns highest block numbers for all static file segments.
pub fn copy_to_static_files(&self) -> ProviderResult<HighestStaticFiles> {
let provider = self.provider_factory.provider()?;
let stages_checkpoints = [StageId::Headers, StageId::Execution, StageId::Bodies]
.into_iter()
.map(|stage| provider.get_stage_checkpoint(stage).map(|c| c.map(|c| c.block_number)))
.collect::<Result<Vec<_>, _>>()?;
let highest_static_files = HighestStaticFiles {
headers: stages_checkpoints[0],
receipts: stages_checkpoints[1],
transactions: stages_checkpoints[2],
};
let targets = self.get_static_file_targets(highest_static_files)?;
self.run(targets)?;
Ok(highest_static_files)
}
/// Returns a static file targets at the provided finalized block numbers per segment.
/// The target is determined by the check against highest `static_files` using
/// [`reth_provider::providers::StaticFileProvider::get_highest_static_files`].

View File

@ -20,7 +20,7 @@ pub use segment::{SegmentConfig, SegmentHeader, SegmentRangeInclusive, StaticFil
/// Default static file block count.
pub const BLOCKS_PER_STATIC_FILE: u64 = 500_000;
/// Highest static file block numbers, per data part.
/// Highest static file block numbers, per data segment.
#[derive(Debug, Clone, Copy, Default, Eq, PartialEq)]
pub struct HighestStaticFiles {
/// Highest static file block of headers, inclusive.
@ -53,6 +53,11 @@ impl HighestStaticFiles {
}
}
/// Returns the minimum block of all segments.
pub fn min(&self) -> Option<u64> {
[self.headers, self.transactions, self.receipts].iter().filter_map(|&option| option).min()
}
/// Returns the maximum block of all segments.
pub fn max(&self) -> Option<u64> {
[self.headers, self.transactions, self.receipts].iter().filter_map(|&option| option).max()