mirror of
https://github.com/hl-archive-node/nanoreth.git
synced 2025-12-06 10:59:55 +00:00
Make ETL file size configurable (#6927)
Co-authored-by: Alexey Shekhirin <a.shekhirin@gmail.com> Co-authored-by: joshieDo <ranriver@protonmail.com>
This commit is contained in:
@ -127,6 +127,7 @@ impl Command {
|
|||||||
header_downloader,
|
header_downloader,
|
||||||
body_downloader,
|
body_downloader,
|
||||||
factory.clone(),
|
factory.clone(),
|
||||||
|
stage_conf.etl.etl_file_size,
|
||||||
)
|
)
|
||||||
.set(SenderRecoveryStage {
|
.set(SenderRecoveryStage {
|
||||||
commit_threshold: stage_conf.sender_recovery.commit_threshold,
|
commit_threshold: stage_conf.sender_recovery.commit_threshold,
|
||||||
|
|||||||
@ -178,6 +178,9 @@ impl ImportCommand {
|
|||||||
reth_revm::EvmProcessorFactory::new(self.chain.clone(), EthEvmConfig::default());
|
reth_revm::EvmProcessorFactory::new(self.chain.clone(), EthEvmConfig::default());
|
||||||
|
|
||||||
let max_block = file_client.max_block().unwrap_or(0);
|
let max_block = file_client.max_block().unwrap_or(0);
|
||||||
|
|
||||||
|
let etl_file_size = config.stages.etl.etl_file_size;
|
||||||
|
|
||||||
let mut pipeline = Pipeline::builder()
|
let mut pipeline = Pipeline::builder()
|
||||||
.with_tip_sender(tip_tx)
|
.with_tip_sender(tip_tx)
|
||||||
// we want to sync all blocks the file client provides or 0 if empty
|
// we want to sync all blocks the file client provides or 0 if empty
|
||||||
@ -190,6 +193,7 @@ impl ImportCommand {
|
|||||||
header_downloader,
|
header_downloader,
|
||||||
body_downloader,
|
body_downloader,
|
||||||
factory.clone(),
|
factory.clone(),
|
||||||
|
etl_file_size,
|
||||||
)
|
)
|
||||||
.set(SenderRecoveryStage {
|
.set(SenderRecoveryStage {
|
||||||
commit_threshold: config.stages.sender_recovery.commit_threshold,
|
commit_threshold: config.stages.sender_recovery.commit_threshold,
|
||||||
|
|||||||
@ -82,6 +82,10 @@ pub struct Command {
|
|||||||
#[arg(long)]
|
#[arg(long)]
|
||||||
batch_size: Option<u64>,
|
batch_size: Option<u64>,
|
||||||
|
|
||||||
|
/// The maximum size in bytes of data held in memory before being flushed to disk as a file.
|
||||||
|
#[arg(long)]
|
||||||
|
etl_file_size: Option<usize>,
|
||||||
|
|
||||||
/// Normally, running the stage requires unwinding for stages that already
|
/// Normally, running the stage requires unwinding for stages that already
|
||||||
/// have been run, in order to not rewrite to the same database slots.
|
/// have been run, in order to not rewrite to the same database slots.
|
||||||
///
|
///
|
||||||
@ -151,6 +155,8 @@ impl Command {
|
|||||||
|
|
||||||
let batch_size = self.batch_size.unwrap_or(self.to - self.from + 1);
|
let batch_size = self.batch_size.unwrap_or(self.to - self.from + 1);
|
||||||
|
|
||||||
|
let etl_file_size = self.etl_file_size.unwrap_or(500 * 1024 * 1024);
|
||||||
|
|
||||||
let (mut exec_stage, mut unwind_stage): (Box<dyn Stage<_>>, Option<Box<dyn Stage<_>>>) =
|
let (mut exec_stage, mut unwind_stage): (Box<dyn Stage<_>>, Option<Box<dyn Stage<_>>>) =
|
||||||
match self.stage {
|
match self.stage {
|
||||||
StageEnum::Bodies => {
|
StageEnum::Bodies => {
|
||||||
@ -229,7 +235,7 @@ impl Command {
|
|||||||
)
|
)
|
||||||
}
|
}
|
||||||
StageEnum::TxLookup => {
|
StageEnum::TxLookup => {
|
||||||
(Box::new(TransactionLookupStage::new(batch_size, None)), None)
|
(Box::new(TransactionLookupStage::new(batch_size, etl_file_size, None)), None)
|
||||||
}
|
}
|
||||||
StageEnum::AccountHashing => {
|
StageEnum::AccountHashing => {
|
||||||
(Box::new(AccountHashingStage::new(1, batch_size)), None)
|
(Box::new(AccountHashingStage::new(1, batch_size)), None)
|
||||||
|
|||||||
2
book/cli/reth/node.md
vendored
2
book/cli/reth/node.md
vendored
@ -238,7 +238,7 @@ RPC:
|
|||||||
--rpc-max-tracing-requests <COUNT>
|
--rpc-max-tracing-requests <COUNT>
|
||||||
Maximum number of concurrent tracing requests
|
Maximum number of concurrent tracing requests
|
||||||
|
|
||||||
[default: 8]
|
[default: 10]
|
||||||
|
|
||||||
--rpc-max-blocks-per-filter <COUNT>
|
--rpc-max-blocks-per-filter <COUNT>
|
||||||
Maximum number of blocks that could be scanned per filter request. (0 = entire chain)
|
Maximum number of blocks that could be scanned per filter request. (0 = entire chain)
|
||||||
|
|||||||
3
book/cli/reth/stage/run.md
vendored
3
book/cli/reth/stage/run.md
vendored
@ -61,6 +61,9 @@ Options:
|
|||||||
--batch-size <BATCH_SIZE>
|
--batch-size <BATCH_SIZE>
|
||||||
Batch size for stage execution and unwind
|
Batch size for stage execution and unwind
|
||||||
|
|
||||||
|
--etl-file-size <ETL_FILE_SIZE>
|
||||||
|
Size for temporary file during ETL stages
|
||||||
|
|
||||||
-s, --skip-unwind
|
-s, --skip-unwind
|
||||||
Normally, running the stage requires unwinding for stages that already have been run, in order to not rewrite to the same database slots.
|
Normally, running the stage requires unwinding for stages that already have been run, in order to not rewrite to the same database slots.
|
||||||
|
|
||||||
|
|||||||
@ -221,6 +221,19 @@ The storage history indexing stage builds an index of what blocks a particular s
|
|||||||
commit_threshold = 100000
|
commit_threshold = 100000
|
||||||
```
|
```
|
||||||
|
|
||||||
|
### `etl`
|
||||||
|
|
||||||
|
An ETL (extract, transform, load) data collector. Used mainly to insert data into `MDBX` in a sorted manner.
|
||||||
|
|
||||||
|
```toml
|
||||||
|
[stages.etl]
|
||||||
|
# The maximum size in bytes of data held in memory before being flushed to disk as a file.
|
||||||
|
#
|
||||||
|
# Lower threshold corresponds to more frequent flushes,
|
||||||
|
# but lowers temporary storage usage
|
||||||
|
file_size = 524_288_000 # 500 * 1024 * 1024
|
||||||
|
```
|
||||||
|
|
||||||
## The `[peers]` section
|
## The `[peers]` section
|
||||||
|
|
||||||
The peers section is used to configure how the networking component of reth establishes and maintains connections to peers.
|
The peers section is used to configure how the networking component of reth establishes and maintains connections to peers.
|
||||||
|
|||||||
@ -70,6 +70,8 @@ pub struct StageConfig {
|
|||||||
pub index_account_history: IndexHistoryConfig,
|
pub index_account_history: IndexHistoryConfig,
|
||||||
/// Index Storage History stage configuration.
|
/// Index Storage History stage configuration.
|
||||||
pub index_storage_history: IndexHistoryConfig,
|
pub index_storage_history: IndexHistoryConfig,
|
||||||
|
/// Common ETL related configuration.
|
||||||
|
pub etl: EtlConfig,
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Header stage configuration.
|
/// Header stage configuration.
|
||||||
@ -235,7 +237,21 @@ impl Default for TransactionLookupConfig {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/// History History stage configuration.
|
/// Common ETL related configuration.
|
||||||
|
#[derive(Debug, Clone, Copy, Deserialize, PartialEq, Serialize)]
|
||||||
|
#[serde(default)]
|
||||||
|
pub struct EtlConfig {
|
||||||
|
/// The maximum size in bytes of data held in memory before being flushed to disk as a file.
|
||||||
|
pub etl_file_size: usize,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl Default for EtlConfig {
|
||||||
|
fn default() -> Self {
|
||||||
|
Self { etl_file_size: 500 * (1024 * 1024) }
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// History stage configuration.
|
||||||
#[derive(Debug, Clone, Copy, Deserialize, PartialEq, Serialize)]
|
#[derive(Debug, Clone, Copy, Deserialize, PartialEq, Serialize)]
|
||||||
#[serde(default)]
|
#[serde(default)]
|
||||||
pub struct IndexHistoryConfig {
|
pub struct IndexHistoryConfig {
|
||||||
|
|||||||
@ -406,6 +406,7 @@ where
|
|||||||
header_downloader,
|
header_downloader,
|
||||||
body_downloader,
|
body_downloader,
|
||||||
executor_factory.clone(),
|
executor_factory.clone(),
|
||||||
|
500 * (1024 * 1024),
|
||||||
))
|
))
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|||||||
@ -837,6 +837,7 @@ impl NodeConfig {
|
|||||||
header_downloader,
|
header_downloader,
|
||||||
body_downloader,
|
body_downloader,
|
||||||
factory.clone(),
|
factory.clone(),
|
||||||
|
stage_config.etl.etl_file_size,
|
||||||
)
|
)
|
||||||
.set(SenderRecoveryStage {
|
.set(SenderRecoveryStage {
|
||||||
commit_threshold: stage_config.sender_recovery.commit_threshold,
|
commit_threshold: stage_config.sender_recovery.commit_threshold,
|
||||||
@ -870,6 +871,7 @@ impl NodeConfig {
|
|||||||
.set(MerkleStage::new_execution(stage_config.merkle.clean_threshold))
|
.set(MerkleStage::new_execution(stage_config.merkle.clean_threshold))
|
||||||
.set(TransactionLookupStage::new(
|
.set(TransactionLookupStage::new(
|
||||||
stage_config.transaction_lookup.chunk_size,
|
stage_config.transaction_lookup.chunk_size,
|
||||||
|
stage_config.etl.etl_file_size,
|
||||||
prune_modes.transaction_lookup,
|
prune_modes.transaction_lookup,
|
||||||
))
|
))
|
||||||
.set(IndexAccountHistoryStage::new(
|
.set(IndexAccountHistoryStage::new(
|
||||||
|
|||||||
@ -57,7 +57,7 @@ fn transaction_lookup(c: &mut Criterion) {
|
|||||||
let mut group = c.benchmark_group("Stages");
|
let mut group = c.benchmark_group("Stages");
|
||||||
// don't need to run each stage for that many times
|
// don't need to run each stage for that many times
|
||||||
group.sample_size(10);
|
group.sample_size(10);
|
||||||
let stage = TransactionLookupStage::new(DEFAULT_NUM_BLOCKS, None);
|
let stage = TransactionLookupStage::new(DEFAULT_NUM_BLOCKS, 500 * 1024 * 1024, None);
|
||||||
|
|
||||||
let db = setup::txs_testdata(DEFAULT_NUM_BLOCKS);
|
let db = setup::txs_testdata(DEFAULT_NUM_BLOCKS);
|
||||||
|
|
||||||
|
|||||||
@ -59,6 +59,7 @@
|
|||||||
//! headers_downloader,
|
//! headers_downloader,
|
||||||
//! bodies_downloader,
|
//! bodies_downloader,
|
||||||
//! executor_factory,
|
//! executor_factory,
|
||||||
|
//! 500*1024*1024,
|
||||||
//! )
|
//! )
|
||||||
//! )
|
//! )
|
||||||
//! .build(provider_factory, static_file_producer);
|
//! .build(provider_factory, static_file_producer);
|
||||||
|
|||||||
@ -100,6 +100,7 @@ impl<Provider, H, B, EF> DefaultStages<Provider, H, B, EF> {
|
|||||||
header_downloader: H,
|
header_downloader: H,
|
||||||
body_downloader: B,
|
body_downloader: B,
|
||||||
executor_factory: EF,
|
executor_factory: EF,
|
||||||
|
etl_file_size: usize,
|
||||||
) -> Self
|
) -> Self
|
||||||
where
|
where
|
||||||
EF: ExecutorFactory,
|
EF: ExecutorFactory,
|
||||||
@ -111,6 +112,7 @@ impl<Provider, H, B, EF> DefaultStages<Provider, H, B, EF> {
|
|||||||
consensus,
|
consensus,
|
||||||
header_downloader,
|
header_downloader,
|
||||||
body_downloader,
|
body_downloader,
|
||||||
|
etl_file_size,
|
||||||
),
|
),
|
||||||
executor_factory,
|
executor_factory,
|
||||||
}
|
}
|
||||||
@ -162,6 +164,8 @@ pub struct OnlineStages<Provider, H, B> {
|
|||||||
header_downloader: H,
|
header_downloader: H,
|
||||||
/// The block body downloader
|
/// The block body downloader
|
||||||
body_downloader: B,
|
body_downloader: B,
|
||||||
|
/// The size of temporary files in bytes for ETL data collector.
|
||||||
|
etl_file_size: usize,
|
||||||
}
|
}
|
||||||
|
|
||||||
impl<Provider, H, B> OnlineStages<Provider, H, B> {
|
impl<Provider, H, B> OnlineStages<Provider, H, B> {
|
||||||
@ -172,8 +176,9 @@ impl<Provider, H, B> OnlineStages<Provider, H, B> {
|
|||||||
consensus: Arc<dyn Consensus>,
|
consensus: Arc<dyn Consensus>,
|
||||||
header_downloader: H,
|
header_downloader: H,
|
||||||
body_downloader: B,
|
body_downloader: B,
|
||||||
|
etl_file_size: usize,
|
||||||
) -> Self {
|
) -> Self {
|
||||||
Self { provider, header_mode, consensus, header_downloader, body_downloader }
|
Self { provider, header_mode, consensus, header_downloader, body_downloader, etl_file_size }
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -198,9 +203,16 @@ where
|
|||||||
mode: HeaderSyncMode,
|
mode: HeaderSyncMode,
|
||||||
header_downloader: H,
|
header_downloader: H,
|
||||||
consensus: Arc<dyn Consensus>,
|
consensus: Arc<dyn Consensus>,
|
||||||
|
etl_file_size: usize,
|
||||||
) -> StageSetBuilder<DB> {
|
) -> StageSetBuilder<DB> {
|
||||||
StageSetBuilder::default()
|
StageSetBuilder::default()
|
||||||
.add_stage(HeaderStage::new(provider, header_downloader, mode, consensus.clone()))
|
.add_stage(HeaderStage::new(
|
||||||
|
provider,
|
||||||
|
header_downloader,
|
||||||
|
mode,
|
||||||
|
consensus.clone(),
|
||||||
|
etl_file_size,
|
||||||
|
))
|
||||||
.add_stage(bodies)
|
.add_stage(bodies)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -219,6 +231,7 @@ where
|
|||||||
self.header_downloader,
|
self.header_downloader,
|
||||||
self.header_mode,
|
self.header_mode,
|
||||||
self.consensus.clone(),
|
self.consensus.clone(),
|
||||||
|
self.etl_file_size,
|
||||||
))
|
))
|
||||||
.add_stage(BodyStage::new(self.body_downloader))
|
.add_stage(BodyStage::new(self.body_downloader))
|
||||||
}
|
}
|
||||||
|
|||||||
@ -74,6 +74,7 @@ where
|
|||||||
downloader: Downloader,
|
downloader: Downloader,
|
||||||
mode: HeaderSyncMode,
|
mode: HeaderSyncMode,
|
||||||
consensus: Arc<dyn Consensus>,
|
consensus: Arc<dyn Consensus>,
|
||||||
|
etl_file_size: usize,
|
||||||
) -> Self {
|
) -> Self {
|
||||||
Self {
|
Self {
|
||||||
provider: database,
|
provider: database,
|
||||||
@ -81,8 +82,8 @@ where
|
|||||||
mode,
|
mode,
|
||||||
consensus,
|
consensus,
|
||||||
sync_gap: None,
|
sync_gap: None,
|
||||||
hash_collector: Collector::new(100 * (1024 * 1024)),
|
hash_collector: Collector::new(etl_file_size / 2),
|
||||||
header_collector: Collector::new(100 * (1024 * 1024)),
|
header_collector: Collector::new(etl_file_size / 2),
|
||||||
is_etl_ready: false,
|
is_etl_ready: false,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -419,6 +420,7 @@ mod tests {
|
|||||||
(*self.downloader_factory)(),
|
(*self.downloader_factory)(),
|
||||||
HeaderSyncMode::Tip(self.channel.1.clone()),
|
HeaderSyncMode::Tip(self.channel.1.clone()),
|
||||||
self.consensus.clone(),
|
self.consensus.clone(),
|
||||||
|
500 * (1024 * 1024),
|
||||||
)
|
)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@ -32,19 +32,20 @@ pub struct TransactionLookupStage {
|
|||||||
/// The maximum number of lookup entries to hold in memory before pushing them to
|
/// The maximum number of lookup entries to hold in memory before pushing them to
|
||||||
/// [`reth_etl::Collector`].
|
/// [`reth_etl::Collector`].
|
||||||
chunk_size: u64,
|
chunk_size: u64,
|
||||||
|
etl_file_size: usize,
|
||||||
prune_mode: Option<PruneMode>,
|
prune_mode: Option<PruneMode>,
|
||||||
}
|
}
|
||||||
|
|
||||||
impl Default for TransactionLookupStage {
|
impl Default for TransactionLookupStage {
|
||||||
fn default() -> Self {
|
fn default() -> Self {
|
||||||
Self { chunk_size: 5_000_000, prune_mode: None }
|
Self { chunk_size: 5_000_000, etl_file_size: 500 * 1024 * 1024, prune_mode: None }
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
impl TransactionLookupStage {
|
impl TransactionLookupStage {
|
||||||
/// Create new instance of [TransactionLookupStage].
|
/// Create new instance of [TransactionLookupStage].
|
||||||
pub fn new(chunk_size: u64, prune_mode: Option<PruneMode>) -> Self {
|
pub fn new(chunk_size: u64, etl_file_size: usize, prune_mode: Option<PruneMode>) -> Self {
|
||||||
Self { chunk_size, prune_mode }
|
Self { chunk_size, etl_file_size, prune_mode }
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -99,7 +100,7 @@ impl<DB: Database> Stage<DB> for TransactionLookupStage {
|
|||||||
}
|
}
|
||||||
|
|
||||||
// 500MB temporary files
|
// 500MB temporary files
|
||||||
let mut hash_collector: Collector<TxHash, TxNumber> = Collector::new(500 * (1024 * 1024));
|
let mut hash_collector: Collector<TxHash, TxNumber> = Collector::new(self.etl_file_size);
|
||||||
|
|
||||||
debug!(
|
debug!(
|
||||||
target: "sync::stages::transaction_lookup",
|
target: "sync::stages::transaction_lookup",
|
||||||
@ -397,12 +398,18 @@ mod tests {
|
|||||||
struct TransactionLookupTestRunner {
|
struct TransactionLookupTestRunner {
|
||||||
db: TestStageDB,
|
db: TestStageDB,
|
||||||
chunk_size: u64,
|
chunk_size: u64,
|
||||||
|
etl_file_size: usize,
|
||||||
prune_mode: Option<PruneMode>,
|
prune_mode: Option<PruneMode>,
|
||||||
}
|
}
|
||||||
|
|
||||||
impl Default for TransactionLookupTestRunner {
|
impl Default for TransactionLookupTestRunner {
|
||||||
fn default() -> Self {
|
fn default() -> Self {
|
||||||
Self { db: TestStageDB::default(), chunk_size: 1000, prune_mode: None }
|
Self {
|
||||||
|
db: TestStageDB::default(),
|
||||||
|
chunk_size: 1000,
|
||||||
|
etl_file_size: 500 * 1024 * 1024,
|
||||||
|
prune_mode: None,
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -449,7 +456,11 @@ mod tests {
|
|||||||
}
|
}
|
||||||
|
|
||||||
fn stage(&self) -> Self::S {
|
fn stage(&self) -> Self::S {
|
||||||
TransactionLookupStage { chunk_size: self.chunk_size, prune_mode: self.prune_mode }
|
TransactionLookupStage {
|
||||||
|
chunk_size: self.chunk_size,
|
||||||
|
etl_file_size: self.etl_file_size,
|
||||||
|
prune_mode: self.prune_mode,
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user