refactor(stages): split Pipeline::run (#30)

This commit is contained in:
Bjerg
2022-10-10 15:52:23 +02:00
committed by GitHub
parent 15479457ab
commit a759201b40
4 changed files with 286 additions and 137 deletions

View File

@ -1,7 +1,7 @@
use crate::{ use crate::{
error::*, error::*,
util::opt::{self, MaybeSender}, util::{db::TxContainer, opt::MaybeSender},
ExecInput, ExecOutput, Stage, StageError, UnwindInput, UnwindOutput, ExecInput, ExecOutput, Stage, StageError, StageId, UnwindInput, UnwindOutput,
}; };
use reth_db::mdbx; use reth_db::mdbx;
use reth_primitives::BlockNumber; use reth_primitives::BlockNumber;
@ -9,21 +9,13 @@ use std::fmt::{Debug, Formatter};
use tokio::sync::mpsc::Sender; use tokio::sync::mpsc::Sender;
use tracing::*; use tracing::*;
mod ctrl;
mod event; mod event;
pub use event::*; mod state;
struct QueuedStage<'db, E> use ctrl::*;
where pub use event::*;
E: mdbx::EnvironmentKind, use state::*;
{
/// The actual stage to execute.
stage: Box<dyn Stage<'db, E>>,
/// The unwind priority of the stage.
unwind_priority: usize,
/// Whether or not this stage can only execute when we reach what we believe to be the tip of
/// the chain.
require_tip: bool,
}
/// A staged sync pipeline. /// A staged sync pipeline.
/// ///
@ -126,111 +118,70 @@ where
/// Run the pipeline. /// Run the pipeline.
pub async fn run(&mut self, db: &'db mdbx::Environment<E>) -> Result<(), PipelineError> { pub async fn run(&mut self, db: &'db mdbx::Environment<E>) -> Result<(), PipelineError> {
let mut previous_stage = None; let mut state = PipelineState {
let mut minimum_progress: Option<BlockNumber> = None; events_sender: self.events_sender.clone(),
let mut maximum_progress: Option<BlockNumber> = None; max_block: self.max_block,
let mut reached_tip_flag = true; maximum_progress: None,
minimum_progress: None,
reached_tip: true,
};
'run: loop { loop {
let mut tx = db.begin_rw_txn()?; let mut tx = TxContainer::new(db)?;
for (_, queued_stage) in self.stages.iter_mut().enumerate() { match self.run_loop(&mut state, &mut tx).await? {
let stage_id = queued_stage.stage.id(); ControlFlow::Continue => {
let block_reached = loop { tx.commit()?;
let prev_progress = stage_id.get_progress(&tx)?;
self.events_sender
.send(PipelineEvent::Running { stage_id, stage_progress: prev_progress })
.await?;
// Whether any stage has reached the maximum block, which also counts as having // Check if we've reached our desired target block
// reached the tip for stages that have reached the tip if state
let reached_max_block = maximum_progress .minimum_progress
.zip(self.max_block) .zip(self.max_block)
.map_or(false, |(progress, target)| progress >= target); .map_or(false, |(progress, target)| progress >= target)
{
// Whether this stage reached the max block return Ok(())
let stage_reached_max_block = prev_progress
.zip(self.max_block)
.map_or(false, |(prev_progress, target)| prev_progress >= target);
// Execute stage
let output = Self::execute_stage(
&mut tx,
queued_stage,
ExecInput { previous_stage, stage_progress: prev_progress },
reached_tip_flag || reached_max_block,
stage_reached_max_block,
)
.instrument(info_span!("Running", stage = %stage_id))
.await;
if output.is_err() {
self.events_sender
.send(PipelineEvent::Ran { stage_id, result: None })
.await?;
} }
}
match output { ControlFlow::Unwind { .. } => (),
Ok(out @ ExecOutput { stage_progress, done, reached_tip }) => {
debug!(stage = %stage_id, %stage_progress, %done, "Stage made progress");
stage_id.save_progress(&tx, stage_progress)?;
self.events_sender
.send(PipelineEvent::Ran { stage_id, result: Some(out.clone()) })
.await?;
// TODO: Make the commit interval configurable
tx.commit()?;
tx = db.begin_rw_txn()?;
// Update our minimum and maximum stage progress
minimum_progress = opt::min(minimum_progress, stage_progress);
maximum_progress = opt::max(maximum_progress, stage_progress);
if done {
reached_tip_flag = reached_tip;
break stage_progress
}
}
Err(StageError::Validation { block }) => {
debug!(stage = %stage_id, bad_block = %block, "Stage encountered a validation error.");
// We unwind because of a validation error. If the unwind itself fails,
// we bail entirely, otherwise we restart the execution loop from the
// beginning.
//
// Note on the drop: The transaction needs to be dropped in order for
// unwind to create a new one. Dropping the
// transaction will abort it; there is no
// other way currently to abort the transaction. It will be re-created
// if the loop restarts.
drop(tx);
match self
.unwind(db, prev_progress.unwrap_or_default(), Some(block))
.await
{
Ok(()) => continue 'run,
Err(e) => return Err(e),
}
}
Err(e) => return Err(PipelineError::Stage(e)),
}
};
// Set previous stage and continue on to next stage.
previous_stage = Some((stage_id, block_reached));
}
tx.commit()?;
// Check if we've reached our desired target block
if minimum_progress
.zip(self.max_block)
.map_or(false, |(progress, target)| progress >= target)
{
return Ok(())
} }
} }
} }
/// Run a single stage loop.
///
/// A stage loop will run each stage serially.
async fn run_loop<'tx>(
&mut self,
state: &mut PipelineState,
tx: &mut TxContainer<'db, 'tx, E>,
) -> Result<ControlFlow, PipelineError>
where
'db: 'tx,
{
let mut previous_stage = None;
for (_, queued_stage) in self.stages.iter_mut().enumerate() {
let stage_id = queued_stage.stage.id();
match queued_stage
.execute(state, previous_stage, tx)
.instrument(info_span!("Running", stage = %stage_id))
.await?
{
ControlFlow::Continue => {
previous_stage =
Some((stage_id, stage_id.get_progress(tx.get())?.unwrap_or_default()));
}
ControlFlow::Unwind { target, bad_block } => {
// TODO: Note on close
tx.close();
self.unwind(tx.db, target, bad_block).await?;
tx.open()?;
return Ok(ControlFlow::Unwind { target, bad_block })
}
}
}
Ok(ControlFlow::Continue)
}
/// Unwind the stages to the target block. /// Unwind the stages to the target block.
/// ///
/// If the unwind is due to a bad block the number of that block should be specified. /// If the unwind is due to a bad block the number of that block should be specified.
@ -311,40 +262,103 @@ where
} }
} }
impl<'db, E> Pipeline<'db, E> /// A container for a queued stage.
struct QueuedStage<'db, E>
where where
E: mdbx::EnvironmentKind, E: mdbx::EnvironmentKind,
{ {
async fn execute_stage<'tx>( /// The actual stage to execute.
tx: &mut mdbx::Transaction<'tx, mdbx::RW, E>, stage: Box<dyn Stage<'db, E>>,
QueuedStage { stage, require_tip, .. }: &mut QueuedStage<'db, E>, /// The unwind priority of the stage.
input: ExecInput, unwind_priority: usize,
reached_tip: bool, /// Whether or not this stage can only execute when we reach what we believe to be the tip of
stage_reached_max_block: bool, /// the chain.
) -> Result<ExecOutput, StageError> require_tip: bool,
}
impl<'db, E> QueuedStage<'db, E>
where
E: mdbx::EnvironmentKind,
{
/// Execute the stage.
async fn execute<'tx>(
&mut self,
state: &mut PipelineState,
previous_stage: Option<(StageId, BlockNumber)>,
tx: &mut TxContainer<'db, 'tx, E>,
) -> Result<ControlFlow, PipelineError>
where where
'db: 'tx, 'db: 'tx,
{ {
if !reached_tip && *require_tip { let stage_id = self.stage.id();
if self.require_tip && !state.reached_tip() {
info!("Tip not reached, skipping."); info!("Tip not reached, skipping.");
// Stage requires us to reach the tip of the chain first, but we have // Stage requires us to reach the tip of the chain first, but we have
// not. // not.
Ok(ExecOutput { return Ok(ControlFlow::Continue)
stage_progress: input.stage_progress.unwrap_or_default(), }
done: true,
reached_tip: false, loop {
}) let prev_progress = stage_id.get_progress(tx.get())?;
} else if stage_reached_max_block { state
info!("Stage reached maximum block, skipping."); .events_sender
// We reached the maximum block, so we skip the stage .send(PipelineEvent::Running { stage_id, stage_progress: prev_progress })
Ok(ExecOutput { .await?;
stage_progress: input.stage_progress.unwrap_or_default(),
done: true, let stage_reached_max_block = prev_progress
reached_tip: true, .zip(state.max_block)
}) .map_or(false, |(prev_progress, target)| prev_progress >= target);
} else { if stage_reached_max_block {
stage.execute(tx, input).await info!("Stage reached maximum block, skipping.");
// We reached the maximum block, so we skip the stage
state.set_reached_tip(true);
return Ok(ControlFlow::Continue)
}
match self
.stage
.execute(tx.get_mut(), ExecInput { previous_stage, stage_progress: prev_progress })
.await
{
Ok(out @ ExecOutput { stage_progress, done, reached_tip }) => {
debug!(stage = %stage_id, %stage_progress, %done, "Stage made progress");
stage_id.save_progress(tx.get_mut(), stage_progress)?;
state
.events_sender
.send(PipelineEvent::Ran { stage_id, result: Some(out.clone()) })
.await?;
// TODO: Make the commit interval configurable
tx.commit()?;
state.record_progress_outliers(stage_progress);
state.set_reached_tip(reached_tip);
if done {
return Ok(ControlFlow::Continue)
}
}
Err(err) => {
state.events_sender.send(PipelineEvent::Ran { stage_id, result: None }).await?;
return if let StageError::Validation { block } = err {
debug!(stage = %stage_id, bad_block = %block, "Stage encountered a validation error.");
// We unwind because of a validation error. If the unwind itself fails,
// we bail entirely, otherwise we restart the execution loop from the
// beginning.
Ok(ControlFlow::Unwind {
target: prev_progress.unwrap_or_default(),
bad_block: Some(block),
})
} else {
Err(err.into())
}
}
}
} }
} }
} }

View File

@ -0,0 +1,14 @@
use reth_primitives::BlockNumber;
/// Determines the control flow during pipeline execution.
pub(crate) enum ControlFlow {
/// An unwind was requested and must be performed before continuing.
Unwind {
/// The block to unwind to.
target: BlockNumber,
/// The block that caused the unwind.
bad_block: Option<BlockNumber>,
},
/// The pipeline is allowed to continue executing stages.
Continue,
}

View File

@ -0,0 +1,43 @@
use crate::{
pipeline::event::PipelineEvent,
util::{opt, opt::MaybeSender},
};
use reth_primitives::BlockNumber;
/// The state of the pipeline during execution.
pub(crate) struct PipelineState {
pub(crate) events_sender: MaybeSender<PipelineEvent>,
pub(crate) max_block: Option<BlockNumber>,
/// The maximum progress achieved by any stage during the execution of the pipeline.
pub(crate) maximum_progress: Option<BlockNumber>,
/// The minimum progress achieved by any stage during the execution of the pipeline.
pub(crate) minimum_progress: Option<BlockNumber>,
/// Whether or not the previous stage reached the tip of the chain.
///
/// **Do not use this** under normal circumstances. Instead, opt for
/// [PipelineState::reached_tip] and [PipelineState::set_reached_tip].
pub(crate) reached_tip: bool,
}
impl PipelineState {
/// Record the progress of a stage, setting the maximum and minimum progress achieved by any
/// stage during the execution of the pipeline.
pub(crate) fn record_progress_outliers(&mut self, stage_progress: BlockNumber) {
// Update our minimum and maximum stage progress
self.minimum_progress = opt::min(self.minimum_progress, stage_progress);
self.maximum_progress = opt::max(self.maximum_progress, stage_progress);
}
/// Whether or not the pipeline reached the tip of the chain.
pub(crate) fn reached_tip(&self) -> bool {
self.reached_tip ||
self.max_block
.zip(self.minimum_progress)
.map_or(false, |(target, progress)| progress >= target)
}
/// Set whether or not the pipeline has reached the tip of the chain.
pub(crate) fn set_reached_tip(&mut self, flag: bool) {
self.reached_tip = flag;
}
}

View File

@ -16,7 +16,7 @@ pub(crate) mod opt {
} }
/// The producing side of a [tokio::mpsc] channel that may or may not be set. /// The producing side of a [tokio::mpsc] channel that may or may not be set.
#[derive(Default)] #[derive(Default, Clone)]
pub(crate) struct MaybeSender<T> { pub(crate) struct MaybeSender<T> {
inner: Option<Sender<T>>, inner: Option<Sender<T>>,
} }
@ -61,3 +61,81 @@ pub(crate) mod opt {
} }
} }
} }
pub(crate) mod db {
use reth_db::mdbx;
/// A container for a MDBX transaction that will open a new inner transaction when the current
/// one is committed.
// NOTE: This container is needed since `Transaction::commit` takes `mut self`, so methods in
// the pipeline that just take a reference will not be able to commit their transaction and let
// the pipeline continue. Is there a better way to do this?
pub(crate) struct TxContainer<'db, 'tx, E>
where
'db: 'tx,
E: mdbx::EnvironmentKind,
{
/// A handle to the MDBX database.
pub(crate) db: &'db mdbx::Environment<E>,
tx: Option<mdbx::Transaction<'tx, mdbx::RW, E>>,
}
impl<'db, 'tx, E> TxContainer<'db, 'tx, E>
where
'db: 'tx,
E: mdbx::EnvironmentKind,
{
/// Create a new container with the given database handle.
///
/// A new inner transaction will be opened.
pub(crate) fn new(db: &'db mdbx::Environment<E>) -> Result<Self, mdbx::Error> {
Ok(Self { db, tx: Some(db.begin_rw_txn()?) })
}
/// Commit the current inner transaction and open a new one.
///
/// # Panics
///
/// Panics if an inner transaction does not exist. This should never be the case unless
/// [TxContainer::close] was called without following up with a call to [TxContainer::open].
pub(crate) fn commit(&mut self) -> Result<bool, mdbx::Error> {
let success =
self.tx.take().expect("Tried committing a non-existent transaction").commit()?;
self.tx = Some(self.db.begin_rw_txn()?);
Ok(success)
}
/// Get the inner transaction.
///
/// # Panics
///
/// Panics if an inner transaction does not exist. This should never be the case unless
/// [TxContainer::close] was called without following up with a call to [TxContainer::open].
pub(crate) fn get(&self) -> &mdbx::Transaction<'tx, mdbx::RW, E> {
self.tx.as_ref().expect("Tried getting a reference to a non-existent transaction")
}
/// Get a mutable reference to the inner transaction.
///
/// # Panics
///
/// Panics if an inner transaction does not exist. This should never be the case unless
/// [TxContainer::close] was called without following up with a call to [TxContainer::open].
pub(crate) fn get_mut(&mut self) -> &mut mdbx::Transaction<'tx, mdbx::RW, E> {
self.tx
.as_mut()
.expect("Tried getting a mutable reference to a non-existent transaction")
}
/// Open a new inner transaction.
pub(crate) fn open(&mut self) -> Result<(), mdbx::Error> {
self.tx = Some(self.db.begin_rw_txn()?);
Ok(())
}
/// Close the current inner transaction.
pub(crate) fn close(&mut self) {
self.tx.take();
}
}
}