fix: use --syncmode=execution-layer from op-node for optimistic pipeline sync (#7552)

Co-authored-by: Matthias Seitz <matthias.seitz@outlook.de>
This commit is contained in:
joshieDo
2024-05-07 21:16:04 +01:00
committed by GitHub
parent 7c4d37b270
commit 9bd74fda9e
19 changed files with 365 additions and 111 deletions

View File

@ -130,10 +130,16 @@ impl EngineHooksController {
args: EngineHookContext,
db_write_active: bool,
) -> Poll<Result<PolledHook, EngineHookError>> {
// Hook with DB write access level is not allowed to run due to already running hook with DB
// write access level or active DB write according to passed argument
// Hook with DB write access level is not allowed to run due to any of the following
// reasons:
// - An already running hook with DB write access level
// - Active DB write according to passed argument
// - Missing a finalized block number. We might be on an optimistic sync scenario where we
// cannot skip the FCU with the finalized hash, otherwise CL might misbehave.
if hook.db_access_level().is_read_write() &&
(self.active_db_write_hook.is_some() || db_write_active)
(self.active_db_write_hook.is_some() ||
db_write_active ||
args.finalized_block_number.is_none())
{
return Poll::Pending
}

View File

@ -15,8 +15,9 @@ use reth_interfaces::{
use reth_payload_builder::PayloadBuilderHandle;
use reth_payload_validator::ExecutionPayloadValidator;
use reth_primitives::{
constants::EPOCH_SLOTS, stage::StageId, BlockNumHash, BlockNumber, Head, Header, SealedBlock,
SealedHeader, B256,
constants::EPOCH_SLOTS,
stage::{PipelineTarget, StageId},
BlockNumHash, BlockNumber, Head, Header, SealedBlock, SealedHeader, B256,
};
use reth_provider::{
BlockIdReader, BlockReader, BlockSource, CanonChainTracker, ChainSpecProvider, ProviderError,
@ -316,7 +317,7 @@ where
};
if let Some(target) = maybe_pipeline_target {
this.sync.set_pipeline_sync_target(target);
this.sync.set_pipeline_sync_target(target.into());
}
Ok((this, handle))
@ -668,6 +669,21 @@ where
// threshold
return Some(state.finalized_block_hash)
}
// OPTIMISTIC SYNCING
//
// It can happen when the node is doing an
// optimistic sync, where the CL has no knowledge of the finalized hash,
// but is expecting the EL to sync as high
// as possible before finalizing.
//
// This usually doesn't happen on ETH mainnet since CLs use the more
// secure checkpoint syncing.
//
// However, optimism chains will do this. The risk of a reorg is however
// low.
debug!(target: "consensus::engine", hash=?state.head_block_hash, "Setting head hash as an optimistic pipeline target.");
return Some(state.head_block_hash)
}
Ok(Some(_)) => {
// we're fully synced to the finalized block
@ -981,6 +997,10 @@ where
// so we should not warn the user, since this will result in us attempting to sync
// to a new target and is considered normal operation during sync
}
CanonicalError::OptimisticTargetRevert(block_number) => {
self.sync.set_pipeline_sync_target(PipelineTarget::Unwind(*block_number));
return PayloadStatus::from_status(PayloadStatusEnum::Syncing)
}
_ => {
warn!(target: "consensus::engine", %error, ?state, "Failed to canonicalize the head hash");
// TODO(mattsse) better error handling before attempting to sync (FCU could be
@ -1011,7 +1031,7 @@ where
if self.pipeline_run_threshold == 0 {
// use the pipeline to sync to the target
trace!(target: "consensus::engine", %target, "Triggering pipeline run to sync missing ancestors of the new head");
self.sync.set_pipeline_sync_target(target);
self.sync.set_pipeline_sync_target(target.into());
} else {
// trigger a full block download for missing hash, or the parent of its lowest buffered
// ancestor
@ -1361,7 +1381,7 @@ where
) {
// we don't have the block yet and the distance exceeds the allowed
// threshold
self.sync.set_pipeline_sync_target(target);
self.sync.set_pipeline_sync_target(target.into());
// we can exit early here because the pipeline will take care of syncing
return
}
@ -1445,6 +1465,8 @@ where
// TODO: do not ignore this
let _ = self.blockchain.make_canonical(*target_hash.as_ref());
}
} else if let Some(block_number) = err.optimistic_revert_block_number() {
self.sync.set_pipeline_sync_target(PipelineTarget::Unwind(block_number));
}
Err((target.head_block_hash, err))
@ -1506,13 +1528,7 @@ where
// update the canon chain if continuous is enabled
if self.sync.run_pipeline_continuously() {
let max_block = ctrl.block_number().unwrap_or_default();
let max_header = self.blockchain.sealed_header(max_block)
.inspect_err(|error| {
error!(target: "consensus::engine", %error, "Error getting canonical header for continuous sync");
})?
.ok_or_else(|| ProviderError::HeaderNotFound(max_block.into()))?;
self.blockchain.set_canonical_head(max_header);
self.set_canonical_head(ctrl.block_number().unwrap_or_default())?;
}
let sync_target_state = match self.forkchoice_state_tracker.sync_target_state() {
@ -1525,6 +1541,14 @@ where
}
};
if sync_target_state.finalized_block_hash.is_zero() {
self.set_canonical_head(ctrl.block_number().unwrap_or_default())?;
self.blockchain.update_block_hashes_and_clear_buffered()?;
self.blockchain.connect_buffered_blocks_to_canonical_hashes()?;
// We are on an optimistic syncing process, better to wait for the next FCU to handle
return Ok(())
}
// Next, we check if we need to schedule another pipeline run or transition
// to live sync via tree.
// This can arise if we buffer the forkchoice head, and if the head is an
@ -1580,7 +1604,7 @@ where
// the tree update from executing too many blocks and blocking.
if let Some(target) = pipeline_target {
// run the pipeline to the target since the distance is sufficient
self.sync.set_pipeline_sync_target(target);
self.sync.set_pipeline_sync_target(target.into());
} else if let Some(number) =
self.blockchain.block_number(sync_target_state.finalized_block_hash)?
{
@ -1592,12 +1616,23 @@ where
} else {
// We don't have the finalized block in the database, so we need to
// trigger another pipeline run.
self.sync.set_pipeline_sync_target(sync_target_state.finalized_block_hash);
self.sync.set_pipeline_sync_target(sync_target_state.finalized_block_hash.into());
}
Ok(())
}
fn set_canonical_head(&self, max_block: BlockNumber) -> RethResult<()> {
let max_header = self.blockchain.sealed_header(max_block)
.inspect_err(|error| {
error!(target: "consensus::engine", %error, "Error getting canonical header for continuous sync");
})?
.ok_or_else(|| ProviderError::HeaderNotFound(max_block.into()))?;
self.blockchain.set_canonical_head(max_header);
Ok(())
}
fn on_hook_result(&self, polled_hook: PolledHook) -> Result<(), BeaconConsensusEngineError> {
if let EngineHookEvent::Finished(Err(error)) = &polled_hook.event {
error!(
@ -1746,16 +1781,20 @@ where
Err(BeaconOnNewPayloadError::Internal(Box::new(error.clone())));
let _ = tx.send(response);
return Err(RethError::Canonical(error))
} else if error.optimistic_revert_block_number().is_some() {
// engine already set the pipeline unwind target on
// `try_make_sync_target_canonical`
PayloadStatus::from_status(PayloadStatusEnum::Syncing)
} else {
// If we could not make the sync target block canonical,
// we should return the error as an invalid payload status.
PayloadStatus::new(
PayloadStatusEnum::Invalid { validation_error: error.to_string() },
// TODO: return a proper latest valid hash
// See: <https://github.com/paradigmxyz/reth/issues/7146>
self.forkchoice_state_tracker.last_valid_head(),
)
}
// If we could not make the sync target block canonical,
// we should return the error as an invalid payload status.
PayloadStatus::new(
PayloadStatusEnum::Invalid { validation_error: error.to_string() },
// TODO: return a proper latest valid hash
// See: <https://github.com/paradigmxyz/reth/issues/7146>
self.forkchoice_state_tracker.last_valid_head(),
)
}
};

View File

@ -11,7 +11,7 @@ use reth_interfaces::p2p::{
full_block::{FetchFullBlockFuture, FetchFullBlockRangeFuture, FullBlockClient},
headers::client::HeadersClient,
};
use reth_primitives::{BlockNumber, ChainSpec, SealedBlock, B256};
use reth_primitives::{stage::PipelineTarget, BlockNumber, ChainSpec, SealedBlock, B256};
use reth_stages_api::{ControlFlow, Pipeline, PipelineError, PipelineWithResult};
use reth_tasks::TaskSpawner;
use reth_tokio_util::EventListeners;
@ -44,7 +44,7 @@ where
/// The pipeline is used for large ranges.
pipeline_state: PipelineState<DB>,
/// Pending target block for the pipeline to sync
pending_pipeline_target: Option<B256>,
pending_pipeline_target: Option<PipelineTarget>,
/// In-flight full block requests in progress.
inflight_full_block_requests: Vec<FetchFullBlockFuture<Client>>,
/// In-flight full block _range_ requests in progress.
@ -216,8 +216,12 @@ where
/// Sets a new target to sync the pipeline to.
///
/// But ensures the target is not the zero hash.
pub(crate) fn set_pipeline_sync_target(&mut self, target: B256) {
if target.is_zero() {
pub(crate) fn set_pipeline_sync_target(&mut self, target: PipelineTarget) {
if target.sync_target().is_some_and(|target| target.is_zero()) {
trace!(
target: "consensus::engine::sync",
"Pipeline target cannot be zero hash."
);
// precaution to never sync to the zero hash
return
}
@ -384,7 +388,7 @@ pub(crate) enum EngineSyncEvent {
/// Pipeline started syncing
///
/// This is none if the pipeline is triggered without a specific target.
PipelineStarted(Option<B256>),
PipelineStarted(Option<PipelineTarget>),
/// Pipeline finished
///
/// If this is returned, the pipeline is idle.
@ -590,7 +594,7 @@ mod tests {
.build(pipeline, chain_spec);
let tip = client.highest_block().expect("there should be blocks here");
sync_controller.set_pipeline_sync_target(tip.hash());
sync_controller.set_pipeline_sync_target(tip.hash().into());
let sync_future = poll_fn(|cx| sync_controller.poll(cx));
let next_event = poll!(sync_future);
@ -598,7 +602,7 @@ mod tests {
// can assert that the first event here is PipelineStarted because we set the sync target,
// and we should get Ready because the pipeline should be spawned immediately
assert_matches!(next_event, Poll::Ready(EngineSyncEvent::PipelineStarted(Some(target))) => {
assert_eq!(target, tip.hash());
assert_eq!(target.sync_target().unwrap(), tip.hash());
});
// the next event should be the pipeline finishing in a good state