feat: hl-node ingestion

Ported from https://github.com/hl-archive-node/nanoreth/pull/7 ; since this repo is experimental, will be merged with nanoreth repo in future.
This commit is contained in:
sprites0
2025-07-01 03:09:03 +00:00
parent 39ae8ae9f8
commit 1633b3d2e1
5 changed files with 254 additions and 8 deletions

View File

@ -13,4 +13,4 @@ Heavily inspired by [reth-bsc](https://github.com/loocapro/reth-bsc).
- [x] Downloader - [x] Downloader
- [x] S3 format (file) - [x] S3 format (file)
- [x] S3 format (AWS API) - [x] S3 format (AWS API)
- [ ] hl-node format - [x] hl-node format

View File

@ -12,6 +12,9 @@ pub struct BlockSourceArgs {
#[arg(long)] #[arg(long)]
block_source: Option<String>, block_source: Option<String>,
#[arg(long)]
block_source_from_node: Option<String>,
/// Shorthand of --block-source=s3://hl-mainnet-evm-blocks /// Shorthand of --block-source=s3://hl-mainnet-evm-blocks
#[arg(long = "s3", default_value_t = false)] #[arg(long = "s3", default_value_t = false)]
s3: bool, s3: bool,
@ -29,12 +32,16 @@ impl BlockSourceArgs {
)); ));
}; };
let config = if let Some(bucket) = value.strip_prefix("s3://") { let mut config = if let Some(bucket) = value.strip_prefix("s3://") {
BlockSourceConfig::s3(bucket.to_string()).await BlockSourceConfig::s3(bucket.to_string()).await
} else { } else {
BlockSourceConfig::local(value.to_string()) BlockSourceConfig::local(value.to_string())
}; };
if let Some(block_source_from_node) = self.block_source_from_node.as_ref() {
config = config.with_block_source_from_node(block_source_from_node.to_string());
}
Ok(config) Ok(config)
} }
} }

View File

@ -1,14 +1,18 @@
use aws_config::BehaviorVersion; use aws_config::BehaviorVersion;
use super::sources::HlNodeBlockSource;
use super::{ use super::{
consts::DEFAULT_S3_BUCKET, consts::DEFAULT_S3_BUCKET,
sources::{BlockSourceBoxed, LocalBlockSource, S3BlockSource}, sources::{BlockSourceBoxed, LocalBlockSource, S3BlockSource},
}; };
use std::path::PathBuf;
use std::sync::Arc; use std::sync::Arc;
#[derive(Debug, Clone)] #[derive(Debug, Clone)]
pub struct BlockSourceConfig { pub struct BlockSourceConfig {
pub source_type: BlockSourceType, pub source_type: BlockSourceType,
pub block_source_from_node: Option<String>,
} }
#[derive(Debug, Clone)] #[derive(Debug, Clone)]
@ -19,19 +23,27 @@ pub enum BlockSourceType {
impl BlockSourceConfig { impl BlockSourceConfig {
pub async fn s3_default() -> Self { pub async fn s3_default() -> Self {
Self { source_type: BlockSourceType::S3 { bucket: DEFAULT_S3_BUCKET.to_string() } } Self {
source_type: BlockSourceType::S3 { bucket: DEFAULT_S3_BUCKET.to_string() },
block_source_from_node: None,
}
} }
pub async fn s3(bucket: String) -> Self { pub async fn s3(bucket: String) -> Self {
Self { source_type: BlockSourceType::S3 { bucket } } Self { source_type: BlockSourceType::S3 { bucket }, block_source_from_node: None }
} }
pub fn local(path: String) -> Self { pub fn local(path: String) -> Self {
Self { source_type: BlockSourceType::Local { path } } Self { source_type: BlockSourceType::Local { path }, block_source_from_node: None }
}
pub fn with_block_source_from_node(mut self, block_source_from_node: String) -> Self {
self.block_source_from_node = Some(block_source_from_node);
self
} }
pub async fn create_block_source(&self) -> BlockSourceBoxed { pub async fn create_block_source(&self) -> BlockSourceBoxed {
match &self.source_type { let block_source: BlockSourceBoxed = match &self.source_type {
BlockSourceType::S3 { bucket } => { BlockSourceType::S3 { bucket } => {
let client = aws_sdk_s3::Client::new( let client = aws_sdk_s3::Client::new(
&aws_config::defaults(BehaviorVersion::latest()) &aws_config::defaults(BehaviorVersion::latest())
@ -46,7 +58,18 @@ impl BlockSourceConfig {
let block_source = LocalBlockSource::new(path.clone()); let block_source = LocalBlockSource::new(path.clone());
Arc::new(Box::new(block_source)) Arc::new(Box::new(block_source))
} }
} };
let Some(block_source_from_node) = self.block_source_from_node.as_ref() else {
return block_source;
};
let block_source = HlNodeBlockSource::new(
block_source.clone(),
PathBuf::from(block_source_from_node.clone()),
)
.await;
Arc::new(Box::new(block_source))
} }
pub async fn create_cached_block_source(&self) -> BlockSourceBoxed { pub async fn create_cached_block_source(&self) -> BlockSourceBoxed {

View File

@ -0,0 +1,213 @@
use std::io::{BufRead, BufReader};
use std::path::{Path, PathBuf};
use std::sync::Arc;
use eyre::ContextCompat;
use futures::future::BoxFuture;
use reth_network::cache::LruMap;
use serde::Deserialize;
use time::{format_description, Duration, OffsetDateTime};
use tokio::sync::Mutex;
use tracing::info;
use crate::node::types::{BlockAndReceipts, EvmBlock};
use super::{BlockSource, BlockSourceBoxed};
/// Poll interval when tailing an *open* hourly file.
const TAIL_INTERVAL: std::time::Duration = std::time::Duration::from_millis(25);
/// Subdirectory that contains day folders (inside `local_ingest_dir`).
const HOURLY_SUBDIR: &str = "hourly";
/// Maximum number of blocks to cache blocks from hl-node.
/// In normal situation, 0~1 blocks will be cached.
const CACHE_SIZE: u32 = 1000;
/// Block source that tails the local ingest directory for the HL node.
///
/// Originally written at https://github.com/hl-archive-node/nanoreth/pull/7
#[derive(Debug, Clone)]
pub struct HlNodeBlockSource {
pub fallback: BlockSourceBoxed,
pub local_ingest_dir: PathBuf,
pub local_blocks_cache: Arc<Mutex<LruMap<u64, BlockAndReceipts>>>, // height → block
}
#[derive(Deserialize)]
struct LocalBlockAndReceipts(String, BlockAndReceipts);
struct ScanResult {
next_expected_height: u64,
new_blocks: Vec<BlockAndReceipts>,
}
fn scan_hour_file(path: &Path, last_line: &mut usize, start_height: u64) -> ScanResult {
// info!(
// "Scanning hour block file @ {:?} for height [{:?}] | Last Line {:?}",
// path, start_height, last_line
// );
let file = std::fs::File::open(path).expect("Failed to open hour file path");
let reader = BufReader::new(file);
let mut new_blocks = Vec::<BlockAndReceipts>::new();
let mut last_height = start_height;
let lines: Vec<String> = reader.lines().collect::<Result<_, _>>().unwrap();
let skip = if *last_line == 0 { 0 } else { (last_line.clone()) - 1 };
for (line_idx, line) in lines.iter().enumerate().skip(skip) {
// Safety check ensuring efficiency
if line_idx < *last_line {
continue;
}
if line.trim().is_empty() {
continue;
}
let LocalBlockAndReceipts(_block_timestamp, parsed_block): LocalBlockAndReceipts =
serde_json::from_str(&line).expect("Failed to parse local block and receipts");
let height = match &parsed_block.block {
EvmBlock::Reth115(b) => {
let block_number = b.header.header.number;
// Another check to ensure not returning an older block
if block_number < start_height {
continue;
}
block_number
}
};
// println!("Iterating block height {:?} | Line {}", height, line_idx);
if height >= start_height {
last_height = last_height.max(height);
new_blocks.push(parsed_block);
*last_line = line_idx;
}
}
ScanResult { next_expected_height: last_height + 1, new_blocks }
}
fn datetime_from_timestamp(ts_sec: u64) -> OffsetDateTime {
OffsetDateTime::from_unix_timestamp_nanos((ts_sec as i128) * 1_000 * 1_000_000)
.expect("timestamp out of range")
}
fn date_from_datetime(dt: OffsetDateTime) -> String {
dt.format(&format_description::parse("[year][month][day]").unwrap()).unwrap()
}
impl BlockSource for HlNodeBlockSource {
fn collect_block(&self, height: u64) -> BoxFuture<eyre::Result<BlockAndReceipts>> {
Box::pin(async move {
// Not a one liner (using .or) to include logs
if let Some(block) = self.try_collect_local_block(height).await {
info!("Returning locally synced block for @ Height [{height}]");
return Ok(block);
} else {
self.fallback.collect_block(height).await
}
})
}
fn find_latest_block_number(&self) -> futures::future::BoxFuture<Option<u64>> {
self.fallback.find_latest_block_number()
}
fn recommended_chunk_size(&self) -> u64 {
self.fallback.recommended_chunk_size()
}
}
fn to_hourly(dt: OffsetDateTime) -> Result<OffsetDateTime, time::error::ComponentRange> {
dt.replace_minute(0)?.replace_second(0)?.replace_nanosecond(0)
}
impl HlNodeBlockSource {
async fn try_collect_local_block(&self, height: u64) -> Option<BlockAndReceipts> {
let mut u_cache = self.local_blocks_cache.lock().await;
u_cache.remove(&height)
}
async fn start_local_ingest_loop(&self, current_head: u64, current_ts: u64) {
let root = self.local_ingest_dir.to_owned();
let cache = self.local_blocks_cache.clone();
tokio::spawn(async move {
let mut next_height = current_head;
let mut dt = to_hourly(datetime_from_timestamp(current_ts)).unwrap();
let mut hour = dt.hour();
let mut day_str = date_from_datetime(dt);
let mut last_line = 0;
loop {
let hour_file = root.join(HOURLY_SUBDIR).join(&day_str).join(format!("{hour}"));
if hour_file.exists() {
let ScanResult { next_expected_height, new_blocks } =
scan_hour_file(&hour_file, &mut last_line, next_height);
if !new_blocks.is_empty() {
let mut u_cache = cache.lock().await;
for blk in new_blocks {
let h = match &blk.block {
EvmBlock::Reth115(b) => {
let block_number = b.header.header.number;
block_number
}
};
u_cache.insert(h, blk);
}
next_height = next_expected_height;
}
}
// Decide whether the *current* hour file is closed (past) or
// still live. If its in the past by > 1 h, move to next hour;
// otherwise, keep tailing the same file.
let now = OffsetDateTime::now_utc();
// println!("Date Current {:?}", dt);
// println!("Now Current {:?}", now);
if dt + Duration::HOUR < now {
dt += Duration::HOUR;
hour = dt.hour();
day_str = date_from_datetime(dt);
last_line = 0;
info!(
"Moving to a new file. {:?}",
root.join(HOURLY_SUBDIR).join(&day_str).join(format!("{hour}"))
);
continue;
}
tokio::time::sleep(TAIL_INTERVAL).await;
}
});
}
pub(crate) async fn run(&self) -> eyre::Result<()> {
let latest_block_number = self
.fallback
.find_latest_block_number()
.await
.context("Failed to find latest block number")?;
let EvmBlock::Reth115(latest_block) =
self.fallback.collect_block(latest_block_number).await?.block;
let latest_block_ts = latest_block.header.header.timestamp;
self.start_local_ingest_loop(latest_block_number, latest_block_ts).await;
Ok(())
}
pub async fn new(fallback: BlockSourceBoxed, local_ingest_dir: PathBuf) -> Self {
let block_source = HlNodeBlockSource {
fallback,
local_ingest_dir,
local_blocks_cache: Arc::new(Mutex::new(LruMap::new(CACHE_SIZE))),
};
block_source.run().await.unwrap();
block_source
}
}

View File

@ -9,6 +9,9 @@ use std::{
}; };
use tracing::info; use tracing::info;
mod hl_node;
pub use hl_node::HlNodeBlockSource;
pub trait BlockSource: Send + Sync + std::fmt::Debug + Unpin + 'static { pub trait BlockSource: Send + Sync + std::fmt::Debug + Unpin + 'static {
fn collect_block(&self, height: u64) -> BoxFuture<eyre::Result<BlockAndReceipts>>; fn collect_block(&self, height: u64) -> BoxFuture<eyre::Result<BlockAndReceipts>>;
fn find_latest_block_number(&self) -> BoxFuture<Option<u64>>; fn find_latest_block_number(&self) -> BoxFuture<Option<u64>>;
@ -129,7 +132,7 @@ impl BlockSource for S3BlockSource {
} }
fn recommended_chunk_size(&self) -> u64 { fn recommended_chunk_size(&self) -> u64 {
100 1000
} }
} }