mirror of
https://github.com/hl-archive-node/nanoreth.git
synced 2025-12-06 10:59:55 +00:00
feat: create a NippyJar snapshot from multiple Table (#4716)
This commit is contained in:
@ -9,6 +9,7 @@ repository.workspace = true
|
||||
|
||||
[dependencies]
|
||||
reth-codecs = { path = "../storage/codecs" }
|
||||
reth-nippy-jar = { path = "../storage/nippy-jar" }
|
||||
reth-primitives.workspace = true
|
||||
reth-rpc-types.workspace = true
|
||||
reth-network-api.workspace = true
|
||||
|
||||
@ -29,3 +29,9 @@ pub enum RethError {
|
||||
#[error("{0}")]
|
||||
Custom(String),
|
||||
}
|
||||
|
||||
impl From<reth_nippy_jar::NippyJarError> for RethError {
|
||||
fn from(err: reth_nippy_jar::NippyJarError) -> Self {
|
||||
RethError::Custom(err.to_string())
|
||||
}
|
||||
}
|
||||
|
||||
@ -14,6 +14,7 @@ reth-primitives.workspace = true
|
||||
reth-interfaces.workspace = true
|
||||
reth-codecs = { path = "../codecs" }
|
||||
reth-libmdbx = { path = "../libmdbx-rs", optional = true, features = ["return-borrowed"] }
|
||||
reth-nippy-jar = { path = "../nippy-jar" }
|
||||
|
||||
# codecs
|
||||
serde = { workspace = true, default-features = false }
|
||||
@ -42,6 +43,7 @@ tempfile = { version = "3.3.0", optional = true }
|
||||
parking_lot.workspace = true
|
||||
derive_more = "0.99"
|
||||
eyre = "0.6.8"
|
||||
paste = "1.0"
|
||||
|
||||
# arbitrary utils
|
||||
arbitrary = { workspace = true, features = ["derive"], optional = true }
|
||||
|
||||
@ -35,6 +35,11 @@ pub trait Compress: Send + Sync + Sized + Debug {
|
||||
pub trait Decompress: Send + Sync + Sized + Debug {
|
||||
/// Decompresses data coming from the database.
|
||||
fn decompress<B: AsRef<[u8]>>(value: B) -> Result<Self, DatabaseError>;
|
||||
|
||||
/// Decompresses owned data coming from the database.
|
||||
fn decompress_owned(value: Vec<u8>) -> Result<Self, DatabaseError> {
|
||||
Self::decompress(value)
|
||||
}
|
||||
}
|
||||
|
||||
/// Trait that will transform the data to be saved in the DB.
|
||||
|
||||
@ -68,6 +68,7 @@
|
||||
pub mod abstraction;
|
||||
|
||||
mod implementation;
|
||||
pub mod snapshot;
|
||||
pub mod tables;
|
||||
mod utils;
|
||||
pub mod version;
|
||||
|
||||
87
crates/storage/db/src/snapshot.rs
Normal file
87
crates/storage/db/src/snapshot.rs
Normal file
@ -0,0 +1,87 @@
|
||||
//! reth's snapshot creation from database tables
|
||||
|
||||
use crate::{
|
||||
abstraction::cursor::DbCursorRO,
|
||||
table::{Key, Table},
|
||||
transaction::DbTx,
|
||||
RawKey, RawTable,
|
||||
};
|
||||
use reth_interfaces::RethResult;
|
||||
use reth_nippy_jar::{ColumnResult, NippyJar, PHFKey};
|
||||
use std::{error::Error as StdError, ops::RangeInclusive};
|
||||
|
||||
/// Macro that generates snapshot creation functions that take an arbitratry number of [`Table`] and
|
||||
/// creates a [`NippyJar`] file out of their [`Table::Value`]. Each list of [`Table::Value`] from a
|
||||
/// table is a column of values.
|
||||
///
|
||||
/// Has membership filter set and compression dictionary support.
|
||||
macro_rules! generate_snapshot_func {
|
||||
($(($($tbl:ident),+)),+ $(,)? ) => {
|
||||
$(
|
||||
paste::item! {
|
||||
/// Creates a snapshot from specified tables. Each table's `Value` iterator represents a column.
|
||||
///
|
||||
/// **Ensure the range contains the same number of rows.**
|
||||
///
|
||||
/// * `tx`: Database transaction.
|
||||
/// * `range`: Data range for columns in tables.
|
||||
/// * `keys`: Iterator of keys (eg. `TxHash` or `BlockHash`) with length equal to `row_count` and ordered by future column insertion from `range`.
|
||||
/// * `dict_compression_set`: Sets of column data for compression dictionaries. Max size is 2GB. Row count is independent.
|
||||
/// * `row_count`: Total rows to add to `NippyJar`. Must match row count in `range`.
|
||||
/// * `nippy_jar`: Snapshot object responsible for file generation.
|
||||
#[allow(non_snake_case)]
|
||||
pub fn [<create_snapshot$(_ $tbl)+>]<'tx,
|
||||
$($tbl: Table<Key=K>,)+
|
||||
K
|
||||
>
|
||||
(
|
||||
tx: &impl DbTx<'tx>,
|
||||
range: RangeInclusive<K>,
|
||||
dict_compression_set: Option<Vec<impl Iterator<Item = Vec<u8>>>>,
|
||||
keys: Option<impl Iterator<Item = ColumnResult<impl PHFKey>>>,
|
||||
row_count: usize,
|
||||
nippy_jar: &mut NippyJar
|
||||
) -> RethResult<()>
|
||||
where K: Key + Copy
|
||||
{
|
||||
let range: RangeInclusive<RawKey<K>> = RawKey::new(*range.start())..=RawKey::new(*range.end());
|
||||
|
||||
// Create PHF and Filter if required
|
||||
if let Some(keys) = keys {
|
||||
nippy_jar.prepare_index(keys, row_count)?;
|
||||
}
|
||||
|
||||
// Create compression dictionaries if required
|
||||
if let Some(data_sets) = dict_compression_set {
|
||||
nippy_jar.prepare_compression(data_sets)?;
|
||||
}
|
||||
|
||||
// Creates the cursors for the columns
|
||||
$(
|
||||
let mut [< $tbl _cursor>] = tx.cursor_read::<RawTable<$tbl>>()?;
|
||||
let [< $tbl _iter>] = [< $tbl _cursor>]
|
||||
.walk_range(range.clone())?
|
||||
.into_iter()
|
||||
.map(|row|
|
||||
row
|
||||
.map(|(_key, val)| val.into_value())
|
||||
.map_err(|e| Box::new(e) as Box<dyn StdError + Send + Sync>)
|
||||
);
|
||||
|
||||
)+
|
||||
|
||||
// Create the snapshot from the data
|
||||
let col_iterators: Vec<Box<dyn Iterator<Item = Result<Vec<u8>,_>>>> = vec![
|
||||
$(Box::new([< $tbl _iter>]),)+
|
||||
];
|
||||
|
||||
nippy_jar.freeze(col_iterators, row_count as u64)?;
|
||||
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
)+
|
||||
};
|
||||
}
|
||||
|
||||
generate_snapshot_func!((T1), (T1, T2), (T1, T2, T3), (T1, T2, T3, T4),);
|
||||
@ -54,14 +54,21 @@ impl<K: Key> RawKey<K> {
|
||||
pub fn new(key: K) -> Self {
|
||||
Self { key: K::encode(key).as_ref().to_vec(), _phantom: std::marker::PhantomData }
|
||||
}
|
||||
|
||||
/// Returns the decoded value.
|
||||
pub fn key(&self) -> Result<K, DatabaseError> {
|
||||
K::decode(&self.key)
|
||||
}
|
||||
|
||||
/// Returns the raw key as seen on the database.
|
||||
pub fn raw_key(&self) -> &Vec<u8> {
|
||||
&self.key
|
||||
}
|
||||
|
||||
/// Consumes [`Self`] and returns the inner raw key.
|
||||
pub fn into_key(self) -> Vec<u8> {
|
||||
self.key
|
||||
}
|
||||
}
|
||||
|
||||
impl<K: Key> From<K> for RawKey<K> {
|
||||
@ -105,14 +112,21 @@ impl<V: Value> RawValue<V> {
|
||||
pub fn new(value: V) -> Self {
|
||||
Self { value: V::compress(value).as_ref().to_vec(), _phantom: std::marker::PhantomData }
|
||||
}
|
||||
|
||||
/// Returns the decompressed value.
|
||||
pub fn value(&self) -> Result<V, DatabaseError> {
|
||||
V::decompress(&self.value)
|
||||
}
|
||||
|
||||
/// Returns the raw value as seen on the database.
|
||||
pub fn raw_value(&self) -> &Vec<u8> {
|
||||
pub fn raw_value(&self) -> &[u8] {
|
||||
&self.value
|
||||
}
|
||||
|
||||
/// Consumes [`Self`] and returns the inner raw value.
|
||||
pub fn into_value(self) -> Vec<u8> {
|
||||
self.value
|
||||
}
|
||||
}
|
||||
|
||||
impl AsRef<[u8]> for RawValue<Vec<u8>> {
|
||||
@ -142,4 +156,8 @@ impl<V: Value> Decompress for RawValue<V> {
|
||||
fn decompress<B: AsRef<[u8]>>(value: B) -> Result<Self, DatabaseError> {
|
||||
Ok(Self { value: value.as_ref().to_vec(), _phantom: std::marker::PhantomData })
|
||||
}
|
||||
|
||||
fn decompress_owned(value: Vec<u8>) -> Result<Self, DatabaseError> {
|
||||
Ok(Self { value, _phantom: std::marker::PhantomData })
|
||||
}
|
||||
}
|
||||
|
||||
@ -54,7 +54,7 @@ where
|
||||
};
|
||||
let value = match kv.1 {
|
||||
Cow::Borrowed(v) => Decompress::decompress(v)?,
|
||||
Cow::Owned(v) => Decompress::decompress(v)?,
|
||||
Cow::Owned(v) => Decompress::decompress_owned(v)?,
|
||||
};
|
||||
Ok((key, value))
|
||||
}
|
||||
@ -68,7 +68,7 @@ where
|
||||
{
|
||||
Ok(match kv.1 {
|
||||
Cow::Borrowed(v) => Decompress::decompress(v)?,
|
||||
Cow::Owned(v) => Decompress::decompress(v)?,
|
||||
Cow::Owned(v) => Decompress::decompress_owned(v)?,
|
||||
})
|
||||
}
|
||||
|
||||
@ -79,6 +79,6 @@ where
|
||||
{
|
||||
Ok(match value {
|
||||
Cow::Borrowed(v) => Decompress::decompress(v)?,
|
||||
Cow::Owned(v) => Decompress::decompress(v)?,
|
||||
Cow::Owned(v) => Decompress::decompress_owned(v)?,
|
||||
})
|
||||
}
|
||||
|
||||
@ -9,7 +9,7 @@ use sucds::int_vectors::Access;
|
||||
use zstd::bulk::Decompressor;
|
||||
|
||||
/// Simple cursor implementation to retrieve data from [`NippyJar`].
|
||||
pub struct NippyJarCursor<'a, H> {
|
||||
pub struct NippyJarCursor<'a, H = ()> {
|
||||
/// [`NippyJar`] which holds most of the required configuration to read from the file.
|
||||
jar: &'a NippyJar<H>,
|
||||
/// Optional dictionary decompressors.
|
||||
|
||||
@ -3,6 +3,8 @@ use thiserror::Error;
|
||||
/// Errors associated with [`crate::NippyJar`].
|
||||
#[derive(Debug, Error)]
|
||||
pub enum NippyJarError {
|
||||
#[error(transparent)]
|
||||
Internal(#[from] Box<dyn std::error::Error + Send + Sync>),
|
||||
#[error(transparent)]
|
||||
Disconnect(#[from] std::io::Error),
|
||||
#[error(transparent)]
|
||||
|
||||
@ -13,8 +13,8 @@
|
||||
use serde::{Deserialize, Serialize};
|
||||
use std::{
|
||||
clone::Clone,
|
||||
error::Error as StdError,
|
||||
fs::File,
|
||||
hash::Hash,
|
||||
io::{Seek, Write},
|
||||
marker::Sync,
|
||||
path::{Path, PathBuf},
|
||||
@ -32,6 +32,7 @@ pub mod compression;
|
||||
use compression::{Compression, Compressors};
|
||||
|
||||
pub mod phf;
|
||||
pub use phf::PHFKey;
|
||||
use phf::{Fmph, Functions, GoFmph, PerfectHashingFunction};
|
||||
|
||||
mod error;
|
||||
@ -45,6 +46,9 @@ const NIPPY_JAR_VERSION: usize = 1;
|
||||
/// A [`Row`] is a list of its selected column values.
|
||||
type Row = Vec<Vec<u8>>;
|
||||
|
||||
/// Alias type for a column value wrapped in `Result`
|
||||
pub type ColumnResult<T> = Result<T, Box<dyn StdError + Send + Sync>>;
|
||||
|
||||
/// `NippyJar` is a specialized storage format designed for immutable data.
|
||||
///
|
||||
/// Data is organized into a columnar format, enabling column-based compression. Data retrieval
|
||||
@ -106,6 +110,11 @@ impl NippyJar<()> {
|
||||
pub fn load_without_header(path: &Path) -> Result<Self, NippyJarError> {
|
||||
NippyJar::<()>::load(path)
|
||||
}
|
||||
|
||||
/// Whether this [`NippyJar`] uses a [`InclusionFilters`] and [`Functions`].
|
||||
pub fn uses_filters(&self) -> bool {
|
||||
self.filter.is_some() && self.phf.is_some()
|
||||
}
|
||||
}
|
||||
|
||||
impl<H> NippyJar<H>
|
||||
@ -210,19 +219,23 @@ where
|
||||
/// Prepares beforehand the offsets index for querying rows based on `values` (eg. transaction
|
||||
/// hash). Expects `values` to be sorted in the same way as the data that is going to be
|
||||
/// later on inserted.
|
||||
pub fn prepare_index<T: AsRef<[u8]> + Sync + Clone + Hash>(
|
||||
///
|
||||
/// Currently collecting all items before acting on them.
|
||||
pub fn prepare_index<T: PHFKey>(
|
||||
&mut self,
|
||||
values: &[T],
|
||||
values: impl IntoIterator<Item = ColumnResult<T>>,
|
||||
row_count: usize,
|
||||
) -> Result<(), NippyJarError> {
|
||||
let mut offsets_index = vec![0; values.len()];
|
||||
let values = values.into_iter().collect::<Result<Vec<_>, _>>()?;
|
||||
let mut offsets_index = vec![0; row_count];
|
||||
|
||||
// Builds perfect hashing function from the values
|
||||
if let Some(phf) = self.phf.as_mut() {
|
||||
phf.set_keys(values)?;
|
||||
phf.set_keys(&values)?;
|
||||
}
|
||||
|
||||
if self.filter.is_some() || self.phf.is_some() {
|
||||
for (row_num, v) in values.iter().enumerate() {
|
||||
for (row_num, v) in values.into_iter().enumerate() {
|
||||
if let Some(filter) = self.filter.as_mut() {
|
||||
filter.add(v.as_ref())?;
|
||||
}
|
||||
@ -242,7 +255,7 @@ where
|
||||
/// Writes all data and configuration to a file and the offset index to another.
|
||||
pub fn freeze(
|
||||
&mut self,
|
||||
columns: Vec<impl IntoIterator<Item = Vec<u8>>>,
|
||||
columns: Vec<impl IntoIterator<Item = ColumnResult<Vec<u8>>>>,
|
||||
total_rows: u64,
|
||||
) -> Result<(), NippyJarError> {
|
||||
let mut file = self.freeze_check(&columns)?;
|
||||
@ -275,7 +288,7 @@ where
|
||||
offsets.push(file.stream_position()? as usize);
|
||||
|
||||
match column_iter.next() {
|
||||
Some(value) => {
|
||||
Some(Ok(value)) => {
|
||||
if let Some(compression) = &self.compressor {
|
||||
// Special zstd case with dictionaries
|
||||
if let (Some(dict_compressors), Compressors::Zstd(_)) =
|
||||
@ -300,6 +313,7 @@ where
|
||||
column_number as u64,
|
||||
))
|
||||
}
|
||||
Some(Err(err)) => return Err(err.into()),
|
||||
}
|
||||
|
||||
iterators.push(column_iter);
|
||||
@ -339,7 +353,7 @@ where
|
||||
/// Safety checks before creating and returning a [`File`] handle to write data to.
|
||||
fn freeze_check(
|
||||
&mut self,
|
||||
columns: &Vec<impl IntoIterator<Item = Vec<u8>>>,
|
||||
columns: &Vec<impl IntoIterator<Item = ColumnResult<Vec<u8>>>>,
|
||||
) -> Result<File, NippyJarError> {
|
||||
if columns.len() != self.columns {
|
||||
return Err(NippyJarError::ColumnLenMismatch(self.columns, columns.len()))
|
||||
@ -384,10 +398,7 @@ impl<H> PerfectHashingFunction for NippyJar<H>
|
||||
where
|
||||
H: Send + Sync + Serialize + for<'a> Deserialize<'a>,
|
||||
{
|
||||
fn set_keys<T: AsRef<[u8]> + Sync + Clone + Hash>(
|
||||
&mut self,
|
||||
keys: &[T],
|
||||
) -> Result<(), NippyJarError> {
|
||||
fn set_keys<T: PHFKey>(&mut self, keys: &[T]) -> Result<(), NippyJarError> {
|
||||
self.phf.as_mut().ok_or(NippyJarError::PHFMissing)?.set_keys(keys)
|
||||
}
|
||||
|
||||
@ -402,6 +413,7 @@ mod tests {
|
||||
use rand::{rngs::SmallRng, seq::SliceRandom, RngCore, SeedableRng};
|
||||
use std::collections::HashSet;
|
||||
|
||||
type ColumnResults<T> = Vec<ColumnResult<T>>;
|
||||
type ColumnValues = Vec<Vec<u8>>;
|
||||
|
||||
fn test_data(seed: Option<u64>) -> (ColumnValues, ColumnValues) {
|
||||
@ -423,6 +435,10 @@ mod tests {
|
||||
(gen(), gen())
|
||||
}
|
||||
|
||||
fn clone_with_result(col: &ColumnValues) -> ColumnResults<Vec<u8>> {
|
||||
col.iter().map(|v| Ok(v.clone())).collect()
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_phf() {
|
||||
let (col1, col2) = test_data(None);
|
||||
@ -455,8 +471,10 @@ mod tests {
|
||||
assert_eq!(indexes, collect_indexes(nippy));
|
||||
|
||||
// Ensure that loaded phf provides the same function outputs
|
||||
nippy.prepare_index(&col1).unwrap();
|
||||
nippy.freeze(vec![col1.clone(), col2.clone()], num_rows).unwrap();
|
||||
nippy.prepare_index(clone_with_result(&col1), col1.len()).unwrap();
|
||||
nippy
|
||||
.freeze(vec![clone_with_result(&col1), clone_with_result(&col2)], num_rows)
|
||||
.unwrap();
|
||||
let loaded_nippy = NippyJar::load_without_header(file_path.path()).unwrap();
|
||||
assert_eq!(indexes, collect_indexes(&loaded_nippy));
|
||||
};
|
||||
@ -504,7 +522,7 @@ mod tests {
|
||||
Err(NippyJarError::FilterMaxCapacity)
|
||||
));
|
||||
|
||||
nippy.freeze(vec![col1.clone(), col2.clone()], num_rows).unwrap();
|
||||
nippy.freeze(vec![clone_with_result(&col1), clone_with_result(&col2)], num_rows).unwrap();
|
||||
let loaded_nippy = NippyJar::load_without_header(file_path.path()).unwrap();
|
||||
|
||||
assert_eq!(nippy, loaded_nippy);
|
||||
@ -540,16 +558,14 @@ mod tests {
|
||||
));
|
||||
}
|
||||
|
||||
let data = vec![col1.clone(), col2.clone()];
|
||||
|
||||
// If ZSTD is enabled, do not write to the file unless the column dictionaries have been
|
||||
// calculated.
|
||||
assert!(matches!(
|
||||
nippy.freeze(data.clone(), num_rows),
|
||||
nippy.freeze(vec![clone_with_result(&col1), clone_with_result(&col2)], num_rows),
|
||||
Err(NippyJarError::CompressorNotReady)
|
||||
));
|
||||
|
||||
nippy.prepare_compression(data.clone()).unwrap();
|
||||
nippy.prepare_compression(vec![col1.clone(), col2.clone()]).unwrap();
|
||||
|
||||
if let Some(Compressors::Zstd(zstd)) = &nippy.compressor {
|
||||
assert!(matches!(
|
||||
@ -558,7 +574,7 @@ mod tests {
|
||||
));
|
||||
}
|
||||
|
||||
nippy.freeze(data.clone(), num_rows).unwrap();
|
||||
nippy.freeze(vec![clone_with_result(&col1), clone_with_result(&col2)], num_rows).unwrap();
|
||||
|
||||
let mut loaded_nippy = NippyJar::load_without_header(file_path.path()).unwrap();
|
||||
assert_eq!(nippy, loaded_nippy);
|
||||
@ -578,7 +594,7 @@ mod tests {
|
||||
// Iterate over compressed values and compare
|
||||
let mut row_index = 0usize;
|
||||
while let Some(row) = cursor.next_row().unwrap() {
|
||||
assert_eq!((&row[0], &row[1]), (&data[0][row_index], &data[1][row_index]));
|
||||
assert_eq!((&row[0], &row[1]), (&col1[row_index], &col2[row_index]));
|
||||
row_index += 1;
|
||||
}
|
||||
}
|
||||
@ -598,9 +614,7 @@ mod tests {
|
||||
NippyJar::new_without_header(num_columns, file_path.path()).with_zstd(false, 5000);
|
||||
assert!(nippy.compressor.is_some());
|
||||
|
||||
let data = vec![col1.clone(), col2.clone()];
|
||||
|
||||
nippy.freeze(data.clone(), num_rows).unwrap();
|
||||
nippy.freeze(vec![clone_with_result(&col1), clone_with_result(&col2)], num_rows).unwrap();
|
||||
|
||||
let loaded_nippy = NippyJar::load_without_header(file_path.path()).unwrap();
|
||||
assert_eq!(nippy, loaded_nippy);
|
||||
@ -613,7 +627,7 @@ mod tests {
|
||||
// Iterate over compressed values and compare
|
||||
let mut row_index = 0usize;
|
||||
while let Some(row) = cursor.next_row().unwrap() {
|
||||
assert_eq!((&row[0], &row[1]), (&data[0][row_index], &data[1][row_index]));
|
||||
assert_eq!((&row[0], &row[1]), (&col1[row_index], &col2[row_index]));
|
||||
row_index += 1;
|
||||
}
|
||||
} else {
|
||||
@ -629,6 +643,7 @@ mod tests {
|
||||
let num_columns = 2;
|
||||
let file_path = tempfile::NamedTempFile::new().unwrap();
|
||||
let data = vec![col1.clone(), col2.clone()];
|
||||
|
||||
let block_start = 500;
|
||||
|
||||
#[derive(Serialize, Deserialize, Debug)]
|
||||
@ -645,8 +660,10 @@ mod tests {
|
||||
.with_mphf();
|
||||
|
||||
nippy.prepare_compression(data.clone()).unwrap();
|
||||
nippy.prepare_index(&col1).unwrap();
|
||||
nippy.freeze(data.clone(), num_rows).unwrap();
|
||||
nippy.prepare_index(clone_with_result(&col1), col1.len()).unwrap();
|
||||
nippy
|
||||
.freeze(vec![clone_with_result(&col1), clone_with_result(&col2)], num_rows)
|
||||
.unwrap();
|
||||
}
|
||||
|
||||
// Read file
|
||||
@ -710,8 +727,10 @@ mod tests {
|
||||
.with_mphf();
|
||||
|
||||
nippy.prepare_compression(data.clone()).unwrap();
|
||||
nippy.prepare_index(&col1).unwrap();
|
||||
nippy.freeze(data.clone(), num_rows).unwrap();
|
||||
nippy.prepare_index(clone_with_result(&col1), col1.len()).unwrap();
|
||||
nippy
|
||||
.freeze(vec![clone_with_result(&col1), clone_with_result(&col2)], num_rows)
|
||||
.unwrap();
|
||||
}
|
||||
|
||||
// Read file
|
||||
|
||||
@ -1,10 +1,9 @@
|
||||
use crate::{NippyJarError, PerfectHashingFunction};
|
||||
use crate::{NippyJarError, PHFKey, PerfectHashingFunction};
|
||||
use ph::fmph::{BuildConf, Function};
|
||||
use serde::{
|
||||
de::Error as DeSerdeError, ser::Error as SerdeError, Deserialize, Deserializer, Serialize,
|
||||
Serializer,
|
||||
};
|
||||
use std::{clone::Clone, hash::Hash, marker::Sync};
|
||||
|
||||
/// Wrapper struct for [`Function`]. Implementation of the following [paper](https://dl.acm.org/doi/10.1145/3596453).
|
||||
#[derive(Default)]
|
||||
@ -19,10 +18,7 @@ impl Fmph {
|
||||
}
|
||||
|
||||
impl PerfectHashingFunction for Fmph {
|
||||
fn set_keys<T: AsRef<[u8]> + Sync + Clone + Hash>(
|
||||
&mut self,
|
||||
keys: &[T],
|
||||
) -> Result<(), NippyJarError> {
|
||||
fn set_keys<T: PHFKey>(&mut self, keys: &[T]) -> Result<(), NippyJarError> {
|
||||
self.function = Some(Function::from_slice_with_conf(
|
||||
keys,
|
||||
BuildConf { use_multiple_threads: true, ..Default::default() },
|
||||
|
||||
@ -1,10 +1,9 @@
|
||||
use crate::{NippyJarError, PerfectHashingFunction};
|
||||
use crate::{NippyJarError, PHFKey, PerfectHashingFunction};
|
||||
use ph::fmph::{GOBuildConf, GOFunction};
|
||||
use serde::{
|
||||
de::Error as DeSerdeError, ser::Error as SerdeError, Deserialize, Deserializer, Serialize,
|
||||
Serializer,
|
||||
};
|
||||
use std::{clone::Clone, hash::Hash, marker::Sync};
|
||||
|
||||
/// Wrapper struct for [`GOFunction`]. Implementation of the following [paper](https://dl.acm.org/doi/10.1145/3596453).
|
||||
#[derive(Default)]
|
||||
@ -19,10 +18,7 @@ impl GoFmph {
|
||||
}
|
||||
|
||||
impl PerfectHashingFunction for GoFmph {
|
||||
fn set_keys<T: AsRef<[u8]> + Sync + Clone + Hash>(
|
||||
&mut self,
|
||||
keys: &[T],
|
||||
) -> Result<(), NippyJarError> {
|
||||
fn set_keys<T: PHFKey>(&mut self, keys: &[T]) -> Result<(), NippyJarError> {
|
||||
self.function = Some(GOFunction::from_slice_with_conf(
|
||||
keys,
|
||||
GOBuildConf { use_multiple_threads: true, ..Default::default() },
|
||||
|
||||
@ -8,13 +8,14 @@ pub use fmph::Fmph;
|
||||
mod go_fmph;
|
||||
pub use go_fmph::GoFmph;
|
||||
|
||||
/// Trait alias for [`PerfectHashingFunction`] keys.
|
||||
pub trait PHFKey: AsRef<[u8]> + Sync + Clone + Hash {}
|
||||
impl<T: AsRef<[u8]> + Sync + Clone + Hash> PHFKey for T {}
|
||||
|
||||
/// Trait to build and query a perfect hashing function.
|
||||
pub trait PerfectHashingFunction: Serialize + for<'a> Deserialize<'a> {
|
||||
/// Adds the key set and builds the perfect hashing function.
|
||||
fn set_keys<T: AsRef<[u8]> + Sync + Clone + Hash>(
|
||||
&mut self,
|
||||
keys: &[T],
|
||||
) -> Result<(), NippyJarError>;
|
||||
fn set_keys<T: PHFKey>(&mut self, keys: &[T]) -> Result<(), NippyJarError>;
|
||||
|
||||
/// Get corresponding associated integer. There might be false positives.
|
||||
fn get_index(&self, key: &[u8]) -> Result<Option<u64>, NippyJarError>;
|
||||
@ -29,10 +30,7 @@ pub enum Functions {
|
||||
}
|
||||
|
||||
impl PerfectHashingFunction for Functions {
|
||||
fn set_keys<T: AsRef<[u8]> + Sync + Clone + Hash>(
|
||||
&mut self,
|
||||
keys: &[T],
|
||||
) -> Result<(), NippyJarError> {
|
||||
fn set_keys<T: PHFKey>(&mut self, keys: &[T]) -> Result<(), NippyJarError> {
|
||||
match self {
|
||||
Functions::Fmph(f) => f.set_keys(keys),
|
||||
Functions::GoFmph(f) => f.set_keys(keys),
|
||||
|
||||
@ -15,6 +15,7 @@ reth-interfaces.workspace = true
|
||||
reth-revm-primitives = { path = "../../revm/revm-primitives" }
|
||||
reth-db.workspace = true
|
||||
reth-trie = { path = "../../trie" }
|
||||
reth-nippy-jar = { path = "../nippy-jar" }
|
||||
|
||||
# async
|
||||
tokio = { workspace = true, features = ["sync", "macros", "rt-multi-thread"] }
|
||||
@ -45,6 +46,7 @@ reth-interfaces = { workspace = true, features = ["test-utils"] }
|
||||
parking_lot.workspace = true
|
||||
tempfile = "3.3"
|
||||
assert_matches.workspace = true
|
||||
rand.workspace = true
|
||||
|
||||
[features]
|
||||
test-utils = ["reth-rlp"]
|
||||
|
||||
@ -35,6 +35,8 @@ use tracing::trace;
|
||||
mod bundle_state_provider;
|
||||
mod chain_info;
|
||||
mod database;
|
||||
mod snapshot;
|
||||
pub use snapshot::SnapshotProvider;
|
||||
mod state;
|
||||
use crate::{providers::chain_info::ChainInfoTracker, traits::BlockSource};
|
||||
pub use bundle_state_provider::BundleStateProvider;
|
||||
|
||||
205
crates/storage/provider/src/providers/snapshot.rs
Normal file
205
crates/storage/provider/src/providers/snapshot.rs
Normal file
@ -0,0 +1,205 @@
|
||||
use crate::HeaderProvider;
|
||||
use reth_db::{
|
||||
table::{Decompress, Table},
|
||||
HeaderTD,
|
||||
};
|
||||
use reth_interfaces::RethResult;
|
||||
use reth_nippy_jar::{NippyJar, NippyJarCursor};
|
||||
use reth_primitives::{BlockHash, BlockNumber, Header, SealedHeader, U256};
|
||||
use std::ops::RangeBounds;
|
||||
|
||||
/// SnapshotProvider
|
||||
///
|
||||
/// WIP Rudimentary impl just for testes
|
||||
/// TODO: should be able to walk through snapshot files/block_ranges
|
||||
/// TODO: Arc over NippyJars and/or NippyJarCursors (LRU)
|
||||
#[derive(Debug)]
|
||||
pub struct SnapshotProvider<'a> {
|
||||
/// NippyJar
|
||||
pub jar: &'a NippyJar,
|
||||
}
|
||||
|
||||
impl<'a> SnapshotProvider<'a> {
|
||||
fn cursor(&self) -> NippyJarCursor<'a> {
|
||||
NippyJarCursor::new(self.jar, None).unwrap()
|
||||
}
|
||||
}
|
||||
|
||||
impl<'a> HeaderProvider for SnapshotProvider<'a> {
|
||||
fn header(&self, block_hash: &BlockHash) -> RethResult<Option<Header>> {
|
||||
// WIP
|
||||
let mut cursor = self.cursor();
|
||||
|
||||
let header = Header::decompress(
|
||||
&cursor.row_by_key_with_cols::<0b01, 2>(&block_hash.0).unwrap().unwrap()[0],
|
||||
)
|
||||
.unwrap();
|
||||
|
||||
if &header.hash_slow() == block_hash {
|
||||
return Ok(Some(header))
|
||||
} else {
|
||||
// check next snapshot
|
||||
}
|
||||
Ok(None)
|
||||
}
|
||||
|
||||
fn header_by_number(&self, _num: BlockNumber) -> RethResult<Option<Header>> {
|
||||
unimplemented!();
|
||||
}
|
||||
|
||||
fn header_td(&self, block_hash: &BlockHash) -> RethResult<Option<U256>> {
|
||||
// WIP
|
||||
let mut cursor = self.cursor();
|
||||
|
||||
let row = cursor.row_by_key_with_cols::<0b11, 2>(&block_hash.0).unwrap().unwrap();
|
||||
|
||||
let header = Header::decompress(&row[0]).unwrap();
|
||||
let td = <HeaderTD as Table>::Value::decompress(&row[1]).unwrap();
|
||||
|
||||
if &header.hash_slow() == block_hash {
|
||||
return Ok(Some(td.0))
|
||||
} else {
|
||||
// check next snapshot
|
||||
}
|
||||
Ok(None)
|
||||
}
|
||||
|
||||
fn header_td_by_number(&self, _number: BlockNumber) -> RethResult<Option<U256>> {
|
||||
unimplemented!();
|
||||
}
|
||||
|
||||
fn headers_range(&self, _range: impl RangeBounds<BlockNumber>) -> RethResult<Vec<Header>> {
|
||||
unimplemented!();
|
||||
}
|
||||
|
||||
fn sealed_headers_range(
|
||||
&self,
|
||||
_range: impl RangeBounds<BlockNumber>,
|
||||
) -> RethResult<Vec<SealedHeader>> {
|
||||
unimplemented!();
|
||||
}
|
||||
|
||||
fn sealed_header(&self, _number: BlockNumber) -> RethResult<Option<SealedHeader>> {
|
||||
unimplemented!();
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod test {
|
||||
use super::*;
|
||||
use crate::ProviderFactory;
|
||||
use rand::{self, seq::SliceRandom};
|
||||
use reth_db::{
|
||||
cursor::DbCursorRO,
|
||||
database::Database,
|
||||
snapshot::create_snapshot_T1_T2,
|
||||
test_utils::create_test_rw_db,
|
||||
transaction::{DbTx, DbTxMut},
|
||||
CanonicalHeaders, DatabaseError, HeaderNumbers, HeaderTD, Headers, RawTable,
|
||||
};
|
||||
use reth_interfaces::test_utils::generators::{self, random_header_range};
|
||||
use reth_nippy_jar::NippyJar;
|
||||
use reth_primitives::{H256, MAINNET};
|
||||
|
||||
#[test]
|
||||
fn test_snap() {
|
||||
// Ranges
|
||||
let row_count = 100u64;
|
||||
let range = 0..=(row_count - 1);
|
||||
|
||||
// Data sources
|
||||
let db = create_test_rw_db();
|
||||
let factory = ProviderFactory::new(&db, MAINNET.clone());
|
||||
let snap_file = tempfile::NamedTempFile::new().unwrap();
|
||||
|
||||
// Setup data
|
||||
let mut headers = random_header_range(
|
||||
&mut generators::rng(),
|
||||
*range.start()..(*range.end() + 1),
|
||||
H256::random(),
|
||||
);
|
||||
|
||||
db.update(|tx| -> std::result::Result<(), DatabaseError> {
|
||||
let mut td = U256::ZERO;
|
||||
for header in headers.clone() {
|
||||
td += header.header.difficulty;
|
||||
let hash = header.hash();
|
||||
|
||||
tx.put::<CanonicalHeaders>(header.number, hash)?;
|
||||
tx.put::<Headers>(header.number, header.clone().unseal())?;
|
||||
tx.put::<HeaderTD>(header.number, td.into())?;
|
||||
tx.put::<HeaderNumbers>(hash, header.number)?;
|
||||
}
|
||||
Ok(())
|
||||
})
|
||||
.unwrap()
|
||||
.unwrap();
|
||||
|
||||
// Create Snapshot
|
||||
{
|
||||
let with_compression = true;
|
||||
let with_filter = true;
|
||||
|
||||
let mut nippy_jar = NippyJar::new_without_header(2, snap_file.path());
|
||||
|
||||
if with_compression {
|
||||
nippy_jar = nippy_jar.with_zstd(false, 0);
|
||||
}
|
||||
|
||||
if with_filter {
|
||||
nippy_jar = nippy_jar.with_cuckoo_filter(row_count as usize + 10).with_mphf();
|
||||
}
|
||||
|
||||
let tx = db.tx().unwrap();
|
||||
|
||||
// Hacky type inference. TODO fix
|
||||
let mut none_vec = Some(vec![vec![vec![0u8]].into_iter()]);
|
||||
let _ = none_vec.take();
|
||||
|
||||
// Generate list of hashes for filters & PHF
|
||||
let mut cursor = tx.cursor_read::<RawTable<CanonicalHeaders>>().unwrap();
|
||||
let hashes = cursor
|
||||
.walk(None)
|
||||
.unwrap()
|
||||
.map(|row| row.map(|(_key, value)| value.into_value()).map_err(|e| e.into()));
|
||||
|
||||
create_snapshot_T1_T2::<Headers, HeaderTD, BlockNumber>(
|
||||
&tx,
|
||||
range,
|
||||
none_vec,
|
||||
Some(hashes),
|
||||
row_count as usize,
|
||||
&mut nippy_jar,
|
||||
)
|
||||
.unwrap();
|
||||
}
|
||||
|
||||
// Use providers to query Header data and compare if it matches
|
||||
{
|
||||
let jar = NippyJar::load_without_header(snap_file.path()).unwrap();
|
||||
|
||||
let db_provider = factory.provider().unwrap();
|
||||
let snap_provider = SnapshotProvider { jar: &jar };
|
||||
|
||||
assert!(!headers.is_empty());
|
||||
|
||||
// Shuffled for chaos.
|
||||
headers.shuffle(&mut generators::rng());
|
||||
|
||||
for header in headers {
|
||||
let header_hash = header.hash();
|
||||
let header = header.unseal();
|
||||
|
||||
// Compare Header
|
||||
assert_eq!(header, db_provider.header(&header_hash).unwrap().unwrap());
|
||||
assert_eq!(header, snap_provider.header(&header_hash).unwrap().unwrap());
|
||||
|
||||
// Compare HeaderTD
|
||||
assert_eq!(
|
||||
db_provider.header_td(&header_hash).unwrap().unwrap(),
|
||||
snap_provider.header_td(&header_hash).unwrap().unwrap()
|
||||
);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user