From af8885172313aca66816dfc72537728d0c3c4e36 Mon Sep 17 00:00:00 2001 From: joshieDo <93316087+joshieDo@users.noreply.github.com> Date: Fri, 24 Nov 2023 18:02:14 +0000 Subject: [PATCH] feat: refactor generation of snapshots from the cli (#5464) --- Cargo.lock | 2 + bin/reth/Cargo.toml | 1 + bin/reth/src/db/snapshots/headers.rs | 48 +---- bin/reth/src/db/snapshots/mod.rs | 177 +++++++++++++++--- bin/reth/src/db/snapshots/receipts.rs | 48 +---- bin/reth/src/db/snapshots/transactions.rs | 51 +---- crates/primitives/src/snapshot/segment.rs | 5 + crates/snapshot/src/segments/headers.rs | 4 +- crates/snapshot/src/segments/mod.rs | 2 +- crates/snapshot/src/segments/receipts.rs | 4 +- crates/snapshot/src/segments/transactions.rs | 4 +- crates/snapshot/src/snapshotter.rs | 5 +- crates/storage/nippy-jar/src/filter/cuckoo.rs | 4 + crates/storage/nippy-jar/src/filter/mod.rs | 9 + crates/storage/nippy-jar/src/lib.rs | 19 ++ 15 files changed, 225 insertions(+), 158 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 2fc9c302f..6059240ad 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -5581,6 +5581,7 @@ dependencies = [ "pretty_assertions", "proptest", "rand 0.8.5", + "rayon", "reth-auto-seal-consensus", "reth-basic-payload-builder", "reth-beacon-consensus", @@ -6378,6 +6379,7 @@ dependencies = [ "reth-rpc-api", "reth-rpc-types", "serde_json", + "similar-asserts", "tokio", ] diff --git a/bin/reth/Cargo.toml b/bin/reth/Cargo.toml index 6b04ae299..273466aa1 100644 --- a/bin/reth/Cargo.toml +++ b/bin/reth/Cargo.toml @@ -103,6 +103,7 @@ humantime = "2.1.0" const-str = "0.5.6" boyer-moore-magiclen = "0.2.16" itertools.workspace = true +rayon.workspace = true [target.'cfg(not(windows))'.dependencies] jemallocator = { version = "0.5.0", optional = true } diff --git a/bin/reth/src/db/snapshots/headers.rs b/bin/reth/src/db/snapshots/headers.rs index d05ff80c8..5253601e3 100644 --- a/bin/reth/src/db/snapshots/headers.rs +++ b/bin/reth/src/db/snapshots/headers.rs @@ -3,56 +3,22 @@ use super::{ Command, }; use rand::{seq::SliceRandom, Rng}; -use reth_db::{database::Database, open_db_read_only, snapshot::HeaderMask}; +use reth_db::{open_db_read_only, snapshot::HeaderMask}; use reth_interfaces::db::LogLevel; use reth_primitives::{ snapshot::{Compression, Filters, InclusionFilter, PerfectHashingFunction}, BlockHash, ChainSpec, Header, SnapshotSegment, }; use reth_provider::{ - providers::SnapshotProvider, DatabaseProviderRO, HeaderProvider, ProviderError, - ProviderFactory, TransactionsProviderExt, + providers::SnapshotProvider, BlockNumReader, HeaderProvider, ProviderError, ProviderFactory, + TransactionsProviderExt, }; -use reth_snapshot::{segments, segments::Segment}; use std::{ path::{Path, PathBuf}, sync::Arc, }; impl Command { - pub(crate) fn generate_headers_snapshot( - &self, - provider: &DatabaseProviderRO, - compression: Compression, - inclusion_filter: InclusionFilter, - phf: PerfectHashingFunction, - ) -> eyre::Result<()> { - let range = self.block_range(); - let filters = if self.with_filters { - Filters::WithFilters(inclusion_filter, phf) - } else { - Filters::WithoutFilters - }; - - let segment = segments::Headers::new(compression, filters); - - segment.snapshot::(provider, PathBuf::default(), range.clone())?; - - // Default name doesn't have any configuration - let tx_range = provider.transaction_range_by_block_range(range.clone())?; - reth_primitives::fs::rename( - SnapshotSegment::Headers.filename(&range, &tx_range), - SnapshotSegment::Headers.filename_with_configuration( - filters, - compression, - &range, - &tx_range, - ), - )?; - - Ok(()) - } - pub(crate) fn bench_headers_snapshot( &self, db_path: &Path, @@ -62,14 +28,18 @@ impl Command { inclusion_filter: InclusionFilter, phf: PerfectHashingFunction, ) -> eyre::Result<()> { + let factory = ProviderFactory::new(open_db_read_only(db_path, log_level)?, chain.clone()); + let provider = factory.provider()?; + let tip = provider.last_block_number()?; + let block_range = + self.block_ranges(tip).first().expect("has been generated before").clone(); + let filters = if self.with_filters { Filters::WithFilters(inclusion_filter, phf) } else { Filters::WithoutFilters }; - let block_range = self.block_range(); - let mut row_indexes = block_range.clone().collect::>(); let mut rng = rand::thread_rng(); diff --git a/bin/reth/src/db/snapshots/mod.rs b/bin/reth/src/db/snapshots/mod.rs index 1113d7086..b3909c29d 100644 --- a/bin/reth/src/db/snapshots/mod.rs +++ b/bin/reth/src/db/snapshots/mod.rs @@ -1,13 +1,21 @@ -use clap::Parser; +use clap::{builder::RangedU64ValueParser, Parser}; use itertools::Itertools; -use reth_db::{open_db_read_only, DatabaseEnv}; +use rayon::iter::{IntoParallelIterator, ParallelIterator}; +use reth_db::{database::Database, open_db_read_only, DatabaseEnv}; use reth_interfaces::db::LogLevel; +use reth_nippy_jar::NippyJar; use reth_primitives::{ - snapshot::{Compression, InclusionFilter, PerfectHashingFunction}, + snapshot::{Compression, Filters, InclusionFilter, PerfectHashingFunction, SegmentHeader}, BlockNumber, ChainSpec, SnapshotSegment, }; -use reth_provider::ProviderFactory; -use std::{ops::RangeInclusive, path::Path, sync::Arc}; +use reth_provider::{BlockNumReader, ProviderFactory, TransactionsProviderExt}; +use reth_snapshot::{segments as snap_segments, segments::Segment}; +use std::{ + ops::RangeInclusive, + path::{Path, PathBuf}, + sync::Arc, + time::{Duration, Instant}, +}; mod bench; mod headers; @@ -28,6 +36,19 @@ pub struct Command { #[arg(long, short, default_value = "500000")] block_interval: u64, + /// Sets the number of snapshots built in parallel. Note: Each parallel build is + /// memory-intensive. + #[arg( + long, short, + default_value = "1", + value_parser = RangedU64ValueParser::::new().range(1..) + )] + parallel: u64, + + /// Flag to skip snapshot creation and print snapshot files stats. + #[arg(long, default_value = "false")] + only_stats: bool, + /// Flag to enable database-to-snapshot benchmarking. #[arg(long, default_value = "false")] bench: bool, @@ -41,7 +62,7 @@ pub struct Command { compression: Vec, /// Flag to enable inclusion list filters and PHFs. - #[arg(long, default_value = "true")] + #[arg(long, default_value = "false")] with_filters: bool, /// Specifies the perfect hashing function to use. @@ -65,39 +86,36 @@ impl Command { { let db = open_db_read_only(db_path, None)?; - let factory = ProviderFactory::new(db, chain.clone()); - let provider = factory.provider()?; + let factory = Arc::new(ProviderFactory::new(db, chain.clone())); if !self.only_bench { for ((mode, compression), phf) in all_combinations.clone() { + let filters = if self.with_filters { + Filters::WithFilters(InclusionFilter::Cuckoo, *phf) + } else { + Filters::WithoutFilters + }; + match mode { - SnapshotSegment::Headers => self.generate_headers_snapshot::( - &provider, - *compression, - InclusionFilter::Cuckoo, - *phf, + SnapshotSegment::Headers => self.generate_snapshot::( + factory.clone(), + snap_segments::Headers::new(*compression, filters), + )?, + SnapshotSegment::Transactions => self.generate_snapshot::( + factory.clone(), + snap_segments::Transactions::new(*compression, filters), + )?, + SnapshotSegment::Receipts => self.generate_snapshot::( + factory.clone(), + snap_segments::Receipts::new(*compression, filters), )?, - SnapshotSegment::Transactions => self - .generate_transactions_snapshot::( - &provider, - *compression, - InclusionFilter::Cuckoo, - *phf, - )?, - SnapshotSegment::Receipts => self - .generate_receipts_snapshot::( - &provider, - *compression, - InclusionFilter::Cuckoo, - *phf, - )?, } } } } if self.only_bench || self.bench { - for ((mode, compression), phf) in all_combinations { + for ((mode, compression), phf) in all_combinations.clone() { match mode { SnapshotSegment::Headers => self.bench_headers_snapshot( db_path, @@ -130,8 +148,105 @@ impl Command { Ok(()) } - /// Gives out the inclusive block range for the snapshot requested by the user. - fn block_range(&self) -> RangeInclusive { - self.from..=(self.from + self.block_interval - 1) + /// Generates successive inclusive block ranges up to the tip starting at `self.from`. + fn block_ranges(&self, tip: BlockNumber) -> Vec> { + let mut from = self.from; + let mut ranges = Vec::new(); + + while from <= tip { + let end_range = std::cmp::min(from + self.block_interval - 1, tip); + ranges.push(from..=end_range); + from = end_range + 1; + } + + ranges + } + + /// Generates snapshots from `self.from` with a `self.block_interval`. Generates them in + /// parallel if specified. + fn generate_snapshot( + &self, + factory: Arc>, + segment: impl Segment + Send + Sync, + ) -> eyre::Result<()> { + let dir = PathBuf::default(); + let ranges = self.block_ranges(factory.last_block_number()?); + + let mut created_snapshots = vec![]; + + // Filter/PHF is memory intensive, so we have to limit the parallelism. + for block_ranges in ranges.chunks(self.parallel as usize) { + let created_files = block_ranges + .into_par_iter() + .map(|block_range| { + let provider = factory.provider()?; + + if !self.only_stats { + segment.snapshot::(&provider, &dir, block_range.clone())?; + } + + let tx_range = + provider.transaction_range_by_block_range(block_range.clone())?; + + Ok(segment.segment().filename(block_range, &tx_range)) + }) + .collect::, eyre::Report>>()?; + + created_snapshots.extend(created_files); + } + + self.stats(created_snapshots) + } + + /// Prints detailed statistics for each snapshot, including loading time. + /// + /// This function loads each snapshot from the provided paths and prints + /// statistics about various aspects of each snapshot, such as filters size, + /// offset index size, offset list size, and loading time. + fn stats(&self, snapshots: Vec>) -> eyre::Result<()> { + let mb = 1024.0 * 1024.0; + let mut total_filters_size = 0; + let mut total_index_size = 0; + let mut total_offsets_size = 0; + let mut total_duration = Duration::new(0, 0); + let mut total_file_size = 0; + + for snap in &snapshots { + let start_time = Instant::now(); + let jar = NippyJar::::load(snap.as_ref())?; + let duration = start_time.elapsed(); + let file_size = snap.as_ref().metadata()?.len(); + + total_filters_size += jar.filter_size(); + total_index_size += jar.offsets_index_size(); + total_offsets_size += jar.offsets_size(); + total_duration += duration; + total_file_size += file_size; + + println!("Snapshot: {:?}", snap.as_ref().file_name()); + println!(" File Size: {:>7.2} MB", file_size as f64 / mb); + println!(" Filters Size: {:>7.2} MB", jar.filter_size() as f64 / mb); + println!(" Offset Index Size: {:>7.2} MB", jar.offsets_index_size() as f64 / mb); + println!(" Offset List Size: {:>7.2} MB", jar.offsets_size() as f64 / mb); + println!( + " Loading Time: {:>7.2} ms | {:>7.2} µs", + duration.as_millis() as f64, + duration.as_micros() as f64 + ); + } + + let avg_duration = total_duration / snapshots.len() as u32; + + println!("Total Filters Size: {:>7.2} MB", total_filters_size as f64 / mb); + println!("Total Offset Index Size: {:>7.2} MB", total_index_size as f64 / mb); + println!("Total Offset List Size: {:>7.2} MB", total_offsets_size as f64 / mb); + println!("Total File Size: {:>7.2} GB", total_file_size as f64 / (mb * 1024.0)); + println!( + "Average Loading Time: {:>7.2} ms | {:>7.2} µs", + avg_duration.as_millis() as f64, + avg_duration.as_micros() as f64 + ); + + Ok(()) } } diff --git a/bin/reth/src/db/snapshots/receipts.rs b/bin/reth/src/db/snapshots/receipts.rs index b24eccda5..ce028f79b 100644 --- a/bin/reth/src/db/snapshots/receipts.rs +++ b/bin/reth/src/db/snapshots/receipts.rs @@ -3,55 +3,23 @@ use super::{ Command, Compression, PerfectHashingFunction, }; use rand::{seq::SliceRandom, Rng}; -use reth_db::{database::Database, open_db_read_only, snapshot::ReceiptMask}; +use reth_db::{open_db_read_only, snapshot::ReceiptMask}; use reth_interfaces::db::LogLevel; use reth_primitives::{ snapshot::{Filters, InclusionFilter}, ChainSpec, Receipt, SnapshotSegment, }; use reth_provider::{ - providers::SnapshotProvider, DatabaseProviderRO, ProviderError, ProviderFactory, - ReceiptProvider, TransactionsProvider, TransactionsProviderExt, + providers::SnapshotProvider, BlockNumReader, ProviderError, ProviderFactory, ReceiptProvider, + TransactionsProvider, TransactionsProviderExt, }; -use reth_snapshot::{segments, segments::Segment}; + use std::{ path::{Path, PathBuf}, sync::Arc, }; impl Command { - pub(crate) fn generate_receipts_snapshot( - &self, - provider: &DatabaseProviderRO, - compression: Compression, - inclusion_filter: InclusionFilter, - phf: PerfectHashingFunction, - ) -> eyre::Result<()> { - let block_range = self.block_range(); - let filters = if self.with_filters { - Filters::WithFilters(inclusion_filter, phf) - } else { - Filters::WithoutFilters - }; - - let segment: segments::Receipts = segments::Receipts::new(compression, filters); - segment.snapshot::(provider, PathBuf::default(), block_range.clone())?; - - // Default name doesn't have any configuration - let tx_range = provider.transaction_range_by_block_range(block_range.clone())?; - reth_primitives::fs::rename( - SnapshotSegment::Receipts.filename(&block_range, &tx_range), - SnapshotSegment::Receipts.filename_with_configuration( - filters, - compression, - &block_range, - &tx_range, - ), - )?; - - Ok(()) - } - pub(crate) fn bench_receipts_snapshot( &self, db_path: &Path, @@ -61,14 +29,18 @@ impl Command { inclusion_filter: InclusionFilter, phf: PerfectHashingFunction, ) -> eyre::Result<()> { + let factory = ProviderFactory::new(open_db_read_only(db_path, log_level)?, chain.clone()); + let provider = factory.provider()?; + let tip = provider.last_block_number()?; + let block_range = + self.block_ranges(tip).first().expect("has been generated before").clone(); + let filters = if self.with_filters { Filters::WithFilters(inclusion_filter, phf) } else { Filters::WithoutFilters }; - let block_range = self.block_range(); - let mut rng = rand::thread_rng(); let tx_range = ProviderFactory::new(open_db_read_only(db_path, log_level)?, chain.clone()) diff --git a/bin/reth/src/db/snapshots/transactions.rs b/bin/reth/src/db/snapshots/transactions.rs index 94a61d262..690ca45b2 100644 --- a/bin/reth/src/db/snapshots/transactions.rs +++ b/bin/reth/src/db/snapshots/transactions.rs @@ -3,56 +3,23 @@ use super::{ Command, Compression, PerfectHashingFunction, }; use rand::{seq::SliceRandom, Rng}; -use reth_db::{database::Database, open_db_read_only, snapshot::TransactionMask}; +use reth_db::{open_db_read_only, snapshot::TransactionMask}; use reth_interfaces::db::LogLevel; use reth_primitives::{ snapshot::{Filters, InclusionFilter}, ChainSpec, SnapshotSegment, TransactionSignedNoHash, }; use reth_provider::{ - providers::SnapshotProvider, DatabaseProviderRO, ProviderError, ProviderFactory, + providers::SnapshotProvider, BlockNumReader, ProviderError, ProviderFactory, TransactionsProvider, TransactionsProviderExt, }; -use reth_snapshot::{segments, segments::Segment}; + use std::{ path::{Path, PathBuf}, sync::Arc, }; impl Command { - pub(crate) fn generate_transactions_snapshot( - &self, - provider: &DatabaseProviderRO, - compression: Compression, - inclusion_filter: InclusionFilter, - phf: PerfectHashingFunction, - ) -> eyre::Result<()> { - let block_range = self.block_range(); - let filters = if self.with_filters { - Filters::WithFilters(inclusion_filter, phf) - } else { - Filters::WithoutFilters - }; - - let segment = segments::Transactions::new(compression, filters); - - segment.snapshot::(provider, PathBuf::default(), block_range.clone())?; - - // Default name doesn't have any configuration - let tx_range = provider.transaction_range_by_block_range(block_range.clone())?; - reth_primitives::fs::rename( - SnapshotSegment::Transactions.filename(&block_range, &tx_range), - SnapshotSegment::Transactions.filename_with_configuration( - filters, - compression, - &block_range, - &tx_range, - ), - )?; - - Ok(()) - } - pub(crate) fn bench_transactions_snapshot( &self, db_path: &Path, @@ -62,19 +29,21 @@ impl Command { inclusion_filter: InclusionFilter, phf: PerfectHashingFunction, ) -> eyre::Result<()> { + let factory = ProviderFactory::new(open_db_read_only(db_path, log_level)?, chain.clone()); + let provider = factory.provider()?; + let tip = provider.last_block_number()?; + let block_range = + self.block_ranges(tip).first().expect("has been generated before").clone(); + let filters = if self.with_filters { Filters::WithFilters(inclusion_filter, phf) } else { Filters::WithoutFilters }; - let block_range = self.block_range(); - let mut rng = rand::thread_rng(); - let tx_range = ProviderFactory::new(open_db_read_only(db_path, log_level)?, chain.clone()) - .provider()? - .transaction_range_by_block_range(block_range.clone())?; + let tx_range = provider.transaction_range_by_block_range(block_range.clone())?; let mut row_indexes = tx_range.clone().collect::>(); diff --git a/crates/primitives/src/snapshot/segment.rs b/crates/primitives/src/snapshot/segment.rs index d8357fc16..90145f14f 100644 --- a/crates/primitives/src/snapshot/segment.rs +++ b/crates/primitives/src/snapshot/segment.rs @@ -158,6 +158,11 @@ impl SegmentHeader { *self.block_range.start() } + /// Returns the last block number of the segment. + pub fn block_end(&self) -> BlockNumber { + *self.block_range.end() + } + /// Returns the first transaction number of the segment. pub fn tx_start(&self) -> TxNumber { *self.tx_range.start() diff --git a/crates/snapshot/src/segments/headers.rs b/crates/snapshot/src/segments/headers.rs index 2bf73b2f7..0a524e86c 100644 --- a/crates/snapshot/src/segments/headers.rs +++ b/crates/snapshot/src/segments/headers.rs @@ -31,7 +31,7 @@ impl Default for Headers { } impl Segment for Headers { - fn segment() -> SnapshotSegment { + fn segment(&self) -> SnapshotSegment { SnapshotSegment::Headers } @@ -45,7 +45,7 @@ impl Segment for Headers { let mut jar = prepare_jar::( provider, directory, - Self::segment(), + self.segment(), self.config, range.clone(), range_len, diff --git a/crates/snapshot/src/segments/mod.rs b/crates/snapshot/src/segments/mod.rs index 88cdc52ef..68b1b81b0 100644 --- a/crates/snapshot/src/segments/mod.rs +++ b/crates/snapshot/src/segments/mod.rs @@ -37,7 +37,7 @@ pub trait Segment: Default { ) -> ProviderResult<()>; /// Returns this struct's [`SnapshotSegment`]. - fn segment() -> SnapshotSegment; + fn segment(&self) -> SnapshotSegment; /// Generates the dataset to train a zstd dictionary with the most recent rows (at most 1000). fn dataset_for_compression>( diff --git a/crates/snapshot/src/segments/receipts.rs b/crates/snapshot/src/segments/receipts.rs index 4b82a7133..5c5a48112 100644 --- a/crates/snapshot/src/segments/receipts.rs +++ b/crates/snapshot/src/segments/receipts.rs @@ -28,7 +28,7 @@ impl Default for Receipts { } impl Segment for Receipts { - fn segment() -> SnapshotSegment { + fn segment(&self) -> SnapshotSegment { SnapshotSegment::Receipts } @@ -44,7 +44,7 @@ impl Segment for Receipts { let mut jar = prepare_jar::( provider, directory, - Self::segment(), + self.segment(), self.config, block_range, tx_range_len, diff --git a/crates/snapshot/src/segments/transactions.rs b/crates/snapshot/src/segments/transactions.rs index 585bc9625..ea936bd95 100644 --- a/crates/snapshot/src/segments/transactions.rs +++ b/crates/snapshot/src/segments/transactions.rs @@ -28,7 +28,7 @@ impl Default for Transactions { } impl Segment for Transactions { - fn segment() -> SnapshotSegment { + fn segment(&self) -> SnapshotSegment { SnapshotSegment::Transactions } @@ -44,7 +44,7 @@ impl Segment for Transactions { let mut jar = prepare_jar::( provider, directory, - Self::segment(), + self.segment(), self.config, block_range, tx_range_len, diff --git a/crates/snapshot/src/snapshotter.rs b/crates/snapshot/src/snapshotter.rs index 729b0c1b9..21a072ab3 100644 --- a/crates/snapshot/src/snapshotter.rs +++ b/crates/snapshot/src/snapshotter.rs @@ -210,9 +210,10 @@ impl Snapshotter { let temp = self.snapshots_path.join(TEMPORARY_SUBDIRECTORY); let provider = self.provider_factory.provider()?; let tx_range = provider.transaction_range_by_block_range(block_range.clone())?; - let filename = S::segment().filename(&block_range, &tx_range); + let segment = S::default(); + let filename = segment.segment().filename(&block_range, &tx_range); - S::default().snapshot::(&provider, temp.clone(), block_range)?; + segment.snapshot::(&provider, temp.clone(), block_range)?; reth_primitives::fs::rename(temp.join(&filename), self.snapshots_path.join(filename))?; } diff --git a/crates/storage/nippy-jar/src/filter/cuckoo.rs b/crates/storage/nippy-jar/src/filter/cuckoo.rs index 2e4110e58..70c3be24a 100644 --- a/crates/storage/nippy-jar/src/filter/cuckoo.rs +++ b/crates/storage/nippy-jar/src/filter/cuckoo.rs @@ -39,6 +39,10 @@ impl InclusionFilter for Cuckoo { fn contains(&self, element: &[u8]) -> Result { Ok(self.filter.contains(element)) } + + fn size(&self) -> usize { + self.filter.memory_usage() + } } impl std::fmt::Debug for Cuckoo { diff --git a/crates/storage/nippy-jar/src/filter/mod.rs b/crates/storage/nippy-jar/src/filter/mod.rs index e8e6294eb..dd3e78049 100644 --- a/crates/storage/nippy-jar/src/filter/mod.rs +++ b/crates/storage/nippy-jar/src/filter/mod.rs @@ -11,6 +11,8 @@ pub trait InclusionFilter { /// Checks if the element belongs to the inclusion list. **There might be false positives.** fn contains(&self, element: &[u8]) -> Result; + + fn size(&self) -> usize; } /// Enum with different [`InclusionFilter`] types. @@ -36,4 +38,11 @@ impl InclusionFilter for InclusionFilters { InclusionFilters::Unused => todo!(), } } + + fn size(&self) -> usize { + match self { + InclusionFilters::Cuckoo(c) => c.size(), + InclusionFilters::Unused => 0, + } + } } diff --git a/crates/storage/nippy-jar/src/lib.rs b/crates/storage/nippy-jar/src/lib.rs index c7515305d..435359e87 100644 --- a/crates/storage/nippy-jar/src/lib.rs +++ b/crates/storage/nippy-jar/src/lib.rs @@ -201,6 +201,21 @@ where &self.user_header } + /// Gets a reference to `self.offsets`. + pub fn offsets_size(&self) -> usize { + self.offsets.size_in_bytes() + } + + /// Gets a reference to `self.offsets`. + pub fn filter_size(&self) -> usize { + self.size() + } + + /// Gets a reference to `self.offsets_index`. + pub fn offsets_index_size(&self) -> usize { + self.offsets_index.size_in_bytes() + } + /// Gets a reference to the compressor. pub fn compressor(&self) -> Option<&Compressors> { self.compressor.as_ref() @@ -480,6 +495,10 @@ where fn contains(&self, element: &[u8]) -> Result { self.filter.as_ref().ok_or(NippyJarError::FilterMissing)?.contains(element) } + + fn size(&self) -> usize { + self.filter.as_ref().map(|f| f.size()).unwrap_or(0) + } } impl PerfectHashingFunction for NippyJar