doc(storage): add missing documentation for nippy-jar (#11913)

This commit is contained in:
Thomas Coratger
2024-10-21 01:10:36 +02:00
committed by GitHub
parent 0270128d4f
commit 2e8a8fe6f6
7 changed files with 72 additions and 4 deletions

View File

@ -44,7 +44,9 @@ pub trait Compression: Serialize + for<'a> Deserialize<'a> {
#[derive(Debug, Serialize, Deserialize)]
#[cfg_attr(test, derive(PartialEq))]
pub enum Compressors {
/// Zstandard compression algorithm with custom settings.
Zstd(Zstd),
/// LZ4 compression algorithm with custom settings.
Lz4(Lz4),
}

View File

@ -12,10 +12,13 @@ pub use zstd::{bulk::Decompressor, dict::DecoderDictionary};
type RawDictionary = Vec<u8>;
/// Represents the state of a Zstandard compression operation.
#[derive(Debug, Default, PartialEq, Eq, Serialize, Deserialize)]
pub enum ZstdState {
/// The compressor is pending a dictionary.
#[default]
PendingDictionary,
/// The compressor is ready to perform compression.
Ready,
}
@ -51,6 +54,7 @@ impl Zstd {
}
}
/// Sets the compression level for the Zstd compression instance.
pub const fn with_level(mut self, level: i32) -> Self {
self.level = level;
self

View File

@ -28,6 +28,11 @@ pub struct NippyJarChecker<H: NippyJarHeader = ()> {
}
impl<H: NippyJarHeader> NippyJarChecker<H> {
/// Creates a new instance of [`NippyJarChecker`] with the provided [`NippyJar`].
///
/// This method initializes the checker without any associated file handles for
/// the data or offsets files. The [`NippyJar`] passed in contains all necessary
/// configurations for handling data.
pub const fn new(jar: NippyJar<H>) -> Self {
Self { jar, data_file: None, offsets_file: None }
}

View File

@ -25,9 +25,10 @@ impl<H: NippyJarHeader> std::fmt::Debug for NippyJarCursor<'_, H> {
}
impl<'a, H: NippyJarHeader> NippyJarCursor<'a, H> {
/// Creates a new instance of [`NippyJarCursor`] for the given [`NippyJar`].
pub fn new(jar: &'a NippyJar<H>) -> Result<Self, NippyJarError> {
let max_row_size = jar.max_row_size;
Ok(NippyJarCursor {
Ok(Self {
jar,
reader: Arc::new(jar.open_data_reader()?),
// Makes sure that we have enough buffer capacity to decompress any row of data.
@ -36,12 +37,14 @@ impl<'a, H: NippyJarHeader> NippyJarCursor<'a, H> {
})
}
/// Creates a new instance of [`NippyJarCursor`] with the specified [`NippyJar`] and data
/// reader.
pub fn with_reader(
jar: &'a NippyJar<H>,
reader: Arc<DataReader>,
) -> Result<Self, NippyJarError> {
let max_row_size = jar.max_row_size;
Ok(NippyJarCursor {
Ok(Self {
jar,
reader,
// Makes sure that we have enough buffer capacity to decompress any row of data.

View File

@ -4,53 +4,92 @@ use thiserror::Error;
/// Errors associated with [`crate::NippyJar`].
#[derive(Error, Debug)]
pub enum NippyJarError {
/// An internal error occurred, wrapping any type of error.
#[error(transparent)]
Internal(#[from] Box<dyn core::error::Error + Send + Sync>),
/// An error occurred while disconnecting, wrapping a standard I/O error.
#[error(transparent)]
Disconnect(#[from] std::io::Error),
/// An error related to the file system occurred, wrapping a file system path error.
#[error(transparent)]
FileSystem(#[from] reth_fs_util::FsPathError),
/// A custom error message provided by the user.
#[error("{0}")]
Custom(String),
/// An error occurred during serialization/deserialization with Bincode.
#[error(transparent)]
Bincode(#[from] Box<bincode::ErrorKind>),
/// An error occurred with the Elias-Fano encoding/decoding process.
#[error(transparent)]
EliasFano(#[from] anyhow::Error),
/// Compression was enabled, but the compressor is not ready yet.
#[error("compression was enabled, but it's not ready yet")]
CompressorNotReady,
/// Decompression was enabled, but the decompressor is not ready yet.
#[error("decompression was enabled, but it's not ready yet")]
DecompressorNotReady,
/// The number of columns does not match the expected length.
#[error("number of columns does not match: {0} != {1}")]
ColumnLenMismatch(usize, usize),
/// An unexpected missing value was encountered at a specific row and column.
#[error("unexpected missing value: row:col {0}:{1}")]
UnexpectedMissingValue(u64, u64),
/// The size of an offset exceeds the maximum allowed size of 8 bytes.
#[error("the size of an offset must be at most 8 bytes, got {offset_size}")]
OffsetSizeTooBig {
/// The read offset size in number of bytes.
offset_size: u8,
},
/// The size of an offset is less than the minimum allowed size of 1 byte.
#[error("the size of an offset must be at least 1 byte, got {offset_size}")]
OffsetSizeTooSmall {
/// The read offset size in number of bytes.
offset_size: u8,
},
/// An attempt was made to read an offset that is out of bounds.
#[error("attempted to read an out of bounds offset: {index}")]
OffsetOutOfBounds {
/// The index of the offset that was being read.
index: usize,
},
/// The output buffer is too small for the compression or decompression operation.
#[error("compression or decompression requires a bigger destination output")]
OutputTooSmall,
/// A dictionary is not loaded when it is required for operations.
#[error("dictionary is not loaded.")]
DictionaryNotLoaded,
/// It's not possible to generate a compressor after loading a dictionary.
#[error("it's not possible to generate a compressor after loading a dictionary.")]
CompressorNotAllowed,
/// The number of offsets is smaller than the requested prune size.
#[error("number of offsets ({0}) is smaller than prune request ({1}).")]
InvalidPruning(u64, u64),
/// The jar has been frozen and cannot be modified.
#[error("jar has been frozen and cannot be modified.")]
FrozenJar,
/// The file is in an inconsistent state.
#[error("File is in an inconsistent state.")]
InconsistentState,
/// A specified file is missing.
#[error("Missing file: {0}.")]
MissingFile(PathBuf),
}

View File

@ -10,7 +10,6 @@
issue_tracker_base_url = "https://github.com/paradigmxyz/reth/issues/"
)]
#![cfg_attr(not(test), warn(unused_crate_dependencies))]
#![allow(missing_docs)]
#![cfg_attr(docsrs, feature(doc_cfg, doc_auto_cfg))]
use memmap2::Mmap;
@ -28,6 +27,7 @@ use std::os::windows::prelude::OpenOptionsExt;
use tracing::*;
/// Compression algorithms supported by `NippyJar`.
pub mod compression;
#[cfg(test)]
use compression::Compression;
@ -55,10 +55,13 @@ pub use writer::NippyJarWriter;
mod consistency;
pub use consistency::NippyJarChecker;
/// The version number of the Nippy Jar format.
const NIPPY_JAR_VERSION: usize = 1;
/// The file extension used for index files.
const INDEX_FILE_EXTENSION: &str = "idx";
/// The file extension used for offsets files.
const OFFSETS_FILE_EXTENSION: &str = "off";
/// The file extension used for configuration files.
pub const CONFIG_FILE_EXTENSION: &str = "conf";
/// A [`RefRow`] is a list of column value slices pointing to either an internal buffer or a

View File

@ -354,6 +354,10 @@ impl<H: NippyJarHeader> NippyJarWriter<H> {
Ok(())
}
/// Commits changes to the data file and offsets without synchronizing all data to disk.
///
/// This function flushes the buffered data to the data file and commits the offsets,
/// but it does not guarantee that all data is synchronized to persistent storage.
#[cfg(feature = "test-utils")]
pub fn commit_without_sync_all(&mut self) -> Result<(), NippyJarError> {
self.data_file.flush()?;
@ -412,41 +416,49 @@ impl<H: NippyJarHeader> NippyJarWriter<H> {
Ok(())
}
/// Returns the maximum row size for the associated [`NippyJar`].
#[cfg(test)]
pub const fn max_row_size(&self) -> usize {
self.jar.max_row_size
}
/// Returns the column index of the current checker instance.
#[cfg(test)]
pub const fn column(&self) -> usize {
self.column
}
/// Returns a reference to the offsets vector.
#[cfg(test)]
pub fn offsets(&self) -> &[u64] {
&self.offsets
}
/// Returns a mutable reference to the offsets vector.
#[cfg(test)]
pub fn offsets_mut(&mut self) -> &mut Vec<u64> {
&mut self.offsets
}
/// Returns the path to the offsets file for the associated [`NippyJar`].
#[cfg(test)]
pub fn offsets_path(&self) -> std::path::PathBuf {
self.jar.offsets_path()
}
/// Returns the path to the data file for the associated [`NippyJar`].
#[cfg(test)]
pub fn data_path(&self) -> &Path {
self.jar.data_path()
}
/// Returns a mutable reference to the buffered writer for the data file.
#[cfg(any(test, feature = "test-utils"))]
pub fn data_file(&mut self) -> &mut BufWriter<File> {
&mut self.data_file
}
/// Returns a reference to the associated [`NippyJar`] instance.
#[cfg(any(test, feature = "test-utils"))]
pub const fn jar(&self) -> &NippyJar<H> {
&self.jar