chore: move Integerlist to db-api (#13062)

This commit is contained in:
Matthias Seitz
2024-12-02 17:02:19 +01:00
committed by GitHub
parent 6789ff4a1e
commit 8a047ed6e4
14 changed files with 195 additions and 222 deletions

3
Cargo.lock generated
View File

@ -6953,6 +6953,7 @@ dependencies = [
"reth-stages-types",
"reth-storage-errors",
"reth-trie-common",
"roaring",
"serde",
"test-fuzz",
]
@ -8670,7 +8671,6 @@ dependencies = [
"rand 0.8.5",
"reth-codecs",
"revm-primitives",
"roaring",
"serde",
"serde_json",
"serde_with",
@ -9720,7 +9720,6 @@ checksum = "f81dc953b2244ddd5e7860cb0bb2a790494b898ef321d4aff8e260efab60cc88"
dependencies = [
"bytemuck",
"byteorder",
"serde",
]
[[package]]

View File

@ -28,7 +28,7 @@ tracing.workspace = true
# HeaderBytes
generic-array.workspace = true
typenum = "1.15.0"
byteorder = "1.4.3"
byteorder.workspace = true
# crypto
rand.workspace = true

View File

@ -30,7 +30,6 @@ op-alloy-consensus = { workspace = true, optional = true }
byteorder = { workspace = true, optional = true }
bytes.workspace = true
derive_more.workspace = true
roaring = "0.10.2"
serde_with = { workspace = true, optional = true }
auto_impl.workspace = true
@ -100,7 +99,6 @@ serde = [
"rand/serde",
"reth-codecs?/serde",
"revm-primitives/serde",
"roaring/serde",
"revm-primitives/serde",
"op-alloy-consensus?/serde"
]

View File

@ -1,196 +0,0 @@
use alloc::vec::Vec;
use core::fmt;
use bytes::BufMut;
use derive_more::Deref;
use roaring::RoaringTreemap;
/// A data structure that uses Roaring Bitmaps to efficiently store a list of integers.
///
/// This structure provides excellent compression while allowing direct access to individual
/// elements without the need for full decompression.
///
/// Key features:
/// - Efficient compression: the underlying Roaring Bitmaps significantly reduce memory usage.
/// - Direct access: elements can be accessed or queried without needing to decode the entire list.
/// - [`RoaringTreemap`] backing: internally backed by [`RoaringTreemap`], which supports 64-bit
/// integers.
#[derive(Clone, PartialEq, Default, Deref)]
pub struct IntegerList(pub RoaringTreemap);
impl fmt::Debug for IntegerList {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
f.write_str("IntegerList")?;
f.debug_list().entries(self.0.iter()).finish()
}
}
impl IntegerList {
/// Creates a new empty [`IntegerList`].
pub fn empty() -> Self {
Self(RoaringTreemap::new())
}
/// Creates an [`IntegerList`] from a list of integers.
///
/// Returns an error if the list is not pre-sorted.
pub fn new(list: impl IntoIterator<Item = u64>) -> Result<Self, IntegerListError> {
RoaringTreemap::from_sorted_iter(list)
.map(Self)
.map_err(|_| IntegerListError::UnsortedInput)
}
/// Creates an [`IntegerList`] from a pre-sorted list of integers.
///
/// # Panics
///
/// Panics if the list is not pre-sorted.
#[inline]
#[track_caller]
pub fn new_pre_sorted(list: impl IntoIterator<Item = u64>) -> Self {
Self::new(list).expect("IntegerList must be pre-sorted and non-empty")
}
/// Appends a list of integers to the current list.
pub fn append(&mut self, list: impl IntoIterator<Item = u64>) -> Result<u64, IntegerListError> {
self.0.append(list).map_err(|_| IntegerListError::UnsortedInput)
}
/// Pushes a new integer to the list.
pub fn push(&mut self, value: u64) -> Result<(), IntegerListError> {
self.0.push(value).then_some(()).ok_or(IntegerListError::UnsortedInput)
}
/// Clears the list.
pub fn clear(&mut self) {
self.0.clear();
}
/// Serializes a [`IntegerList`] into a sequence of bytes.
pub fn to_bytes(&self) -> Vec<u8> {
let mut vec = Vec::with_capacity(self.0.serialized_size());
self.0.serialize_into(&mut vec).expect("not able to encode IntegerList");
vec
}
/// Serializes a [`IntegerList`] into a sequence of bytes.
pub fn to_mut_bytes<B: bytes::BufMut>(&self, buf: &mut B) {
self.0.serialize_into(buf.writer()).unwrap();
}
/// Deserializes a sequence of bytes into a proper [`IntegerList`].
pub fn from_bytes(data: &[u8]) -> Result<Self, IntegerListError> {
RoaringTreemap::deserialize_from(data)
.map(Self)
.map_err(|_| IntegerListError::FailedToDeserialize)
}
}
#[cfg(feature = "serde")]
impl serde::Serialize for IntegerList {
fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
where
S: serde::Serializer,
{
use serde::ser::SerializeSeq;
let mut seq = serializer.serialize_seq(Some(self.len() as usize))?;
for e in &self.0 {
seq.serialize_element(&e)?;
}
seq.end()
}
}
#[cfg(feature = "serde")]
struct IntegerListVisitor;
#[cfg(feature = "serde")]
impl<'de> serde::de::Visitor<'de> for IntegerListVisitor {
type Value = IntegerList;
fn expecting(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
f.write_str("a usize array")
}
fn visit_seq<E>(self, mut seq: E) -> Result<Self::Value, E::Error>
where
E: serde::de::SeqAccess<'de>,
{
let mut list = IntegerList::empty();
while let Some(item) = seq.next_element()? {
list.push(item).map_err(serde::de::Error::custom)?;
}
Ok(list)
}
}
#[cfg(feature = "serde")]
impl<'de> serde::Deserialize<'de> for IntegerList {
fn deserialize<D>(deserializer: D) -> Result<Self, D::Error>
where
D: serde::Deserializer<'de>,
{
deserializer.deserialize_byte_buf(IntegerListVisitor)
}
}
#[cfg(any(test, feature = "arbitrary"))]
use arbitrary::{Arbitrary, Unstructured};
#[cfg(any(test, feature = "arbitrary"))]
impl<'a> Arbitrary<'a> for IntegerList {
fn arbitrary(u: &mut Unstructured<'a>) -> Result<Self, arbitrary::Error> {
let mut nums: Vec<u64> = Vec::arbitrary(u)?;
nums.sort_unstable();
Self::new(nums).map_err(|_| arbitrary::Error::IncorrectFormat)
}
}
/// Primitives error type.
#[derive(Debug, derive_more::Display, derive_more::Error)]
pub enum IntegerListError {
/// The provided input is unsorted.
#[display("the provided input is unsorted")]
UnsortedInput,
/// Failed to deserialize data into type.
#[display("failed to deserialize data into type")]
FailedToDeserialize,
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn empty_list() {
assert_eq!(IntegerList::empty().len(), 0);
assert_eq!(IntegerList::new_pre_sorted(std::iter::empty()).len(), 0);
}
#[test]
fn test_integer_list() {
let original_list = [1, 2, 3];
let ef_list = IntegerList::new(original_list).unwrap();
assert_eq!(ef_list.iter().collect::<Vec<_>>(), original_list);
}
#[test]
fn test_integer_list_serialization() {
let original_list = [1, 2, 3];
let ef_list = IntegerList::new(original_list).unwrap();
let blist = ef_list.to_bytes();
assert_eq!(IntegerList::from_bytes(&blist).unwrap(), ef_list)
}
#[test]
fn serde_serialize_deserialize() {
let original_list = [1, 2, 3];
let ef_list = IntegerList::new(original_list).unwrap();
let serde_out = serde_json::to_string(&ef_list).unwrap();
let serde_ef_list = serde_json::from_str::<IntegerList>(&serde_out).unwrap();
assert_eq!(serde_ef_list, ef_list);
}
}

View File

@ -31,9 +31,6 @@ pub use transaction::{
FullTransaction, Transaction,
};
mod integer_list;
pub use integer_list::{IntegerList, IntegerListError};
pub mod block;
pub use block::{
body::{BlockBody, FullBlockBody},

View File

@ -29,6 +29,7 @@ alloy-consensus.workspace = true
# codecs
modular-bitfield.workspace = true
roaring = "0.10.2"
parity-scale-codec = { version = "3.2.1", features = ["bytes"] }
serde = { workspace = true, default-features = false }

View File

@ -4,7 +4,159 @@ use crate::{
table::{Compress, Decompress},
DatabaseError,
};
use reth_primitives_traits::IntegerList;
use bytes::BufMut;
use core::fmt;
use derive_more::Deref;
use roaring::RoaringTreemap;
/// A data structure that uses Roaring Bitmaps to efficiently store a list of integers.
///
/// This structure provides excellent compression while allowing direct access to individual
/// elements without the need for full decompression.
///
/// Key features:
/// - Efficient compression: the underlying Roaring Bitmaps significantly reduce memory usage.
/// - Direct access: elements can be accessed or queried without needing to decode the entire list.
/// - [`RoaringTreemap`] backing: internally backed by [`RoaringTreemap`], which supports 64-bit
/// integers.
#[derive(Clone, PartialEq, Default, Deref)]
pub struct IntegerList(pub RoaringTreemap);
impl fmt::Debug for IntegerList {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
f.write_str("IntegerList")?;
f.debug_list().entries(self.0.iter()).finish()
}
}
impl IntegerList {
/// Creates a new empty [`IntegerList`].
pub fn empty() -> Self {
Self(RoaringTreemap::new())
}
/// Creates an [`IntegerList`] from a list of integers.
///
/// Returns an error if the list is not pre-sorted.
pub fn new(list: impl IntoIterator<Item = u64>) -> Result<Self, IntegerListError> {
RoaringTreemap::from_sorted_iter(list)
.map(Self)
.map_err(|_| IntegerListError::UnsortedInput)
}
/// Creates an [`IntegerList`] from a pre-sorted list of integers.
///
/// # Panics
///
/// Panics if the list is not pre-sorted.
#[inline]
#[track_caller]
pub fn new_pre_sorted(list: impl IntoIterator<Item = u64>) -> Self {
Self::new(list).expect("IntegerList must be pre-sorted and non-empty")
}
/// Appends a list of integers to the current list.
pub fn append(&mut self, list: impl IntoIterator<Item = u64>) -> Result<u64, IntegerListError> {
self.0.append(list).map_err(|_| IntegerListError::UnsortedInput)
}
/// Pushes a new integer to the list.
pub fn push(&mut self, value: u64) -> Result<(), IntegerListError> {
self.0.push(value).then_some(()).ok_or(IntegerListError::UnsortedInput)
}
/// Clears the list.
pub fn clear(&mut self) {
self.0.clear();
}
/// Serializes a [`IntegerList`] into a sequence of bytes.
pub fn to_bytes(&self) -> Vec<u8> {
let mut vec = Vec::with_capacity(self.0.serialized_size());
self.0.serialize_into(&mut vec).expect("not able to encode IntegerList");
vec
}
/// Serializes a [`IntegerList`] into a sequence of bytes.
pub fn to_mut_bytes<B: bytes::BufMut>(&self, buf: &mut B) {
self.0.serialize_into(buf.writer()).unwrap();
}
/// Deserializes a sequence of bytes into a proper [`IntegerList`].
pub fn from_bytes(data: &[u8]) -> Result<Self, IntegerListError> {
RoaringTreemap::deserialize_from(data)
.map(Self)
.map_err(|_| IntegerListError::FailedToDeserialize)
}
}
impl serde::Serialize for IntegerList {
fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
where
S: serde::Serializer,
{
use serde::ser::SerializeSeq;
let mut seq = serializer.serialize_seq(Some(self.len() as usize))?;
for e in &self.0 {
seq.serialize_element(&e)?;
}
seq.end()
}
}
struct IntegerListVisitor;
impl<'de> serde::de::Visitor<'de> for IntegerListVisitor {
type Value = IntegerList;
fn expecting(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
f.write_str("a usize array")
}
fn visit_seq<E>(self, mut seq: E) -> Result<Self::Value, E::Error>
where
E: serde::de::SeqAccess<'de>,
{
let mut list = IntegerList::empty();
while let Some(item) = seq.next_element()? {
list.push(item).map_err(serde::de::Error::custom)?;
}
Ok(list)
}
}
impl<'de> serde::Deserialize<'de> for IntegerList {
fn deserialize<D>(deserializer: D) -> Result<Self, D::Error>
where
D: serde::Deserializer<'de>,
{
deserializer.deserialize_byte_buf(IntegerListVisitor)
}
}
#[cfg(any(test, feature = "arbitrary"))]
use arbitrary::{Arbitrary, Unstructured};
#[cfg(any(test, feature = "arbitrary"))]
impl<'a> Arbitrary<'a> for IntegerList {
fn arbitrary(u: &mut Unstructured<'a>) -> Result<Self, arbitrary::Error> {
let mut nums: Vec<u64> = Vec::arbitrary(u)?;
nums.sort_unstable();
Self::new(nums).map_err(|_| arbitrary::Error::IncorrectFormat)
}
}
/// Primitives error type.
#[derive(Debug, derive_more::Display, derive_more::Error)]
pub enum IntegerListError {
/// The provided input is unsorted.
#[display("the provided input is unsorted")]
UnsortedInput,
/// Failed to deserialize data into type.
#[display("failed to deserialize data into type")]
FailedToDeserialize,
}
impl Compress for IntegerList {
type Compressed = Vec<u8>;
@ -23,3 +175,30 @@ impl Decompress for IntegerList {
Self::from_bytes(value).map_err(|_| DatabaseError::Decode)
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn empty_list() {
assert_eq!(IntegerList::empty().len(), 0);
assert_eq!(IntegerList::new_pre_sorted(std::iter::empty()).len(), 0);
}
#[test]
fn test_integer_list() {
let original_list = [1, 2, 3];
let ef_list = IntegerList::new(original_list).unwrap();
assert_eq!(ef_list.iter().collect::<Vec<_>>(), original_list);
}
#[test]
fn test_integer_list_serialization() {
let original_list = [1, 2, 3];
let ef_list = IntegerList::new(original_list).unwrap();
let blist = ef_list.to_bytes();
assert_eq!(IntegerList::from_bytes(&blist).unwrap(), ef_list)
}
}

View File

@ -8,9 +8,8 @@ use alloy_consensus::Header;
use alloy_genesis::GenesisAccount;
use alloy_primitives::{Address, Bytes, Log, B256, U256};
use reth_codecs::{add_arbitrary_tests, Compact};
use reth_primitives::{
Account, Bytecode, Receipt, StorageEntry, TransactionSigned, TransactionSignedNoHash, TxType,
};
use reth_primitives::{Receipt, StorageEntry, TransactionSigned, TransactionSignedNoHash, TxType};
use reth_primitives_traits::{Account, Bytecode};
use reth_prune_types::{PruneCheckpoint, PruneSegment};
use reth_stages_types::StageCheckpoint;
use reth_trie_common::{StoredNibbles, StoredNibblesSubKey, *};
@ -24,6 +23,7 @@ pub mod storage_sharded_key;
pub use accounts::*;
pub use blocks::*;
pub use integer_list::IntegerList;
pub use reth_db_models::{
AccountBeforeTx, ClientVersion, StoredBlockBodyIndices, StoredBlockWithdrawals,
};

View File

@ -609,12 +609,11 @@ mod tests {
use reth_db::DatabaseEnv;
use reth_db_api::{
cursor::DbCursorRO,
models::{storage_sharded_key::StorageShardedKey, ShardedKey},
models::{storage_sharded_key::StorageShardedKey, IntegerList, ShardedKey},
table::{Table, TableRow},
transaction::DbTx,
Database,
};
use reth_primitives_traits::IntegerList;
use reth_provider::{
test_utils::{create_test_provider_factory_with_chain_spec, MockNodeTypesWithDB},
ProviderFactory,

View File

@ -507,12 +507,11 @@ mod tests {
use alloy_primitives::{Address, B256, U256};
use reth_db_api::{
cursor::{DbDupCursorRO, DbDupCursorRW, ReverseWalker, Walker},
models::{AccountBeforeTx, ShardedKey},
models::{AccountBeforeTx, IntegerList, ShardedKey},
table::{Encode, Table},
};
use reth_libmdbx::Error;
use reth_primitives::{Account, StorageEntry};
use reth_primitives_traits::IntegerList;
use reth_primitives_traits::{Account, StorageEntry};
use reth_storage_errors::db::{DatabaseWriteError, DatabaseWriteOperation};
use std::str::FromStr;
use tempfile::TempDir;

View File

@ -1,6 +1,6 @@
//! Curates the input coming from the fuzzer for certain types.
use reth_primitives_traits::IntegerList;
use reth_db_api::models::IntegerList;
use serde::{Deserialize, Serialize};
/// Makes sure that the list provided by the fuzzer is not empty and pre-sorted

View File

@ -16,9 +16,6 @@ macro_rules! impl_fuzzer_with_input {
pub mod $name {
use reth_db_api::table;
#[allow(unused_imports)]
#[allow(unused_imports)]
use reth_primitives_traits::*;

View File

@ -26,13 +26,13 @@ use reth_db_api::{
accounts::BlockNumberAddress,
blocks::{HeaderHash, StoredBlockOmmers},
storage_sharded_key::StorageShardedKey,
AccountBeforeTx, ClientVersion, CompactU256, ShardedKey, StoredBlockBodyIndices,
StoredBlockWithdrawals,
AccountBeforeTx, ClientVersion, CompactU256, IntegerList, ShardedKey,
StoredBlockBodyIndices, StoredBlockWithdrawals,
},
table::{Decode, DupSort, Encode, Table},
};
use reth_primitives::{Account, Bytecode, Receipt, StorageEntry, TransactionSignedNoHash};
use reth_primitives_traits::IntegerList;
use reth_primitives::{Receipt, StorageEntry, TransactionSignedNoHash};
use reth_primitives_traits::{Account, Bytecode};
use reth_prune_types::{PruneCheckpoint, PruneSegment};
use reth_stages_types::StageCheckpoint;
use reth_trie_common::{BranchNodeCompact, StorageTrieEntry, StoredNibbles, StoredNibblesSubKey};

View File

@ -15,7 +15,7 @@ workspace = true
reth-mdbx-sys.workspace = true
bitflags.workspace = true
byteorder = "1"
byteorder.workspace = true
derive_more.workspace = true
indexmap = "2"
parking_lot.workspace = true