perf: improve IntegerList API to avoid allocations (#11292)

This commit is contained in:
DaniPopes
2024-09-27 18:46:33 +02:00
committed by GitHub
parent 8dfab3354b
commit bf18fd927c
13 changed files with 71 additions and 71 deletions

1
Cargo.lock generated
View File

@ -2762,7 +2762,6 @@ dependencies = [
"futures-util", "futures-util",
"reth", "reth",
"reth-node-ethereum", "reth-node-ethereum",
"reth-rpc-types",
] ]
[[package]] [[package]]

View File

@ -4,7 +4,7 @@ use core::fmt;
use derive_more::Deref; use derive_more::Deref;
use roaring::RoaringTreemap; use roaring::RoaringTreemap;
use serde::{ use serde::{
de::{SeqAccess, Unexpected, Visitor}, de::{SeqAccess, Visitor},
ser::SerializeSeq, ser::SerializeSeq,
Deserialize, Deserializer, Serialize, Serializer, Deserialize, Deserializer, Serialize, Serializer,
}; };
@ -16,34 +16,54 @@ pub struct IntegerList(pub RoaringTreemap);
impl fmt::Debug for IntegerList { impl fmt::Debug for IntegerList {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
let vec: Vec<u64> = self.0.iter().collect(); f.write_str("IntegerList")?;
write!(f, "IntegerList {vec:?}") f.debug_list().entries(self.0.iter()).finish()
} }
} }
impl IntegerList { impl IntegerList {
/// Creates a new empty `IntegerList`.
pub fn empty() -> Self {
Self(RoaringTreemap::new())
}
/// Creates an `IntegerList` from a list of integers. /// Creates an `IntegerList` from a list of integers.
/// ///
/// # Returns /// Returns an error if the list is not pre-sorted.
/// pub fn new(list: impl IntoIterator<Item = u64>) -> Result<Self, IntegerListError> {
/// Returns an error if the list is empty or not pre-sorted. RoaringTreemap::from_sorted_iter(list)
pub fn new<T: AsRef<[u64]>>(list: T) -> Result<Self, RoaringBitmapError> { .map(Self)
Ok(Self( .map_err(|_| IntegerListError::UnsortedInput)
RoaringTreemap::from_sorted_iter(list.as_ref().iter().copied())
.map_err(|_| RoaringBitmapError::InvalidInput)?,
))
} }
// Creates an IntegerList from a pre-sorted list of integers. // Creates an IntegerList from a pre-sorted list of integers.
/// ///
/// # Panics /// # Panics
/// ///
/// Panics if the list is empty or not pre-sorted. /// Panics if the list is not pre-sorted.
pub fn new_pre_sorted<T: AsRef<[u64]>>(list: T) -> Self { #[inline]
Self( #[track_caller]
RoaringTreemap::from_sorted_iter(list.as_ref().iter().copied()) pub fn new_pre_sorted(list: impl IntoIterator<Item = u64>) -> Self {
.expect("IntegerList must be pre-sorted and non-empty"), Self::new(list).expect("IntegerList must be pre-sorted and non-empty")
) }
/// Appends a list of integers to the current list.
pub fn append(&mut self, list: impl IntoIterator<Item = u64>) -> Result<u64, IntegerListError> {
self.0.append(list).map_err(|_| IntegerListError::UnsortedInput)
}
/// Pushes a new integer to the list.
pub fn push(&mut self, value: u64) -> Result<(), IntegerListError> {
if self.0.push(value) {
Ok(())
} else {
Err(IntegerListError::UnsortedInput)
}
}
/// Clears the list.
pub fn clear(&mut self) {
self.0.clear();
} }
/// Serializes a [`IntegerList`] into a sequence of bytes. /// Serializes a [`IntegerList`] into a sequence of bytes.
@ -59,36 +79,21 @@ impl IntegerList {
} }
/// Deserializes a sequence of bytes into a proper [`IntegerList`]. /// Deserializes a sequence of bytes into a proper [`IntegerList`].
pub fn from_bytes(data: &[u8]) -> Result<Self, RoaringBitmapError> { pub fn from_bytes(data: &[u8]) -> Result<Self, IntegerListError> {
Ok(Self( Ok(Self(
RoaringTreemap::deserialize_from(data) RoaringTreemap::deserialize_from(data)
.map_err(|_| RoaringBitmapError::FailedToDeserialize)?, .map_err(|_| IntegerListError::FailedToDeserialize)?,
)) ))
} }
} }
macro_rules! impl_uint {
($($w:tt),+) => {
$(
impl From<Vec<$w>> for IntegerList {
fn from(v: Vec<$w>) -> Self {
Self::new_pre_sorted(v.iter().map(|v| *v as u64).collect::<Vec<_>>())
}
}
)+
};
}
impl_uint!(usize, u64, u32, u8, u16);
impl Serialize for IntegerList { impl Serialize for IntegerList {
fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error> fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
where where
S: Serializer, S: Serializer,
{ {
let vec = self.0.iter().collect::<Vec<u64>>();
let mut seq = serializer.serialize_seq(Some(self.len() as usize))?; let mut seq = serializer.serialize_seq(Some(self.len() as usize))?;
for e in vec { for e in &self.0 {
seq.serialize_element(&e)?; seq.serialize_element(&e)?;
} }
seq.end() seq.end()
@ -107,12 +112,11 @@ impl<'de> Visitor<'de> for IntegerListVisitor {
where where
E: SeqAccess<'de>, E: SeqAccess<'de>,
{ {
let mut list = Vec::new(); let mut list = IntegerList::empty();
while let Some(item) = seq.next_element()? { while let Some(item) = seq.next_element()? {
list.push(item); list.push(item).map_err(serde::de::Error::custom)?;
} }
Ok(list)
IntegerList::new(list).map_err(|_| serde::de::Error::invalid_value(Unexpected::Seq, &self))
} }
} }
@ -132,17 +136,17 @@ use arbitrary::{Arbitrary, Unstructured};
impl<'a> Arbitrary<'a> for IntegerList { impl<'a> Arbitrary<'a> for IntegerList {
fn arbitrary(u: &mut Unstructured<'a>) -> Result<Self, arbitrary::Error> { fn arbitrary(u: &mut Unstructured<'a>) -> Result<Self, arbitrary::Error> {
let mut nums: Vec<u64> = Vec::arbitrary(u)?; let mut nums: Vec<u64> = Vec::arbitrary(u)?;
nums.sort(); nums.sort_unstable();
Self::new(nums).map_err(|_| arbitrary::Error::IncorrectFormat) Self::new(nums).map_err(|_| arbitrary::Error::IncorrectFormat)
} }
} }
/// Primitives error type. /// Primitives error type.
#[derive(Debug, derive_more::Display, derive_more::Error)] #[derive(Debug, derive_more::Display, derive_more::Error)]
pub enum RoaringBitmapError { pub enum IntegerListError {
/// The provided input is invalid. /// The provided input is unsorted.
#[display("the provided input is invalid")] #[display("the provided input is unsorted")]
InvalidInput, UnsortedInput,
/// Failed to deserialize data into type. /// Failed to deserialize data into type.
#[display("failed to deserialize data into type")] #[display("failed to deserialize data into type")]
FailedToDeserialize, FailedToDeserialize,
@ -152,6 +156,12 @@ pub enum RoaringBitmapError {
mod tests { mod tests {
use super::*; use super::*;
#[test]
fn empty_list() {
assert_eq!(IntegerList::empty().len(), 0);
assert_eq!(IntegerList::new_pre_sorted(std::iter::empty()).len(), 0);
}
#[test] #[test]
fn test_integer_list() { fn test_integer_list() {
let original_list = [1, 2, 3]; let original_list = [1, 2, 3];

View File

@ -21,7 +21,7 @@ pub mod account;
pub use account::{Account, Bytecode}; pub use account::{Account, Bytecode};
mod integer_list; mod integer_list;
pub use integer_list::{IntegerList, RoaringBitmapError}; pub use integer_list::{IntegerList, IntegerListError};
pub mod request; pub mod request;
pub use request::{Request, Requests}; pub use request::{Request, Requests};

View File

@ -275,10 +275,8 @@ mod tests {
.iter() .iter()
.filter(|(key, _)| key.highest_block_number > last_pruned_block_number) .filter(|(key, _)| key.highest_block_number > last_pruned_block_number)
.map(|(key, blocks)| { .map(|(key, blocks)| {
let new_blocks = blocks let new_blocks =
.iter() blocks.iter().skip_while(|block| *block <= last_pruned_block_number);
.skip_while(|block| *block <= last_pruned_block_number)
.collect::<Vec<_>>();
(key.clone(), BlockNumberList::new_pre_sorted(new_blocks)) (key.clone(), BlockNumberList::new_pre_sorted(new_blocks))
}) })
.collect::<Vec<_>>(); .collect::<Vec<_>>();

View File

@ -281,10 +281,8 @@ mod tests {
.iter() .iter()
.filter(|(key, _)| key.sharded_key.highest_block_number > last_pruned_block_number) .filter(|(key, _)| key.sharded_key.highest_block_number > last_pruned_block_number)
.map(|(key, blocks)| { .map(|(key, blocks)| {
let new_blocks = blocks let new_blocks =
.iter() blocks.iter().skip_while(|block| *block <= last_pruned_block_number);
.skip_while(|block| *block <= last_pruned_block_number)
.collect::<Vec<_>>();
(key.clone(), BlockNumberList::new_pre_sorted(new_blocks)) (key.clone(), BlockNumberList::new_pre_sorted(new_blocks))
}) })
.collect::<Vec<_>>(); .collect::<Vec<_>>();

View File

@ -182,7 +182,7 @@ mod tests {
} }
fn list(list: &[u64]) -> BlockNumberList { fn list(list: &[u64]) -> BlockNumberList {
BlockNumberList::new(list).unwrap() BlockNumberList::new(list.iter().copied()).unwrap()
} }
fn cast( fn cast(

View File

@ -197,7 +197,7 @@ mod tests {
} }
fn list(list: &[u64]) -> BlockNumberList { fn list(list: &[u64]) -> BlockNumberList {
BlockNumberList::new(list).unwrap() BlockNumberList::new(list.iter().copied()).unwrap()
} }
fn cast( fn cast(

View File

@ -54,11 +54,11 @@ where
let mut cache: HashMap<P, Vec<u64>> = HashMap::default(); let mut cache: HashMap<P, Vec<u64>> = HashMap::default();
let mut collect = |cache: &HashMap<P, Vec<u64>>| { let mut collect = |cache: &HashMap<P, Vec<u64>>| {
for (key, indice_list) in cache { for (key, indices) in cache {
let last = indice_list.last().expect("qed"); let last = indices.last().expect("qed");
collector.insert( collector.insert(
sharded_key_factory(*key, *last), sharded_key_factory(*key, *last),
BlockNumberList::new_pre_sorted(indice_list), BlockNumberList::new_pre_sorted(indices.iter().copied()),
)?; )?;
} }
Ok::<(), StageError>(()) Ok::<(), StageError>(())

View File

@ -12,6 +12,7 @@ impl Compress for IntegerList {
fn compress(self) -> Self::Compressed { fn compress(self) -> Self::Compressed {
self.to_bytes() self.to_bytes()
} }
fn compress_to_buf<B: bytes::BufMut + AsMut<[u8]>>(self, buf: &mut B) { fn compress_to_buf<B: bytes::BufMut + AsMut<[u8]>>(self, buf: &mut B) {
self.to_mut_bytes(buf) self.to_mut_bytes(buf)
} }

View File

@ -1319,7 +1319,7 @@ mod tests {
for i in 1..5 { for i in 1..5 {
let key = ShardedKey::new(real_key, i * 100); let key = ShardedKey::new(real_key, i * 100);
let list: IntegerList = vec![i * 100u64].into(); let list = IntegerList::new_pre_sorted([i * 100u64]);
db.update(|tx| tx.put::<AccountsHistory>(key.clone(), list.clone()).expect("")) db.update(|tx| tx.put::<AccountsHistory>(key.clone(), list.clone()).expect(""))
.unwrap(); .unwrap();
@ -1340,7 +1340,7 @@ mod tests {
.expect("should be able to retrieve it."); .expect("should be able to retrieve it.");
assert_eq!(ShardedKey::new(real_key, 200), key); assert_eq!(ShardedKey::new(real_key, 200), key);
let list200: IntegerList = vec![200u64].into(); let list200 = IntegerList::new_pre_sorted([200u64]);
assert_eq!(list200, list); assert_eq!(list200, list);
} }
// Seek greatest index // Seek greatest index
@ -1357,7 +1357,7 @@ mod tests {
.expect("should be able to retrieve it."); .expect("should be able to retrieve it.");
assert_eq!(ShardedKey::new(real_key, 400), key); assert_eq!(ShardedKey::new(real_key, 400), key);
let list400: IntegerList = vec![400u64].into(); let list400 = IntegerList::new_pre_sorted([400u64]);
assert_eq!(list400, list); assert_eq!(list400, list);
} }
} }

View File

@ -10,12 +10,7 @@ pub struct IntegerListInput(pub Vec<u64>);
impl From<IntegerListInput> for IntegerList { impl From<IntegerListInput> for IntegerList {
fn from(list: IntegerListInput) -> Self { fn from(list: IntegerListInput) -> Self {
let mut v = list.0; let mut v = list.0;
v.sort_unstable();
// Empty lists are not supported by `IntegerList`, so we want to skip these cases. Self::new_pre_sorted(v)
if v.is_empty() {
return vec![1u64].into()
}
v.sort();
v.into()
} }
} }

View File

@ -1356,7 +1356,7 @@ impl<TX: DbTxMut + DbTx, Spec: Send + Sync> DatabaseProvider<TX, Spec> {
}; };
self.tx.put::<T>( self.tx.put::<T>(
sharded_key_factory(partial_key, highest_block_number), sharded_key_factory(partial_key, highest_block_number),
BlockNumberList::new_pre_sorted(list), BlockNumberList::new_pre_sorted(list.iter().copied()),
)?; )?;
} }
} }

View File

@ -8,7 +8,6 @@ license.workspace = true
[dependencies] [dependencies]
reth.workspace = true reth.workspace = true
reth-node-ethereum.workspace = true reth-node-ethereum.workspace = true
reth-rpc-types.workspace = true
alloy-rpc-types.workspace = true alloy-rpc-types.workspace = true
clap = { workspace = true, features = ["derive"] } clap = { workspace = true, features = ["derive"] }
futures-util.workspace = true futures-util.workspace = true