perf: improve IntegerList API to avoid allocations (#11292)

This commit is contained in:
DaniPopes
2024-09-27 18:46:33 +02:00
committed by GitHub
parent 8dfab3354b
commit bf18fd927c
13 changed files with 71 additions and 71 deletions

1
Cargo.lock generated
View File

@ -2762,7 +2762,6 @@ dependencies = [
"futures-util",
"reth",
"reth-node-ethereum",
"reth-rpc-types",
]
[[package]]

View File

@ -4,7 +4,7 @@ use core::fmt;
use derive_more::Deref;
use roaring::RoaringTreemap;
use serde::{
de::{SeqAccess, Unexpected, Visitor},
de::{SeqAccess, Visitor},
ser::SerializeSeq,
Deserialize, Deserializer, Serialize, Serializer,
};
@ -16,34 +16,54 @@ pub struct IntegerList(pub RoaringTreemap);
impl fmt::Debug for IntegerList {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
let vec: Vec<u64> = self.0.iter().collect();
write!(f, "IntegerList {vec:?}")
f.write_str("IntegerList")?;
f.debug_list().entries(self.0.iter()).finish()
}
}
impl IntegerList {
/// Creates a new empty `IntegerList`.
pub fn empty() -> Self {
Self(RoaringTreemap::new())
}
/// Creates an `IntegerList` from a list of integers.
///
/// # Returns
///
/// Returns an error if the list is empty or not pre-sorted.
pub fn new<T: AsRef<[u64]>>(list: T) -> Result<Self, RoaringBitmapError> {
Ok(Self(
RoaringTreemap::from_sorted_iter(list.as_ref().iter().copied())
.map_err(|_| RoaringBitmapError::InvalidInput)?,
))
/// Returns an error if the list is not pre-sorted.
pub fn new(list: impl IntoIterator<Item = u64>) -> Result<Self, IntegerListError> {
RoaringTreemap::from_sorted_iter(list)
.map(Self)
.map_err(|_| IntegerListError::UnsortedInput)
}
// Creates an IntegerList from a pre-sorted list of integers.
///
/// # Panics
///
/// Panics if the list is empty or not pre-sorted.
pub fn new_pre_sorted<T: AsRef<[u64]>>(list: T) -> Self {
Self(
RoaringTreemap::from_sorted_iter(list.as_ref().iter().copied())
.expect("IntegerList must be pre-sorted and non-empty"),
)
/// Panics if the list is not pre-sorted.
#[inline]
#[track_caller]
pub fn new_pre_sorted(list: impl IntoIterator<Item = u64>) -> Self {
Self::new(list).expect("IntegerList must be pre-sorted and non-empty")
}
/// Appends a list of integers to the current list.
pub fn append(&mut self, list: impl IntoIterator<Item = u64>) -> Result<u64, IntegerListError> {
self.0.append(list).map_err(|_| IntegerListError::UnsortedInput)
}
/// Pushes a new integer to the list.
pub fn push(&mut self, value: u64) -> Result<(), IntegerListError> {
if self.0.push(value) {
Ok(())
} else {
Err(IntegerListError::UnsortedInput)
}
}
/// Clears the list.
pub fn clear(&mut self) {
self.0.clear();
}
/// Serializes a [`IntegerList`] into a sequence of bytes.
@ -59,36 +79,21 @@ impl IntegerList {
}
/// Deserializes a sequence of bytes into a proper [`IntegerList`].
pub fn from_bytes(data: &[u8]) -> Result<Self, RoaringBitmapError> {
pub fn from_bytes(data: &[u8]) -> Result<Self, IntegerListError> {
Ok(Self(
RoaringTreemap::deserialize_from(data)
.map_err(|_| RoaringBitmapError::FailedToDeserialize)?,
.map_err(|_| IntegerListError::FailedToDeserialize)?,
))
}
}
macro_rules! impl_uint {
($($w:tt),+) => {
$(
impl From<Vec<$w>> for IntegerList {
fn from(v: Vec<$w>) -> Self {
Self::new_pre_sorted(v.iter().map(|v| *v as u64).collect::<Vec<_>>())
}
}
)+
};
}
impl_uint!(usize, u64, u32, u8, u16);
impl Serialize for IntegerList {
fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
where
S: Serializer,
{
let vec = self.0.iter().collect::<Vec<u64>>();
let mut seq = serializer.serialize_seq(Some(self.len() as usize))?;
for e in vec {
for e in &self.0 {
seq.serialize_element(&e)?;
}
seq.end()
@ -107,12 +112,11 @@ impl<'de> Visitor<'de> for IntegerListVisitor {
where
E: SeqAccess<'de>,
{
let mut list = Vec::new();
let mut list = IntegerList::empty();
while let Some(item) = seq.next_element()? {
list.push(item);
list.push(item).map_err(serde::de::Error::custom)?;
}
IntegerList::new(list).map_err(|_| serde::de::Error::invalid_value(Unexpected::Seq, &self))
Ok(list)
}
}
@ -132,17 +136,17 @@ use arbitrary::{Arbitrary, Unstructured};
impl<'a> Arbitrary<'a> for IntegerList {
fn arbitrary(u: &mut Unstructured<'a>) -> Result<Self, arbitrary::Error> {
let mut nums: Vec<u64> = Vec::arbitrary(u)?;
nums.sort();
nums.sort_unstable();
Self::new(nums).map_err(|_| arbitrary::Error::IncorrectFormat)
}
}
/// Primitives error type.
#[derive(Debug, derive_more::Display, derive_more::Error)]
pub enum RoaringBitmapError {
/// The provided input is invalid.
#[display("the provided input is invalid")]
InvalidInput,
pub enum IntegerListError {
/// The provided input is unsorted.
#[display("the provided input is unsorted")]
UnsortedInput,
/// Failed to deserialize data into type.
#[display("failed to deserialize data into type")]
FailedToDeserialize,
@ -152,6 +156,12 @@ pub enum RoaringBitmapError {
mod tests {
use super::*;
#[test]
fn empty_list() {
assert_eq!(IntegerList::empty().len(), 0);
assert_eq!(IntegerList::new_pre_sorted(std::iter::empty()).len(), 0);
}
#[test]
fn test_integer_list() {
let original_list = [1, 2, 3];

View File

@ -21,7 +21,7 @@ pub mod account;
pub use account::{Account, Bytecode};
mod integer_list;
pub use integer_list::{IntegerList, RoaringBitmapError};
pub use integer_list::{IntegerList, IntegerListError};
pub mod request;
pub use request::{Request, Requests};

View File

@ -275,10 +275,8 @@ mod tests {
.iter()
.filter(|(key, _)| key.highest_block_number > last_pruned_block_number)
.map(|(key, blocks)| {
let new_blocks = blocks
.iter()
.skip_while(|block| *block <= last_pruned_block_number)
.collect::<Vec<_>>();
let new_blocks =
blocks.iter().skip_while(|block| *block <= last_pruned_block_number);
(key.clone(), BlockNumberList::new_pre_sorted(new_blocks))
})
.collect::<Vec<_>>();

View File

@ -281,10 +281,8 @@ mod tests {
.iter()
.filter(|(key, _)| key.sharded_key.highest_block_number > last_pruned_block_number)
.map(|(key, blocks)| {
let new_blocks = blocks
.iter()
.skip_while(|block| *block <= last_pruned_block_number)
.collect::<Vec<_>>();
let new_blocks =
blocks.iter().skip_while(|block| *block <= last_pruned_block_number);
(key.clone(), BlockNumberList::new_pre_sorted(new_blocks))
})
.collect::<Vec<_>>();

View File

@ -182,7 +182,7 @@ mod tests {
}
fn list(list: &[u64]) -> BlockNumberList {
BlockNumberList::new(list).unwrap()
BlockNumberList::new(list.iter().copied()).unwrap()
}
fn cast(

View File

@ -197,7 +197,7 @@ mod tests {
}
fn list(list: &[u64]) -> BlockNumberList {
BlockNumberList::new(list).unwrap()
BlockNumberList::new(list.iter().copied()).unwrap()
}
fn cast(

View File

@ -54,11 +54,11 @@ where
let mut cache: HashMap<P, Vec<u64>> = HashMap::default();
let mut collect = |cache: &HashMap<P, Vec<u64>>| {
for (key, indice_list) in cache {
let last = indice_list.last().expect("qed");
for (key, indices) in cache {
let last = indices.last().expect("qed");
collector.insert(
sharded_key_factory(*key, *last),
BlockNumberList::new_pre_sorted(indice_list),
BlockNumberList::new_pre_sorted(indices.iter().copied()),
)?;
}
Ok::<(), StageError>(())

View File

@ -12,6 +12,7 @@ impl Compress for IntegerList {
fn compress(self) -> Self::Compressed {
self.to_bytes()
}
fn compress_to_buf<B: bytes::BufMut + AsMut<[u8]>>(self, buf: &mut B) {
self.to_mut_bytes(buf)
}

View File

@ -1319,7 +1319,7 @@ mod tests {
for i in 1..5 {
let key = ShardedKey::new(real_key, i * 100);
let list: IntegerList = vec![i * 100u64].into();
let list = IntegerList::new_pre_sorted([i * 100u64]);
db.update(|tx| tx.put::<AccountsHistory>(key.clone(), list.clone()).expect(""))
.unwrap();
@ -1340,7 +1340,7 @@ mod tests {
.expect("should be able to retrieve it.");
assert_eq!(ShardedKey::new(real_key, 200), key);
let list200: IntegerList = vec![200u64].into();
let list200 = IntegerList::new_pre_sorted([200u64]);
assert_eq!(list200, list);
}
// Seek greatest index
@ -1357,7 +1357,7 @@ mod tests {
.expect("should be able to retrieve it.");
assert_eq!(ShardedKey::new(real_key, 400), key);
let list400: IntegerList = vec![400u64].into();
let list400 = IntegerList::new_pre_sorted([400u64]);
assert_eq!(list400, list);
}
}

View File

@ -10,12 +10,7 @@ pub struct IntegerListInput(pub Vec<u64>);
impl From<IntegerListInput> for IntegerList {
fn from(list: IntegerListInput) -> Self {
let mut v = list.0;
// Empty lists are not supported by `IntegerList`, so we want to skip these cases.
if v.is_empty() {
return vec![1u64].into()
}
v.sort();
v.into()
v.sort_unstable();
Self::new_pre_sorted(v)
}
}

View File

@ -1356,7 +1356,7 @@ impl<TX: DbTxMut + DbTx, Spec: Send + Sync> DatabaseProvider<TX, Spec> {
};
self.tx.put::<T>(
sharded_key_factory(partial_key, highest_block_number),
BlockNumberList::new_pre_sorted(list),
BlockNumberList::new_pre_sorted(list.iter().copied()),
)?;
}
}

View File

@ -8,7 +8,6 @@ license.workspace = true
[dependencies]
reth.workspace = true
reth-node-ethereum.workspace = true
reth-rpc-types.workspace = true
alloy-rpc-types.workspace = true
clap = { workspace = true, features = ["derive"] }
futures-util.workspace = true