From 8bc8bd68205243dff84f93ec12567d4be80a1552 Mon Sep 17 00:00:00 2001 From: joshieDo <93316087+joshieDo@users.noreply.github.com> Date: Mon, 24 Oct 2022 21:14:59 +0800 Subject: [PATCH] feat: add `IntegerList` type Elias-Fano (#111) * add elias * docs: should not be empty * change models doc * add arbitrary feature * add simple test * add test-utils to primitives * cfg arbitrary --- Cargo.lock | 31 +++++ .../interfaces/src/db/codecs/fuzz/inputs.rs | 21 +++ .../src/db/codecs/{fuzz.rs => fuzz/mod.rs} | 33 ++++- .../interfaces/src/db/models/integer_list.rs | 22 +++ crates/interfaces/src/db/models/mod.rs | 1 + crates/interfaces/src/db/tables.rs | 10 +- crates/primitives/Cargo.toml | 5 +- crates/primitives/src/error.rs | 13 ++ crates/primitives/src/integer_list.rs | 127 ++++++++++++++++++ crates/primitives/src/lib.rs | 3 + 10 files changed, 257 insertions(+), 9 deletions(-) create mode 100644 crates/interfaces/src/db/codecs/fuzz/inputs.rs rename crates/interfaces/src/db/codecs/{fuzz.rs => fuzz/mod.rs} (57%) create mode 100644 crates/interfaces/src/db/models/integer_list.rs create mode 100644 crates/primitives/src/error.rs create mode 100644 crates/primitives/src/integer_list.rs diff --git a/Cargo.lock b/Cargo.lock index 494a59c41..a18bfbefb 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -58,6 +58,15 @@ dependencies = [ "syn", ] +[[package]] +name = "arbitrary" +version = "1.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "29d47fbf90d5149a107494b15a7dc8d69b351be2db3bb9691740e88ec17fd880" +dependencies = [ + "derive_arbitrary", +] + [[package]] name = "arrayref" version = "0.3.6" @@ -759,6 +768,17 @@ dependencies = [ "zeroize", ] +[[package]] +name = "derive_arbitrary" +version = "1.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4903dff04948f22033ca30232ab8eca2c3fc4c913a8b6a34ee5199699814817f" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + [[package]] name = "derive_more" version = "0.99.17" @@ -2573,6 +2593,7 @@ dependencies = [ name = "reth-primitives" version = "0.1.0" dependencies = [ + "arbitrary", "bytes", "crc", "ethers-core", @@ -2583,6 +2604,7 @@ dependencies = [ "reth-rlp", "serde", "serde_json", + "sucds", "thiserror", "tiny-keccak 0.3.0", ] @@ -3319,6 +3341,15 @@ version = "2.4.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "6bdef32e8150c2a081110b42772ffe7d7c9032b606bc226c8260fd97e0976601" +[[package]] +name = "sucds" +version = "0.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6c1c7f814471a34d2355f9eb25ef3517ec491ac243612b1c83137739998c5444" +dependencies = [ + "anyhow", +] + [[package]] name = "syn" version = "1.0.103" diff --git a/crates/interfaces/src/db/codecs/fuzz/inputs.rs b/crates/interfaces/src/db/codecs/fuzz/inputs.rs new file mode 100644 index 000000000..ff8d8b39c --- /dev/null +++ b/crates/interfaces/src/db/codecs/fuzz/inputs.rs @@ -0,0 +1,21 @@ +//! Curates the input coming from the fuzzer for certain types. + +use reth_primitives::IntegerList; +use serde::{Deserialize, Serialize}; + +/// Makes sure that the list provided by the fuzzer is not empty and pre-sorted +#[derive(Debug, Clone, Deserialize, Serialize, Default)] +pub struct IntegerListInput(pub Vec); + +impl From for IntegerList { + fn from(list: IntegerListInput) -> IntegerList { + let mut v = list.0; + + // Empty lists are not supported by `IntegerList`, so we want to skip these cases. + if v.is_empty() { + return vec![1u64].into() + } + v.sort(); + v.into() + } +} diff --git a/crates/interfaces/src/db/codecs/fuzz.rs b/crates/interfaces/src/db/codecs/fuzz/mod.rs similarity index 57% rename from crates/interfaces/src/db/codecs/fuzz.rs rename to crates/interfaces/src/db/codecs/fuzz/mod.rs index e4abb5d1d..15b24dbaf 100644 --- a/crates/interfaces/src/db/codecs/fuzz.rs +++ b/crates/interfaces/src/db/codecs/fuzz/mod.rs @@ -1,9 +1,14 @@ //! Implements fuzzing targets to be used by test-fuzz +mod inputs; + /// Fuzzer generates a random instance of the object and proceeds to encode and decode it. It then /// makes sure that it matches the original object. -macro_rules! impl_fuzzer { - ($($name:tt),+) => { +/// +/// Some types like [`IntegerList`] might have some restrictons on how they're fuzzed. For example, +/// the list is assumed to be sorted before creating the object. +macro_rules! impl_fuzzer_with_input { + ($(($name:tt, $input_type:tt)),+) => { $( /// Macro generated module to be used by test-fuzz and `bench` if it applies. #[allow(non_snake_case)] @@ -12,8 +17,13 @@ macro_rules! impl_fuzzer { use reth_primitives::$name; use crate::db::table; + #[allow(unused_imports)] + use super::inputs::*; + /// Encodes and decodes table types returning its encoded size and the decoded object. - pub fn encode_and_decode(obj: $name) -> (usize, $name) { + /// This method is used for benchmarking, so its parameter should be the actual type that is being tested. + pub fn encode_and_decode(obj: $name) -> (usize, $name) + { let data = table::Encode::encode(obj); let size = data.len(); (size, table::Decode::decode(data).expect("failed to decode")) @@ -22,13 +32,14 @@ macro_rules! impl_fuzzer { #[cfg(test)] #[allow(dead_code)] #[test_fuzz::test_fuzz] - pub fn fuzz(obj: $name) { + pub fn fuzz(obj: $input_type) { + let obj: $name = obj.into(); assert!(encode_and_decode(obj.clone()).1 == obj ); } #[test] pub fn test() { - encode_and_decode($name::default()); + fuzz($input_type::default()) } } @@ -36,4 +47,16 @@ macro_rules! impl_fuzzer { }; } +/// Fuzzer generates a random instance of the object and proceeds to encode and decode it. It then +/// makes sure that it matches the original object. +macro_rules! impl_fuzzer { + ($($name:tt),+) => { + $( + impl_fuzzer_with_input!(($name, $name)); + )+ + }; +} + impl_fuzzer!(Header, Account); + +impl_fuzzer_with_input!((IntegerList, IntegerListInput)); diff --git a/crates/interfaces/src/db/models/integer_list.rs b/crates/interfaces/src/db/models/integer_list.rs new file mode 100644 index 000000000..64a1b3154 --- /dev/null +++ b/crates/interfaces/src/db/models/integer_list.rs @@ -0,0 +1,22 @@ +//! Implements [`Encode`] and [`Decode`] for [`IntegerList`] + +use crate::db::{ + error::Error, + table::{Decode, Encode}, +}; +use bytes::Bytes; +use reth_primitives::IntegerList; + +impl Encode for IntegerList { + type Encoded = Vec; + + fn encode(self) -> Self::Encoded { + self.to_bytes() + } +} + +impl Decode for IntegerList { + fn decode>(value: B) -> Result { + IntegerList::from_bytes(&value.into()).map_err(|e| Error::Decode(eyre::eyre!("{e}"))) + } +} diff --git a/crates/interfaces/src/db/models/mod.rs b/crates/interfaces/src/db/models/mod.rs index fee897b16..4c0933f9f 100644 --- a/crates/interfaces/src/db/models/mod.rs +++ b/crates/interfaces/src/db/models/mod.rs @@ -1,3 +1,4 @@ //! Implements data structures specific to the database pub mod blocks; +pub mod integer_list; diff --git a/crates/interfaces/src/db/tables.rs b/crates/interfaces/src/db/tables.rs index 46c42eab3..a387c32ba 100644 --- a/crates/interfaces/src/db/tables.rs +++ b/crates/interfaces/src/db/tables.rs @@ -1,7 +1,7 @@ //! Declaration of all Database tables. use crate::db::models::blocks::{BlockNumHash, HeaderHash, NumTransactions, NumTxesInBlock}; -use reth_primitives::{Account, Address, BlockNumber, Header, Receipt}; +use reth_primitives::{Account, Address, BlockNumber, Header, IntegerList, Receipt}; /// Enum for the type of table present in libmdbx. #[derive(Debug)] @@ -97,6 +97,12 @@ table!(Config => ConfigKey => ConfigValue); table!(SyncStage => StageId => BlockNumber); +/// +/// Alias Types + +type TxNumberList = IntegerList; +type TxNumber = u64; + // // TODO: Temporary types, until they're properly defined alongside with the Encode and Decode Trait // @@ -107,9 +113,7 @@ type ConfigValue = Vec; type BlockNumHashTxNumber = Vec; type RlpTotalDifficulty = Vec; type RlpTxBody = Vec; -type TxNumber = u64; // TODO check size type PlainStateKey = Address; // TODO new type will have to account for address_incarna_skey as well -type TxNumberList = Vec; #[allow(non_camel_case_types)] type Address_StorageKey = Vec; type AccountBeforeTx = Vec; diff --git a/crates/primitives/Cargo.toml b/crates/primitives/Cargo.toml index 439bff159..24f7821af 100644 --- a/crates/primitives/Cargo.toml +++ b/crates/primitives/Cargo.toml @@ -25,7 +25,10 @@ maplit = "1" bytes = "1.2" serde = "1.0" thiserror = "1" +sucds = "0.5.0" +arbitrary = { version = "1.1.7", features = ["derive"], optional = true} [dev-dependencies] +arbitrary = { version = "1.1.7", features = ["derive"]} serde_json = "1.0" -hex-literal = "0.3" \ No newline at end of file +hex-literal = "0.3" diff --git a/crates/primitives/src/error.rs b/crates/primitives/src/error.rs new file mode 100644 index 000000000..2360eeb8c --- /dev/null +++ b/crates/primitives/src/error.rs @@ -0,0 +1,13 @@ +//! Primitive errors +use thiserror::Error; + +/// Primitives error type. +#[derive(Debug, Error)] +pub enum Error { + /// Input provided is invalid. + #[error("Input provided is invalid.")] + InvalidInput, + /// Failed to deserialize data into type. + #[error("Failed to deserialize data into type.")] + FailedDeserialize, +} diff --git a/crates/primitives/src/integer_list.rs b/crates/primitives/src/integer_list.rs new file mode 100644 index 000000000..880c0e2e3 --- /dev/null +++ b/crates/primitives/src/integer_list.rs @@ -0,0 +1,127 @@ +use crate::error::Error; +use serde::{ + de::{Unexpected, Visitor}, + Deserialize, Deserializer, Serialize, Serializer, +}; +use std::ops::Deref; +use sucds::{EliasFano, Searial}; + +/// Uses EliasFano to hold a list of integers. It provides really good compression with the +/// capability to access its elements without decoding it. +#[derive(Debug, Clone, PartialEq, Eq)] +pub struct IntegerList(pub EliasFano); + +impl Deref for IntegerList { + type Target = EliasFano; + + fn deref(&self) -> &Self::Target { + &self.0 + } +} + +impl IntegerList { + /// Creates an IntegerList from a list of integers. `usize` is safe to use since + /// [`sucds::EliasFano`] restricts its compilation to 64bits. + /// + /// List should be pre-sorted and not empty. + pub fn new>(list: T) -> Result { + Ok(Self(EliasFano::from_ints(list.as_ref()).map_err(|_| Error::InvalidInput)?)) + } + + /// Serializes a [`IntegerList`] into a sequence of bytes. + pub fn to_bytes(&self) -> Vec { + let mut vec = Vec::with_capacity(self.0.size_in_bytes()); + self.0.serialize_into(&mut vec).expect("not able to encode integer list."); + vec + } + + /// Deserializes a sequence of bytes into a proper [`IntegerList`]. + pub fn from_bytes(data: &[u8]) -> Result { + Ok(Self(EliasFano::deserialize_from(data).map_err(|_| Error::FailedDeserialize)?)) + } +} + +macro_rules! impl_uint { + ($($w:tt),+) => { + $( + impl From> for IntegerList { + fn from(v: Vec<$w>) -> Self { + let v: Vec = v.iter().map(|v| *v as usize).collect(); + Self(EliasFano::from_ints(v.as_slice()).expect("could not create list.")) + } + } + )+ + }; +} + +impl_uint!(usize, u64, u32, u8, u16); + +impl Serialize for IntegerList { + fn serialize(&self, serializer: S) -> Result + where + S: Serializer, + { + serializer.serialize_bytes(&self.to_bytes()) + } +} + +struct IntegerListVisitor; +impl<'de> Visitor<'de> for IntegerListVisitor { + type Value = IntegerList; + + fn expecting(&self, formatter: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + formatter.write_str("a byte array") + } + + fn visit_bytes(self, v: &[u8]) -> Result + where + E: serde::de::Error, + { + IntegerList::from_bytes(v) + .map_err(|_| serde::de::Error::invalid_type(Unexpected::Bytes(v), &self)) + } +} + +impl<'de> Deserialize<'de> for IntegerList { + fn deserialize(deserializer: D) -> Result + where + D: Deserializer<'de>, + { + deserializer.deserialize_byte_buf(IntegerListVisitor) + } +} + +#[cfg(any(test, feature = "arbitrary"))] +use arbitrary::{Arbitrary, Unstructured}; + +#[cfg(any(test, feature = "arbitrary"))] +impl<'a> Arbitrary<'a> for IntegerList { + fn arbitrary(u: &mut Unstructured<'a>) -> Result { + let mut nums: Vec = Vec::arbitrary(u)?; + nums.sort(); + Ok(Self(EliasFano::from_ints(&nums).map_err(|_| arbitrary::Error::IncorrectFormat)?)) + } +} + +#[cfg(test)] +mod test { + use super::*; + + #[test] + fn test_integer_list() { + let original_list = [1, 2, 3]; + + let ef_list = IntegerList::new(&original_list).unwrap(); + + assert!(ef_list.iter(0).collect::>() == original_list); + } + + #[test] + fn test_integer_list_serialization() { + let original_list = [1, 2, 3]; + let ef_list = IntegerList::new(&original_list).unwrap(); + + let blist = ef_list.to_bytes(); + assert!(IntegerList::from_bytes(&blist).unwrap() == ef_list) + } +} diff --git a/crates/primitives/src/lib.rs b/crates/primitives/src/lib.rs index f04122385..9a3ad7e86 100644 --- a/crates/primitives/src/lib.rs +++ b/crates/primitives/src/lib.rs @@ -10,7 +10,9 @@ mod account; mod block; mod chain; +mod error; mod header; +mod integer_list; mod jsonu256; mod log; mod receipt; @@ -20,6 +22,7 @@ pub use account::Account; pub use block::{Block, BlockLocked}; pub use chain::Chain; pub use header::{Header, HeaderLocked}; +pub use integer_list::IntegerList; pub use jsonu256::JsonU256; pub use log::Log; pub use receipt::Receipt;