feat: add IntegerList type Elias-Fano (#111)

* add elias

* docs: should not be empty

* change models doc

* add arbitrary feature

* add simple test

* add test-utils to primitives

* cfg arbitrary
This commit is contained in:
joshieDo
2022-10-24 21:14:59 +08:00
committed by GitHub
parent 6d3ebe5096
commit 8bc8bd6820
10 changed files with 257 additions and 9 deletions

31
Cargo.lock generated
View File

@ -58,6 +58,15 @@ dependencies = [
"syn",
]
[[package]]
name = "arbitrary"
version = "1.2.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "29d47fbf90d5149a107494b15a7dc8d69b351be2db3bb9691740e88ec17fd880"
dependencies = [
"derive_arbitrary",
]
[[package]]
name = "arrayref"
version = "0.3.6"
@ -759,6 +768,17 @@ dependencies = [
"zeroize",
]
[[package]]
name = "derive_arbitrary"
version = "1.2.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "4903dff04948f22033ca30232ab8eca2c3fc4c913a8b6a34ee5199699814817f"
dependencies = [
"proc-macro2",
"quote",
"syn",
]
[[package]]
name = "derive_more"
version = "0.99.17"
@ -2573,6 +2593,7 @@ dependencies = [
name = "reth-primitives"
version = "0.1.0"
dependencies = [
"arbitrary",
"bytes",
"crc",
"ethers-core",
@ -2583,6 +2604,7 @@ dependencies = [
"reth-rlp",
"serde",
"serde_json",
"sucds",
"thiserror",
"tiny-keccak 0.3.0",
]
@ -3319,6 +3341,15 @@ version = "2.4.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "6bdef32e8150c2a081110b42772ffe7d7c9032b606bc226c8260fd97e0976601"
[[package]]
name = "sucds"
version = "0.5.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "6c1c7f814471a34d2355f9eb25ef3517ec491ac243612b1c83137739998c5444"
dependencies = [
"anyhow",
]
[[package]]
name = "syn"
version = "1.0.103"

View File

@ -0,0 +1,21 @@
//! Curates the input coming from the fuzzer for certain types.
use reth_primitives::IntegerList;
use serde::{Deserialize, Serialize};
/// Makes sure that the list provided by the fuzzer is not empty and pre-sorted
#[derive(Debug, Clone, Deserialize, Serialize, Default)]
pub struct IntegerListInput(pub Vec<u64>);
impl From<IntegerListInput> for IntegerList {
fn from(list: IntegerListInput) -> IntegerList {
let mut v = list.0;
// Empty lists are not supported by `IntegerList`, so we want to skip these cases.
if v.is_empty() {
return vec![1u64].into()
}
v.sort();
v.into()
}
}

View File

@ -1,9 +1,14 @@
//! Implements fuzzing targets to be used by test-fuzz
mod inputs;
/// Fuzzer generates a random instance of the object and proceeds to encode and decode it. It then
/// makes sure that it matches the original object.
macro_rules! impl_fuzzer {
($($name:tt),+) => {
///
/// Some types like [`IntegerList`] might have some restrictons on how they're fuzzed. For example,
/// the list is assumed to be sorted before creating the object.
macro_rules! impl_fuzzer_with_input {
($(($name:tt, $input_type:tt)),+) => {
$(
/// Macro generated module to be used by test-fuzz and `bench` if it applies.
#[allow(non_snake_case)]
@ -12,8 +17,13 @@ macro_rules! impl_fuzzer {
use reth_primitives::$name;
use crate::db::table;
#[allow(unused_imports)]
use super::inputs::*;
/// Encodes and decodes table types returning its encoded size and the decoded object.
pub fn encode_and_decode(obj: $name) -> (usize, $name) {
/// This method is used for benchmarking, so its parameter should be the actual type that is being tested.
pub fn encode_and_decode(obj: $name) -> (usize, $name)
{
let data = table::Encode::encode(obj);
let size = data.len();
(size, table::Decode::decode(data).expect("failed to decode"))
@ -22,13 +32,14 @@ macro_rules! impl_fuzzer {
#[cfg(test)]
#[allow(dead_code)]
#[test_fuzz::test_fuzz]
pub fn fuzz(obj: $name) {
pub fn fuzz(obj: $input_type) {
let obj: $name = obj.into();
assert!(encode_and_decode(obj.clone()).1 == obj );
}
#[test]
pub fn test() {
encode_and_decode($name::default());
fuzz($input_type::default())
}
}
@ -36,4 +47,16 @@ macro_rules! impl_fuzzer {
};
}
/// Fuzzer generates a random instance of the object and proceeds to encode and decode it. It then
/// makes sure that it matches the original object.
macro_rules! impl_fuzzer {
($($name:tt),+) => {
$(
impl_fuzzer_with_input!(($name, $name));
)+
};
}
impl_fuzzer!(Header, Account);
impl_fuzzer_with_input!((IntegerList, IntegerListInput));

View File

@ -0,0 +1,22 @@
//! Implements [`Encode`] and [`Decode`] for [`IntegerList`]
use crate::db::{
error::Error,
table::{Decode, Encode},
};
use bytes::Bytes;
use reth_primitives::IntegerList;
impl Encode for IntegerList {
type Encoded = Vec<u8>;
fn encode(self) -> Self::Encoded {
self.to_bytes()
}
}
impl Decode for IntegerList {
fn decode<B: Into<Bytes>>(value: B) -> Result<Self, Error> {
IntegerList::from_bytes(&value.into()).map_err(|e| Error::Decode(eyre::eyre!("{e}")))
}
}

View File

@ -1,3 +1,4 @@
//! Implements data structures specific to the database
pub mod blocks;
pub mod integer_list;

View File

@ -1,7 +1,7 @@
//! Declaration of all Database tables.
use crate::db::models::blocks::{BlockNumHash, HeaderHash, NumTransactions, NumTxesInBlock};
use reth_primitives::{Account, Address, BlockNumber, Header, Receipt};
use reth_primitives::{Account, Address, BlockNumber, Header, IntegerList, Receipt};
/// Enum for the type of table present in libmdbx.
#[derive(Debug)]
@ -97,6 +97,12 @@ table!(Config => ConfigKey => ConfigValue);
table!(SyncStage => StageId => BlockNumber);
///
/// Alias Types
type TxNumberList = IntegerList;
type TxNumber = u64;
//
// TODO: Temporary types, until they're properly defined alongside with the Encode and Decode Trait
//
@ -107,9 +113,7 @@ type ConfigValue = Vec<u8>;
type BlockNumHashTxNumber = Vec<u8>;
type RlpTotalDifficulty = Vec<u8>;
type RlpTxBody = Vec<u8>;
type TxNumber = u64; // TODO check size
type PlainStateKey = Address; // TODO new type will have to account for address_incarna_skey as well
type TxNumberList = Vec<u8>;
#[allow(non_camel_case_types)]
type Address_StorageKey = Vec<u8>;
type AccountBeforeTx = Vec<u8>;

View File

@ -25,7 +25,10 @@ maplit = "1"
bytes = "1.2"
serde = "1.0"
thiserror = "1"
sucds = "0.5.0"
arbitrary = { version = "1.1.7", features = ["derive"], optional = true}
[dev-dependencies]
arbitrary = { version = "1.1.7", features = ["derive"]}
serde_json = "1.0"
hex-literal = "0.3"

View File

@ -0,0 +1,13 @@
//! Primitive errors
use thiserror::Error;
/// Primitives error type.
#[derive(Debug, Error)]
pub enum Error {
/// Input provided is invalid.
#[error("Input provided is invalid.")]
InvalidInput,
/// Failed to deserialize data into type.
#[error("Failed to deserialize data into type.")]
FailedDeserialize,
}

View File

@ -0,0 +1,127 @@
use crate::error::Error;
use serde::{
de::{Unexpected, Visitor},
Deserialize, Deserializer, Serialize, Serializer,
};
use std::ops::Deref;
use sucds::{EliasFano, Searial};
/// Uses EliasFano to hold a list of integers. It provides really good compression with the
/// capability to access its elements without decoding it.
#[derive(Debug, Clone, PartialEq, Eq)]
pub struct IntegerList(pub EliasFano);
impl Deref for IntegerList {
type Target = EliasFano;
fn deref(&self) -> &Self::Target {
&self.0
}
}
impl IntegerList {
/// Creates an IntegerList from a list of integers. `usize` is safe to use since
/// [`sucds::EliasFano`] restricts its compilation to 64bits.
///
/// List should be pre-sorted and not empty.
pub fn new<T: AsRef<[usize]>>(list: T) -> Result<Self, Error> {
Ok(Self(EliasFano::from_ints(list.as_ref()).map_err(|_| Error::InvalidInput)?))
}
/// Serializes a [`IntegerList`] into a sequence of bytes.
pub fn to_bytes(&self) -> Vec<u8> {
let mut vec = Vec::with_capacity(self.0.size_in_bytes());
self.0.serialize_into(&mut vec).expect("not able to encode integer list.");
vec
}
/// Deserializes a sequence of bytes into a proper [`IntegerList`].
pub fn from_bytes(data: &[u8]) -> Result<Self, Error> {
Ok(Self(EliasFano::deserialize_from(data).map_err(|_| Error::FailedDeserialize)?))
}
}
macro_rules! impl_uint {
($($w:tt),+) => {
$(
impl From<Vec<$w>> for IntegerList {
fn from(v: Vec<$w>) -> Self {
let v: Vec<usize> = v.iter().map(|v| *v as usize).collect();
Self(EliasFano::from_ints(v.as_slice()).expect("could not create list."))
}
}
)+
};
}
impl_uint!(usize, u64, u32, u8, u16);
impl Serialize for IntegerList {
fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
where
S: Serializer,
{
serializer.serialize_bytes(&self.to_bytes())
}
}
struct IntegerListVisitor;
impl<'de> Visitor<'de> for IntegerListVisitor {
type Value = IntegerList;
fn expecting(&self, formatter: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
formatter.write_str("a byte array")
}
fn visit_bytes<E>(self, v: &[u8]) -> Result<Self::Value, E>
where
E: serde::de::Error,
{
IntegerList::from_bytes(v)
.map_err(|_| serde::de::Error::invalid_type(Unexpected::Bytes(v), &self))
}
}
impl<'de> Deserialize<'de> for IntegerList {
fn deserialize<D>(deserializer: D) -> Result<IntegerList, D::Error>
where
D: Deserializer<'de>,
{
deserializer.deserialize_byte_buf(IntegerListVisitor)
}
}
#[cfg(any(test, feature = "arbitrary"))]
use arbitrary::{Arbitrary, Unstructured};
#[cfg(any(test, feature = "arbitrary"))]
impl<'a> Arbitrary<'a> for IntegerList {
fn arbitrary(u: &mut Unstructured<'a>) -> Result<Self, arbitrary::Error> {
let mut nums: Vec<usize> = Vec::arbitrary(u)?;
nums.sort();
Ok(Self(EliasFano::from_ints(&nums).map_err(|_| arbitrary::Error::IncorrectFormat)?))
}
}
#[cfg(test)]
mod test {
use super::*;
#[test]
fn test_integer_list() {
let original_list = [1, 2, 3];
let ef_list = IntegerList::new(&original_list).unwrap();
assert!(ef_list.iter(0).collect::<Vec<usize>>() == original_list);
}
#[test]
fn test_integer_list_serialization() {
let original_list = [1, 2, 3];
let ef_list = IntegerList::new(&original_list).unwrap();
let blist = ef_list.to_bytes();
assert!(IntegerList::from_bytes(&blist).unwrap() == ef_list)
}
}

View File

@ -10,7 +10,9 @@
mod account;
mod block;
mod chain;
mod error;
mod header;
mod integer_list;
mod jsonu256;
mod log;
mod receipt;
@ -20,6 +22,7 @@ pub use account::Account;
pub use block::{Block, BlockLocked};
pub use chain::Chain;
pub use header::{Header, HeaderLocked};
pub use integer_list::IntegerList;
pub use jsonu256::JsonU256;
pub use log::Log;
pub use receipt::Receipt;