feat(db): add zstd and CompactZstd to Transactions and Receipts (#2483)

Co-authored-by: Georgios Konstantopoulos <me@gakonst.com>
This commit is contained in:
joshieDo
2023-05-12 18:30:15 +01:00
committed by GitHub
parent 4056b15882
commit 047f1e513c
12 changed files with 8729 additions and 46 deletions

9
Cargo.lock generated
View File

@ -5177,6 +5177,7 @@ dependencies = [
"tracing",
"triehash",
"url",
"zstd",
]
[[package]]
@ -7766,18 +7767,18 @@ dependencies = [
[[package]]
name = "zstd"
version = "0.11.2+zstd.1.5.2"
version = "0.12.3+zstd.1.5.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "20cc960326ece64f010d2d2107537f26dc589a6573a316bd5b1dba685fa5fde4"
checksum = "76eea132fb024e0e13fd9c2f5d5d595d8a967aa72382ac2f9d39fcc95afd0806"
dependencies = [
"zstd-safe",
]
[[package]]
name = "zstd-safe"
version = "5.0.2+zstd.1.5.2"
version = "6.0.5+zstd.1.5.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "1d2a5585e04f9eea4b2a3d1eca508c4dee9592a89ef6f450c11719da0726f4db"
checksum = "d56d9e60b4b1758206c238a10165fbcae3ca37b01744e394c463463f6529d23b"
dependencies = [
"libc",
"zstd-sys",

View File

@ -57,6 +57,7 @@ derive_more = "0.99"
url = "2.3"
impl-serde = "0.4.0"
once_cell = "1.17.0"
zstd = { version = "0.12", features = ["experimental"] }
# proof related
triehash = "0.8"

View File

@ -0,0 +1,28 @@
mod receipt_dictionary;
mod transaction_dictionary;
pub use receipt_dictionary::RECEIPT_DICTIONARY;
pub use transaction_dictionary::TRANSACTION_DICTIONARY;
use std::{cell::RefCell, thread_local};
use zstd::bulk::{Compressor, Decompressor};
// Reason for using static compressors is that dictionaries can be quite big, and zstd-rs
// recommends to use one context/compressor per thread. Thus the usage of `thread_local`.
thread_local! {
/// Thread Transaction compressor.
pub static TRANSACTION_COMPRESSOR: RefCell<Compressor<'static>> = RefCell::new(Compressor::with_dictionary(0, &TRANSACTION_DICTIONARY)
.expect("Failed to initialize compressor."));
/// Thread Transaction decompressor.
pub static TRANSACTION_DECOMPRESSOR: RefCell<Decompressor<'static>> = RefCell::new(Decompressor::with_dictionary(&TRANSACTION_DICTIONARY)
.expect("Failed to initialize decompressor."));
/// Thread receipt compressor.
pub static RECEIPT_COMPRESSOR: RefCell<Compressor<'static>> = RefCell::new(Compressor::with_dictionary(0, &RECEIPT_DICTIONARY)
.expect("Failed to initialize compressor."));
/// Thread receipt decompressor.
pub static RECEIPT_DECOMPRESSOR: RefCell<Decompressor<'static>> = RefCell::new(Decompressor::with_dictionary(&RECEIPT_DICTIONARY)
.expect("Failed to initialize decompressor."));
}

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@ -17,6 +17,7 @@ mod block;
pub mod bloom;
mod chain;
mod checkpoints;
mod compression;
pub mod constants;
pub mod contract;
mod forkid;
@ -50,6 +51,7 @@ pub use chain::{
MAINNET, SEPOLIA,
};
pub use checkpoints::{AccountHashingCheckpoint, MerkleCheckpoint, StorageHashingCheckpoint};
pub use compression::*;
pub use constants::{
EMPTY_OMMER_ROOT, GOERLI_GENESIS, KECCAK_EMPTY, MAINNET_GENESIS, SEPOLIA_GENESIS,
};

View File

@ -1,11 +1,15 @@
use crate::{bloom::logs_bloom, Bloom, Log, TxType};
use crate::{
bloom::logs_bloom,
compression::{RECEIPT_COMPRESSOR, RECEIPT_DECOMPRESSOR},
Bloom, Log, TxType,
};
use bytes::{Buf, BufMut, BytesMut};
use reth_codecs::{main_codec, Compact};
use reth_codecs::{main_codec, Compact, CompactZstd};
use reth_rlp::{length_of_length, Decodable, Encodable};
use std::cmp::Ordering;
/// Receipt containing result of transaction execution.
#[main_codec]
#[main_codec(zstd)]
#[derive(Clone, Debug, PartialEq, Eq, Default)]
pub struct Receipt {
/// Receipt type.

View File

@ -1,4 +1,7 @@
use crate::{keccak256, Address, Bytes, ChainId, TxHash, H256};
use crate::{
compression::{TRANSACTION_COMPRESSOR, TRANSACTION_DECOMPRESSOR},
keccak256, Address, Bytes, ChainId, TxHash, H256,
};
pub use access_list::{AccessList, AccessListItem, AccessListWithGasUsed};
use bytes::{Buf, BytesMut};
use derive_more::{AsRef, Deref};
@ -782,25 +785,70 @@ impl Compact for TransactionSignedNoHash {
where
B: bytes::BufMut + AsMut<[u8]>,
{
let before = buf.as_mut().len();
let start = buf.as_mut().len();
// placeholder for bitflags
// Placeholder for bitflags.
// The first byte uses 4 bits as flags: IsCompressed[1bit], TxType[2bits], Signature[1bit]
buf.put_u8(0);
let sig_bit = self.signature.to_compact(buf) as u8;
let tx_bit = self.transaction.to_compact(buf) as u8;
let zstd_bit = self.transaction.input().len() >= 32;
// replace with actual flags
buf.as_mut()[before] = sig_bit | (tx_bit << 1);
let tx_bits = if zstd_bit {
TRANSACTION_COMPRESSOR.with(|compressor| {
let mut compressor = compressor.borrow_mut();
let mut tmp = bytes::BytesMut::with_capacity(200);
let tx_bits = self.transaction.to_compact(&mut tmp);
buf.as_mut().len() - before
buf.put_slice(&compressor.compress(&tmp).expect("Failed to compress"));
tx_bits as u8
})
} else {
self.transaction.to_compact(buf) as u8
};
// Replace bitflags with the actual values
buf.as_mut()[start] = sig_bit | (tx_bits << 1) | ((zstd_bit as u8) << 3);
buf.as_mut().len() - start
}
fn from_compact(mut buf: &[u8], _: usize) -> (Self, &[u8]) {
let prefix = buf.get_u8() as usize;
fn from_compact(mut buf: &[u8], _len: usize) -> (Self, &[u8]) {
// The first byte uses 4 bits as flags: IsCompressed[1], TxType[2], Signature[1]
let bitflags = buf.get_u8() as usize;
let (signature, buf) = Signature::from_compact(buf, prefix & 1);
let (transaction, buf) = Transaction::from_compact(buf, prefix >> 1);
let sig_bit = bitflags & 1;
let (signature, buf) = Signature::from_compact(buf, sig_bit);
let zstd_bit = bitflags >> 3;
let (transaction, buf) = if zstd_bit != 0 {
TRANSACTION_DECOMPRESSOR.with(|decompressor| {
let mut decompressor = decompressor.borrow_mut();
let mut tmp: Vec<u8> = Vec::with_capacity(200);
// `decompress_to_buffer` will return an error if the output buffer doesn't have
// enough capacity. However we don't actually have information on the required
// length. So we hope for the best, and keep trying again with a fairly bigger size
// if it fails.
while let Err(err) = decompressor.decompress_to_buffer(buf, &mut tmp) {
let err = err.to_string();
if !err.contains("Destination buffer is too small") {
panic!("Failed to decompress: {}", err);
}
tmp.reserve(tmp.capacity() + 24_000);
}
// TODO: enforce that zstd is only present at a "top" level type
let transaction_type = (bitflags & 0b110) >> 1;
let (transaction, _) = Transaction::from_compact(tmp.as_slice(), transaction_type);
(transaction, buf)
})
} else {
let transaction_type = bitflags >> 1;
Transaction::from_compact(buf, transaction_type)
};
(TransactionSignedNoHash { signature, transaction }, buf)
}

View File

@ -2,7 +2,11 @@ use super::*;
/// Generates the flag fieldset struct that is going to be used to store the length of fields and
/// their potential presence.
pub(crate) fn generate_flag_struct(ident: &Ident, fields: &FieldList) -> TokenStream2 {
pub(crate) fn generate_flag_struct(
ident: &Ident,
fields: &FieldList,
is_zstd: bool,
) -> TokenStream2 {
let is_enum = fields.iter().any(|field| matches!(field, FieldTypes::EnumVariant(_)));
let flags_ident = format_ident!("{ident}Flags");
@ -27,6 +31,7 @@ pub(crate) fn generate_flag_struct(ident: &Ident, fields: &FieldList) -> TokenSt
})
.collect::<Vec<_>>(),
&mut field_flags,
is_zstd,
)
};
@ -34,7 +39,7 @@ pub(crate) fn generate_flag_struct(ident: &Ident, fields: &FieldList) -> TokenSt
return placeholder_flag_struct(&flags_ident)
}
let total_bytes = pad_flag_struct(total_bits, &mut field_flags);
let (total_bytes, unused_bits) = pad_flag_struct(total_bits, &mut field_flags);
// Provides the number of bytes used to represent the flag struct.
let readable_bytes = vec![
@ -44,6 +49,9 @@ pub(crate) fn generate_flag_struct(ident: &Ident, fields: &FieldList) -> TokenSt
total_bytes.into()
];
let docs =
format!("Fieldset that facilitates compacting the parent type. Used bytes: {total_bytes} | Unused bits: {unused_bits}");
// Generate the flag struct.
quote! {
@ -52,7 +60,7 @@ pub(crate) fn generate_flag_struct(ident: &Ident, fields: &FieldList) -> TokenSt
use bytes::Buf;
use modular_bitfield::prelude::*;
/// Fieldset that facilitates compacting the parent type.
#[doc = #docs]
#[bitfield]
#[derive(Clone, Copy, Debug, Default)]
pub struct #flags_ident {
@ -77,6 +85,7 @@ pub(crate) fn generate_flag_struct(ident: &Ident, fields: &FieldList) -> TokenSt
fn build_struct_field_flags(
fields: Vec<&StructFieldDescriptor>,
field_flags: &mut Vec<TokenStream2>,
is_zstd: bool,
) -> u8 {
let mut total_bits = 0;
@ -106,14 +115,23 @@ fn build_struct_field_flags(
}
}
}
if is_zstd {
field_flags.push(quote! {
pub __zstd: B1,
});
total_bits += 1;
}
total_bits
}
/// Total number of bits should be divisible by 8, so we might need to pad the struct with an unused
/// skipped field.
///
/// Returns the total number of bytes used by the flags struct.
fn pad_flag_struct(total_bits: u8, field_flags: &mut Vec<TokenStream2>) -> u8 {
/// Returns the total number of bytes used by the flags struct and how many unused bits.
fn pad_flag_struct(total_bits: u8, field_flags: &mut Vec<TokenStream2>) -> (u8, u8) {
let remaining = 8 - total_bits % 8;
if remaining != 8 {
let bsize = format_ident!("B{remaining}");
@ -121,9 +139,9 @@ fn pad_flag_struct(total_bits: u8, field_flags: &mut Vec<TokenStream2>) -> u8 {
#[skip]
unused: #bsize ,
});
(total_bits + remaining) / 8
((total_bits + remaining) / 8, remaining)
} else {
total_bits / 8
(total_bits / 8, 0)
}
}

View File

@ -4,11 +4,11 @@ use super::*;
use convert_case::{Case, Casing};
/// Generates code to implement the `Compact` trait for a data type.
pub fn generate_from_to(ident: &Ident, fields: &FieldList) -> TokenStream2 {
pub fn generate_from_to(ident: &Ident, fields: &FieldList, is_zstd: bool) -> TokenStream2 {
let flags = format_ident!("{ident}Flags");
let to_compact = generate_to_compact(fields, ident);
let from_compact = generate_from_compact(fields, ident);
let to_compact = generate_to_compact(fields, ident, is_zstd);
let from_compact = generate_from_compact(fields, ident, is_zstd);
let snake_case_ident = ident.to_string().to_case(Case::Snake);
@ -43,15 +43,14 @@ pub fn generate_from_to(ident: &Ident, fields: &FieldList) -> TokenStream2 {
fn from_compact(mut buf: &[u8], len: usize) -> (Self, &[u8]) {
let (flags, mut buf) = #flags::from(buf);
#(#from_compact)*
(obj, buf)
#from_compact
}
}
}
}
/// Generates code to implement the `Compact` trait method `to_compact`.
fn generate_from_compact(fields: &FieldList, ident: &Ident) -> Vec<TokenStream2> {
fn generate_from_compact(fields: &FieldList, ident: &Ident, is_zstd: bool) -> TokenStream2 {
let mut lines = vec![];
let mut known_types = vec!["H256", "H160", "Address", "Bloom", "Vec", "TxHash"];
@ -102,11 +101,48 @@ fn generate_from_compact(fields: &FieldList, ident: &Ident) -> Vec<TokenStream2>
}
}
lines
// If the type has compression support, then check the `__zstd` flag. Otherwise, use the default
// code branch. However, even if it's a type with compression support, not all values are
// to be compressed (thus the zstd flag). Ideally only the bigger ones.
is_zstd
.then(|| {
let decompressor = format_ident!("{}_DECOMPRESSOR", ident.to_string().to_uppercase());
quote! {
if flags.__zstd() != 0 {
#decompressor.with(|decompressor| {
let mut decompressor = decompressor.borrow_mut();
let mut tmp: Vec<u8> = Vec::with_capacity(300);
while let Err(err) = decompressor.decompress_to_buffer(&buf[..], &mut tmp) {
let err = err.to_string();
if !err.contains("Destination buffer is too small") {
panic!("Failed to decompress: {}", err);
}
tmp.reserve(tmp.capacity() + 10_000);
}
let mut original_buf = buf;
let mut buf: &[u8] = tmp.as_slice();
#(#lines)*
(obj, original_buf)
})
} else {
#(#lines)*
(obj, buf)
}
}
})
.unwrap_or_else(|| {
quote! {
#(#lines)*
(obj, buf)
}
})
}
/// Generates code to implement the `Compact` trait method `from_compact`.
fn generate_to_compact(fields: &FieldList, ident: &Ident) -> Vec<TokenStream2> {
fn generate_to_compact(fields: &FieldList, ident: &Ident, is_zstd: bool) -> Vec<TokenStream2> {
let mut lines = vec![quote! {
let mut buffer = bytes::BytesMut::new();
}];
@ -125,13 +161,45 @@ fn generate_to_compact(fields: &FieldList, ident: &Ident) -> Vec<TokenStream2> {
lines.append(&mut StructHandler::new(fields).generate_to());
}
// Just because a type supports compression, doesn't mean all its values are to be compressed.
// We skip the smaller ones, and thus require a flag `__zstd` to specify if this value is
// compressed or not.
if is_zstd {
lines.push(quote! {
let mut zstd = buffer.len() > 7;
if zstd {
flags.set___zstd(1);
}
});
}
// Places the flag bits.
lines.push(quote! {
let flags = flags.into_bytes();
total_len += flags.len() + buffer.len();
buf.put_slice(&flags);
buf.put(buffer);
});
if is_zstd {
let compressor = format_ident!("{}_COMPRESSOR", ident.to_string().to_uppercase());
lines.push(quote! {
if zstd {
#compressor.with(|compressor| {
let mut compressor = compressor.borrow_mut();
let compressed = compressor.compress(&buffer).expect("Failed to compress.");
buf.put(compressed.as_slice());
});
} else {
buf.put(buffer);
}
});
} else {
lines.push(quote! {
buf.put(buffer);
})
}
lines
}

View File

@ -41,13 +41,13 @@ pub enum FieldTypes {
}
/// Derives the `Compact` trait and its from/to implementations.
pub fn derive(input: TokenStream) -> TokenStream {
pub fn derive(input: TokenStream, is_zstd: bool) -> TokenStream {
let mut output = quote! {};
let DeriveInput { ident, data, .. } = parse_macro_input!(input);
let fields = get_fields(&data);
output.extend(generate_flag_struct(&ident, &fields));
output.extend(generate_from_to(&ident, &fields));
output.extend(generate_flag_struct(&ident, &fields, is_zstd));
output.extend(generate_from_to(&ident, &fields, is_zstd));
output.into()
}
@ -201,8 +201,8 @@ mod tests {
let mut output = quote! {};
let DeriveInput { ident, data, .. } = parse2(f_struct).unwrap();
let fields = get_fields(&data);
output.extend(generate_flag_struct(&ident, &fields));
output.extend(generate_from_to(&ident, &fields));
output.extend(generate_flag_struct(&ident, &fields, false));
output.extend(generate_from_to(&ident, &fields, false));
// Expected output in a TokenStream format. Commas matter!
let should_output = quote! {
@ -211,7 +211,7 @@ mod tests {
use bytes::Buf;
use modular_bitfield::prelude::*;
#[doc=r" Fieldset that facilitates compacting the parent type."]
#[doc="Fieldset that facilitates compacting the parent type. Used bytes: 2 | Unused bits: 1"]
#[bitfield]
#[derive(Clone, Copy, Debug, Default)]
pub struct TestStructFlags {

View File

@ -7,7 +7,14 @@ mod compact;
#[proc_macro_derive(Compact, attributes(maybe_zero))]
pub fn derive(input: TokenStream) -> TokenStream {
compact::derive(input)
let is_zstd = false;
compact::derive(input, is_zstd)
}
#[proc_macro_derive(CompactZstd, attributes(maybe_zero))]
pub fn derive_zstd(input: TokenStream) -> TokenStream {
let is_zstd = true;
compact::derive(input, is_zstd)
}
/// Implements the main codec. If the codec supports it, it will call `derive_arbitrary(..)`.
@ -78,11 +85,22 @@ pub fn use_postcard(_args: TokenStream, input: TokenStream) -> TokenStream {
#[proc_macro_attribute]
pub fn use_compact(args: TokenStream, input: TokenStream) -> TokenStream {
let ast = parse_macro_input!(input as DeriveInput);
let compact = quote! {
#[derive(Compact, serde::Serialize, serde::Deserialize)]
#ast
}
.into();
let with_zstd = args.clone().into_iter().any(|tk| tk.to_string() == "zstd");
let compact = if with_zstd {
quote! {
#[derive(CompactZstd, serde::Serialize, serde::Deserialize)]
#ast
}
.into()
} else {
quote! {
#[derive(Compact, serde::Serialize, serde::Deserialize)]
#ast
}
.into()
};
if let Some(first_arg) = args.clone().into_iter().next() {
if first_arg.to_string() == "no_arbitrary" {