feat(trie): trie nodes (#2174)

This commit is contained in:
Roman Krasiuk
2023-04-10 19:18:19 +03:00
committed by GitHub
parent bbdeda3246
commit a6366de1cd
7 changed files with 254 additions and 0 deletions

5
Cargo.lock generated
View File

@ -5310,6 +5310,11 @@ dependencies = [
[[package]]
name = "reth-trie"
version = "0.1.0"
dependencies = [
"hex",
"reth-primitives",
"reth-rlp",
]
[[package]]
name = "revm"

View File

@ -10,3 +10,13 @@ Merkle trie implementation
"""
[dependencies]
# reth
reth-primitives = { path = "../primitives" }
reth-rlp = { path = "../rlp" }
# misc
hex = "0.4"
[dev-dependencies]
# reth
reth-primitives = { path = "../primitives", features = ["test-utils"] }

View File

@ -8,3 +8,6 @@
//! The implementation of Merkle Patricia Trie, a cryptographically
//! authenticated radix trie that is used to store key-value bindings.
//! <https://ethereum.org/en/developers/docs/data-structures-and-encoding/patricia-merkle-trie/>
/// Various branch nodes producde by the hash builder.
pub mod nodes;

View File

@ -0,0 +1,73 @@
use super::{matches_mask, rlp_node};
use reth_primitives::{bytes::BytesMut, H256};
use reth_rlp::{BufMut, EMPTY_STRING_CODE};
/// A Branch node is only a pointer to the stack of nodes and is used to
/// create the RLP encoding of the node using masks which filter from
/// the stack of nodes.
#[derive(Clone, Debug)]
pub struct BranchNode<'a> {
/// Rlp encoded children
pub stack: &'a [Vec<u8>],
}
impl<'a> BranchNode<'a> {
/// Create a new branch node from the stack of nodes.
pub fn new(stack: &'a [Vec<u8>]) -> Self {
Self { stack }
}
/// Given the hash and state mask of children present, return an iterator over the stack items
/// that match the mask.
pub fn children(&self, state_mask: u16, hash_mask: u16) -> impl Iterator<Item = H256> + '_ {
let mut index = self.stack.len() - state_mask.count_ones() as usize;
(0..16).filter_map(move |digit| {
let mut child = None;
if matches_mask(state_mask, digit) {
if matches_mask(hash_mask, digit) {
child = Some(&self.stack[index]);
}
index += 1;
}
child.map(|child| H256::from_slice(&child[1..]))
})
}
/// Returns the RLP encoding of the branch node given the state mask of children present.
pub fn rlp(&self, state_mask: u16) -> Vec<u8> {
let first_child_idx = self.stack.len() - state_mask.count_ones() as usize;
let mut buf = BytesMut::new();
// Create the RLP header from the mask elements present.
let mut i = first_child_idx;
let header = (0..16).fold(
reth_rlp::Header { list: true, payload_length: 1 },
|mut header, digit| {
if matches_mask(state_mask, digit) {
header.payload_length += self.stack[i].len();
i += 1;
} else {
header.payload_length += 1;
}
header
},
);
header.encode(&mut buf);
// Extend the RLP buffer with the present children
let mut i = first_child_idx;
(0..16).for_each(|idx| {
if matches_mask(state_mask, idx) {
buf.extend_from_slice(&self.stack[i]);
i += 1;
} else {
buf.put_u8(EMPTY_STRING_CODE)
}
});
// Is this needed?
buf.put_u8(EMPTY_STRING_CODE);
rlp_node(&buf)
}
}

View File

@ -0,0 +1,54 @@
use super::rlp_node;
use reth_primitives::{bytes::BytesMut, trie::Nibbles};
use reth_rlp::{BufMut, Encodable};
/// An intermediate node that exists solely to compress the trie's paths. It contains a path segment
/// (a shared prefix of keys) and a single child pointer. Essentially, an extension node can be
/// thought of as a shortcut within the trie to reduce its overall depth.
///
/// The purpose of an extension node is to optimize the trie structure by collapsing multiple nodes
/// with a single child into one node. This simplification reduces the space and computational
/// complexity when performing operations on the trie.
pub struct ExtensionNode<'a> {
/// A common prefix for keys.
pub prefix: Vec<u8>,
/// A pointer to the child.
pub node: &'a [u8],
}
impl<'a> ExtensionNode<'a> {
/// Creates a new extension node with the given prefix and child.
pub fn new(prefix: &Nibbles, node: &'a [u8]) -> Self {
Self { prefix: prefix.encode_path_leaf(false), node }
}
/// RLP encodes the node and returns either RLP(Node) or RLP(keccak(RLP(node))).
pub fn rlp(&self) -> Vec<u8> {
let mut buf = BytesMut::new();
self.encode(&mut buf);
rlp_node(&buf)
}
}
impl Encodable for ExtensionNode<'_> {
fn encode(&self, out: &mut dyn BufMut) {
let h = reth_rlp::Header {
list: true,
payload_length: self.prefix.as_slice().length() + self.node.len(),
};
h.encode(out);
// Slices have different RLP encoding from Vectors so we need to `as_slice()
self.prefix.as_slice().encode(out);
// The nodes are already RLP encoded
out.put_slice(self.node);
}
}
impl std::fmt::Debug for ExtensionNode<'_> {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
f.debug_struct("ExtensionNode")
.field("prefix", &hex::encode(&self.prefix))
.field("node", &hex::encode(self.node))
.finish()
}
}

View File

@ -0,0 +1,80 @@
use super::rlp_node;
use reth_primitives::{bytes::BytesMut, trie::Nibbles};
use reth_rlp::{BufMut, Encodable};
/// A leaf node represents the endpoint or terminal node in the trie. In other words, a leaf node is
/// where actual values are stored.
///
/// A leaf node consists of two parts: the key (or path) and the value. The key is typically the
/// remaining portion of the key after following the path through the trie, and the value is the
/// data associated with the full key. When searching the trie for a specific key, reaching a leaf
/// node means that the search has successfully found the value associated with that key.
#[derive(Default)]
pub struct LeafNode<'a> {
/// The key path.
pub key: Vec<u8>,
/// value: SmallVec<[u8; 36]>
pub value: &'a [u8],
}
impl<'a> LeafNode<'a> {
/// Creates a new leaf node with the given key and value.
pub fn new(key: &Nibbles, value: &'a [u8]) -> Self {
Self { key: key.encode_path_leaf(true), value }
}
/// RLP encodes the node and returns either RLP(Node) or RLP(keccak(RLP(node)))
/// depending on if the serialized node was longer than a keccak).
pub fn rlp(&self) -> Vec<u8> {
let mut out = BytesMut::new();
self.encode(&mut out);
rlp_node(&out)
}
}
// Handroll because `key` must be encoded as a slice
impl Encodable for LeafNode<'_> {
fn encode(&self, out: &mut dyn BufMut) {
#[derive(reth_rlp::RlpEncodable)]
struct S<'a> {
encoded_path: &'a [u8],
value: &'a [u8],
}
S { encoded_path: &self.key, value: self.value }.encode(out);
}
}
impl std::fmt::Debug for LeafNode<'_> {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
f.debug_struct("LeafNode")
.field("key", &hex::encode(&self.key))
.field("value", &hex::encode(self.value))
.finish()
}
}
#[cfg(test)]
mod tests {
use super::*;
use reth_primitives::hex_literal::hex;
// From manual regression test
#[test]
fn encode_leaf_node_nibble() {
let nibble = Nibbles { hex_data: hex!("0604060f").to_vec() };
let encoded = nibble.encode_path_leaf(true);
let expected = hex!("20646f").to_vec();
assert_eq!(encoded, expected);
}
#[test]
fn rlp_leaf_node_roundtrip() {
let nibble = Nibbles { hex_data: hex!("0604060f").to_vec() };
let val = hex!("76657262").to_vec();
let leaf = LeafNode::new(&nibble, &val);
let rlp = leaf.rlp();
let expected = hex!("c98320646f8476657262").to_vec();
assert_eq!(rlp, expected);
}
}

View File

@ -0,0 +1,29 @@
use reth_primitives::{keccak256, H256};
use reth_rlp::EMPTY_STRING_CODE;
mod branch;
pub use branch::BranchNode;
mod extension;
pub use extension::ExtensionNode;
mod leaf;
pub use leaf::LeafNode;
/// Given an RLP encoded node, returns either RLP(Node) or RLP(keccak(RLP(node)))
fn rlp_node(rlp: &[u8]) -> Vec<u8> {
if rlp.len() < H256::len_bytes() {
rlp.to_vec()
} else {
rlp_hash(keccak256(rlp))
}
}
/// Optimization for quick encoding of a hash as RLP
pub fn rlp_hash(hash: H256) -> Vec<u8> {
[[EMPTY_STRING_CODE + H256::len_bytes() as u8].as_slice(), hash.0.as_slice()].concat()
}
fn matches_mask(mask: u16, idx: i32) -> bool {
mask & (1u16 << idx) != 0
}