Final tidyups to the trie benchmarking

This commit is contained in:
Gav 2018-09-23 14:26:36 +01:00
parent c33f024c80
commit 87fb2de0f3
6 changed files with 208 additions and 56 deletions

View File

@ -89,7 +89,7 @@ fn bench_contents(b: &mut Criterion, name: &str, d: Vec<(Vec<u8>, Vec<u8>)>) {
{
let mut rlp_t = RlpTrieDBMut::new(&mut rlp_memdb, &mut rlp_root);
let mut codec_t = TrieDBMut::new(&mut codec_memdb, &mut codec_root);
let mut alt_t = TrieDBMut::new(&mut alt_memdb, &mut alt_root);
let mut alt_t = AltTrieDBMut::new(&mut alt_memdb, &mut alt_root);
for i in d.iter() {
rlp_t.insert(&i.0, &i.1).unwrap();
codec_t.insert(&i.0, &i.1).unwrap();
@ -98,13 +98,14 @@ fn bench_contents(b: &mut Criterion, name: &str, d: Vec<(Vec<u8>, Vec<u8>)>) {
}
let funs = vec![
Fun::new("Rlp", |b, d: &TrieInsertionList| b.iter(&mut ||{
let mut memdb = MemoryDB::<KeccakHasher, DBValue>::new();
let mut root = H256::default();
let mut t = RlpTrieDBMut::new(&mut memdb, &mut root);
for i in d.0.iter() {
t.insert(&i.0, &i.1).unwrap();
}
Fun::new("ClosedCodec", |b, d: &TrieInsertionList| b.iter(&mut ||{
trie_root::<KeccakHasher, CodecTrieStream, _, _, _>(d.0.clone())
})),
Fun::new("ClosedAlt", |b, d: &TrieInsertionList| b.iter(&mut ||{
trie_root::<KeccakHasher, CodecTrieStreamAlt, _, _, _>(d.0.clone())
})),
Fun::new("ClosedRlp", |b, d: &TrieInsertionList| b.iter(&mut ||{
trie_root::<KeccakHasher, RlpTrieStream, _, _, _>(d.0.clone())
})),
Fun::new("Codec", |b, d: &TrieInsertionList| b.iter(&mut ||{
let mut memdb = MemoryDB::<KeccakHasher, DBValue>::new_codec();
@ -122,10 +123,12 @@ fn bench_contents(b: &mut Criterion, name: &str, d: Vec<(Vec<u8>, Vec<u8>)>) {
t.insert(&i.0, &i.1).unwrap();
}
})),
Fun::new("IterRlp", move |b, _d| b.iter(&mut ||{
let t = RlpTrieDB::new(&rlp_memdb, &rlp_root).unwrap();
for n in t.iter().unwrap() {
black_box(n).unwrap();
Fun::new("Rlp", |b, d: &TrieInsertionList| b.iter(&mut ||{
let mut memdb = MemoryDB::<KeccakHasher, DBValue>::new();
let mut root = H256::default();
let mut t = RlpTrieDBMut::new(&mut memdb, &mut root);
for i in d.0.iter() {
t.insert(&i.0, &i.1).unwrap();
}
})),
Fun::new("IterCodec", move |b, _d| b.iter(&mut ||{
@ -135,19 +138,16 @@ fn bench_contents(b: &mut Criterion, name: &str, d: Vec<(Vec<u8>, Vec<u8>)>) {
}
})),
Fun::new("IterAlt", move |b, _d| b.iter(&mut ||{
let t = AltTrieDB::new(&alt_memdb, &codec_root).unwrap();
let t = AltTrieDB::new(&alt_memdb, &alt_root).unwrap();
for n in t.iter().unwrap() {
black_box(n).unwrap();
}
})),
Fun::new("ClosedRlp", |b, d: &TrieInsertionList| b.iter(&mut ||{
trie_root::<KeccakHasher, RlpTrieStream, _, _, _>(d.0.clone())
})),
Fun::new("ClosedCodec", |b, d: &TrieInsertionList| b.iter(&mut ||{
trie_root::<KeccakHasher, CodecTrieStream, _, _, _>(d.0.clone())
})),
Fun::new("ClosedAlt", |b, d: &TrieInsertionList| b.iter(&mut ||{
trie_root::<KeccakHasher, CodecTrieStreamAlt, _, _, _>(d.0.clone())
Fun::new("IterRlp", move |b, _d| b.iter(&mut ||{
let t = RlpTrieDB::new(&rlp_memdb, &rlp_root).unwrap();
for n in t.iter().unwrap() {
black_box(n).unwrap();
}
}))
];

View File

@ -29,6 +29,10 @@ pub const EXTENSION_NODE_OFFSET: u8 = 128;
pub const EXTENSION_NODE_BIG: u8 = 253;
pub const BRANCH_NODE_NO_VALUE: u8 = 254;
pub const BRANCH_NODE_WITH_VALUE: u8 = 255;
pub const LEAF_NODE_THRESHOLD: u8 = LEAF_NODE_BIG - LEAF_NODE_OFFSET;
pub const EXTENSION_NODE_THRESHOLD: u8 = EXTENSION_NODE_BIG - EXTENSION_NODE_OFFSET; //125
pub const LEAF_NODE_SMALL_MAX: u8 = LEAF_NODE_BIG - 1;
pub const EXTENSION_NODE_SMALL_MAX: u8 = EXTENSION_NODE_BIG - 1;
impl CodecTrieStream {
// useful for debugging but not used otherwise

View File

@ -105,13 +105,14 @@ impl TrieStream for CodecTrieStreamAlt {
match data.len() {
0...31 => {
// println!("[append_substream] appending data, because data.len() = {}", data.len());
data.encode_to(&mut self.buffer)
self.buffer.extend_from_slice(&data[..]);
},
_ => {
// println!("[append_substream] would have hashed, because data.len() = {}", data.len());
// data.encode_to(&mut self.buffer)
// TODO: re-enable hashing before merging
H::hash(&data).as_ref().encode_to(&mut self.buffer)
self.buffer.push(EMPTY_TRIE);
self.buffer.extend_from_slice(H::hash(&data).as_ref());
}
}
}

View File

@ -26,6 +26,8 @@ extern crate patricia_trie;
extern crate memorydb;
#[cfg(test)]
extern crate keccak_hasher;
#[cfg(test)]
extern crate trie_standardmap;
mod codec_error;
mod node_header;
@ -34,6 +36,7 @@ mod parity_node_codec_alt;
mod codec_triestream;
mod codec_triestream_alt;
use codec::{Decode, Compact};
pub use codec_error::CodecError;
pub use parity_node_codec::ParityNodeCodec;
pub use parity_node_codec_alt::ParityNodeCodecAlt;
@ -61,6 +64,66 @@ fn partial_to_key(partial: &[u8], offset: u8, big: u8) -> Vec<u8> {
output
}
fn compact_len(n: usize) -> usize {
match n {
0...0b00111111 => 1,
0...0b00111111_11111111 => 2,
_ => 4
}
}
/// Returns the size of the node that `data` begins with, `Hash` if it's a hash, or `None` if no node exists.
fn node_len(data: &[u8], hash_len: usize) -> Option<(usize, bool)> {
use codec_triestream::{EMPTY_TRIE, LEAF_NODE_OFFSET, LEAF_NODE_BIG, EXTENSION_NODE_OFFSET,
EXTENSION_NODE_BIG, BRANCH_NODE_NO_VALUE, BRANCH_NODE_WITH_VALUE,
LEAF_NODE_SMALL_MAX, EXTENSION_NODE_SMALL_MAX};
// println!("node_len({:#x?})", data);
if data.len() < 1 {
return None
}
Some((match data[0] {
EMPTY_TRIE => return Some((1 + hash_len, true)),
i @ LEAF_NODE_OFFSET ... LEAF_NODE_SMALL_MAX => {
let nibbles_len = (((i - LEAF_NODE_OFFSET) + 1) / 2) as usize;
let value_len = <Compact<u32>>::decode(&mut &data[1 + nibbles_len..])?.0 as usize;
1 + nibbles_len + compact_len(value_len) + value_len
}
i @ LEAF_NODE_BIG => {
let nibbles_len = ((((i - LEAF_NODE_OFFSET) as usize + data[1] as usize) + 1) / 2) as usize;
let value_len = <Compact<u32>>::decode(&mut &data[2 + nibbles_len..])?.0 as usize;
2 + nibbles_len + compact_len(value_len) + value_len
}
i @ EXTENSION_NODE_OFFSET ... EXTENSION_NODE_SMALL_MAX => {
let nibbles_len = (((i - EXTENSION_NODE_OFFSET) + 1) / 2) as usize;
1 + nibbles_len + node_len(&data[1 + nibbles_len..], hash_len)?.0
}
i @ EXTENSION_NODE_BIG => {
let nibbles_len = ((((i - EXTENSION_NODE_OFFSET) as usize + data[1] as usize) + 1) / 2) as usize;
2 + nibbles_len + node_len(&data[2 + nibbles_len..], hash_len)?.0
}
x @ BRANCH_NODE_NO_VALUE | x @ BRANCH_NODE_WITH_VALUE => {
let child_count = data[1].count_ones() + data[2].count_ones();
let mut offset = 3;
println!("node_len: branch(children={})", child_count);
if x == BRANCH_NODE_WITH_VALUE {
let value_len = <Compact<u32>>::decode(&mut &data[3..])?.0 as usize;
println!("node_len: branch: has_value({})", value_len);
offset += compact_len(value_len) + value_len;
}
for _ in 0..child_count {
offset += node_len(&data[offset..], hash_len)?.0;
}
offset
}
_ => unreachable!(),
}, false))
}
#[cfg(test)]
mod tests {
use super::*;
@ -68,10 +131,11 @@ mod tests {
use triehash::{unhashed_trie, trie_root};
use keccak_hasher::KeccakHasher;
use memorydb::MemoryDB;
use patricia_trie::{Hasher, DBValue, TrieMut, TrieDBMut};
use patricia_trie::{Hasher, DBValue, TrieMut, Trie, TrieDB, TrieDBMut};
use trie_standardmap::{Alphabet, ValueMode, StandardMap};
fn check_equivalent(input: Vec<(&[u8], &[u8])>) {
{
fn check_equivalent(input: &Vec<(&[u8], &[u8])>) {
/* {
let closed_form = trie_root::<KeccakHasher, CodecTrieStream, _, _, _>(input.clone());
let d = unhashed_trie::<KeccakHasher, CodecTrieStream, _, _, _>(input.clone());
println!("Data: {:#x?}, {:#x?}", d, KeccakHasher::hash(&d[..]));
@ -79,13 +143,13 @@ mod tests {
let mut memdb = MemoryDB::<KeccakHasher, DBValue>::from_null_node(&[0u8][..], [0u8][..].into());
let mut root = <KeccakHasher as Hasher>::Out::default();
let mut t = TrieDBMut::<KeccakHasher, ParityNodeCodec<KeccakHasher>>::new(&mut memdb, &mut root);
for (x, y) in input.clone() {
for (x, y) in input {
t.insert(x, y).unwrap();
}
t.root().clone()
};
assert_eq!(closed_form, persistent);
}
}*/
{
let closed_form = trie_root::<KeccakHasher, CodecTrieStreamAlt, _, _, _>(input.clone());
let d = unhashed_trie::<KeccakHasher, CodecTrieStreamAlt, _, _, _>(input.clone());
@ -94,7 +158,7 @@ mod tests {
let mut memdb = MemoryDB::<KeccakHasher, DBValue>::from_null_node(&[0u8][..], [0u8][..].into());
let mut root = <KeccakHasher as Hasher>::Out::default();
let mut t = TrieDBMut::<KeccakHasher, ParityNodeCodecAlt<KeccakHasher>>::new(&mut memdb, &mut root);
for (x, y) in input {
for (x, y) in input.iter().rev() {
t.insert(x, y).unwrap();
}
t.root().clone()
@ -103,34 +167,108 @@ mod tests {
}
}
fn check_iteration(input: &Vec<(&[u8], &[u8])>) {
let mut memdb = MemoryDB::<KeccakHasher, DBValue>::from_null_node(&[0u8][..], [0u8][..].into());
let mut root = <KeccakHasher as Hasher>::Out::default();
{
let mut t = TrieDBMut::<KeccakHasher, ParityNodeCodecAlt<KeccakHasher>>::new(&mut memdb, &mut root);
for (x, y) in input.clone() {
t.insert(x, y).unwrap();
}
}
{
let t = TrieDB::<KeccakHasher, ParityNodeCodecAlt<KeccakHasher>>::new(&mut memdb, &root).unwrap();
assert_eq!(
input.iter().map(|(i, j)| (i.to_vec(), j.to_vec())).collect::<Vec<_>>(),
t.iter().unwrap().map(|x| x.map(|y| (y.0, y.1.to_vec())).unwrap()).collect::<Vec<_>>()
);
}
}
#[test]
fn empty_is_equivalent() {
let input: Vec<(&[u8], &[u8])> = vec![];
check_equivalent(input);
check_equivalent(&input);
check_iteration(&input);
}
#[test]
fn leaf_is_equivalent() {
let input: Vec<(&[u8], &[u8])> = vec![(&[0xaa][..], &[0xbb][..])];
check_equivalent(input);
check_equivalent(&input);
check_iteration(&input);
}
#[test]
fn branch_is_equivalent() {
let input: Vec<(&[u8], &[u8])> = vec![(&[0xaa][..], &[0x10][..]), (&[0xba][..], &[0x11][..])];
check_equivalent(input);
check_equivalent(&input);
check_iteration(&input);
}
#[test]
fn extension_and_branch_is_equivalent() {
let input: Vec<(&[u8], &[u8])> = vec![(&[0xaa][..], &[0x10][..]), (&[0xab][..], &[0x11][..])];
check_equivalent(input);
check_equivalent(&input);
check_iteration(&input);
}
#[test]
fn standard_is_equivalent() {
let st = StandardMap {
alphabet: Alphabet::All,
min_key: 32,
journal_key: 0,
value_mode: ValueMode::Random,
count: 1000,
};
let mut d = st.make();
d.sort_unstable_by(|&(ref a, _), &(ref b, _)| a.cmp(b));
let dr = d.iter().map(|v| (&v.0[..], &v.1[..])).collect();
check_equivalent(&dr);
check_iteration(&dr);
}
#[test]
fn extension_and_branch_with_value_is_equivalent() {
let input: Vec<(&[u8], &[u8])> = vec![
(&[0xaa][..], &[0xa0][..]),
(&[0xaa, 0xaa][..], &[0xaa][..]),
(&[0xaa, 0xbb][..], &[0xab][..])
];
check_equivalent(&input);
check_iteration(&input);
}
#[test]
fn bigger_extension_and_branch_with_value_is_equivalent() {
let input: Vec<(&[u8], &[u8])> = vec![
(&[0xaa][..], &[0xa0][..]),
(&[0xaa, 0xaa][..], &[0xaa][..]),
(&[0xaa, 0xbb][..], &[0xab][..]),
(&[0xbb][..], &[0xb0][..]),
(&[0xbb, 0xbb][..], &[0xbb][..]),
(&[0xbb, 0xcc][..], &[0xbc][..]),
];
check_equivalent(&input);
check_iteration(&input);
}
#[test]
fn single_long_leaf_is_equivalent() {
let input: Vec<(&[u8], &[u8])> = vec![(&[0xaa][..], &b"ABCABCABCABCABCABCABCABCABCABCABCABCABCABCABCABCABCABCABCABCABCABCABCABC"[..]), (&[0xba][..], &[0x11][..])];
check_equivalent(input);
check_equivalent(&input);
check_iteration(&input);
}
#[test]
fn two_long_leaves_is_equivalent() {
let input: Vec<(&[u8], &[u8])> = vec![
(&[0xaa][..], &b"ABCABCABCABCABCABCABCABCABCABCABCABCABCABCABCABCABCABCABCABCABCABCABCABC"[..]),
(&[0xba][..], &b"ABCABCABCABCABCABCABCABCABCABCABCABCABCABCABCABCABCABCABCABCABCABCABCABC"[..])
];
check_equivalent(&input);
check_iteration(&input);
}
fn to_compact(n: u8) -> u8 {

View File

@ -18,7 +18,8 @@
use codec::{Encode, Decode, Input, Output};
use codec_triestream::{EMPTY_TRIE, LEAF_NODE_OFFSET, LEAF_NODE_BIG, EXTENSION_NODE_OFFSET,
EXTENSION_NODE_BIG, BRANCH_NODE_NO_VALUE, BRANCH_NODE_WITH_VALUE};
EXTENSION_NODE_BIG, BRANCH_NODE_NO_VALUE, BRANCH_NODE_WITH_VALUE, LEAF_NODE_THRESHOLD,
EXTENSION_NODE_THRESHOLD, LEAF_NODE_SMALL_MAX, EXTENSION_NODE_SMALL_MAX};
/// A node header.
#[derive(Copy, Clone, PartialEq, Eq, Debug)]
@ -29,11 +30,6 @@ pub enum NodeHeader {
Leaf(usize),
}
const LEAF_NODE_THRESHOLD: u8 = LEAF_NODE_BIG - LEAF_NODE_OFFSET;
const EXTENSION_NODE_THRESHOLD: u8 = EXTENSION_NODE_BIG - EXTENSION_NODE_OFFSET; //125
const LEAF_NODE_SMALL_MAX: u8 = LEAF_NODE_BIG - 1;
const EXTENSION_NODE_SMALL_MAX: u8 = EXTENSION_NODE_BIG - 1;
impl Encode for NodeHeader {
fn encode_to<T: Output>(&self, output: &mut T) {
match self {

View File

@ -22,7 +22,7 @@ use codec::{Encode, Decode, Compact};
use codec_error::CodecError;
use codec_triestream::{EMPTY_TRIE, LEAF_NODE_OFFSET, LEAF_NODE_BIG, EXTENSION_NODE_OFFSET,
EXTENSION_NODE_BIG, branch_node};
use super::{take, partial_to_key, node_header::NodeHeader};
use super::{take, partial_to_key, node_len, node_header::NodeHeader};
/// Concrete implementation of a `NodeCodec` with Parity Codec encoding, generic over the `Hasher`
#[derive(Default, Clone)]
@ -40,10 +40,12 @@ impl<H: Hasher> NodeCodec<H> for ParityNodeCodecAlt<H> {
}
fn decode(data: &[u8]) -> ::std::result::Result<Node, Self::Error> {
//println!("decoding... {:#x?}", data);
let input = &mut &*data;
match NodeHeader::decode(input).ok_or(CodecError::BadFormat)? {
let r = match NodeHeader::decode(input).ok_or(CodecError::BadFormat)? {
NodeHeader::Null => Ok(Node::Empty),
NodeHeader::Branch(has_value) => {
// //println!("decode: branch({})", has_value);
let bitmap = u16::decode(input).ok_or(CodecError::BadFormat)?;
let value = if has_value {
let count = <Compact<u32>>::decode(input).ok_or(CodecError::BadFormat)?.0 as usize;
@ -55,7 +57,7 @@ impl<H: Hasher> NodeCodec<H> for ParityNodeCodecAlt<H> {
let mut pot_cursor = 1;
for i in 0..16 {
if bitmap & pot_cursor != 0 {
let count = <Compact<u32>>::decode(input).ok_or(CodecError::BadFormat)?.0 as usize;
let count = node_len(*input, H::LENGTH).ok_or(CodecError::BadFormat)?.0;
children[i] = Some(take(input, count).ok_or(CodecError::BadFormat)?);
}
pot_cursor <<= 1;
@ -63,28 +65,36 @@ impl<H: Hasher> NodeCodec<H> for ParityNodeCodecAlt<H> {
Ok(Node::Branch(children, value))
}
NodeHeader::Extension(nibble_count) => {
// //println!("decode: ext({})", nibble_count);
let nibble_data = take(input, (nibble_count + 1) / 2).ok_or(CodecError::BadFormat)?;
let nibble_slice = NibbleSlice::new_offset(nibble_data, nibble_count % 2);
let count = <Compact<u32>>::decode(input).ok_or(CodecError::BadFormat)?.0 as usize;
// //println!("decode: ext: nibble_slice({:?})", nibble_slice);
let count = node_len(*input, H::LENGTH).ok_or(CodecError::BadFormat)?.0;
// //println!("decode: ext: node_len {}", count);
Ok(Node::Extension(nibble_slice, take(input, count).ok_or(CodecError::BadFormat)?))
}
NodeHeader::Leaf(nibble_count) => {
// //println!("decode: leaf({})", nibble_count);
let nibble_data = take(input, (nibble_count + 1) / 2).ok_or(CodecError::BadFormat)?;
let nibble_slice = NibbleSlice::new_offset(nibble_data, nibble_count % 2);
let count = <Compact<u32>>::decode(input).ok_or(CodecError::BadFormat)?.0 as usize;
Ok(Node::Leaf(nibble_slice, take(input, count).ok_or(CodecError::BadFormat)?))
}
}
};
//println!("decode: {:#x?} -> {:#x?}", data, r);
r
}
fn try_decode_hash(data: &[u8]) -> Option<H::Out> {
if data.len() == H::LENGTH {
let r = if data.len() == H::LENGTH + 1 && data[0] == EMPTY_TRIE {
let mut r = H::Out::default();
r.as_mut().copy_from_slice(data);
r.as_mut().copy_from_slice(&data[1..]);
Some(r)
} else {
None
}
};
//println!("try_decode_hash: {:#x?} -> {:#x?}", data, r);
r
}
fn is_empty_node(data: &[u8]) -> bool {
@ -98,7 +108,7 @@ impl<H: Hasher> NodeCodec<H> for ParityNodeCodecAlt<H> {
fn leaf_node(partial: &[u8], value: &[u8]) -> Vec<u8> {
let mut output = partial_to_key(partial, LEAF_NODE_OFFSET, LEAF_NODE_BIG);
value.encode_to(&mut output);
// println!("leaf_node: {:#x?}", output);
//println!("leaf_node: {:#x?}", output);
output
}
@ -106,12 +116,14 @@ impl<H: Hasher> NodeCodec<H> for ParityNodeCodecAlt<H> {
fn ext_node(partial: &[u8], child: ChildReference<H::Out>) -> Vec<u8> {
let mut output = partial_to_key(partial, EXTENSION_NODE_OFFSET, EXTENSION_NODE_BIG);
match child {
ChildReference::Hash(h) =>
h.as_ref().encode_to(&mut output),
ChildReference::Hash(h) => {
output.push(EMPTY_TRIE);
output.extend_from_slice(h.as_ref());
}
ChildReference::Inline(inline_data, len) =>
(&AsRef::<[u8]>::as_ref(&inline_data)[..len]).encode_to(&mut output),
output.extend_from_slice(&AsRef::<[u8]>::as_ref(&inline_data)[..len]),
};
// println!("ext_node: {:#x?}", output);
//println!("ext_node: {:#x?}", output);
output
}
@ -127,17 +139,18 @@ impl<H: Hasher> NodeCodec<H> for ParityNodeCodecAlt<H> {
};
let prefix = branch_node(have_value, children.map(|maybe_child| match maybe_child {
Some(ChildReference::Hash(h)) => {
h.as_ref().encode_to(&mut output);
output.push(EMPTY_TRIE);
output.extend_from_slice(h.as_ref());
true
}
Some(ChildReference::Inline(inline_data, len)) => {
(&AsRef::<[u8]>::as_ref(&inline_data)[..len]).encode_to(&mut output);
output.extend_from_slice(&AsRef::<[u8]>::as_ref(&inline_data)[..len]);
true
}
None => false,
}));
output[0..3].copy_from_slice(&prefix[..]);
// println!("branch_node: {:#x?}", output);
//println!("branch_node: {:#x?}", output);
output
}
}