diff --git a/triehash/Cargo.toml b/triehash/Cargo.toml new file mode 100644 index 0000000..ee42b9d --- /dev/null +++ b/triehash/Cargo.toml @@ -0,0 +1,15 @@ +[package] +name = "triehash" +version = "0.1.0" +authors = ["Parity Technologies "] +description = "in memory patricia trie operations" +license = "GPL-3.0" + +[dependencies] +elastic-array = "0.10" +rlp = { version = "0.2.1", path = "../rlp" } +ethereum-types = "0.3" +keccak-hash = { version = "0.1", path = "../hash" } + +[dev-dependencies] +trie-standardmap = { path = "../trie-standardmap" } diff --git a/triehash/benches/triehash.rs b/triehash/benches/triehash.rs new file mode 100644 index 0000000..505ea12 --- /dev/null +++ b/triehash/benches/triehash.rs @@ -0,0 +1,147 @@ +// Copyright 2015-2018 Parity Technologies (UK) Ltd. +// This file is part of Parity. + +// Parity is free software: you can redistribute it and/or modify +// it under the terms of the GNU General Public License as published by +// the Free Software Foundation, either version 3 of the License, or +// (at your option) any later version. + +// Parity is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License for more details. + +// You should have received a copy of the GNU General Public License +// along with Parity. If not, see . + +#![feature(test)] + +extern crate ethereum_types; +extern crate keccak_hash; +extern crate test; +extern crate trie_standardmap; +extern crate triehash; + +use ethereum_types::H256; +use keccak_hash::keccak; +use test::Bencher; +use trie_standardmap::{Alphabet, ValueMode, StandardMap}; +use triehash::trie_root; + +fn random_word(alphabet: &[u8], min_count: usize, diff_count: usize, seed: &mut H256) -> Vec { + assert!(min_count + diff_count <= 32); + *seed = keccak(&seed); + let r = min_count + (seed[31] as usize % (diff_count + 1)); + let mut ret: Vec = Vec::with_capacity(r); + for i in 0..r { + ret.push(alphabet[seed[i] as usize % alphabet.len()]); + } + ret +} + +fn random_bytes(min_count: usize, diff_count: usize, seed: &mut H256) -> Vec { + assert!(min_count + diff_count <= 32); + *seed = keccak(&seed); + let r = min_count + (seed[31] as usize % (diff_count + 1)); + seed[0..r].to_vec() +} + +fn random_value(seed: &mut H256) -> Vec { + *seed = keccak(&seed); + match seed[0] % 2 { + 1 => vec![seed[31];1], + _ => seed.to_vec(), + } +} + +#[bench] +fn triehash_insertions_32_mir_1k(b: &mut Bencher) { + let st = StandardMap { + alphabet: Alphabet::All, + min_key: 32, + journal_key: 0, + value_mode: ValueMode::Mirror, + count: 1000, + }; + let d = st.make(); + b.iter(&mut ||{ + trie_root(d.clone()).clone(); + }); +} + +#[bench] +fn triehash_insertions_32_ran_1k(b: &mut Bencher) { + let st = StandardMap { + alphabet: Alphabet::All, + min_key: 32, + journal_key: 0, + value_mode: ValueMode::Random, + count: 1000, + }; + let d = st.make(); + b.iter(&mut ||{ + trie_root(d.clone()).clone(); + }); +} + +#[bench] +fn triehash_insertions_six_high(b: &mut Bencher) { + let mut d: Vec<(Vec, Vec)> = Vec::new(); + let mut seed = H256::new(); + for _ in 0..1000 { + let k = random_bytes(6, 0, &mut seed); + let v = random_value(&mut seed); + d.push((k, v)) + } + + b.iter(&||{ + trie_root(d.clone()); + }) +} + +#[bench] +fn triehash_insertions_six_mid(b: &mut Bencher) { + let alphabet = b"@QWERTYUIOPASDFGHJKLZXCVBNM[/]^_"; + let mut d: Vec<(Vec, Vec)> = Vec::new(); + let mut seed = H256::new(); + for _ in 0..1000 { + let k = random_word(alphabet, 6, 0, &mut seed); + let v = random_value(&mut seed); + d.push((k, v)) + } + b.iter(||{ + trie_root(d.clone()); + }) +} + +#[bench] +fn triehash_insertions_random_mid(b: &mut Bencher) { + let alphabet = b"@QWERTYUIOPASDFGHJKLZXCVBNM[/]^_"; + let mut d: Vec<(Vec, Vec)> = Vec::new(); + let mut seed = H256::new(); + for _ in 0..1000 { + let k = random_word(alphabet, 1, 5, &mut seed); + let v = random_value(&mut seed); + d.push((k, v)) + } + + b.iter(||{ + trie_root(d.clone()); + }) +} + +#[bench] +fn triehash_insertions_six_low(b: &mut Bencher) { + let alphabet = b"abcdef"; + let mut d: Vec<(Vec, Vec)> = Vec::new(); + let mut seed = H256::new(); + for _ in 0..1000 { + let k = random_word(alphabet, 6, 0, &mut seed); + let v = random_value(&mut seed); + d.push((k, v)) + } + + b.iter(||{ + trie_root(d.clone()); + }) +} diff --git a/triehash/src/lib.rs b/triehash/src/lib.rs new file mode 100644 index 0000000..c78ed0c --- /dev/null +++ b/triehash/src/lib.rs @@ -0,0 +1,376 @@ +// Copyright 2015-2018 Parity Technologies (UK) Ltd. +// This file is part of Parity. + +// Parity is free software: you can redistribute it and/or modify +// it under the terms of the GNU General Public License as published by +// the Free Software Foundation, either version 3 of the License, or +// (at your option) any later version. + +// Parity is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License for more details. + +// You should have received a copy of the GNU General Public License +// along with Parity. If not, see . + +//! Generetes trie root. +//! +//! This module should be used to generate trie root hash. + +extern crate elastic_array; +extern crate ethereum_types; +extern crate keccak_hash as hash; +extern crate rlp; + +use std::collections::BTreeMap; +use std::cmp; +use elastic_array::{ElasticArray4, ElasticArray8}; +use ethereum_types::H256; +use hash::keccak; +use rlp::RlpStream; + +fn shared_prefix_len(first: &[T], second: &[T]) -> usize { + let len = cmp::min(first.len(), second.len()); + (0..len).take_while(|&i| first[i] == second[i]).count() +} + +/// Generates a trie root hash for a vector of values +/// +/// ```rust +/// extern crate triehash; +/// use triehash::ordered_trie_root; +/// +/// fn main() { +/// let v = &["doe", "reindeer"]; +/// let root = "e766d5d51b89dc39d981b41bda63248d7abce4f0225eefd023792a540bcffee3"; +/// assert_eq!(ordered_trie_root(v), root.into()); +/// } +/// ``` +pub fn ordered_trie_root(input: I) -> H256 + where I: IntoIterator, + A: AsRef<[u8]>, +{ + let gen_input: Vec<_> = input + // first put elements into btree to sort them by nibbles + // optimize it later + .into_iter() + .enumerate() + .map(|(i, slice)| (rlp::encode(&i), slice)) + .collect::>() + // then move them to a vector + .into_iter() + .map(|(k, v)| (as_nibbles(&k), v) ) + .collect(); + + gen_trie_root(&gen_input) +} + +/// Generates a trie root hash for a vector of key-values +/// +/// ```rust +/// extern crate triehash; +/// use triehash::trie_root; +/// +/// fn main() { +/// let v = vec![ +/// ("doe", "reindeer"), +/// ("dog", "puppy"), +/// ("dogglesworth", "cat"), +/// ]; +/// +/// let root = "8aad789dff2f538bca5d8ea56e8abe10f4c7ba3a5dea95fea4cd6e7c3a1168d3"; +/// assert_eq!(trie_root(v), root.into()); +/// } +/// ``` +pub fn trie_root(input: I) -> H256 + where I: IntoIterator, + A: AsRef<[u8]> + Ord, + B: AsRef<[u8]>, +{ + let gen_input: Vec<_> = input + // first put elements into btree to sort them and to remove duplicates + .into_iter() + .collect::>() + // then move them to a vector + .into_iter() + .map(|(k, v)| (as_nibbles(k.as_ref()), v) ) + .collect(); + + gen_trie_root(&gen_input) +} + +/// Generates a key-hashed (secure) trie root hash for a vector of key-values. +/// +/// ```rust +/// extern crate triehash; +/// use triehash::sec_trie_root; +/// +/// fn main() { +/// let v = vec![ +/// ("doe", "reindeer"), +/// ("dog", "puppy"), +/// ("dogglesworth", "cat"), +/// ]; +/// +/// let root = "d4cd937e4a4368d7931a9cf51686b7e10abb3dce38a39000fd7902a092b64585"; +/// assert_eq!(sec_trie_root(v), root.into()); +/// } +/// ``` +pub fn sec_trie_root(input: I) -> H256 + where I: IntoIterator, + A: AsRef<[u8]>, + B: AsRef<[u8]>, +{ + let gen_input: Vec<_> = input + // first put elements into btree to sort them and to remove duplicates + .into_iter() + .map(|(k, v)| (keccak(k), v)) + .collect::>() + // then move them to a vector + .into_iter() + .map(|(k, v)| (as_nibbles(&k), v) ) + .collect(); + + gen_trie_root(&gen_input) +} + +fn gen_trie_root, B: AsRef<[u8]>>(input: &[(A, B)]) -> H256 { + let mut stream = RlpStream::new(); + hash256rlp(input, 0, &mut stream); + keccak(stream.out()) +} + +/// Hex-prefix Notation. First nibble has flags: oddness = 2^0 & termination = 2^1. +/// +/// The "termination marker" and "leaf-node" specifier are completely equivalent. +/// +/// Input values are in range `[0, 0xf]`. +/// +/// ```markdown +/// [0,0,1,2,3,4,5] 0x10012345 // 7 > 4 +/// [0,1,2,3,4,5] 0x00012345 // 6 > 4 +/// [1,2,3,4,5] 0x112345 // 5 > 3 +/// [0,0,1,2,3,4] 0x00001234 // 6 > 3 +/// [0,1,2,3,4] 0x101234 // 5 > 3 +/// [1,2,3,4] 0x001234 // 4 > 3 +/// [0,0,1,2,3,4,5,T] 0x30012345 // 7 > 4 +/// [0,0,1,2,3,4,T] 0x20001234 // 6 > 4 +/// [0,1,2,3,4,5,T] 0x20012345 // 6 > 4 +/// [1,2,3,4,5,T] 0x312345 // 5 > 3 +/// [1,2,3,4,T] 0x201234 // 4 > 3 +/// ``` +fn hex_prefix_encode(nibbles: &[u8], leaf: bool) -> ElasticArray4 { + let inlen = nibbles.len(); + let oddness_factor = inlen % 2; + let mut res = ElasticArray4::new(); + + let first_byte = { + let mut bits = ((inlen as u8 & 1) + (2 * leaf as u8)) << 4; + if oddness_factor == 1 { + bits += nibbles[0]; + } + bits + }; + + res.push(first_byte); + + let mut offset = oddness_factor; + while offset < inlen { + let byte = (nibbles[offset] << 4) + nibbles[offset + 1]; + res.push(byte); + offset += 2; + } + + res +} + +/// Converts slice of bytes to nibbles. +fn as_nibbles(bytes: &[u8]) -> ElasticArray8 { + let mut res = ElasticArray8::new(); + for i in 0..bytes.len() { + let byte = bytes[i]; + res.push(byte >> 4); + res.push(byte & 0b1111); + } + res +} + +fn hash256rlp, B: AsRef<[u8]>>(input: &[(A, B)], pre_len: usize, stream: &mut RlpStream) { + let inlen = input.len(); + + // in case of empty slice, just append empty data + if inlen == 0 { + stream.append_empty_data(); + return; + } + + // take slices + let key: &[u8] = &input[0].0.as_ref(); + let value: &[u8] = &input[0].1.as_ref(); + + // if the slice contains just one item, append the suffix of the key + // and then append value + if inlen == 1 { + stream.begin_list(2); + stream.append(&&*hex_prefix_encode(&key[pre_len..], true)); + stream.append(&value); + return; + } + + // get length of the longest shared prefix in slice keys + let shared_prefix = input.iter() + // skip first element + .skip(1) + // get minimum number of shared nibbles between first and each successive + .fold(key.len(), | acc, &(ref k, _) | { + cmp::min(shared_prefix_len(key, k.as_ref()), acc) + }); + + // if shared prefix is higher than current prefix append its + // new part of the key to the stream + // then recursively append suffixes of all items who had this key + if shared_prefix > pre_len { + stream.begin_list(2); + stream.append(&&*hex_prefix_encode(&key[pre_len..shared_prefix], false)); + hash256aux(input, shared_prefix, stream); + return; + } + + // an item for every possible nibble/suffix + // + 1 for data + stream.begin_list(17); + + // if first key len is equal to prefix_len, move to next element + let mut begin = match pre_len == key.len() { + true => 1, + false => 0 + }; + + // iterate over all possible nibbles + for i in 0..16 { + // cout how many successive elements have same next nibble + let len = match begin < input.len() { + true => input[begin..].iter() + .take_while(| pair | pair.0.as_ref()[pre_len] == i ) + .count(), + false => 0 + }; + + // if at least 1 successive element has the same nibble + // append their suffixes + match len { + 0 => { stream.append_empty_data(); }, + _ => hash256aux(&input[begin..(begin + len)], pre_len + 1, stream) + } + begin += len; + } + + // if fist key len is equal prefix, append its value + match pre_len == key.len() { + true => { stream.append(&value); }, + false => { stream.append_empty_data(); } + }; +} + +fn hash256aux, B: AsRef<[u8]>>(input: &[(A, B)], pre_len: usize, stream: &mut RlpStream) { + let mut s = RlpStream::new(); + hash256rlp(input, pre_len, &mut s); + let out = s.out(); + match out.len() { + 0...31 => stream.append_raw(&out, 1), + _ => stream.append(&keccak(out)) + }; +} + +#[test] +fn test_nibbles() { + let v = vec![0x31, 0x23, 0x45]; + let e = vec![3, 1, 2, 3, 4, 5]; + assert_eq!(as_nibbles(&v), e); + + // A => 65 => 0x41 => [4, 1] + let v: Vec = From::from("A"); + let e = vec![4, 1]; + assert_eq!(as_nibbles(&v), e); +} + +#[cfg(test)] +mod tests { + use super::{trie_root, shared_prefix_len, hex_prefix_encode}; + + #[test] + fn test_hex_prefix_encode() { + let v = vec![0, 0, 1, 2, 3, 4, 5]; + let e = vec![0x10, 0x01, 0x23, 0x45]; + let h = hex_prefix_encode(&v, false); + assert_eq!(h, e); + + let v = vec![0, 1, 2, 3, 4, 5]; + let e = vec![0x00, 0x01, 0x23, 0x45]; + let h = hex_prefix_encode(&v, false); + assert_eq!(h, e); + + let v = vec![0, 1, 2, 3, 4, 5]; + let e = vec![0x20, 0x01, 0x23, 0x45]; + let h = hex_prefix_encode(&v, true); + assert_eq!(h, e); + + let v = vec![1, 2, 3, 4, 5]; + let e = vec![0x31, 0x23, 0x45]; + let h = hex_prefix_encode(&v, true); + assert_eq!(h, e); + + let v = vec![1, 2, 3, 4]; + let e = vec![0x00, 0x12, 0x34]; + let h = hex_prefix_encode(&v, false); + assert_eq!(h, e); + + let v = vec![4, 1]; + let e = vec![0x20, 0x41]; + let h = hex_prefix_encode(&v, true); + assert_eq!(h, e); + } + + #[test] + fn simple_test() { + assert_eq!(trie_root(vec![ + (b"A", b"aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa" as &[u8]) + ]), "d23786fb4a010da3ce639d66d5e904a11dbc02746d1ce25029e53290cabf28ab".into()); + } + + #[test] + fn test_triehash_out_of_order() { + assert!(trie_root(vec![ + (vec![0x01u8, 0x23], vec![0x01u8, 0x23]), + (vec![0x81u8, 0x23], vec![0x81u8, 0x23]), + (vec![0xf1u8, 0x23], vec![0xf1u8, 0x23]), + ]) == + trie_root(vec![ + (vec![0x01u8, 0x23], vec![0x01u8, 0x23]), + (vec![0xf1u8, 0x23], vec![0xf1u8, 0x23]), + (vec![0x81u8, 0x23], vec![0x81u8, 0x23]), + ])); + } + + #[test] + fn test_shared_prefix() { + let a = vec![1,2,3,4,5,6]; + let b = vec![4,2,3,4,5,6]; + assert_eq!(shared_prefix_len(&a, &b), 0); + } + + #[test] + fn test_shared_prefix2() { + let a = vec![1,2,3,3,5]; + let b = vec![1,2,3]; + assert_eq!(shared_prefix_len(&a, &b), 3); + } + + #[test] + fn test_shared_prefix3() { + let a = vec![1,2,3,4,5,6]; + let b = vec![1,2,3,4,5,6]; + assert_eq!(shared_prefix_len(&a, &b), 6); + } +}