From 19343a2d153a9d68395cba61e73edb2313bfb92c Mon Sep 17 00:00:00 2001 From: Johann Tuffe Date: Mon, 27 Aug 2018 14:27:01 +0800 Subject: [PATCH] remove elastic-array in triehash --- patricia_trie/Cargo.toml | 2 +- rlp/src/impls.rs | 11 ++- rlp/src/stream.rs | 59 +++++++++++++--- triehash/Cargo.toml | 3 +- triehash/src/lib.rs | 142 +++++++++++++-------------------------- 5 files changed, 102 insertions(+), 115 deletions(-) diff --git a/patricia_trie/Cargo.toml b/patricia_trie/Cargo.toml index c374ba3..2e5f576 100644 --- a/patricia_trie/Cargo.toml +++ b/patricia_trie/Cargo.toml @@ -20,7 +20,7 @@ keccak-hash = { version = "0.1", path = "../keccak-hash" } memorydb = { version = "0.3", path = "../memorydb", default-features = false } rlp = { version = "0.3.0", path = "../rlp", default-features = false } trie-standardmap = { version = "0.1", path = "../trie-standardmap", default-features = false } -triehash = { version = "0.2", path = "../triehash", default-features = false } +triehash = { version = "0.3", path = "../triehash", default-features = false } parity-bytes = { version = "0.1.0", path = "../parity-bytes" } # REVIEW: what's a better way to deal with this? The tests here in diff --git a/rlp/src/impls.rs b/rlp/src/impls.rs index f896239..4abc6d4 100644 --- a/rlp/src/impls.rs +++ b/rlp/src/impls.rs @@ -7,6 +7,7 @@ // except according to those terms. use std::{mem, str}; +use std::iter::{once, empty}; use byteorder::{ByteOrder, BigEndian}; use traits::{Encodable, Decodable}; use stream::RlpStream; @@ -31,11 +32,7 @@ pub fn decode_usize(bytes: &[u8]) -> Result { impl Encodable for bool { fn rlp_append(&self, s: &mut RlpStream) { - if *self { - s.encoder().encode_value(&[1]); - } else { - s.encoder().encode_value(&[0]); - } + s.encoder().encode_iter(once(if *self { 1u8 } else { 0 })); } } @@ -99,9 +96,9 @@ impl Decodable for Option where T: Decodable { impl Encodable for u8 { fn rlp_append(&self, s: &mut RlpStream) { if *self != 0 { - s.encoder().encode_value(&[*self]); + s.encoder().encode_iter(once(*self)); } else { - s.encoder().encode_value(&[]); + s.encoder().encode_iter(empty()); } } } diff --git a/rlp/src/stream.rs b/rlp/src/stream.rs index 6b608fc..b13fd3b 100644 --- a/rlp/src/stream.rs +++ b/rlp/src/stream.rs @@ -120,6 +120,30 @@ impl RlpStream { self } + /// Appends iterator to the end of stream, chainable. + /// + /// ```rust + /// extern crate rlp; + /// use rlp::*; + /// + /// fn main () { + /// let mut stream = RlpStream::new_list(2); + /// stream.append(&"cat").append_iter("dog".as_bytes().iter().cloned()); + /// let out = stream.out(); + /// assert_eq!(out, vec![0xc8, 0x83, b'c', b'a', b't', 0x83, b'd', b'o', b'g']); + /// } + /// ``` + pub fn append_iter<'a, I>(&'a mut self, value: I) -> &'a mut Self + where I: IntoIterator, + { + self.finished_list = false; + self.encoder().encode_iter(value); + if !self.finished_list { + self.note_appended(1); + } + self + } + /// Appends list of values to the end of stream, chainable. pub fn append_list<'a, E, K>(&'a mut self, values: &[K]) -> &'a mut Self where E: Encodable, K: Borrow { self.begin_list(values.len()); @@ -353,17 +377,36 @@ impl<'a> BasicEncoder<'a> { }; } - /// Pushes encoded value to the end of buffer pub fn encode_value(&mut self, value: &[u8]) { - match value.len() { + self.encode_iter(value.iter().cloned()); + } + + /// Pushes encoded value to the end of buffer + pub fn encode_iter(&mut self, value: I) + where I: IntoIterator, + { + let mut value = value.into_iter(); + let len = match value.size_hint() { + (lower, Some(upper)) if lower == upper => lower, + _ => { + let value = value.collect::>(); + return self.encode_iter(value); + } + }; + match len { // just 0 0 => self.buffer.push(0x80u8), - // byte is its own encoding if < 0x80 - 1 if value[0] < 0x80 => self.buffer.push(value[0]), - // (prefix + length), followed by the string len @ 1 ... 55 => { - self.buffer.push(0x80u8 + len as u8); - self.buffer.extend_from_slice(value); + let first = value.next().expect("iterator length is higher than 1"); + if len == 1 && first < 0x80 { + // byte is its own encoding if < 0x80 + self.buffer.push(first); + } else { + // (prefix + length), followed by the string + self.buffer.push(0x80u8 + len as u8); + self.buffer.push(first); + self.buffer.extend(value); + } } // (prefix + length of length), followed by the length, followd by the string len => { @@ -371,7 +414,7 @@ impl<'a> BasicEncoder<'a> { let position = self.buffer.len(); let inserted_bytes = self.insert_size(len, position); self.buffer[position - 1] = 0xb7 + inserted_bytes; - self.buffer.extend_from_slice(value); + self.buffer.extend(value); } } } diff --git a/triehash/Cargo.toml b/triehash/Cargo.toml index c7bf6c0..be50218 100644 --- a/triehash/Cargo.toml +++ b/triehash/Cargo.toml @@ -1,13 +1,12 @@ [package] name = "triehash" -version = "0.2.3" +version = "0.3.0" authors = ["Parity Technologies "] description = "In-memory patricia trie operations" repository = "https://github.com/paritytech/parity-common" license = "GPL-3.0" [dependencies] -elastic-array = "0.10" hashdb = { version = "0.3", path = "../hashdb", default-features = false } rlp = { version = "0.3", path = "../rlp", default-features = false } diff --git a/triehash/src/lib.rs b/triehash/src/lib.rs index 8f02989..8125abb 100644 --- a/triehash/src/lib.rs +++ b/triehash/src/lib.rs @@ -18,20 +18,21 @@ //! //! This module should be used to generate trie root hash. -extern crate elastic_array; extern crate hashdb; extern crate rlp; #[cfg(test)] extern crate keccak_hasher; use std::collections::BTreeMap; use std::cmp; -use elastic_array::{ElasticArray4, ElasticArray8}; +use std::iter::once; use hashdb::Hasher; use rlp::RlpStream; fn shared_prefix_len(first: &[T], second: &[T]) -> usize { - let len = cmp::min(first.len(), second.len()); - (0..len).take_while(|&i| first[i] == second[i]).count() + first.iter() + .zip(second.iter()) + .position(|(f, s)| f != s) + .unwrap_or_else(|| cmp::min(first.len(), second.len())) } /// Generates a trie root hash for a vector of values @@ -45,29 +46,17 @@ fn shared_prefix_len(first: &[T], second: &[T]) -> usize { /// fn main() { /// let v = &["doe", "reindeer"]; /// let root = "e766d5d51b89dc39d981b41bda63248d7abce4f0225eefd023792a540bcffee3"; -/// assert_eq!(ordered_trie_root::(v), root.into()); +/// assert_eq!(ordered_trie_root::(v), root.into()); /// } /// ``` -pub fn ordered_trie_root(input: I) -> H::Out +pub fn ordered_trie_root(input: I) -> H::Out where - I: IntoIterator, - A: AsRef<[u8]>, + I: IntoIterator, + I::Item: AsRef<[u8]>, H: Hasher, ::Out: cmp::Ord + rlp::Encodable, { - let gen_input: Vec<_> = input - // first put elements into btree to sort them by nibbles (key'd by index) - // optimize it later - .into_iter() - .enumerate() - .map(|(i, slice)| (rlp::encode(&i), slice)) - .collect::>() - // then move them to a vector - .into_iter() - .map(|(k, v)| (as_nibbles(&k), v) ) - .collect(); - - gen_trie_root::(&gen_input) + trie_root::(input.into_iter().enumerate().map(|(i, v)| (rlp::encode(&i), v))) } /// Generates a trie root hash for a vector of key-value tuples @@ -97,16 +86,31 @@ where H: Hasher, ::Out: cmp::Ord + rlp::Encodable, { - let gen_input: Vec<_> = input - // first put elements into btree to sort them and to remove duplicates - .into_iter() - .collect::>() - // then move them to a vector - .into_iter() - .map(|(k, v)| (as_nibbles(k.as_ref()), v) ) - .collect(); - gen_trie_root::(&gen_input) + // first put elements into btree to sort them and to remove duplicates + let input = input + .into_iter() + .collect::>(); + + let mut nibbles = Vec::with_capacity(input.keys().map(|k| k.as_ref().len()).sum::() * 2); + let mut lens = Vec::with_capacity(input.len() + 1); + lens.push(0); + for k in input.keys() { + for &b in k.as_ref() { + nibbles.push(b >> 4); + nibbles.push(b & 0x0F); + } + lens.push(nibbles.len()); + } + + // then move them to a vector + let input = input.into_iter().zip(lens.windows(2)) + .map(|((_, v), w)| (&nibbles[w[0]..w[1]], v)) + .collect::>(); + + let mut stream = RlpStream::new(); + hash256rlp::(&input, 0, &mut stream); + H::hash(&stream.out()) } /// Generates a key-hashed (secure) trie root hash for a vector of key-value tuples. @@ -136,29 +140,7 @@ where H: Hasher, ::Out: cmp::Ord + rlp::Encodable, { - let gen_input: Vec<_> = input - // first put elements into btree to sort them and to remove duplicates - .into_iter() - .map(|(k, v)| (H::hash(k.as_ref()), v)) - .collect::>() - // then move them to a vector - .into_iter() - .map(|(k, v)| (as_nibbles(k.as_ref()), v) ) - .collect(); - - gen_trie_root::(&gen_input) -} - -fn gen_trie_root(input: &[(A, B)]) -> H::Out -where - A: AsRef<[u8]>, - B: AsRef<[u8]>, - H: Hasher, - ::Out: cmp::Ord + rlp::Encodable, -{ - let mut stream = RlpStream::new(); - hash256rlp::(input, 0, &mut stream); - H::hash(&stream.out()) + trie_root::(input.into_iter().map(|(k, v)| (H::hash(k.as_ref()), v))) } /// Hex-prefix Notation. First nibble has flags: oddness = 2^0 & termination = 2^1. @@ -180,10 +162,9 @@ where /// [1,2,3,4,5,T] 0x312345 // 5 > 3 /// [1,2,3,4,T] 0x201234 // 4 > 3 /// ``` -fn hex_prefix_encode(nibbles: &[u8], leaf: bool) -> ElasticArray4 { +fn hex_prefix_encode<'a>(nibbles: &'a [u8], leaf: bool) -> impl Iterator + 'a { let inlen = nibbles.len(); let oddness_factor = inlen % 2; - let mut res = ElasticArray4::new(); let first_byte = { let mut bits = ((inlen as u8 & 1) + (2 * leaf as u8)) << 4; @@ -192,28 +173,7 @@ fn hex_prefix_encode(nibbles: &[u8], leaf: bool) -> ElasticArray4 { } bits }; - - res.push(first_byte); - - let mut offset = oddness_factor; - while offset < inlen { - let byte = (nibbles[offset] << 4) + nibbles[offset + 1]; - res.push(byte); - offset += 2; - } - - res -} - -/// Converts slice of bytes to nibbles. -fn as_nibbles(bytes: &[u8]) -> ElasticArray8 { - let mut res = ElasticArray8::new(); - for i in 0..bytes.len() { - let byte = bytes[i]; - res.push(byte >> 4); - res.push(byte & 0b1111); - } - res + once(first_byte).chain(nibbles[oddness_factor..].chunks(2).map(|ch| ch[0] << 4 | ch[1])) } fn hash256rlp(input: &[(A, B)], pre_len: usize, stream: &mut RlpStream) @@ -239,7 +199,7 @@ where // and then append value if inlen == 1 { stream.begin_list(2); - stream.append(&&*hex_prefix_encode(&key[pre_len..], true)); + stream.append_iter(hex_prefix_encode(&key[pre_len..], true)); stream.append(&value); return; } @@ -258,7 +218,7 @@ where // then recursively append suffixes of all items who had this key if shared_prefix > pre_len { stream.begin_list(2); - stream.append(&&*hex_prefix_encode(&key[pre_len..shared_prefix], false)); + stream.append_iter(hex_prefix_encode(&key[pre_len..shared_prefix], false)); hash256aux::(input, shared_prefix, stream); return; } @@ -315,18 +275,6 @@ where }; } -#[test] -fn test_nibbles() { - let v = vec![0x31, 0x23, 0x45]; - let e = vec![3, 1, 2, 3, 4, 5]; - assert_eq!(as_nibbles(&v), e); - - // A => 65 => 0x41 => [4, 1] - let v: Vec = From::from("A"); - let e = vec![4, 1]; - assert_eq!(as_nibbles(&v), e); -} - #[cfg(test)] mod tests { use super::{trie_root, shared_prefix_len, hex_prefix_encode}; @@ -336,32 +284,32 @@ mod tests { fn test_hex_prefix_encode() { let v = vec![0, 0, 1, 2, 3, 4, 5]; let e = vec![0x10, 0x01, 0x23, 0x45]; - let h = hex_prefix_encode(&v, false); + let h = hex_prefix_encode(&v, false).collect::>(); assert_eq!(h, e); let v = vec![0, 1, 2, 3, 4, 5]; let e = vec![0x00, 0x01, 0x23, 0x45]; - let h = hex_prefix_encode(&v, false); + let h = hex_prefix_encode(&v, false).collect::>(); assert_eq!(h, e); let v = vec![0, 1, 2, 3, 4, 5]; let e = vec![0x20, 0x01, 0x23, 0x45]; - let h = hex_prefix_encode(&v, true); + let h = hex_prefix_encode(&v, true).collect::>(); assert_eq!(h, e); let v = vec![1, 2, 3, 4, 5]; let e = vec![0x31, 0x23, 0x45]; - let h = hex_prefix_encode(&v, true); + let h = hex_prefix_encode(&v, true).collect::>(); assert_eq!(h, e); let v = vec![1, 2, 3, 4]; let e = vec![0x00, 0x12, 0x34]; - let h = hex_prefix_encode(&v, false); + let h = hex_prefix_encode(&v, false).collect::>(); assert_eq!(h, e); let v = vec![4, 1]; let e = vec![0x20, 0x41]; - let h = hex_prefix_encode(&v, true); + let h = hex_prefix_encode(&v, true).collect::>(); assert_eq!(h, e); }