From c6a41651abe6cc8c5a0b6396bc6525d4a524a5c9 Mon Sep 17 00:00:00 2001 From: Sebastian Geisler Date: Thu, 3 Jan 2019 17:24:18 -0800 Subject: [PATCH 1/3] Replace slow hex decoding function with optimized version Fixes #207. --- src/util/misc.rs | 68 ++++++++++++++++++++++++++++++++++-------------- 1 file changed, 49 insertions(+), 19 deletions(-) diff --git a/src/util/misc.rs b/src/util/misc.rs index f94a274..b3b485e 100644 --- a/src/util/misc.rs +++ b/src/util/misc.rs @@ -17,29 +17,59 @@ //! Various utility functions use blockdata::opcodes; -use util::iter::Pairable; use consensus::encode; +/// Helper function to convert hex nibble characters to their respective value +#[inline] +fn hex_val(c: u8) -> Result { + let res = match c { + b'0' ... b'9' => c - '0' as u8, + b'a' ... b'f' => c - 'a' as u8 + 10, + b'A' ... b'F' => c - 'A' as u8 + 10, + _ => return Err(encode::Error::UnexpectedHexDigit(c as char)), + }; + Ok(res) +} + /// Convert a hexadecimal-encoded string to its corresponding bytes -pub fn hex_bytes(s: &str) -> Result, encode::Error> { - let mut v = vec![]; - let mut iter = s.chars().pair(); - // Do the parsing - iter.by_ref().fold(Ok(()), |e, (f, s)| - if e.is_err() { e } - else { - match (f.to_digit(16), s.to_digit(16)) { - (None, _) => Err(encode::Error::UnexpectedHexDigit(f)), - (_, None) => Err(encode::Error::UnexpectedHexDigit(s)), - (Some(f), Some(s)) => { v.push((f * 0x10 + s) as u8); Ok(()) } - } - } - )?; - // Check that there was no remainder - match iter.remainder() { - Some(_) => Err(encode::Error::ParseFailed("hexstring of odd length")), - None => Ok(v) +pub fn hex_bytes(data: &str) -> Result, encode::Error> { + // This code is optimized to be as fast as possible without using unsafe or platform specific + // features. If you want to refactor it please make sure you don't introduce performance + // regressions (see benches/from_hex.rs). + + // If the hex string has an uneven length fail early + if data.len() % 2 != 0 { + return Err(encode::Error::ParseFailed("hexstring of odd length")); } + + // Preallocate the uninitialized memory for the byte array + let mut res = Vec::with_capacity(data.len() / 2); + + let mut hex_it = data.bytes(); + loop { + // Get most significant nibble of current byte or end iteration + let msn = match hex_it.next() { + None => break, + Some(x) => x, + }; + + // Get least significant nibble of current byte + let lsn = match hex_it.next() { + None => unreachable!("len % 2 == 0"), + Some(x) => x, + }; + + // Convert bytes representing characters to their represented value and combine lsn and msn. + // The and_then and map are crucial for performance, in comparision to using ? and then + // using the results of that for the calculation it's nearly twice as fast. Using bit + // shifting and or instead of multiply and add on the other hand doesn't show a significant + // increase in performance. + match hex_val(msn).and_then(|msn_val| hex_val(lsn).map(|lsn_val| msn_val * 16 + lsn_val)) { + Ok(x) => res.push(x), + Err(e) => return Err(e), + } + } + Ok(res) } /// Search for `needle` in the vector `haystack` and remove every From 7c7ec02ed27aa520a205ce274eddd514fe907839 Mon Sep 17 00:00:00 2001 From: Sebastian Geisler Date: Thu, 3 Jan 2019 17:25:56 -0800 Subject: [PATCH 2/3] Remove unused Pair iterator and util::iter module --- src/util/iter.rs | 80 ------------------------------------------------ src/util/mod.rs | 1 - 2 files changed, 81 deletions(-) delete mode 100644 src/util/iter.rs diff --git a/src/util/iter.rs b/src/util/iter.rs deleted file mode 100644 index e950d78..0000000 --- a/src/util/iter.rs +++ /dev/null @@ -1,80 +0,0 @@ -// Rust Bitcoin Library -// Written in 2014 by -// Andrew Poelstra -// -// To the extent possible under law, the author(s) have dedicated all -// copyright and related and neighboring rights to this software to -// the public domain worldwide. This software is distributed without -// any warranty. -// -// You should have received a copy of the CC0 Public Domain Dedication -// along with this software. -// If not, see . -// - -//! Iterator adaptors -//! -//! Iterator adaptors needed by Bitcoin but not provided by the Rust -//! standard library. - -/// An iterator that returns pairs of elements -pub struct Pair - where I: Iterator -{ - iter: I, - last_elem: Option -} - -impl Iterator for Pair { - type Item = (I::Item, I::Item); - - #[inline] - fn next(&mut self) -> Option<(I::Item, I::Item)> { - let elem1 = self.iter.next(); - if elem1.is_none() { - None - } else { - let elem2 = self.iter.next(); - if elem2.is_none() { - self.last_elem = elem1; - None - } else { - Some((elem1.unwrap(), elem2.unwrap())) - } - } - } - - #[inline] - fn size_hint(&self) -> (usize, Option) { - match self.iter.size_hint() { - (n, None) => (n/2, None), - (n, Some(m)) => (n/2, Some(m/2)) - } - } -} - -impl Pair { - /// Returns the last element of the iterator if there were an odd - /// number of elements remaining before it was Pair-ified. - #[inline] - pub fn remainder(self) -> Option { - self.last_elem - } -} - -/// Returns an iterator that returns elements of the original iterator 2 at a time -pub trait Pairable : Sized + Iterator { - /// Returns an iterator that returns elements of the original iterator 2 at a time - fn pair(self) -> Pair; -} - -impl Pairable for I { - /// Creates an iterator that yields pairs of elements from the underlying - /// iterator, yielding `None` when there are fewer than two elements to - /// return. - #[inline] - fn pair(self) -> Pair { - Pair {iter: self, last_elem: None } - } -} - diff --git a/src/util/mod.rs b/src/util/mod.rs index 055c8a1..8ae3f56 100644 --- a/src/util/mod.rs +++ b/src/util/mod.rs @@ -24,7 +24,6 @@ pub mod bip143; pub mod contracthash; pub mod decimal; pub mod hash; -pub mod iter; pub mod misc; pub mod uint; From 4c29fc0e8db049406d48c46e4d1285ad3ceb30ee Mon Sep 17 00:00:00 2001 From: Sebastian Geisler Date: Mon, 7 Jan 2019 13:43:23 -0800 Subject: [PATCH 3/3] Add feature gated hex decode benchmark --- Cargo.toml | 2 +- src/util/misc.rs | 51 +++++++++++++++++++++++++++++++++++++++++++++++- 2 files changed, 51 insertions(+), 2 deletions(-) diff --git a/Cargo.toml b/Cargo.toml index f5b76ee..0ff37b3 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,4 +1,3 @@ - [package] name = "bitcoin" version = "0.15.1" @@ -18,6 +17,7 @@ path = "src/lib.rs" [features] fuzztarget = ["secp256k1/fuzztarget"] serde-decimal = ["serde", "strason"] +unstable = [] [dependencies] bitcoin-bech32 = "0.8.0" diff --git a/src/util/misc.rs b/src/util/misc.rs index b3b485e..058e750 100644 --- a/src/util/misc.rs +++ b/src/util/misc.rs @@ -35,7 +35,7 @@ fn hex_val(c: u8) -> Result { pub fn hex_bytes(data: &str) -> Result, encode::Error> { // This code is optimized to be as fast as possible without using unsafe or platform specific // features. If you want to refactor it please make sure you don't introduce performance - // regressions (see benches/from_hex.rs). + // regressions (run the benchmark with `cargo bench --features unstable`). // If the hex string has an uneven length fail early if data.len() % 2 != 0 { @@ -107,6 +107,55 @@ pub fn script_find_and_remove(haystack: &mut Vec, needle: &[u8]) -> usize { n_deleted } +#[cfg(all(test, feature="unstable"))] +mod benches { + use rand::{Rng, thread_rng}; + use super::hex_bytes; + use test::Bencher; + + fn join, IT: AsRef>(iter: I, expected_len: usize) -> String { + let mut res = String::with_capacity(expected_len); + for s in iter { + res.push_str(s.as_ref()); + } + res + } + + fn bench_from_hex(b: &mut Bencher, data_size: usize) { + let data_bytes = thread_rng() + .gen_iter() + .take(data_size) + .collect::>(); + let data = join(data_bytes.iter().map(|x| format!("{:02x}", x)), data_size * 2); + + assert_eq!(hex_bytes(&data).unwrap(), data_bytes); + + b.iter(move || { + hex_bytes(&data).unwrap() + }) + } + + #[bench] + fn from_hex_16_bytes(b: &mut Bencher) { + bench_from_hex(b, 16); + } + + #[bench] + fn from_hex_64_bytes(b: &mut Bencher) { + bench_from_hex(b, 64); + } + + #[bench] + fn from_hex_256_bytes(b: &mut Bencher) { + bench_from_hex(b, 256); + } + + #[bench] + fn from_hex_4m_bytes(b: &mut Bencher) { + bench_from_hex(b, 1024 * 1024 * 4); + } +} + #[cfg(test)] mod tests { use super::script_find_and_remove;