Merge pull request #208 from sgeisler/hex_bytes

Use more performant hex_bytes function
This commit is contained in:
Matt Corallo 2019-01-15 14:06:00 -05:00 committed by GitHub
commit 2ed4b1f246
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
4 changed files with 99 additions and 101 deletions

View File

@ -1,4 +1,3 @@
[package]
name = "bitcoin"
version = "0.15.1"
@ -18,6 +17,7 @@ path = "src/lib.rs"
[features]
fuzztarget = ["secp256k1/fuzztarget"]
serde-decimal = ["serde", "strason"]
unstable = []
[dependencies]
bitcoin-bech32 = "0.8.0"

View File

@ -1,80 +0,0 @@
// Rust Bitcoin Library
// Written in 2014 by
// Andrew Poelstra <apoelstra@wpsoftware.net>
//
// To the extent possible under law, the author(s) have dedicated all
// copyright and related and neighboring rights to this software to
// the public domain worldwide. This software is distributed without
// any warranty.
//
// You should have received a copy of the CC0 Public Domain Dedication
// along with this software.
// If not, see <http://creativecommons.org/publicdomain/zero/1.0/>.
//
//! Iterator adaptors
//!
//! Iterator adaptors needed by Bitcoin but not provided by the Rust
//! standard library.
/// An iterator that returns pairs of elements
pub struct Pair<I>
where I: Iterator
{
iter: I,
last_elem: Option<I::Item>
}
impl<I: Iterator> Iterator for Pair<I> {
type Item = (I::Item, I::Item);
#[inline]
fn next(&mut self) -> Option<(I::Item, I::Item)> {
let elem1 = self.iter.next();
if elem1.is_none() {
None
} else {
let elem2 = self.iter.next();
if elem2.is_none() {
self.last_elem = elem1;
None
} else {
Some((elem1.unwrap(), elem2.unwrap()))
}
}
}
#[inline]
fn size_hint(&self) -> (usize, Option<usize>) {
match self.iter.size_hint() {
(n, None) => (n/2, None),
(n, Some(m)) => (n/2, Some(m/2))
}
}
}
impl<I: Iterator> Pair<I> {
/// Returns the last element of the iterator if there were an odd
/// number of elements remaining before it was Pair-ified.
#[inline]
pub fn remainder(self) -> Option<I::Item> {
self.last_elem
}
}
/// Returns an iterator that returns elements of the original iterator 2 at a time
pub trait Pairable : Sized + Iterator {
/// Returns an iterator that returns elements of the original iterator 2 at a time
fn pair(self) -> Pair<Self>;
}
impl<I: Iterator> Pairable for I {
/// Creates an iterator that yields pairs of elements from the underlying
/// iterator, yielding `None` when there are fewer than two elements to
/// return.
#[inline]
fn pair(self) -> Pair<I> {
Pair {iter: self, last_elem: None }
}
}

View File

@ -17,29 +17,59 @@
//! Various utility functions
use blockdata::opcodes;
use util::iter::Pairable;
use consensus::encode;
/// Helper function to convert hex nibble characters to their respective value
#[inline]
fn hex_val(c: u8) -> Result<u8, encode::Error> {
let res = match c {
b'0' ... b'9' => c - '0' as u8,
b'a' ... b'f' => c - 'a' as u8 + 10,
b'A' ... b'F' => c - 'A' as u8 + 10,
_ => return Err(encode::Error::UnexpectedHexDigit(c as char)),
};
Ok(res)
}
/// Convert a hexadecimal-encoded string to its corresponding bytes
pub fn hex_bytes(s: &str) -> Result<Vec<u8>, encode::Error> {
let mut v = vec![];
let mut iter = s.chars().pair();
// Do the parsing
iter.by_ref().fold(Ok(()), |e, (f, s)|
if e.is_err() { e }
else {
match (f.to_digit(16), s.to_digit(16)) {
(None, _) => Err(encode::Error::UnexpectedHexDigit(f)),
(_, None) => Err(encode::Error::UnexpectedHexDigit(s)),
(Some(f), Some(s)) => { v.push((f * 0x10 + s) as u8); Ok(()) }
}
}
)?;
// Check that there was no remainder
match iter.remainder() {
Some(_) => Err(encode::Error::ParseFailed("hexstring of odd length")),
None => Ok(v)
pub fn hex_bytes(data: &str) -> Result<Vec<u8>, encode::Error> {
// This code is optimized to be as fast as possible without using unsafe or platform specific
// features. If you want to refactor it please make sure you don't introduce performance
// regressions (run the benchmark with `cargo bench --features unstable`).
// If the hex string has an uneven length fail early
if data.len() % 2 != 0 {
return Err(encode::Error::ParseFailed("hexstring of odd length"));
}
// Preallocate the uninitialized memory for the byte array
let mut res = Vec::with_capacity(data.len() / 2);
let mut hex_it = data.bytes();
loop {
// Get most significant nibble of current byte or end iteration
let msn = match hex_it.next() {
None => break,
Some(x) => x,
};
// Get least significant nibble of current byte
let lsn = match hex_it.next() {
None => unreachable!("len % 2 == 0"),
Some(x) => x,
};
// Convert bytes representing characters to their represented value and combine lsn and msn.
// The and_then and map are crucial for performance, in comparision to using ? and then
// using the results of that for the calculation it's nearly twice as fast. Using bit
// shifting and or instead of multiply and add on the other hand doesn't show a significant
// increase in performance.
match hex_val(msn).and_then(|msn_val| hex_val(lsn).map(|lsn_val| msn_val * 16 + lsn_val)) {
Ok(x) => res.push(x),
Err(e) => return Err(e),
}
}
Ok(res)
}
/// Search for `needle` in the vector `haystack` and remove every
@ -77,6 +107,55 @@ pub fn script_find_and_remove(haystack: &mut Vec<u8>, needle: &[u8]) -> usize {
n_deleted
}
#[cfg(all(test, feature="unstable"))]
mod benches {
use rand::{Rng, thread_rng};
use super::hex_bytes;
use test::Bencher;
fn join<I: Iterator<Item=IT>, IT: AsRef<str>>(iter: I, expected_len: usize) -> String {
let mut res = String::with_capacity(expected_len);
for s in iter {
res.push_str(s.as_ref());
}
res
}
fn bench_from_hex(b: &mut Bencher, data_size: usize) {
let data_bytes = thread_rng()
.gen_iter()
.take(data_size)
.collect::<Vec<u8>>();
let data = join(data_bytes.iter().map(|x| format!("{:02x}", x)), data_size * 2);
assert_eq!(hex_bytes(&data).unwrap(), data_bytes);
b.iter(move || {
hex_bytes(&data).unwrap()
})
}
#[bench]
fn from_hex_16_bytes(b: &mut Bencher) {
bench_from_hex(b, 16);
}
#[bench]
fn from_hex_64_bytes(b: &mut Bencher) {
bench_from_hex(b, 64);
}
#[bench]
fn from_hex_256_bytes(b: &mut Bencher) {
bench_from_hex(b, 256);
}
#[bench]
fn from_hex_4m_bytes(b: &mut Bencher) {
bench_from_hex(b, 1024 * 1024 * 4);
}
}
#[cfg(test)]
mod tests {
use super::script_find_and_remove;

View File

@ -24,7 +24,6 @@ pub mod bip143;
pub mod contracthash;
pub mod decimal;
pub mod hash;
pub mod iter;
pub mod misc;
pub mod uint;