add bloom benchmarking, perf improvement from Fnv ~= 8X (#2477)

* add bloom benchmarking, perf improvement from Fnv ~= 8X
* have a look at bits.set()
* ignore new benches to pacify CI (solana_upload_perf?)
This commit is contained in:
Rob Walker 2019-01-17 18:22:21 -08:00 committed by GitHub
parent 14267e172d
commit 1f87d9ba4a
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
4 changed files with 140 additions and 35 deletions

1
Cargo.lock generated
View File

@ -1853,6 +1853,7 @@ dependencies = [
"bv 0.11.0 (registry+https://github.com/rust-lang/crates.io-index)",
"byteorder 1.2.7 (registry+https://github.com/rust-lang/crates.io-index)",
"chrono 0.4.6 (registry+https://github.com/rust-lang/crates.io-index)",
"fnv 1.0.6 (registry+https://github.com/rust-lang/crates.io-index)",
"hashbrown 0.1.8 (registry+https://github.com/rust-lang/crates.io-index)",
"hex-literal 0.1.1 (registry+https://github.com/rust-lang/crates.io-index)",
"indexmap 1.0.2 (registry+https://github.com/rust-lang/crates.io-index)",

View File

@ -29,6 +29,7 @@ bs58 = "0.2.0"
bv = { version = "0.11.0", features = ["serde"] }
byteorder = "1.2.1"
chrono = { version = "0.4.0", features = ["serde"] }
fnv = "1.0.6"
hashbrown = "0.1.8"
indexmap = "1.0"
itertools = "0.8.0"

101
benches/bloom.rs Normal file
View File

@ -0,0 +1,101 @@
#![feature(test)]
extern crate test;
use bv::BitVec;
use fnv::FnvHasher;
use solana::bloom::{Bloom, BloomHashIndex};
use solana_sdk::hash::{hash, Hash};
use solana_sdk::signature::Signature;
//use std::collections::HashSet;
use hashbrown::HashSet;
use std::hash::Hasher;
use test::Bencher;
#[bench]
#[ignore]
fn bench_bits_set(bencher: &mut Bencher) {
let mut bits: BitVec<u8> = BitVec::new_fill(false, 38_340_234 as u64);
let mut hasher: FnvHasher = Default::default();
bencher.iter(|| {
let idx = hasher.finish() % bits.len();
bits.set(idx, true);
hasher.write_u64(idx);
});
// subtract the next bencher result from this one to get a number for raw
// bits.set()
}
#[bench]
#[ignore]
fn bench_bits_set_hasher(bencher: &mut Bencher) {
let bits: BitVec<u8> = BitVec::new_fill(false, 38_340_234 as u64);
let mut hasher: FnvHasher = Default::default();
bencher.iter(|| {
let idx = hasher.finish() % bits.len();
hasher.write_u64(idx);
});
}
#[bench]
#[ignore]
fn bench_sigs_bloom(bencher: &mut Bencher) {
// 1M TPS * 1s (length of block in sigs) == 1M items in filter
// 1.0E-8 false positive rate
// https://hur.st/bloomfilter/?n=1000000&p=1.0E-8&m=&k=
let last_id = hash(Hash::default().as_ref());
// eprintln!("last_id = {:?}", last_id);
let keys = (0..27)
.into_iter()
.map(|i| last_id.hash_at_index(i))
.collect();
let mut sigs: Bloom<Signature> = Bloom::new(38_340_234, keys);
let mut id = last_id;
let mut falses = 0;
let mut iterations = 0;
bencher.iter(|| {
id = hash(id.as_ref());
let mut sigbytes = Vec::from(id.as_ref());
id = hash(id.as_ref());
sigbytes.extend(id.as_ref());
let sig = Signature::new(&sigbytes);
if sigs.contains(&sig) {
falses += 1;
}
sigs.add(&sig);
sigs.contains(&sig);
iterations += 1;
});
assert_eq!(falses, 0);
}
#[bench]
#[ignore]
fn bench_sigs_hashmap(bencher: &mut Bencher) {
// same structure as above, new
let last_id = hash(Hash::default().as_ref());
// eprintln!("last_id = {:?}", last_id);
let mut sigs: HashSet<Signature> = HashSet::new();
let mut id = last_id;
let mut falses = 0;
let mut iterations = 0;
bencher.iter(|| {
id = hash(id.as_ref());
let mut sigbytes = Vec::from(id.as_ref());
id = hash(id.as_ref());
sigbytes.extend(id.as_ref());
let sig = Signature::new(&sigbytes);
if sigs.contains(&sig) {
falses += 1;
}
sigs.insert(sig);
sigs.contains(&sig);
iterations += 1;
});
assert_eq!(falses, 0);
}

View File

@ -1,8 +1,9 @@
//! Simple Bloom Filter
use bv::BitVec;
use fnv::FnvHasher;
use rand::{self, Rng};
use solana_sdk::hash::hashv;
use std::cmp;
use std::hash::Hasher;
use std::marker::PhantomData;
/// Generate a stable hash of `self` for each `hash_index`
@ -63,33 +64,34 @@ impl<T: BloomHashIndex> Bloom<T> {
}
}
fn to_slice(v: u64) -> [u8; 8] {
[
v as u8,
(v >> 8) as u8,
(v >> 16) as u8,
(v >> 24) as u8,
(v >> 32) as u8,
(v >> 40) as u8,
(v >> 48) as u8,
(v >> 56) as u8,
]
}
fn from_slice(v: &[u8]) -> u64 {
u64::from(v[0])
| u64::from(v[1]) << 8
| u64::from(v[2]) << 16
| u64::from(v[3]) << 24
| u64::from(v[4]) << 32
| u64::from(v[5]) << 40
| u64::from(v[6]) << 48
| u64::from(v[7]) << 56
}
//fn to_slice(v: u64) -> [u8; 8] {
// [
// v as u8,
// (v >> 8) as u8,
// (v >> 16) as u8,
// (v >> 24) as u8,
// (v >> 32) as u8,
// (v >> 40) as u8,
// (v >> 48) as u8,
// (v >> 56) as u8,
// ]
//}
//fn from_slice(v: &[u8]) -> u64 {
// u64::from(v[0])
// | u64::from(v[1]) << 8
// | u64::from(v[2]) << 16
// | u64::from(v[3]) << 24
// | u64::from(v[4]) << 32
// | u64::from(v[5]) << 40
// | u64::from(v[6]) << 48
// | u64::from(v[7]) << 56
//}
//
fn slice_hash(slice: &[u8], hash_index: u64) -> u64 {
let hash = hashv(&[slice, &to_slice(hash_index)]);
from_slice(hash.as_ref())
let mut hasher = FnvHasher::with_key(hash_index);
hasher.write(slice);
hasher.finish()
}
impl<T: AsRef<[u8]>> BloomHashIndex for T {
@ -102,15 +104,15 @@ impl<T: AsRef<[u8]>> BloomHashIndex for T {
mod test {
use super::*;
use solana_sdk::hash::{hash, Hash};
#[test]
fn test_slice() {
assert_eq!(from_slice(&to_slice(10)), 10);
assert_eq!(from_slice(&to_slice(0x7fff7fff)), 0x7fff7fff);
assert_eq!(
from_slice(&to_slice(0x7fff7fff7fff7fff)),
0x7fff7fff7fff7fff
);
}
// #[test]
// fn test_slice() {
// assert_eq!(from_slice(&to_slice(10)), 10);
// assert_eq!(from_slice(&to_slice(0x7fff7fff)), 0x7fff7fff);
// assert_eq!(
// from_slice(&to_slice(0x7fff7fff7fff7fff)),
// 0x7fff7fff7fff7fff
// );
// }
#[test]
fn test_bloom_filter() {