Only send sigverify to GPU if batch size is >64

Seems to be a decent crossover point for Xeon E5-2620 v4 8c,16t vs. nvidia 1080ti
This commit is contained in:
Stephen Akridge 2018-08-01 14:10:39 -07:00 committed by sakridge
parent e9f8b5b9db
commit 7ccd771ccc
4 changed files with 55 additions and 2 deletions

View File

@ -116,3 +116,7 @@ harness = false
[[bench]]
name = "signature"
harness = false
[[bench]]
name = "sigverify"
harness = false

36
benches/sigverify.rs Normal file
View File

@ -0,0 +1,36 @@
#[macro_use]
extern crate criterion;
extern crate bincode;
extern crate rayon;
extern crate solana;
use criterion::{Bencher, Criterion};
use solana::packet::{to_packets, PacketRecycler};
use solana::sigverify;
use solana::transaction::test_tx;
fn bench_sig_verify(bencher: &mut Bencher) {
let tx = test_tx();
// generate packet vector
let packet_recycler = PacketRecycler::default();
let batches = to_packets(&packet_recycler, &vec![tx; 128]);
// verify packets
bencher.iter(|| {
let _ans = sigverify::ed25519_verify(&batches);
})
}
fn bench(criterion: &mut Criterion) {
criterion.bench_function("bench_sig_verify", |bencher| {
bench_sig_verify(bencher);
});
}
criterion_group!(
name = benches;
config = Criterion::default().sample_size(2);
targets = bench
);
criterion_main!(benches);

View File

@ -41,7 +41,6 @@ pub fn init() {
// stub
}
#[cfg(not(feature = "cuda"))]
fn verify_packet(packet: &Packet) -> u8 {
use ring::signature;
use signature::{PublicKey, Signature};
@ -81,6 +80,11 @@ fn batch_size(batches: &[SharedPackets]) -> usize {
#[cfg_attr(feature = "cargo-clippy", allow(ptr_arg))]
#[cfg(not(feature = "cuda"))]
pub fn ed25519_verify(batches: &Vec<SharedPackets>) -> Vec<Vec<u8>> {
ed25519_verify_cpu(batches)
}
#[cfg_attr(feature = "cargo-clippy", allow(ptr_arg))]
pub fn ed25519_verify_cpu(batches: &Vec<SharedPackets>) -> Vec<Vec<u8>> {
use rayon::prelude::*;
let count = batch_size(batches);
info!("CPU ECDSA for {}", batch_size(batches));
@ -134,6 +138,16 @@ pub fn init() {
pub fn ed25519_verify(batches: &Vec<SharedPackets>) -> Vec<Vec<u8>> {
use packet::PACKET_DATA_SIZE;
let count = batch_size(batches);
// micro-benchmarks show GPU time for smallest batch around 15-20ms
// and CPU speed for 64-128 sig verifies around 10-20ms. 64 is a nice
// power-of-two number around that accounting for the fact that the CPU
// may be busy doing other things while being a real fullnode
// TODO: dynamically adjust this crossover
if count < 64 {
return ed25519_verify_cpu(batches);
}
info!("CUDA ECDSA for {}", batch_size(batches));
let mut out = Vec::new();
let mut elems = Vec::new();

View File

@ -207,7 +207,6 @@ impl Transaction {
}
}
#[cfg(test)]
pub fn test_tx() -> Transaction {
let keypair1 = KeyPair::new();
let pubkey1 = keypair1.pubkey();