Only send sigverify to GPU if batch size is >64
Seems to be a decent crossover point for Xeon E5-2620 v4 8c,16t vs. nvidia 1080ti
This commit is contained in:
parent
e9f8b5b9db
commit
7ccd771ccc
|
@ -116,3 +116,7 @@ harness = false
|
|||
[[bench]]
|
||||
name = "signature"
|
||||
harness = false
|
||||
|
||||
[[bench]]
|
||||
name = "sigverify"
|
||||
harness = false
|
||||
|
|
|
@ -0,0 +1,36 @@
|
|||
#[macro_use]
|
||||
extern crate criterion;
|
||||
extern crate bincode;
|
||||
extern crate rayon;
|
||||
extern crate solana;
|
||||
|
||||
use criterion::{Bencher, Criterion};
|
||||
use solana::packet::{to_packets, PacketRecycler};
|
||||
use solana::sigverify;
|
||||
use solana::transaction::test_tx;
|
||||
|
||||
fn bench_sig_verify(bencher: &mut Bencher) {
|
||||
let tx = test_tx();
|
||||
|
||||
// generate packet vector
|
||||
let packet_recycler = PacketRecycler::default();
|
||||
let batches = to_packets(&packet_recycler, &vec![tx; 128]);
|
||||
|
||||
// verify packets
|
||||
bencher.iter(|| {
|
||||
let _ans = sigverify::ed25519_verify(&batches);
|
||||
})
|
||||
}
|
||||
|
||||
fn bench(criterion: &mut Criterion) {
|
||||
criterion.bench_function("bench_sig_verify", |bencher| {
|
||||
bench_sig_verify(bencher);
|
||||
});
|
||||
}
|
||||
|
||||
criterion_group!(
|
||||
name = benches;
|
||||
config = Criterion::default().sample_size(2);
|
||||
targets = bench
|
||||
);
|
||||
criterion_main!(benches);
|
|
@ -41,7 +41,6 @@ pub fn init() {
|
|||
// stub
|
||||
}
|
||||
|
||||
#[cfg(not(feature = "cuda"))]
|
||||
fn verify_packet(packet: &Packet) -> u8 {
|
||||
use ring::signature;
|
||||
use signature::{PublicKey, Signature};
|
||||
|
@ -81,6 +80,11 @@ fn batch_size(batches: &[SharedPackets]) -> usize {
|
|||
#[cfg_attr(feature = "cargo-clippy", allow(ptr_arg))]
|
||||
#[cfg(not(feature = "cuda"))]
|
||||
pub fn ed25519_verify(batches: &Vec<SharedPackets>) -> Vec<Vec<u8>> {
|
||||
ed25519_verify_cpu(batches)
|
||||
}
|
||||
|
||||
#[cfg_attr(feature = "cargo-clippy", allow(ptr_arg))]
|
||||
pub fn ed25519_verify_cpu(batches: &Vec<SharedPackets>) -> Vec<Vec<u8>> {
|
||||
use rayon::prelude::*;
|
||||
let count = batch_size(batches);
|
||||
info!("CPU ECDSA for {}", batch_size(batches));
|
||||
|
@ -134,6 +138,16 @@ pub fn init() {
|
|||
pub fn ed25519_verify(batches: &Vec<SharedPackets>) -> Vec<Vec<u8>> {
|
||||
use packet::PACKET_DATA_SIZE;
|
||||
let count = batch_size(batches);
|
||||
|
||||
// micro-benchmarks show GPU time for smallest batch around 15-20ms
|
||||
// and CPU speed for 64-128 sig verifies around 10-20ms. 64 is a nice
|
||||
// power-of-two number around that accounting for the fact that the CPU
|
||||
// may be busy doing other things while being a real fullnode
|
||||
// TODO: dynamically adjust this crossover
|
||||
if count < 64 {
|
||||
return ed25519_verify_cpu(batches);
|
||||
}
|
||||
|
||||
info!("CUDA ECDSA for {}", batch_size(batches));
|
||||
let mut out = Vec::new();
|
||||
let mut elems = Vec::new();
|
||||
|
|
|
@ -207,7 +207,6 @@ impl Transaction {
|
|||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
pub fn test_tx() -> Transaction {
|
||||
let keypair1 = KeyPair::new();
|
||||
let pubkey1 = keypair1.pubkey();
|
||||
|
|
Loading…
Reference in New Issue