solana/poh-bench/src/main.rs

150 lines
5.0 KiB
Rust

#![allow(clippy::arithmetic_side_effects)]
#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
use solana_entry::entry::{self, create_ticks, init_poh, EntrySlice, VerifyRecyclers};
#[cfg(not(any(target_arch = "x86", target_arch = "x86_64")))]
use solana_entry::entry::{create_ticks, init_poh, EntrySlice, VerifyRecyclers};
use {
clap::{crate_description, crate_name, Arg, Command},
solana_measure::measure::Measure,
solana_perf::perf_libs,
solana_sdk::hash::hash,
};
fn main() {
solana_logger::setup();
let matches = Command::new(crate_name!())
.about(crate_description!())
.version(solana_version::version!())
.arg(
Arg::new("max_num_entries")
.long("max-num-entries")
.takes_value(true)
.value_name("SIZE")
.help("Number of entries."),
)
.arg(
Arg::new("start_num_entries")
.long("start-num-entries")
.takes_value(true)
.value_name("SIZE")
.help("Packets per chunk"),
)
.arg(
Arg::new("hashes_per_tick")
.long("hashes-per-tick")
.takes_value(true)
.value_name("SIZE")
.help("hashes per tick"),
)
.arg(
Arg::new("num_transactions_per_entry")
.long("num-transactions-per-entry")
.takes_value(true)
.value_name("NUM")
.help("Skip transaction sanity execution"),
)
.arg(
Arg::new("iterations")
.long("iterations")
.takes_value(true)
.help("Number of iterations"),
)
.arg(
Arg::new("num_threads")
.long("num-threads")
.takes_value(true)
.help("Number of threads"),
)
.arg(
Arg::new("cuda")
.long("cuda")
.takes_value(false)
.help("Use cuda"),
)
.get_matches();
let max_num_entries: u64 = matches.value_of_t("max_num_entries").unwrap_or(64);
let start_num_entries: u64 = matches
.value_of_t("start_num_entries")
.unwrap_or(max_num_entries);
let iterations: usize = matches.value_of_t("iterations").unwrap_or(10);
let hashes_per_tick: u64 = matches.value_of_t("hashes_per_tick").unwrap_or(10_000);
let start_hash = hash(&[1, 2, 3, 4]);
let ticks = create_ticks(max_num_entries, hashes_per_tick, start_hash);
let mut num_entries = start_num_entries as usize;
if matches.is_present("cuda") {
perf_libs::init_cuda();
}
init_poh();
while num_entries <= max_num_entries as usize {
let mut time = Measure::start("time");
for _ in 0..iterations {
assert!(ticks[..num_entries]
.verify_cpu_generic(&start_hash)
.finish_verify());
}
time.stop();
println!(
"{},cpu_generic,{}",
num_entries,
time.as_us() / iterations as u64
);
// A target_arch check is required here since calling
// is_x86_feature_detected from a non-x86_64 arch results in a build
// error.
#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
{
if is_x86_feature_detected!("avx2") && entry::api().is_some() {
let mut time = Measure::start("time");
for _ in 0..iterations {
assert!(ticks[..num_entries]
.verify_cpu_x86_simd(&start_hash, 8)
.finish_verify());
}
time.stop();
println!(
"{},cpu_simd_avx2,{}",
num_entries,
time.as_us() / iterations as u64
);
}
if is_x86_feature_detected!("avx512f") && entry::api().is_some() {
let mut time = Measure::start("time");
for _ in 0..iterations {
assert!(ticks[..num_entries]
.verify_cpu_x86_simd(&start_hash, 16)
.finish_verify());
}
time.stop();
println!(
"{},cpu_simd_avx512,{}",
num_entries,
time.as_us() / iterations as u64
);
}
}
if perf_libs::api().is_some() {
let mut time = Measure::start("time");
let recyclers = VerifyRecyclers::default();
for _ in 0..iterations {
assert!(ticks[..num_entries]
.start_verify(&start_hash, recyclers.clone())
.finish_verify());
}
time.stop();
println!(
"{},gpu_cuda,{}",
num_entries,
time.as_us() / iterations as u64
);
}
println!();
num_entries *= 2;
}
}