#![allow(clippy::arithmetic_side_effects)]
#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
use solana_entry::entry::{self, create_ticks, init_poh, EntrySlice, VerifyRecyclers};
#[cfg(not(any(target_arch = "x86", target_arch = "x86_64")))]
use solana_entry::entry::{create_ticks, init_poh, EntrySlice, VerifyRecyclers};
use {
    clap::{crate_description, crate_name, Arg, Command},
    solana_measure::measure::Measure,
    solana_perf::perf_libs,
    solana_sdk::hash::hash,
};

fn main() {
    solana_logger::setup();

    let matches = Command::new(crate_name!())
        .about(crate_description!())
        .version(solana_version::version!())
        .arg(
            Arg::new("max_num_entries")
                .long("max-num-entries")
                .takes_value(true)
                .value_name("SIZE")
                .help("Number of entries."),
        )
        .arg(
            Arg::new("start_num_entries")
                .long("start-num-entries")
                .takes_value(true)
                .value_name("SIZE")
                .help("Packets per chunk"),
        )
        .arg(
            Arg::new("hashes_per_tick")
                .long("hashes-per-tick")
                .takes_value(true)
                .value_name("SIZE")
                .help("hashes per tick"),
        )
        .arg(
            Arg::new("num_transactions_per_entry")
                .long("num-transactions-per-entry")
                .takes_value(true)
                .value_name("NUM")
                .help("Skip transaction sanity execution"),
        )
        .arg(
            Arg::new("iterations")
                .long("iterations")
                .takes_value(true)
                .help("Number of iterations"),
        )
        .arg(
            Arg::new("num_threads")
                .long("num-threads")
                .takes_value(true)
                .help("Number of threads"),
        )
        .arg(
            Arg::new("cuda")
                .long("cuda")
                .takes_value(false)
                .help("Use cuda"),
        )
        .get_matches();

    let max_num_entries: u64 = matches.value_of_t("max_num_entries").unwrap_or(64);
    let start_num_entries: u64 = matches
        .value_of_t("start_num_entries")
        .unwrap_or(max_num_entries);
    let iterations: usize = matches.value_of_t("iterations").unwrap_or(10);
    let hashes_per_tick: u64 = matches.value_of_t("hashes_per_tick").unwrap_or(10_000);
    let start_hash = hash(&[1, 2, 3, 4]);
    let ticks = create_ticks(max_num_entries, hashes_per_tick, start_hash);
    let mut num_entries = start_num_entries as usize;
    if matches.is_present("cuda") {
        perf_libs::init_cuda();
    }
    init_poh();
    while num_entries <= max_num_entries as usize {
        let mut time = Measure::start("time");
        for _ in 0..iterations {
            assert!(ticks[..num_entries]
                .verify_cpu_generic(&start_hash)
                .finish_verify());
        }
        time.stop();
        println!(
            "{},cpu_generic,{}",
            num_entries,
            time.as_us() / iterations as u64
        );

        // A target_arch check is required here since calling
        // is_x86_feature_detected from a non-x86_64 arch results in a build
        // error.
        #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
        {
            if is_x86_feature_detected!("avx2") && entry::api().is_some() {
                let mut time = Measure::start("time");
                for _ in 0..iterations {
                    assert!(ticks[..num_entries]
                        .verify_cpu_x86_simd(&start_hash, 8)
                        .finish_verify());
                }
                time.stop();
                println!(
                    "{},cpu_simd_avx2,{}",
                    num_entries,
                    time.as_us() / iterations as u64
                );
            }

            if is_x86_feature_detected!("avx512f") && entry::api().is_some() {
                let mut time = Measure::start("time");
                for _ in 0..iterations {
                    assert!(ticks[..num_entries]
                        .verify_cpu_x86_simd(&start_hash, 16)
                        .finish_verify());
                }
                time.stop();
                println!(
                    "{},cpu_simd_avx512,{}",
                    num_entries,
                    time.as_us() / iterations as u64
                );
            }
        }

        if perf_libs::api().is_some() {
            let mut time = Measure::start("time");
            let recyclers = VerifyRecyclers::default();
            for _ in 0..iterations {
                assert!(ticks[..num_entries]
                    .start_verify(&start_hash, recyclers.clone())
                    .finish_verify());
            }
            time.stop();
            println!(
                "{},gpu_cuda,{}",
                num_entries,
                time.as_us() / iterations as u64
            );
        }

        println!();
        num_entries *= 2;
    }
}