solana/bench-tps/src/bench.rs

535 lines
18 KiB
Rust

use solana_metrics;
use rayon::prelude::*;
use solana::client::mk_client;
use solana::cluster_info::NodeInfo;
use solana::thin_client::ThinClient;
use solana_drone::drone::request_airdrop_transaction;
use solana_metrics::influxdb;
use solana_sdk::hash::Hash;
use solana_sdk::signature::{Keypair, KeypairUtil};
use solana_sdk::system_transaction::SystemTransaction;
use solana_sdk::timing::timestamp;
use solana_sdk::timing::{duration_as_ms, duration_as_s};
use solana_sdk::transaction::Transaction;
use std::cmp;
use std::collections::VecDeque;
use std::net::SocketAddr;
use std::process::exit;
use std::sync::atomic::{AtomicBool, AtomicIsize, AtomicUsize, Ordering};
use std::sync::{Arc, RwLock};
use std::thread::sleep;
use std::time::Duration;
use std::time::Instant;
pub struct NodeStats {
/// Maximum TPS reported by this node
pub tps: f64,
/// Total transactions reported by this node
pub tx: u64,
}
pub const MAX_SPENDS_PER_TX: usize = 4;
pub type SharedTransactions = Arc<RwLock<VecDeque<Vec<(Transaction, u64)>>>>;
pub fn metrics_submit_token_balance(token_balance: u64) {
println!("Token balance: {}", token_balance);
solana_metrics::submit(
influxdb::Point::new("bench-tps")
.add_tag("op", influxdb::Value::String("token_balance".to_string()))
.add_field("balance", influxdb::Value::Integer(token_balance as i64))
.to_owned(),
);
}
pub fn sample_tx_count(
exit_signal: &Arc<AtomicBool>,
maxes: &Arc<RwLock<Vec<(SocketAddr, NodeStats)>>>,
first_tx_count: u64,
v: &NodeInfo,
sample_period: u64,
) {
let mut client = mk_client(&v);
let mut now = Instant::now();
let mut initial_tx_count = client.transaction_count();
let mut max_tps = 0.0;
let mut total;
let log_prefix = format!("{:21}:", v.tpu.to_string());
loop {
let tx_count = client.transaction_count();
assert!(
tx_count >= initial_tx_count,
"expected tx_count({}) >= initial_tx_count({})",
tx_count,
initial_tx_count
);
let duration = now.elapsed();
now = Instant::now();
let sample = tx_count - initial_tx_count;
initial_tx_count = tx_count;
let ns = duration.as_secs() * 1_000_000_000 + u64::from(duration.subsec_nanos());
let tps = (sample * 1_000_000_000) as f64 / ns as f64;
if tps > max_tps {
max_tps = tps;
}
if tx_count > first_tx_count {
total = tx_count - first_tx_count;
} else {
total = 0;
}
println!(
"{} {:9.2} TPS, Transactions: {:6}, Total transactions: {}",
log_prefix, tps, sample, total
);
sleep(Duration::new(sample_period, 0));
if exit_signal.load(Ordering::Relaxed) {
println!("{} Exiting validator thread", log_prefix);
let stats = NodeStats {
tps: max_tps,
tx: total,
};
maxes.write().unwrap().push((v.tpu, stats));
break;
}
}
}
/// Send loopback payment of 0 tokens and confirm the network processed it
pub fn send_barrier_transaction(barrier_client: &mut ThinClient, last_id: &mut Hash, id: &Keypair) {
let transfer_start = Instant::now();
let mut poll_count = 0;
loop {
if poll_count > 0 && poll_count % 8 == 0 {
println!(
"polling for barrier transaction confirmation, attempt {}",
poll_count
);
}
*last_id = barrier_client.get_last_id();
let signature = barrier_client
.transfer(0, &id, id.pubkey(), last_id)
.expect("Unable to send barrier transaction");
let confirmatiom = barrier_client.poll_for_signature(&signature);
let duration_ms = duration_as_ms(&transfer_start.elapsed());
if confirmatiom.is_ok() {
println!("barrier transaction confirmed in {} ms", duration_ms);
solana_metrics::submit(
influxdb::Point::new("bench-tps")
.add_tag(
"op",
influxdb::Value::String("send_barrier_transaction".to_string()),
)
.add_field("poll_count", influxdb::Value::Integer(poll_count))
.add_field("duration", influxdb::Value::Integer(duration_ms as i64))
.to_owned(),
);
// Sanity check that the client balance is still 1
let balance = barrier_client
.poll_balance_with_timeout(
&id.pubkey(),
&Duration::from_millis(100),
&Duration::from_secs(10),
)
.expect("Failed to get balance");
if balance != 1 {
panic!("Expected an account balance of 1 (balance: {}", balance);
}
break;
}
// Timeout after 3 minutes. When running a CPU-only leader+validator+drone+bench-tps on a dev
// machine, some batches of transactions can take upwards of 1 minute...
if duration_ms > 1000 * 60 * 3 {
println!("Error: Couldn't confirm barrier transaction!");
exit(1);
}
let new_last_id = barrier_client.get_last_id();
if new_last_id == *last_id {
if poll_count > 0 && poll_count % 8 == 0 {
println!("last_id is not advancing, still at {:?}", *last_id);
}
} else {
*last_id = new_last_id;
}
poll_count += 1;
}
}
pub fn generate_txs(
shared_txs: &SharedTransactions,
source: &[Keypair],
dest: &[Keypair],
threads: usize,
reclaim: bool,
leader: &NodeInfo,
) {
let mut client = mk_client(leader);
let last_id = client.get_last_id();
let tx_count = source.len();
println!("Signing transactions... {} (reclaim={})", tx_count, reclaim);
let signing_start = Instant::now();
let pairs: Vec<_> = if !reclaim {
source.iter().zip(dest.iter()).collect()
} else {
dest.iter().zip(source.iter()).collect()
};
let transactions: Vec<_> = pairs
.par_iter()
.map(|(id, keypair)| {
(
SystemTransaction::new_account(id, keypair.pubkey(), 1, last_id, 0),
timestamp(),
)
})
.collect();
let duration = signing_start.elapsed();
let ns = duration.as_secs() * 1_000_000_000 + u64::from(duration.subsec_nanos());
let bsps = (tx_count) as f64 / ns as f64;
let nsps = ns as f64 / (tx_count) as f64;
println!(
"Done. {:.2} thousand signatures per second, {:.2} us per signature, {} ms total time, {}",
bsps * 1_000_000_f64,
nsps / 1_000_f64,
duration_as_ms(&duration),
last_id,
);
solana_metrics::submit(
influxdb::Point::new("bench-tps")
.add_tag("op", influxdb::Value::String("generate_txs".to_string()))
.add_field(
"duration",
influxdb::Value::Integer(duration_as_ms(&duration) as i64),
)
.to_owned(),
);
let sz = transactions.len() / threads;
let chunks: Vec<_> = transactions.chunks(sz).collect();
{
let mut shared_txs_wl = shared_txs.write().unwrap();
for chunk in chunks {
shared_txs_wl.push_back(chunk.to_vec());
}
}
}
pub fn do_tx_transfers(
exit_signal: &Arc<AtomicBool>,
shared_txs: &SharedTransactions,
leader: &NodeInfo,
shared_tx_thread_count: &Arc<AtomicIsize>,
total_tx_sent_count: &Arc<AtomicUsize>,
thread_batch_sleep_ms: usize,
) {
let client = mk_client(&leader);
loop {
if thread_batch_sleep_ms > 0 {
sleep(Duration::from_millis(thread_batch_sleep_ms as u64));
}
let txs;
{
let mut shared_txs_wl = shared_txs.write().unwrap();
txs = shared_txs_wl.pop_front();
}
if let Some(txs0) = txs {
shared_tx_thread_count.fetch_add(1, Ordering::Relaxed);
println!(
"Transferring 1 unit {} times... to {}",
txs0.len(),
leader.tpu
);
let tx_len = txs0.len();
let transfer_start = Instant::now();
for tx in txs0 {
let now = timestamp();
if now > tx.1 && now - tx.1 > 1000 * 30 {
continue;
}
client.transfer_signed(&tx.0).unwrap();
}
shared_tx_thread_count.fetch_add(-1, Ordering::Relaxed);
total_tx_sent_count.fetch_add(tx_len, Ordering::Relaxed);
println!(
"Tx send done. {} ms {} tps",
duration_as_ms(&transfer_start.elapsed()),
tx_len as f32 / duration_as_s(&transfer_start.elapsed()),
);
solana_metrics::submit(
influxdb::Point::new("bench-tps")
.add_tag("op", influxdb::Value::String("do_tx_transfers".to_string()))
.add_field(
"duration",
influxdb::Value::Integer(duration_as_ms(&transfer_start.elapsed()) as i64),
)
.add_field("count", influxdb::Value::Integer(tx_len as i64))
.to_owned(),
);
}
if exit_signal.load(Ordering::Relaxed) {
break;
}
}
}
pub fn verify_funding_transfer(client: &mut ThinClient, tx: &Transaction, amount: u64) -> bool {
for a in &tx.account_keys[1..] {
if client.get_balance(a).unwrap_or(0) >= amount {
return true;
}
}
false
}
/// fund the dests keys by spending all of the source keys into MAX_SPENDS_PER_TX
/// on every iteration. This allows us to replay the transfers because the source is either empty,
/// or full
pub fn fund_keys(client: &mut ThinClient, source: &Keypair, dests: &[Keypair], tokens: u64) {
let total = tokens * dests.len() as u64;
let mut funded: Vec<(&Keypair, u64)> = vec![(source, total)];
let mut notfunded: Vec<&Keypair> = dests.iter().collect();
println!("funding keys {}", dests.len());
while !notfunded.is_empty() {
let mut new_funded: Vec<(&Keypair, u64)> = vec![];
let mut to_fund = vec![];
println!("creating from... {}", funded.len());
for f in &mut funded {
let max_units = cmp::min(notfunded.len(), MAX_SPENDS_PER_TX);
if max_units == 0 {
break;
}
let start = notfunded.len() - max_units;
let per_unit = f.1 / (max_units as u64);
let moves: Vec<_> = notfunded[start..]
.iter()
.map(|k| (k.pubkey(), per_unit))
.collect();
notfunded[start..]
.iter()
.for_each(|k| new_funded.push((k, per_unit)));
notfunded.truncate(start);
if !moves.is_empty() {
to_fund.push((f.0, moves));
}
}
// try to transfer a "few" at a time with recent last_id
// assume 4MB network buffers, and 512 byte packets
const FUND_CHUNK_LEN: usize = 4 * 1024 * 1024 / 512;
to_fund.chunks(FUND_CHUNK_LEN).for_each(|chunk| {
let mut tries = 0;
// this set of transactions just initializes us for bookkeeping
#[allow(clippy::clone_double_ref)] // sigh
let mut to_fund_txs: Vec<_> = chunk
.par_iter()
.map(|(k, m)| {
(
k.clone(),
SystemTransaction::new_move_many(k, &m, Hash::default(), 0),
)
})
.collect();
let amount = chunk[0].1[0].1;
while !to_fund_txs.is_empty() {
let receivers = to_fund_txs
.iter()
.fold(0, |len, (_, tx)| len + tx.instructions.len());
println!(
"{} {} to {} in {} txs",
if tries == 0 {
"transferring"
} else {
" retrying"
},
amount,
receivers,
to_fund_txs.len(),
);
let last_id = client.get_last_id();
// re-sign retained to_fund_txes with updated last_id
to_fund_txs.par_iter_mut().for_each(|(k, tx)| {
tx.sign(&[*k], last_id);
});
to_fund_txs.iter().for_each(|(_, tx)| {
client.transfer_signed(&tx).expect("transfer");
});
// retry anything that seems to have dropped through cracks
// again since these txs are all or nothing, they're fine to
// retry
to_fund_txs.retain(|(_, tx)| !verify_funding_transfer(client, &tx, amount));
tries += 1;
}
println!("transferred");
});
println!("funded: {} left: {}", new_funded.len(), notfunded.len());
funded = new_funded;
}
}
pub fn airdrop_tokens(
client: &mut ThinClient,
drone_addr: &SocketAddr,
id: &Keypair,
tx_count: u64,
) {
let starting_balance = client.poll_get_balance(&id.pubkey()).unwrap_or(0);
metrics_submit_token_balance(starting_balance);
println!("starting balance {}", starting_balance);
if starting_balance < tx_count {
let airdrop_amount = tx_count - starting_balance;
println!(
"Airdropping {:?} tokens from {} for {}",
airdrop_amount,
drone_addr,
id.pubkey(),
);
let last_id = client.get_last_id();
match request_airdrop_transaction(&drone_addr, &id.pubkey(), airdrop_amount, last_id) {
Ok(transaction) => {
let signature = client.transfer_signed(&transaction).unwrap();
client.poll_for_signature(&signature).unwrap();
}
Err(err) => {
panic!(
"Error requesting airdrop: {:?} to addr: {:?} amount: {}",
err, drone_addr, airdrop_amount
);
}
};
let current_balance = client.poll_get_balance(&id.pubkey()).unwrap_or_else(|e| {
println!("airdrop error {}", e);
starting_balance
});
println!("current balance {}...", current_balance);
metrics_submit_token_balance(current_balance);
if current_balance - starting_balance != airdrop_amount {
println!(
"Airdrop failed! {} {} {}",
id.pubkey(),
current_balance,
starting_balance
);
exit(1);
}
}
}
pub fn compute_and_report_stats(
maxes: &Arc<RwLock<Vec<(SocketAddr, NodeStats)>>>,
sample_period: u64,
tx_send_elapsed: &Duration,
total_tx_send_count: usize,
) {
// Compute/report stats
let mut max_of_maxes = 0.0;
let mut max_tx_count = 0;
let mut nodes_with_zero_tps = 0;
let mut total_maxes = 0.0;
println!(" Node address | Max TPS | Total Transactions");
println!("---------------------+---------------+--------------------");
for (sock, stats) in maxes.read().unwrap().iter() {
let maybe_flag = match stats.tx {
0 => "!!!!!",
_ => "",
};
println!(
"{:20} | {:13.2} | {} {}",
(*sock).to_string(),
stats.tps,
stats.tx,
maybe_flag
);
if stats.tps == 0.0 {
nodes_with_zero_tps += 1;
}
total_maxes += stats.tps;
if stats.tps > max_of_maxes {
max_of_maxes = stats.tps;
}
if stats.tx > max_tx_count {
max_tx_count = stats.tx;
}
}
if total_maxes > 0.0 {
let num_nodes_with_tps = maxes.read().unwrap().len() - nodes_with_zero_tps;
let average_max = total_maxes / num_nodes_with_tps as f64;
println!(
"\nAverage max TPS: {:.2}, {} nodes had 0 TPS",
average_max, nodes_with_zero_tps
);
}
println!(
"\nHighest TPS: {:.2} sampling period {}s max transactions: {} clients: {} drop rate: {:.2}",
max_of_maxes,
sample_period,
max_tx_count,
maxes.read().unwrap().len(),
(total_tx_send_count as u64 - max_tx_count) as f64 / total_tx_send_count as f64,
);
println!(
"\tAverage TPS: {}",
max_tx_count as f32 / duration_as_s(tx_send_elapsed)
);
}
// First transfer 3/4 of the tokens to the dest accounts
// then ping-pong 1/4 of the tokens back to the other account
// this leaves 1/4 token buffer in each account
pub fn should_switch_directions(num_tokens_per_account: u64, i: u64) -> bool {
i % (num_tokens_per_account / 4) == 0 && (i >= (3 * num_tokens_per_account) / 4)
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_switch_directions() {
assert_eq!(should_switch_directions(20, 0), false);
assert_eq!(should_switch_directions(20, 1), false);
assert_eq!(should_switch_directions(20, 14), false);
assert_eq!(should_switch_directions(20, 15), true);
assert_eq!(should_switch_directions(20, 16), false);
assert_eq!(should_switch_directions(20, 19), false);
assert_eq!(should_switch_directions(20, 20), true);
assert_eq!(should_switch_directions(20, 21), false);
assert_eq!(should_switch_directions(20, 99), false);
assert_eq!(should_switch_directions(20, 100), true);
assert_eq!(should_switch_directions(20, 101), false);
}
}