tpu-client: Refactor to prep for async client (#25432)

This commit is contained in:
Jon Cinque 2022-05-21 20:22:30 +02:00 committed by GitHub
parent 3b3046ab3d
commit 09ea899a2d
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
2 changed files with 203 additions and 105 deletions

View File

@ -9,3 +9,26 @@ pub(crate) fn new_progress_bar() -> ProgressBar {
progress_bar.enable_steady_tick(100); progress_bar.enable_steady_tick(100);
progress_bar progress_bar
} }
pub(crate) fn set_message_for_confirmed_transactions(
progress_bar: &ProgressBar,
confirmed_transactions: u32,
total_transactions: usize,
block_height: Option<u64>,
last_valid_block_height: u64,
status: &str,
) {
progress_bar.set_message(format!(
"{:>5.1}% | {:<40}{}",
confirmed_transactions as f64 * 100. / total_transactions as f64,
status,
match block_height {
Some(block_height) => format!(
" [block height {}; re-sign in {} blocks]",
block_height,
last_valid_block_height.saturating_sub(block_height),
),
None => String::new(),
},
));
}

View File

@ -1,11 +1,11 @@
use { use {
crate::{ crate::{
client_error::ClientError, client_error::{ClientError, Result as ClientResult},
connection_cache::send_wire_transaction_async, connection_cache::send_wire_transaction_async,
pubsub_client::{PubsubClient, PubsubClientError, PubsubClientSubscription}, pubsub_client::{PubsubClient, PubsubClientError, PubsubClientSubscription},
rpc_client::RpcClient, rpc_client::RpcClient,
rpc_request::MAX_GET_SIGNATURE_STATUSES_QUERY_ITEMS, rpc_request::MAX_GET_SIGNATURE_STATUSES_QUERY_ITEMS,
rpc_response::SlotUpdate, rpc_response::{RpcContactInfo, SlotUpdate},
spinner, spinner,
}, },
bincode::serialize, bincode::serialize,
@ -13,6 +13,7 @@ use {
solana_sdk::{ solana_sdk::{
clock::Slot, clock::Slot,
commitment_config::CommitmentConfig, commitment_config::CommitmentConfig,
epoch_info::EpochInfo,
message::Message, message::Message,
pubkey::Pubkey, pubkey::Pubkey,
signature::SignerError, signature::SignerError,
@ -29,9 +30,9 @@ use {
Arc, RwLock, Arc, RwLock,
}, },
thread::{sleep, JoinHandle}, thread::{sleep, JoinHandle},
time::{Duration, Instant},
}, },
thiserror::Error, thiserror::Error,
tokio::time::{Duration, Instant},
}; };
#[derive(Error, Debug)] #[derive(Error, Debug)]
@ -56,6 +57,11 @@ pub const DEFAULT_FANOUT_SLOTS: u64 = 12;
/// Maximum number of slots used to build TPU socket fanout set /// Maximum number of slots used to build TPU socket fanout set
pub const MAX_FANOUT_SLOTS: u64 = 100; pub const MAX_FANOUT_SLOTS: u64 = 100;
/// Send at ~100 TPS
pub(crate) const SEND_TRANSACTION_INTERVAL: Duration = Duration::from_millis(10);
/// Retry batch send after 4 seconds
pub(crate) const TRANSACTION_RESEND_INTERVAL: Duration = Duration::from_secs(4);
/// Config params for `TpuClient` /// Config params for `TpuClient`
#[derive(Clone, Debug)] #[derive(Clone, Debug)]
pub struct TpuClientConfig { pub struct TpuClientConfig {
@ -156,10 +162,6 @@ impl TpuClient {
signers: &T, signers: &T,
) -> Result<Vec<Option<TransactionError>>> { ) -> Result<Vec<Option<TransactionError>>> {
let mut expired_blockhash_retries = 5; let mut expired_blockhash_retries = 5;
/* Send at ~100 TPS */
const SEND_TRANSACTION_INTERVAL: Duration = Duration::from_millis(10);
/* Retry batch send after 4 seconds */
const TRANSACTION_RESEND_INTERVAL: Duration = Duration::from_secs(4);
let progress_bar = spinner::new_progress_bar(); let progress_bar = spinner::new_progress_bar();
progress_bar.set_message("Setting up..."); progress_bar.set_message("Setting up...");
@ -169,29 +171,11 @@ impl TpuClient {
.enumerate() .enumerate()
.map(|(i, message)| (i, Transaction::new_unsigned(message.clone()))) .map(|(i, message)| (i, Transaction::new_unsigned(message.clone())))
.collect::<Vec<_>>(); .collect::<Vec<_>>();
let num_transactions = transactions.len() as f64; let total_transactions = transactions.len();
let mut transaction_errors = vec![None; transactions.len()]; let mut transaction_errors = vec![None; transactions.len()];
let set_message = |confirmed_transactions,
block_height: Option<u64>,
last_valid_block_height: u64,
status: &str| {
progress_bar.set_message(format!(
"{:>5.1}% | {:<40}{}",
confirmed_transactions as f64 * 100. / num_transactions,
status,
match block_height {
Some(block_height) => format!(
" [block height {}; re-sign in {} blocks]",
block_height,
last_valid_block_height.saturating_sub(block_height),
),
None => String::new(),
},
));
};
let mut confirmed_transactions = 0; let mut confirmed_transactions = 0;
let mut block_height = self.rpc_client.get_block_height()?; let mut block_height = self.rpc_client.get_block_height()?;
while expired_blockhash_retries > 0 { while expired_blockhash_retries > 0 {
let (blockhash, last_valid_block_height) = self let (blockhash, last_valid_block_height) = self
.rpc_client .rpc_client
@ -213,8 +197,10 @@ impl TpuClient {
if !self.send_transaction(transaction) { if !self.send_transaction(transaction) {
let _result = self.rpc_client.send_transaction(transaction).ok(); let _result = self.rpc_client.send_transaction(transaction).ok();
} }
set_message( spinner::set_message_for_confirmed_transactions(
&progress_bar,
confirmed_transactions, confirmed_transactions,
total_transactions,
None, //block_height, None, //block_height,
last_valid_block_height, last_valid_block_height,
&format!("Sending {}/{} transactions", index + 1, num_transactions,), &format!("Sending {}/{} transactions", index + 1, num_transactions,),
@ -226,8 +212,10 @@ impl TpuClient {
// Wait for the next block before checking for transaction statuses // Wait for the next block before checking for transaction statuses
let mut block_height_refreshes = 10; let mut block_height_refreshes = 10;
set_message( spinner::set_message_for_confirmed_transactions(
&progress_bar,
confirmed_transactions, confirmed_transactions,
total_transactions,
Some(block_height), Some(block_height),
last_valid_block_height, last_valid_block_height,
&format!("Waiting for next block, {} pending...", num_transactions), &format!("Waiting for next block, {} pending...", num_transactions),
@ -269,8 +257,10 @@ impl TpuClient {
} }
} }
} }
set_message( spinner::set_message_for_confirmed_transactions(
&progress_bar,
confirmed_transactions, confirmed_transactions,
total_transactions,
Some(block_height), Some(block_height),
last_valid_block_height, last_valid_block_height,
"Checking transaction status...", "Checking transaction status...",
@ -304,7 +294,20 @@ impl Drop for TpuClient {
} }
} }
struct LeaderTpuCache { pub(crate) struct LeaderTpuCacheUpdateInfo {
pub(crate) maybe_cluster_nodes: Option<ClientResult<Vec<RpcContactInfo>>>,
pub(crate) maybe_epoch_info: Option<ClientResult<EpochInfo>>,
pub(crate) maybe_slot_leaders: Option<ClientResult<Vec<Pubkey>>>,
}
impl LeaderTpuCacheUpdateInfo {
pub(crate) fn has_some(&self) -> bool {
self.maybe_cluster_nodes.is_some()
|| self.maybe_epoch_info.is_some()
|| self.maybe_slot_leaders.is_some()
}
}
pub(crate) struct LeaderTpuCache {
first_slot: Slot, first_slot: Slot,
leaders: Vec<Pubkey>, leaders: Vec<Pubkey>,
leader_tpu_map: HashMap<Pubkey, SocketAddr>, leader_tpu_map: HashMap<Pubkey, SocketAddr>,
@ -313,26 +316,41 @@ struct LeaderTpuCache {
} }
impl LeaderTpuCache { impl LeaderTpuCache {
fn new(rpc_client: &RpcClient, first_slot: Slot) -> Result<Self> { pub(crate) fn new(
let slots_in_epoch = rpc_client.get_epoch_info()?.slots_in_epoch; first_slot: Slot,
let leaders = Self::fetch_slot_leaders(rpc_client, first_slot, slots_in_epoch)?; slots_in_epoch: Slot,
let leader_tpu_map = Self::fetch_cluster_tpu_sockets(rpc_client)?; leaders: Vec<Pubkey>,
Ok(Self { cluster_nodes: Vec<RpcContactInfo>,
) -> Self {
let leader_tpu_map = Self::extract_cluster_tpu_sockets(cluster_nodes);
Self {
first_slot, first_slot,
leaders, leaders,
leader_tpu_map, leader_tpu_map,
slots_in_epoch, slots_in_epoch,
last_epoch_info_slot: first_slot, last_epoch_info_slot: first_slot,
}) }
} }
// Last slot that has a cached leader pubkey // Last slot that has a cached leader pubkey
fn last_slot(&self) -> Slot { pub(crate) fn last_slot(&self) -> Slot {
self.first_slot + self.leaders.len().saturating_sub(1) as u64 self.first_slot + self.leaders.len().saturating_sub(1) as u64
} }
pub(crate) fn slot_info(&self) -> (Slot, Slot, Slot) {
(
self.last_slot(),
self.last_epoch_info_slot,
self.slots_in_epoch,
)
}
// Get the TPU sockets for the current leader and upcoming leaders according to fanout size // Get the TPU sockets for the current leader and upcoming leaders according to fanout size
fn get_leader_sockets(&self, current_slot: Slot, fanout_slots: u64) -> Vec<SocketAddr> { pub(crate) fn get_leader_sockets(
&self,
current_slot: Slot,
fanout_slots: u64,
) -> Vec<SocketAddr> {
let mut leader_set = HashSet::new(); let mut leader_set = HashSet::new();
let mut leader_sockets = Vec::new(); let mut leader_sockets = Vec::new();
for leader_slot in current_slot..current_slot + fanout_slots { for leader_slot in current_slot..current_slot + fanout_slots {
@ -358,7 +376,7 @@ impl LeaderTpuCache {
leader_sockets leader_sockets
} }
fn get_slot_leader(&self, slot: Slot) -> Option<&Pubkey> { pub(crate) fn get_slot_leader(&self, slot: Slot) -> Option<&Pubkey> {
if slot >= self.first_slot { if slot >= self.first_slot {
let index = slot - self.first_slot; let index = slot - self.first_slot;
self.leaders.get(index as usize) self.leaders.get(index as usize)
@ -367,9 +385,10 @@ impl LeaderTpuCache {
} }
} }
fn fetch_cluster_tpu_sockets(rpc_client: &RpcClient) -> Result<HashMap<Pubkey, SocketAddr>> { pub(crate) fn extract_cluster_tpu_sockets(
let cluster_contact_info = rpc_client.get_cluster_nodes()?; cluster_contact_info: Vec<RpcContactInfo>,
Ok(cluster_contact_info ) -> HashMap<Pubkey, SocketAddr> {
cluster_contact_info
.into_iter() .into_iter()
.filter_map(|contact_info| { .filter_map(|contact_info| {
Some(( Some((
@ -377,16 +396,97 @@ impl LeaderTpuCache {
contact_info.tpu?, contact_info.tpu?,
)) ))
}) })
.collect()) .collect()
} }
fn fetch_slot_leaders( pub(crate) fn fanout(slots_in_epoch: Slot) -> Slot {
rpc_client: &RpcClient, (2 * MAX_FANOUT_SLOTS).min(slots_in_epoch)
start_slot: Slot, }
slots_in_epoch: Slot,
) -> Result<Vec<Pubkey>> { pub(crate) fn update_all(
let fanout = (2 * MAX_FANOUT_SLOTS).min(slots_in_epoch); &mut self,
Ok(rpc_client.get_slot_leaders(start_slot, fanout)?) estimated_current_slot: Slot,
cache_update_info: LeaderTpuCacheUpdateInfo,
) -> (bool, bool) {
let mut has_error = false;
let mut cluster_refreshed = false;
if let Some(cluster_nodes) = cache_update_info.maybe_cluster_nodes {
match cluster_nodes {
Ok(cluster_nodes) => {
let leader_tpu_map = LeaderTpuCache::extract_cluster_tpu_sockets(cluster_nodes);
self.leader_tpu_map = leader_tpu_map;
cluster_refreshed = true;
}
Err(err) => {
warn!("Failed to fetch cluster tpu sockets: {}", err);
has_error = true;
}
}
}
if let Some(Ok(epoch_info)) = cache_update_info.maybe_epoch_info {
self.slots_in_epoch = epoch_info.slots_in_epoch;
self.last_epoch_info_slot = estimated_current_slot;
}
if let Some(slot_leaders) = cache_update_info.maybe_slot_leaders {
match slot_leaders {
Ok(slot_leaders) => {
self.first_slot = estimated_current_slot;
self.leaders = slot_leaders;
}
Err(err) => {
warn!(
"Failed to fetch slot leaders (current estimated slot: {}): {}",
estimated_current_slot, err
);
has_error = true;
}
}
}
(has_error, cluster_refreshed)
}
}
fn maybe_fetch_cache_info(
leader_tpu_cache: &Arc<RwLock<LeaderTpuCache>>,
last_cluster_refresh: Instant,
rpc_client: &RpcClient,
recent_slots: &RecentLeaderSlots,
) -> LeaderTpuCacheUpdateInfo {
// Refresh cluster TPU ports every 5min in case validators restart with new port configuration
// or new validators come online
let maybe_cluster_nodes = if last_cluster_refresh.elapsed() > Duration::from_secs(5 * 60) {
Some(rpc_client.get_cluster_nodes())
} else {
None
};
let estimated_current_slot = recent_slots.estimated_current_slot();
let (last_slot, last_epoch_info_slot, slots_in_epoch) = {
let leader_tpu_cache = leader_tpu_cache.read().unwrap();
leader_tpu_cache.slot_info()
};
let maybe_epoch_info =
if estimated_current_slot >= last_epoch_info_slot.saturating_sub(slots_in_epoch) {
Some(rpc_client.get_epoch_info())
} else {
None
};
let maybe_slot_leaders = if estimated_current_slot >= last_slot.saturating_sub(MAX_FANOUT_SLOTS)
{
Some(rpc_client.get_slot_leaders(
estimated_current_slot,
LeaderTpuCache::fanout(slots_in_epoch),
))
} else {
None
};
LeaderTpuCacheUpdateInfo {
maybe_cluster_nodes,
maybe_epoch_info,
maybe_slot_leaders,
} }
} }
@ -394,15 +494,15 @@ impl LeaderTpuCache {
const MAX_SLOT_SKIP_DISTANCE: u64 = 48; const MAX_SLOT_SKIP_DISTANCE: u64 = 48;
#[derive(Clone, Debug)] #[derive(Clone, Debug)]
struct RecentLeaderSlots(Arc<RwLock<VecDeque<Slot>>>); pub(crate) struct RecentLeaderSlots(Arc<RwLock<VecDeque<Slot>>>);
impl RecentLeaderSlots { impl RecentLeaderSlots {
fn new(current_slot: Slot) -> Self { pub(crate) fn new(current_slot: Slot) -> Self {
let mut recent_slots = VecDeque::new(); let mut recent_slots = VecDeque::new();
recent_slots.push_back(current_slot); recent_slots.push_back(current_slot);
Self(Arc::new(RwLock::new(recent_slots))) Self(Arc::new(RwLock::new(recent_slots)))
} }
fn record_slot(&self, current_slot: Slot) { pub(crate) fn record_slot(&self, current_slot: Slot) {
let mut recent_slots = self.0.write().unwrap(); let mut recent_slots = self.0.write().unwrap();
recent_slots.push_back(current_slot); recent_slots.push_back(current_slot);
// 12 recent slots should be large enough to avoid a misbehaving // 12 recent slots should be large enough to avoid a misbehaving
@ -413,7 +513,7 @@ impl RecentLeaderSlots {
} }
// Estimate the current slot from recent slot notifications. // Estimate the current slot from recent slot notifications.
fn estimated_current_slot(&self) -> Slot { pub(crate) fn estimated_current_slot(&self) -> Slot {
let mut recent_slots: Vec<Slot> = self.0.read().unwrap().iter().cloned().collect(); let mut recent_slots: Vec<Slot> = self.0.read().unwrap().iter().cloned().collect();
assert!(!recent_slots.is_empty()); assert!(!recent_slots.is_empty());
recent_slots.sort_unstable(); recent_slots.sort_unstable();
@ -458,7 +558,16 @@ impl LeaderTpuService {
let start_slot = rpc_client.get_slot_with_commitment(CommitmentConfig::processed())?; let start_slot = rpc_client.get_slot_with_commitment(CommitmentConfig::processed())?;
let recent_slots = RecentLeaderSlots::new(start_slot); let recent_slots = RecentLeaderSlots::new(start_slot);
let leader_tpu_cache = Arc::new(RwLock::new(LeaderTpuCache::new(&rpc_client, start_slot)?)); let slots_in_epoch = rpc_client.get_epoch_info()?.slots_in_epoch;
let leaders =
rpc_client.get_slot_leaders(start_slot, LeaderTpuCache::fanout(slots_in_epoch))?;
let cluster_nodes = rpc_client.get_cluster_nodes()?;
let leader_tpu_cache = Arc::new(RwLock::new(LeaderTpuCache::new(
start_slot,
slots_in_epoch,
leaders,
cluster_nodes,
)));
let subscription = if !websocket_url.is_empty() { let subscription = if !websocket_url.is_empty() {
let recent_slots = recent_slots.clone(); let recent_slots = recent_slots.clone();
@ -530,59 +639,25 @@ impl LeaderTpuService {
} }
// Sleep a few slots before checking if leader cache needs to be refreshed again // Sleep a few slots before checking if leader cache needs to be refreshed again
std::thread::sleep(Duration::from_millis(sleep_ms)); sleep(Duration::from_millis(sleep_ms));
sleep_ms = 1000; sleep_ms = 1000;
// Refresh cluster TPU ports every 5min in case validators restart with new port configuration let cache_update_info = maybe_fetch_cache_info(
// or new validators come online &leader_tpu_cache,
if last_cluster_refresh.elapsed() > Duration::from_secs(5 * 60) { last_cluster_refresh,
match LeaderTpuCache::fetch_cluster_tpu_sockets(&rpc_client) { &rpc_client,
Ok(leader_tpu_map) => { &recent_slots,
leader_tpu_cache.write().unwrap().leader_tpu_map = leader_tpu_map; );
last_cluster_refresh = Instant::now();
}
Err(err) => {
warn!("Failed to fetch cluster tpu sockets: {}", err);
sleep_ms = 100;
}
}
}
let estimated_current_slot = recent_slots.estimated_current_slot(); if cache_update_info.has_some() {
let (last_slot, last_epoch_info_slot, mut slots_in_epoch) = { let mut leader_tpu_cache = leader_tpu_cache.write().unwrap();
let leader_tpu_cache = leader_tpu_cache.read().unwrap(); let (has_error, cluster_refreshed) = leader_tpu_cache
( .update_all(recent_slots.estimated_current_slot(), cache_update_info);
leader_tpu_cache.last_slot(), if has_error {
leader_tpu_cache.last_epoch_info_slot, sleep_ms = 100;
leader_tpu_cache.slots_in_epoch,
)
};
if estimated_current_slot >= last_epoch_info_slot.saturating_sub(slots_in_epoch) {
if let Ok(epoch_info) = rpc_client.get_epoch_info() {
slots_in_epoch = epoch_info.slots_in_epoch;
let mut leader_tpu_cache = leader_tpu_cache.write().unwrap();
leader_tpu_cache.slots_in_epoch = slots_in_epoch;
leader_tpu_cache.last_epoch_info_slot = estimated_current_slot;
} }
} if cluster_refreshed {
if estimated_current_slot >= last_slot.saturating_sub(MAX_FANOUT_SLOTS) { last_cluster_refresh = Instant::now();
match LeaderTpuCache::fetch_slot_leaders(
&rpc_client,
estimated_current_slot,
slots_in_epoch,
) {
Ok(slot_leaders) => {
let mut leader_tpu_cache = leader_tpu_cache.write().unwrap();
leader_tpu_cache.first_slot = estimated_current_slot;
leader_tpu_cache.leaders = slot_leaders;
}
Err(err) => {
warn!(
"Failed to fetch slot leaders (current estimated slot: {}): {}",
estimated_current_slot, err
);
sleep_ms = 100;
}
} }
} }
} }