uses cluster-nodes cache in broadcast-stage
* Current caching mechanism does not update cluster-nodes when the epoch (and so epoch staked nodes) changes: https://github.com/solana-labs/solana/blob/19bd30262/core/src/broadcast_stage/standard_broadcast_run.rs#L332-L344 * Additionally, the cache update has a concurrency bug in which the thread which does compare_and_swap may be blocked when it tries to obtain the write-lock on cache, while other threads will keep running ahead with the outdated cache (since the atomic timestamp is already updated). In the new ClusterNodesCache, entries are keyed by epoch, and so if epoch changes cluster-nodes will be recalculated. The time-to-live eviction policy is also encapsulated and rigidly enforced.
This commit is contained in:
parent
30bec3921e
commit
aa32738dd5
|
@ -44,6 +44,9 @@ pub(crate) mod broadcast_utils;
|
||||||
mod fail_entry_verification_broadcast_run;
|
mod fail_entry_verification_broadcast_run;
|
||||||
mod standard_broadcast_run;
|
mod standard_broadcast_run;
|
||||||
|
|
||||||
|
const CLUSTER_NODES_CACHE_NUM_EPOCH_CAP: usize = 8;
|
||||||
|
const CLUSTER_NODES_CACHE_TTL: Duration = Duration::from_secs(5);
|
||||||
|
|
||||||
pub(crate) const NUM_INSERT_THREADS: usize = 2;
|
pub(crate) const NUM_INSERT_THREADS: usize = 2;
|
||||||
pub(crate) type RetransmitSlotsSender = CrossbeamSender<HashMap<Slot, Arc<Bank>>>;
|
pub(crate) type RetransmitSlotsSender = CrossbeamSender<HashMap<Slot, Arc<Bank>>>;
|
||||||
pub(crate) type RetransmitSlotsReceiver = CrossbeamReceiver<HashMap<Slot, Arc<Bank>>>;
|
pub(crate) type RetransmitSlotsReceiver = CrossbeamReceiver<HashMap<Slot, Arc<Bank>>>;
|
||||||
|
|
|
@ -1,11 +1,14 @@
|
||||||
use super::*;
|
use {
|
||||||
use solana_entry::entry::Entry;
|
super::*,
|
||||||
use solana_ledger::shred::Shredder;
|
crate::cluster_nodes::ClusterNodesCache,
|
||||||
use solana_runtime::blockhash_queue::BlockhashQueue;
|
solana_entry::entry::Entry,
|
||||||
use solana_sdk::{
|
solana_ledger::shred::Shredder,
|
||||||
hash::Hash,
|
solana_runtime::blockhash_queue::BlockhashQueue,
|
||||||
signature::{Keypair, Signer},
|
solana_sdk::{
|
||||||
system_transaction,
|
hash::Hash,
|
||||||
|
signature::{Keypair, Signer},
|
||||||
|
system_transaction,
|
||||||
|
},
|
||||||
};
|
};
|
||||||
|
|
||||||
pub const MINIMUM_DUPLICATE_SLOT: Slot = 20;
|
pub const MINIMUM_DUPLICATE_SLOT: Slot = 20;
|
||||||
|
@ -32,10 +35,15 @@ pub(super) struct BroadcastDuplicatesRun {
|
||||||
recent_blockhash: Option<Hash>,
|
recent_blockhash: Option<Hash>,
|
||||||
prev_entry_hash: Option<Hash>,
|
prev_entry_hash: Option<Hash>,
|
||||||
num_slots_broadcasted: usize,
|
num_slots_broadcasted: usize,
|
||||||
|
cluster_nodes_cache: Arc<ClusterNodesCache<BroadcastStage>>,
|
||||||
}
|
}
|
||||||
|
|
||||||
impl BroadcastDuplicatesRun {
|
impl BroadcastDuplicatesRun {
|
||||||
pub(super) fn new(shred_version: u16, config: BroadcastDuplicatesConfig) -> Self {
|
pub(super) fn new(shred_version: u16, config: BroadcastDuplicatesConfig) -> Self {
|
||||||
|
let cluster_nodes_cache = Arc::new(ClusterNodesCache::<BroadcastStage>::new(
|
||||||
|
CLUSTER_NODES_CACHE_NUM_EPOCH_CAP,
|
||||||
|
CLUSTER_NODES_CACHE_TTL,
|
||||||
|
));
|
||||||
Self {
|
Self {
|
||||||
config,
|
config,
|
||||||
duplicate_queue: BlockhashQueue::default(),
|
duplicate_queue: BlockhashQueue::default(),
|
||||||
|
@ -47,6 +55,7 @@ impl BroadcastDuplicatesRun {
|
||||||
recent_blockhash: None,
|
recent_blockhash: None,
|
||||||
prev_entry_hash: None,
|
prev_entry_hash: None,
|
||||||
num_slots_broadcasted: 0,
|
num_slots_broadcasted: 0,
|
||||||
|
cluster_nodes_cache,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -220,11 +229,8 @@ impl BroadcastRun for BroadcastDuplicatesRun {
|
||||||
) -> Result<()> {
|
) -> Result<()> {
|
||||||
let ((slot, shreds), _) = receiver.lock().unwrap().recv()?;
|
let ((slot, shreds), _) = receiver.lock().unwrap().recv()?;
|
||||||
let root_bank = bank_forks.read().unwrap().root_bank();
|
let root_bank = bank_forks.read().unwrap().root_bank();
|
||||||
let epoch = root_bank.get_leader_schedule_epoch(slot);
|
|
||||||
let stakes = root_bank.epoch_staked_nodes(epoch);
|
|
||||||
// Broadcast data
|
// Broadcast data
|
||||||
let cluster_nodes =
|
let cluster_nodes = self.cluster_nodes_cache.get(slot, &root_bank, cluster_info);
|
||||||
ClusterNodes::<BroadcastStage>::new(cluster_info, &stakes.unwrap_or_default());
|
|
||||||
broadcast_shreds(
|
broadcast_shreds(
|
||||||
sock,
|
sock,
|
||||||
&shreds,
|
&shreds,
|
||||||
|
|
|
@ -1,9 +1,10 @@
|
||||||
use super::*;
|
use {
|
||||||
use crate::cluster_nodes::ClusterNodes;
|
super::*,
|
||||||
use solana_ledger::shred::Shredder;
|
crate::cluster_nodes::ClusterNodesCache,
|
||||||
use solana_sdk::hash::Hash;
|
solana_ledger::shred::Shredder,
|
||||||
use solana_sdk::signature::Keypair;
|
solana_sdk::{hash::Hash, signature::Keypair},
|
||||||
use std::{thread::sleep, time::Duration};
|
std::{thread::sleep, time::Duration},
|
||||||
|
};
|
||||||
|
|
||||||
pub const NUM_BAD_SLOTS: u64 = 10;
|
pub const NUM_BAD_SLOTS: u64 = 10;
|
||||||
pub const SLOT_TO_RESOLVE: u64 = 32;
|
pub const SLOT_TO_RESOLVE: u64 = 32;
|
||||||
|
@ -14,15 +15,21 @@ pub(super) struct FailEntryVerificationBroadcastRun {
|
||||||
good_shreds: Vec<Shred>,
|
good_shreds: Vec<Shred>,
|
||||||
current_slot: Slot,
|
current_slot: Slot,
|
||||||
next_shred_index: u32,
|
next_shred_index: u32,
|
||||||
|
cluster_nodes_cache: Arc<ClusterNodesCache<BroadcastStage>>,
|
||||||
}
|
}
|
||||||
|
|
||||||
impl FailEntryVerificationBroadcastRun {
|
impl FailEntryVerificationBroadcastRun {
|
||||||
pub(super) fn new(shred_version: u16) -> Self {
|
pub(super) fn new(shred_version: u16) -> Self {
|
||||||
|
let cluster_nodes_cache = Arc::new(ClusterNodesCache::<BroadcastStage>::new(
|
||||||
|
CLUSTER_NODES_CACHE_NUM_EPOCH_CAP,
|
||||||
|
CLUSTER_NODES_CACHE_TTL,
|
||||||
|
));
|
||||||
Self {
|
Self {
|
||||||
shred_version,
|
shred_version,
|
||||||
good_shreds: vec![],
|
good_shreds: vec![],
|
||||||
current_slot: 0,
|
current_slot: 0,
|
||||||
next_shred_index: 0,
|
next_shred_index: 0,
|
||||||
|
cluster_nodes_cache,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -132,11 +139,8 @@ impl BroadcastRun for FailEntryVerificationBroadcastRun {
|
||||||
) -> Result<()> {
|
) -> Result<()> {
|
||||||
let ((slot, shreds), _) = receiver.lock().unwrap().recv()?;
|
let ((slot, shreds), _) = receiver.lock().unwrap().recv()?;
|
||||||
let root_bank = bank_forks.read().unwrap().root_bank();
|
let root_bank = bank_forks.read().unwrap().root_bank();
|
||||||
let epoch = root_bank.get_leader_schedule_epoch(slot);
|
|
||||||
let stakes = root_bank.epoch_staked_nodes(epoch);
|
|
||||||
// Broadcast data
|
// Broadcast data
|
||||||
let cluster_nodes =
|
let cluster_nodes = self.cluster_nodes_cache.get(slot, &root_bank, cluster_info);
|
||||||
ClusterNodes::<BroadcastStage>::new(cluster_info, &stakes.unwrap_or_default());
|
|
||||||
broadcast_shreds(
|
broadcast_shreds(
|
||||||
sock,
|
sock,
|
||||||
&shreds,
|
&shreds,
|
||||||
|
|
|
@ -5,7 +5,9 @@ use {
|
||||||
broadcast_utils::{self, ReceiveResults},
|
broadcast_utils::{self, ReceiveResults},
|
||||||
*,
|
*,
|
||||||
},
|
},
|
||||||
crate::{broadcast_stage::broadcast_utils::UnfinishedSlotInfo, cluster_nodes::ClusterNodes},
|
crate::{
|
||||||
|
broadcast_stage::broadcast_utils::UnfinishedSlotInfo, cluster_nodes::ClusterNodesCache,
|
||||||
|
},
|
||||||
solana_entry::entry::Entry,
|
solana_entry::entry::Entry,
|
||||||
solana_ledger::shred::{
|
solana_ledger::shred::{
|
||||||
ProcessShredsStats, Shred, Shredder, MAX_DATA_SHREDS_PER_FEC_BLOCK,
|
ProcessShredsStats, Shred, Shredder, MAX_DATA_SHREDS_PER_FEC_BLOCK,
|
||||||
|
@ -29,12 +31,16 @@ pub struct StandardBroadcastRun {
|
||||||
shred_version: u16,
|
shred_version: u16,
|
||||||
last_datapoint_submit: Arc<AtomicInterval>,
|
last_datapoint_submit: Arc<AtomicInterval>,
|
||||||
num_batches: usize,
|
num_batches: usize,
|
||||||
cluster_nodes: Arc<RwLock<ClusterNodes<BroadcastStage>>>,
|
cluster_nodes_cache: Arc<ClusterNodesCache<BroadcastStage>>,
|
||||||
last_peer_update: Arc<AtomicInterval>,
|
last_peer_update: Arc<AtomicInterval>,
|
||||||
}
|
}
|
||||||
|
|
||||||
impl StandardBroadcastRun {
|
impl StandardBroadcastRun {
|
||||||
pub(super) fn new(shred_version: u16) -> Self {
|
pub(super) fn new(shred_version: u16) -> Self {
|
||||||
|
let cluster_nodes_cache = Arc::new(ClusterNodesCache::<BroadcastStage>::new(
|
||||||
|
CLUSTER_NODES_CACHE_NUM_EPOCH_CAP,
|
||||||
|
CLUSTER_NODES_CACHE_TTL,
|
||||||
|
));
|
||||||
Self {
|
Self {
|
||||||
process_shreds_stats: ProcessShredsStats::default(),
|
process_shreds_stats: ProcessShredsStats::default(),
|
||||||
transmit_shreds_stats: Arc::default(),
|
transmit_shreds_stats: Arc::default(),
|
||||||
|
@ -45,7 +51,7 @@ impl StandardBroadcastRun {
|
||||||
shred_version,
|
shred_version,
|
||||||
last_datapoint_submit: Arc::default(),
|
last_datapoint_submit: Arc::default(),
|
||||||
num_batches: 0,
|
num_batches: 0,
|
||||||
cluster_nodes: Arc::default(),
|
cluster_nodes_cache,
|
||||||
last_peer_update: Arc::new(AtomicInterval::default()),
|
last_peer_update: Arc::new(AtomicInterval::default()),
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -342,12 +348,8 @@ impl StandardBroadcastRun {
|
||||||
// Get the list of peers to broadcast to
|
// Get the list of peers to broadcast to
|
||||||
let mut get_peers_time = Measure::start("broadcast::get_peers");
|
let mut get_peers_time = Measure::start("broadcast::get_peers");
|
||||||
let root_bank = bank_forks.read().unwrap().root_bank();
|
let root_bank = bank_forks.read().unwrap().root_bank();
|
||||||
let epoch = root_bank.get_leader_schedule_epoch(slot);
|
let cluster_nodes = self.cluster_nodes_cache.get(slot, &root_bank, cluster_info);
|
||||||
let stakes = root_bank.epoch_staked_nodes(epoch);
|
|
||||||
*self.cluster_nodes.write().unwrap() =
|
|
||||||
ClusterNodes::<BroadcastStage>::new(cluster_info, &stakes.unwrap_or_default());
|
|
||||||
get_peers_time.stop();
|
get_peers_time.stop();
|
||||||
let cluster_nodes = self.cluster_nodes.read().unwrap();
|
|
||||||
|
|
||||||
let mut transmit_stats = TransmitShredsStats::default();
|
let mut transmit_stats = TransmitShredsStats::default();
|
||||||
// Broadcast the shreds
|
// Broadcast the shreds
|
||||||
|
@ -363,7 +365,6 @@ impl StandardBroadcastRun {
|
||||||
bank_forks,
|
bank_forks,
|
||||||
cluster_info.socket_addr_space(),
|
cluster_info.socket_addr_space(),
|
||||||
)?;
|
)?;
|
||||||
drop(cluster_nodes);
|
|
||||||
transmit_time.stop();
|
transmit_time.stop();
|
||||||
|
|
||||||
transmit_stats.transmit_elapsed = transmit_time.as_us();
|
transmit_stats.transmit_elapsed = transmit_time.as_us();
|
||||||
|
|
Loading…
Reference in New Issue