parent
e1643c91c4
commit
59413b3124
|
@ -11,7 +11,7 @@ use std::{
|
||||||
pub const VOTE_THRESHOLD_DEPTH: usize = 8;
|
pub const VOTE_THRESHOLD_DEPTH: usize = 8;
|
||||||
pub const VOTE_THRESHOLD_SIZE: f64 = 2f64 / 3f64;
|
pub const VOTE_THRESHOLD_SIZE: f64 = 2f64 / 3f64;
|
||||||
|
|
||||||
#[derive(Default, Debug)]
|
#[derive(Default, Debug, Clone)]
|
||||||
pub struct StakeLockout {
|
pub struct StakeLockout {
|
||||||
lockout: u64,
|
lockout: u64,
|
||||||
stake: u64,
|
stake: u64,
|
||||||
|
@ -258,7 +258,7 @@ impl Tower {
|
||||||
}
|
}
|
||||||
|
|
||||||
// a slot is not recent if it's older than the newest vote we have
|
// a slot is not recent if it's older than the newest vote we have
|
||||||
fn is_recent(&self, slot: u64) -> bool {
|
pub fn is_recent(&self, slot: u64) -> bool {
|
||||||
if let Some(last_vote) = self.lockouts.votes.back() {
|
if let Some(last_vote) = self.lockouts.votes.back() {
|
||||||
if slot <= last_vote.slot {
|
if slot <= last_vote.slot {
|
||||||
return false;
|
return false;
|
||||||
|
@ -316,7 +316,15 @@ impl Tower {
|
||||||
let vote = lockouts.nth_recent_vote(self.threshold_depth);
|
let vote = lockouts.nth_recent_vote(self.threshold_depth);
|
||||||
if let Some(vote) = vote {
|
if let Some(vote) = vote {
|
||||||
if let Some(fork_stake) = stake_lockouts.get(&vote.slot) {
|
if let Some(fork_stake) = stake_lockouts.get(&vote.slot) {
|
||||||
(fork_stake.stake as f64 / total_staked as f64) > self.threshold_size
|
let lockout = fork_stake.stake as f64 / total_staked as f64;
|
||||||
|
trace!(
|
||||||
|
"fork_stake {} {} {} {}",
|
||||||
|
slot,
|
||||||
|
lockout,
|
||||||
|
fork_stake.stake,
|
||||||
|
total_staked
|
||||||
|
);
|
||||||
|
lockout > self.threshold_size
|
||||||
} else {
|
} else {
|
||||||
false
|
false
|
||||||
}
|
}
|
||||||
|
|
|
@ -33,6 +33,7 @@ pub mod gossip_service;
|
||||||
pub mod ledger_cleanup_service;
|
pub mod ledger_cleanup_service;
|
||||||
pub mod local_vote_signer_service;
|
pub mod local_vote_signer_service;
|
||||||
pub mod packet;
|
pub mod packet;
|
||||||
|
pub mod partition_cfg;
|
||||||
pub mod poh_recorder;
|
pub mod poh_recorder;
|
||||||
pub mod poh_service;
|
pub mod poh_service;
|
||||||
pub mod recvmmsg;
|
pub mod recvmmsg;
|
||||||
|
|
|
@ -0,0 +1,92 @@
|
||||||
|
use solana_ledger::leader_schedule_cache::LeaderScheduleCache;
|
||||||
|
use solana_ledger::shred::Shred;
|
||||||
|
use solana_runtime::bank::Bank;
|
||||||
|
use solana_sdk::pubkey::Pubkey;
|
||||||
|
use solana_sdk::timing::timestamp;
|
||||||
|
use std::collections::HashSet;
|
||||||
|
use std::sync::Arc;
|
||||||
|
use std::sync::RwLock;
|
||||||
|
|
||||||
|
///Configure a partition in the retransmit stage
|
||||||
|
#[derive(Debug, Clone)]
|
||||||
|
pub struct Partition {
|
||||||
|
pub num_partitions: usize,
|
||||||
|
pub my_partition: usize,
|
||||||
|
pub start_ts: u64,
|
||||||
|
pub end_ts: u64,
|
||||||
|
leaders: Arc<RwLock<Vec<Pubkey>>>,
|
||||||
|
}
|
||||||
|
impl Default for Partition {
|
||||||
|
fn default() -> Self {
|
||||||
|
Self {
|
||||||
|
num_partitions: 0,
|
||||||
|
my_partition: 0,
|
||||||
|
start_ts: 0,
|
||||||
|
end_ts: 0,
|
||||||
|
leaders: Arc::new(RwLock::new(vec![])),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#[derive(Default, Debug, Clone)]
|
||||||
|
pub struct PartitionCfg {
|
||||||
|
partitions: Vec<Partition>,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl PartitionCfg {
|
||||||
|
pub fn new(partitions: Vec<Partition>) -> Self {
|
||||||
|
Self { partitions }
|
||||||
|
}
|
||||||
|
pub fn is_connected(
|
||||||
|
&self,
|
||||||
|
bank: &Option<Arc<Bank>>,
|
||||||
|
leader_schedule_cache: &Arc<LeaderScheduleCache>,
|
||||||
|
shred: &Shred,
|
||||||
|
) -> bool {
|
||||||
|
if bank.is_none() {
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
let bank = bank.as_ref().unwrap().clone();
|
||||||
|
let slot_leader_pubkey = leader_schedule_cache.slot_leader_at(shred.slot(), Some(&bank));
|
||||||
|
let slot_leader_pubkey = slot_leader_pubkey.unwrap_or_default();
|
||||||
|
let time = timestamp();
|
||||||
|
for p in &self.partitions {
|
||||||
|
let is_time = (p.start_ts <= time) && (time < p.end_ts);
|
||||||
|
if !is_time {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
trace!("PARTITION_TEST partition time! {}", p.my_partition);
|
||||||
|
if p.num_partitions == 0 {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
if p.leaders.read().unwrap().is_empty() {
|
||||||
|
let mut leader_vec = p.leaders.write().unwrap();
|
||||||
|
let mut leaders: Vec<Pubkey> = bank.vote_accounts().keys().cloned().collect();
|
||||||
|
leaders.sort();
|
||||||
|
*leader_vec = leaders;
|
||||||
|
warn!("PARTITION_TEST partition enabled {}", p.my_partition);
|
||||||
|
}
|
||||||
|
let is_connected: bool = {
|
||||||
|
let leaders = p.leaders.read().unwrap();
|
||||||
|
let start = p.my_partition * leaders.len() / p.num_partitions;
|
||||||
|
let partition_size = leaders.len() / p.num_partitions;
|
||||||
|
let end = start + partition_size;
|
||||||
|
let end = if leaders.len() - end < partition_size {
|
||||||
|
leaders.len()
|
||||||
|
} else {
|
||||||
|
end
|
||||||
|
};
|
||||||
|
let my_leaders: HashSet<_> = leaders[start..end].iter().collect();
|
||||||
|
my_leaders.contains(&slot_leader_pubkey)
|
||||||
|
};
|
||||||
|
if is_connected {
|
||||||
|
trace!("PARTITION_TEST connected {}", p.my_partition);
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
trace!("PARTITION_TEST not connected {}", p.my_partition);
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
trace!("PARTITION_TEST connected");
|
||||||
|
true
|
||||||
|
}
|
||||||
|
}
|
|
@ -42,6 +42,8 @@ use std::{
|
||||||
|
|
||||||
pub const MAX_ENTRY_RECV_PER_ITER: usize = 512;
|
pub const MAX_ENTRY_RECV_PER_ITER: usize = 512;
|
||||||
|
|
||||||
|
type VoteAndPoHBank = (Option<(Arc<Bank>, u64)>, Option<Arc<Bank>>);
|
||||||
|
|
||||||
// Implement a destructor for the ReplayStage thread to signal it exited
|
// Implement a destructor for the ReplayStage thread to signal it exited
|
||||||
// even on panics
|
// even on panics
|
||||||
struct Finalizer {
|
struct Finalizer {
|
||||||
|
@ -76,6 +78,20 @@ struct ReplaySlotStats {
|
||||||
replay_start: Instant,
|
replay_start: Instant,
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[derive(Debug, Clone, Default)]
|
||||||
|
struct ForkStats {
|
||||||
|
weight: u128,
|
||||||
|
total_staked: u64,
|
||||||
|
slot: Slot,
|
||||||
|
block_height: u64,
|
||||||
|
has_voted: bool,
|
||||||
|
is_recent: bool,
|
||||||
|
vote_threshold: bool,
|
||||||
|
is_locked_out: bool,
|
||||||
|
stake_lockouts: HashMap<u64, StakeLockout>,
|
||||||
|
computed: bool,
|
||||||
|
}
|
||||||
|
|
||||||
impl ReplaySlotStats {
|
impl ReplaySlotStats {
|
||||||
pub fn new(slot: Slot) -> Self {
|
pub fn new(slot: Slot) -> Self {
|
||||||
Self {
|
Self {
|
||||||
|
@ -123,6 +139,7 @@ struct ForkProgress {
|
||||||
started_ms: u64,
|
started_ms: u64,
|
||||||
is_dead: bool,
|
is_dead: bool,
|
||||||
stats: ReplaySlotStats,
|
stats: ReplaySlotStats,
|
||||||
|
fork_stats: ForkStats,
|
||||||
}
|
}
|
||||||
|
|
||||||
impl ForkProgress {
|
impl ForkProgress {
|
||||||
|
@ -135,6 +152,7 @@ impl ForkProgress {
|
||||||
started_ms: timing::timestamp(),
|
started_ms: timing::timestamp(),
|
||||||
is_dead: false,
|
is_dead: false,
|
||||||
stats: ReplaySlotStats::new(slot),
|
stats: ReplaySlotStats::new(slot),
|
||||||
|
fork_stats: ForkStats::default(),
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -186,7 +204,8 @@ impl ReplayStage {
|
||||||
let _exit = Finalizer::new(exit_.clone());
|
let _exit = Finalizer::new(exit_.clone());
|
||||||
let mut progress = HashMap::new();
|
let mut progress = HashMap::new();
|
||||||
let mut current_leader = None;
|
let mut current_leader = None;
|
||||||
|
let mut last_reset = Hash::default();
|
||||||
|
let mut partition = false;
|
||||||
loop {
|
loop {
|
||||||
let now = Instant::now();
|
let now = Instant::now();
|
||||||
// Stop getting entries if we get exit signal
|
// Stop getting entries if we get exit signal
|
||||||
|
@ -211,16 +230,15 @@ impl ReplayStage {
|
||||||
);
|
);
|
||||||
|
|
||||||
let ancestors = Arc::new(bank_forks.read().unwrap().ancestors());
|
let ancestors = Arc::new(bank_forks.read().unwrap().ancestors());
|
||||||
let votable = Self::generate_votable_banks(
|
loop {
|
||||||
&ancestors,
|
let (vote_bank, heaviest) =
|
||||||
&bank_forks,
|
Self::select_fork(&ancestors, &bank_forks, &tower, &mut progress);
|
||||||
&tower,
|
let done = vote_bank.is_none();
|
||||||
&mut progress,
|
let mut vote_bank_slot = 0;
|
||||||
);
|
let reset_bank = vote_bank.as_ref().map(|b| b.0.clone()).or(heaviest);
|
||||||
|
if let Some((bank, total_staked)) = vote_bank {
|
||||||
if let Some((_, bank, _, total_staked)) = votable.into_iter().last() {
|
info!("voting: {}", bank.slot());
|
||||||
subscriptions.notify_subscribers(bank.slot(), &bank_forks);
|
subscriptions.notify_subscribers(bank.slot(), &bank_forks);
|
||||||
|
|
||||||
if let Some(votable_leader) =
|
if let Some(votable_leader) =
|
||||||
leader_schedule_cache.slot_leader_at(bank.slot(), Some(&bank))
|
leader_schedule_cache.slot_leader_at(bank.slot(), Some(&bank))
|
||||||
{
|
{
|
||||||
|
@ -231,7 +249,7 @@ impl ReplayStage {
|
||||||
&votable_leader,
|
&votable_leader,
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
|
vote_bank_slot = bank.slot();
|
||||||
Self::handle_votable_bank(
|
Self::handle_votable_bank(
|
||||||
&bank,
|
&bank,
|
||||||
&bank_forks,
|
&bank_forks,
|
||||||
|
@ -247,7 +265,9 @@ impl ReplayStage {
|
||||||
&lockouts_sender,
|
&lockouts_sender,
|
||||||
&snapshot_package_sender,
|
&snapshot_package_sender,
|
||||||
)?;
|
)?;
|
||||||
|
}
|
||||||
|
if let Some(bank) = reset_bank {
|
||||||
|
if last_reset != bank.last_blockhash() {
|
||||||
Self::reset_poh_recorder(
|
Self::reset_poh_recorder(
|
||||||
&my_pubkey,
|
&my_pubkey,
|
||||||
&blocktree,
|
&blocktree,
|
||||||
|
@ -255,7 +275,23 @@ impl ReplayStage {
|
||||||
&poh_recorder,
|
&poh_recorder,
|
||||||
&leader_schedule_cache,
|
&leader_schedule_cache,
|
||||||
);
|
);
|
||||||
|
last_reset = bank.last_blockhash();
|
||||||
tpu_has_bank = false;
|
tpu_has_bank = false;
|
||||||
|
info!("vote bank: {} reset bank: {}", vote_bank_slot, bank.slot());
|
||||||
|
if !partition && vote_bank_slot != bank.slot() {
|
||||||
|
warn!("PARTITION DETECTED waiting to join fork: {} last vote: {:?}", bank.slot(), tower.last_vote());
|
||||||
|
inc_new_counter_info!("replay_stage-partition_detected", 1);
|
||||||
|
partition = true;
|
||||||
|
} else if partition && vote_bank_slot == bank.slot() {
|
||||||
|
warn!("PARTITION resolved fork: {} last vote: {:?}", bank.slot(), tower.last_vote());
|
||||||
|
partition = false;
|
||||||
|
inc_new_counter_info!("replay_stage-partition_resolved", 1);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if done {
|
||||||
|
break;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if !tpu_has_bank {
|
if !tpu_has_bank {
|
||||||
|
@ -277,7 +313,7 @@ impl ReplayStage {
|
||||||
}
|
}
|
||||||
|
|
||||||
inc_new_counter_info!(
|
inc_new_counter_info!(
|
||||||
"replicate_stage-duration",
|
"replay_stage-duration",
|
||||||
duration_as_ms(&now.elapsed()) as usize
|
duration_as_ms(&now.elapsed()) as usize
|
||||||
);
|
);
|
||||||
if did_complete_bank {
|
if did_complete_bank {
|
||||||
|
@ -582,7 +618,7 @@ impl ReplayStage {
|
||||||
};
|
};
|
||||||
|
|
||||||
info!(
|
info!(
|
||||||
"{} voted and reset PoH to tick {} (within slot {}). {}",
|
"{} reset PoH to tick {} (within slot {}). {}",
|
||||||
my_pubkey,
|
my_pubkey,
|
||||||
bank.tick_height(),
|
bank.tick_height(),
|
||||||
bank.slot(),
|
bank.slot(),
|
||||||
|
@ -643,72 +679,145 @@ impl ReplayStage {
|
||||||
did_complete_bank
|
did_complete_bank
|
||||||
}
|
}
|
||||||
|
|
||||||
#[allow(clippy::type_complexity)]
|
fn select_fork(
|
||||||
fn generate_votable_banks(
|
|
||||||
ancestors: &HashMap<u64, HashSet<u64>>,
|
ancestors: &HashMap<u64, HashSet<u64>>,
|
||||||
bank_forks: &Arc<RwLock<BankForks>>,
|
bank_forks: &Arc<RwLock<BankForks>>,
|
||||||
tower: &Tower,
|
tower: &Tower,
|
||||||
progress: &mut HashMap<u64, ForkProgress>,
|
progress: &mut HashMap<u64, ForkProgress>,
|
||||||
) -> Vec<(u128, Arc<Bank>, HashMap<u64, StakeLockout>, u64)> {
|
) -> VoteAndPoHBank {
|
||||||
let tower_start = Instant::now();
|
let tower_start = Instant::now();
|
||||||
let frozen_banks = bank_forks.read().unwrap().frozen_banks();
|
let mut frozen_banks: Vec<_> = bank_forks
|
||||||
trace!("frozen_banks {}", frozen_banks.len());
|
.read()
|
||||||
let mut votable: Vec<(u128, Arc<Bank>, HashMap<u64, StakeLockout>, u64)> = frozen_banks
|
.unwrap()
|
||||||
|
.frozen_banks()
|
||||||
.values()
|
.values()
|
||||||
.filter(|b| {
|
.cloned()
|
||||||
let has_voted = tower.has_voted(b.slot());
|
.collect();
|
||||||
trace!("bank has_voted: {} {}", b.slot(), has_voted);
|
frozen_banks.sort_by_key(|bank| bank.slot());
|
||||||
!has_voted
|
|
||||||
})
|
trace!("frozen_banks {}", frozen_banks.len());
|
||||||
.filter(|b| {
|
let stats: Vec<ForkStats> = frozen_banks
|
||||||
let is_locked_out = tower.is_locked_out(b.slot(), &ancestors);
|
.iter()
|
||||||
trace!("bank is is_locked_out: {} {}", b.slot(), is_locked_out);
|
|
||||||
!is_locked_out
|
|
||||||
})
|
|
||||||
.map(|bank| {
|
.map(|bank| {
|
||||||
(
|
let mut stats = progress
|
||||||
bank,
|
.get(&bank.slot())
|
||||||
tower.collect_vote_lockouts(
|
.map(|s| s.fork_stats.clone())
|
||||||
|
.unwrap_or_default();
|
||||||
|
if !stats.computed {
|
||||||
|
stats.slot = bank.slot();
|
||||||
|
let (stake_lockouts, total_staked) = tower.collect_vote_lockouts(
|
||||||
bank.slot(),
|
bank.slot(),
|
||||||
bank.vote_accounts().into_iter(),
|
bank.vote_accounts().into_iter(),
|
||||||
&ancestors,
|
&ancestors,
|
||||||
),
|
);
|
||||||
)
|
Self::confirm_forks(tower, &stake_lockouts, total_staked, progress, bank_forks);
|
||||||
})
|
stats.total_staked = total_staked;
|
||||||
.filter(|(b, (stake_lockouts, total_staked))| {
|
stats.weight = tower.calculate_weight(&stake_lockouts);
|
||||||
let vote_threshold =
|
stats.stake_lockouts = stake_lockouts;
|
||||||
tower.check_vote_stake_threshold(b.slot(), &stake_lockouts, *total_staked);
|
stats.block_height = bank.block_height();
|
||||||
Self::confirm_forks(tower, &stake_lockouts, *total_staked, progress, bank_forks);
|
stats.computed = true;
|
||||||
debug!("bank vote_threshold: {} {}", b.slot(), vote_threshold);
|
}
|
||||||
vote_threshold
|
stats.vote_threshold = tower.check_vote_stake_threshold(
|
||||||
})
|
bank.slot(),
|
||||||
.map(|(b, (stake_lockouts, total_staked))| {
|
&stats.stake_lockouts,
|
||||||
(
|
stats.total_staked,
|
||||||
tower.calculate_weight(&stake_lockouts),
|
);
|
||||||
b.clone(),
|
stats.is_locked_out = tower.is_locked_out(bank.slot(), &ancestors);
|
||||||
stake_lockouts,
|
stats.has_voted = tower.has_voted(bank.slot());
|
||||||
total_staked,
|
stats.is_recent = tower.is_recent(bank.slot());
|
||||||
)
|
if let Some(fp) = progress.get_mut(&bank.slot()) {
|
||||||
|
fp.fork_stats = stats.clone();
|
||||||
|
}
|
||||||
|
stats
|
||||||
})
|
})
|
||||||
.collect();
|
.collect();
|
||||||
|
let mut votable: Vec<_> = frozen_banks
|
||||||
|
.iter()
|
||||||
|
.zip(stats.iter())
|
||||||
|
.filter(|(_, stats)| stats.is_recent && !stats.has_voted && stats.vote_threshold)
|
||||||
|
.collect();
|
||||||
|
|
||||||
votable.sort_by_key(|b| b.0);
|
//highest weight, lowest slot first
|
||||||
let ms = timing::duration_as_ms(&tower_start.elapsed());
|
votable.sort_by_key(|b| (b.1.weight, 0i64 - b.1.slot as i64));
|
||||||
|
|
||||||
|
votable.iter().for_each(|(_, stats)| {
|
||||||
|
let mut parents: Vec<_> = if let Some(set) = ancestors.get(&stats.slot) {
|
||||||
|
set.iter().collect()
|
||||||
|
} else {
|
||||||
|
vec![]
|
||||||
|
};
|
||||||
|
parents.sort();
|
||||||
|
debug!("{}: {:?} {:?}", stats.slot, stats, parents,);
|
||||||
|
});
|
||||||
trace!("votable_banks {}", votable.len());
|
trace!("votable_banks {}", votable.len());
|
||||||
if !votable.is_empty() {
|
let rv = Self::pick_best_fork(ancestors, &votable);
|
||||||
let weights: Vec<u128> = votable.iter().map(|x| x.0).collect();
|
let ms = timing::duration_as_ms(&tower_start.elapsed());
|
||||||
info!(
|
let weights: Vec<(u128, u64, u64)> = votable
|
||||||
"@{:?} tower duration: {:?} len: {} weights: {:?}",
|
.iter()
|
||||||
|
.map(|x| (x.1.weight, x.1.slot, x.1.block_height))
|
||||||
|
.collect();
|
||||||
|
debug!(
|
||||||
|
"@{:?} tower duration: {:?} len: {}/{} weights: {:?} voting: {}",
|
||||||
timing::timestamp(),
|
timing::timestamp(),
|
||||||
ms,
|
ms,
|
||||||
votable.len(),
|
votable.len(),
|
||||||
weights
|
stats.iter().filter(|s| !s.has_voted).count(),
|
||||||
|
weights,
|
||||||
|
rv.0.is_some()
|
||||||
);
|
);
|
||||||
}
|
|
||||||
inc_new_counter_info!("replay_stage-tower_duration", ms as usize);
|
inc_new_counter_info!("replay_stage-tower_duration", ms as usize);
|
||||||
|
rv
|
||||||
|
}
|
||||||
|
|
||||||
votable
|
fn pick_best_fork(
|
||||||
|
ancestors: &HashMap<u64, HashSet<u64>>,
|
||||||
|
best_banks: &[(&Arc<Bank>, &ForkStats)],
|
||||||
|
) -> VoteAndPoHBank {
|
||||||
|
if best_banks.is_empty() {
|
||||||
|
return (None, None);
|
||||||
|
}
|
||||||
|
let mut rv = None;
|
||||||
|
let (best_bank, best_stats) = best_banks.last().unwrap();
|
||||||
|
debug!("best bank: {:?}", best_stats);
|
||||||
|
let mut by_slot: Vec<_> = best_banks.iter().collect();
|
||||||
|
by_slot.sort_by_key(|x| x.1.slot);
|
||||||
|
//look for the oldest ancestors of the best bank
|
||||||
|
if let Some(best_ancestors) = ancestors.get(&best_stats.slot) {
|
||||||
|
for (parent, parent_stats) in by_slot.iter() {
|
||||||
|
if parent_stats.is_locked_out {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
if !best_ancestors.contains(&parent_stats.slot) {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
debug!("best bank found ancestor: {}", parent_stats.slot);
|
||||||
|
inc_new_counter_info!("replay_stage-pick_best_fork-ancestor", 1);
|
||||||
|
rv = Some(((*parent).clone(), parent_stats.total_staked));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
//look for the oldest child of the best bank
|
||||||
|
if rv.is_none() {
|
||||||
|
for (child, child_stats) in by_slot.iter().rev() {
|
||||||
|
if child_stats.is_locked_out {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
let has_best = best_stats.slot == child_stats.slot
|
||||||
|
|| ancestors
|
||||||
|
.get(&child.slot())
|
||||||
|
.map(|set| set.contains(&best_stats.slot))
|
||||||
|
.unwrap_or(false);
|
||||||
|
if !has_best {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
inc_new_counter_info!("replay_stage-pick_best_fork-child", 1);
|
||||||
|
debug!("best bank found child: {}", child_stats.slot);
|
||||||
|
rv = Some(((*child).clone(), child_stats.total_staked));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if rv.is_none() {
|
||||||
|
inc_new_counter_info!("replay_stage-fork_selection-heavy_bank_lockout", 1);
|
||||||
|
}
|
||||||
|
(rv, Some((*best_bank).clone()))
|
||||||
}
|
}
|
||||||
|
|
||||||
fn confirm_forks(
|
fn confirm_forks(
|
||||||
|
|
|
@ -3,6 +3,7 @@
|
||||||
use crate::{
|
use crate::{
|
||||||
cluster_info::{compute_retransmit_peers, ClusterInfo, DATA_PLANE_FANOUT},
|
cluster_info::{compute_retransmit_peers, ClusterInfo, DATA_PLANE_FANOUT},
|
||||||
packet::Packets,
|
packet::Packets,
|
||||||
|
partition_cfg::PartitionCfg,
|
||||||
repair_service::RepairStrategy,
|
repair_service::RepairStrategy,
|
||||||
result::{Error, Result},
|
result::{Error, Result},
|
||||||
streamer::PacketReceiver,
|
streamer::PacketReceiver,
|
||||||
|
@ -211,6 +212,7 @@ impl RetransmitStage {
|
||||||
exit: &Arc<AtomicBool>,
|
exit: &Arc<AtomicBool>,
|
||||||
completed_slots_receiver: CompletedSlotsReceiver,
|
completed_slots_receiver: CompletedSlotsReceiver,
|
||||||
epoch_schedule: EpochSchedule,
|
epoch_schedule: EpochSchedule,
|
||||||
|
cfg: Option<PartitionCfg>,
|
||||||
) -> Self {
|
) -> Self {
|
||||||
let (retransmit_sender, retransmit_receiver) = channel();
|
let (retransmit_sender, retransmit_receiver) = channel();
|
||||||
|
|
||||||
|
@ -239,13 +241,18 @@ impl RetransmitStage {
|
||||||
repair_strategy,
|
repair_strategy,
|
||||||
&leader_schedule_cache.clone(),
|
&leader_schedule_cache.clone(),
|
||||||
move |id, shred, working_bank, last_root| {
|
move |id, shred, working_bank, last_root| {
|
||||||
should_retransmit_and_persist(
|
let is_connected = cfg
|
||||||
|
.as_ref()
|
||||||
|
.map(|x| x.is_connected(&working_bank, &leader_schedule_cache, shred))
|
||||||
|
.unwrap_or(true);
|
||||||
|
let rv = should_retransmit_and_persist(
|
||||||
shred,
|
shred,
|
||||||
working_bank,
|
working_bank,
|
||||||
&leader_schedule_cache,
|
&leader_schedule_cache,
|
||||||
id,
|
id,
|
||||||
last_root,
|
last_root,
|
||||||
)
|
);
|
||||||
|
rv && is_connected
|
||||||
},
|
},
|
||||||
);
|
);
|
||||||
|
|
||||||
|
|
|
@ -5,6 +5,7 @@ use crate::blockstream_service::BlockstreamService;
|
||||||
use crate::cluster_info::ClusterInfo;
|
use crate::cluster_info::ClusterInfo;
|
||||||
use crate::commitment::BlockCommitmentCache;
|
use crate::commitment::BlockCommitmentCache;
|
||||||
use crate::ledger_cleanup_service::LedgerCleanupService;
|
use crate::ledger_cleanup_service::LedgerCleanupService;
|
||||||
|
use crate::partition_cfg::PartitionCfg;
|
||||||
use crate::poh_recorder::PohRecorder;
|
use crate::poh_recorder::PohRecorder;
|
||||||
use crate::replay_stage::ReplayStage;
|
use crate::replay_stage::ReplayStage;
|
||||||
use crate::retransmit_stage::RetransmitStage;
|
use crate::retransmit_stage::RetransmitStage;
|
||||||
|
@ -72,6 +73,7 @@ impl Tvu {
|
||||||
completed_slots_receiver: CompletedSlotsReceiver,
|
completed_slots_receiver: CompletedSlotsReceiver,
|
||||||
block_commitment_cache: Arc<RwLock<BlockCommitmentCache>>,
|
block_commitment_cache: Arc<RwLock<BlockCommitmentCache>>,
|
||||||
sigverify_disabled: bool,
|
sigverify_disabled: bool,
|
||||||
|
cfg: Option<PartitionCfg>,
|
||||||
) -> Self
|
) -> Self
|
||||||
where
|
where
|
||||||
T: 'static + KeypairUtil + Sync + Send,
|
T: 'static + KeypairUtil + Sync + Send,
|
||||||
|
@ -129,6 +131,7 @@ impl Tvu {
|
||||||
&exit,
|
&exit,
|
||||||
completed_slots_receiver,
|
completed_slots_receiver,
|
||||||
*bank_forks.read().unwrap().working_bank().epoch_schedule(),
|
*bank_forks.read().unwrap().working_bank().epoch_schedule(),
|
||||||
|
cfg,
|
||||||
);
|
);
|
||||||
|
|
||||||
let (blockstream_slot_sender, blockstream_slot_receiver) = channel();
|
let (blockstream_slot_sender, blockstream_slot_receiver) = channel();
|
||||||
|
@ -290,6 +293,7 @@ pub mod tests {
|
||||||
completed_slots_receiver,
|
completed_slots_receiver,
|
||||||
block_commitment_cache,
|
block_commitment_cache,
|
||||||
false,
|
false,
|
||||||
|
None,
|
||||||
);
|
);
|
||||||
exit.store(true, Ordering::Relaxed);
|
exit.store(true, Ordering::Relaxed);
|
||||||
tvu.join().unwrap();
|
tvu.join().unwrap();
|
||||||
|
|
|
@ -6,6 +6,7 @@ use crate::{
|
||||||
commitment::BlockCommitmentCache,
|
commitment::BlockCommitmentCache,
|
||||||
contact_info::ContactInfo,
|
contact_info::ContactInfo,
|
||||||
gossip_service::{discover_cluster, GossipService},
|
gossip_service::{discover_cluster, GossipService},
|
||||||
|
partition_cfg::PartitionCfg,
|
||||||
poh_recorder::PohRecorder,
|
poh_recorder::PohRecorder,
|
||||||
poh_service::PohService,
|
poh_service::PohService,
|
||||||
rpc::JsonRpcConfig,
|
rpc::JsonRpcConfig,
|
||||||
|
@ -59,6 +60,7 @@ pub struct ValidatorConfig {
|
||||||
pub snapshot_config: Option<SnapshotConfig>,
|
pub snapshot_config: Option<SnapshotConfig>,
|
||||||
pub max_ledger_slots: Option<u64>,
|
pub max_ledger_slots: Option<u64>,
|
||||||
pub broadcast_stage_type: BroadcastStageType,
|
pub broadcast_stage_type: BroadcastStageType,
|
||||||
|
pub partition_cfg: Option<PartitionCfg>,
|
||||||
}
|
}
|
||||||
|
|
||||||
impl Default for ValidatorConfig {
|
impl Default for ValidatorConfig {
|
||||||
|
@ -75,6 +77,7 @@ impl Default for ValidatorConfig {
|
||||||
rpc_config: JsonRpcConfig::default(),
|
rpc_config: JsonRpcConfig::default(),
|
||||||
snapshot_config: None,
|
snapshot_config: None,
|
||||||
broadcast_stage_type: BroadcastStageType::Standard,
|
broadcast_stage_type: BroadcastStageType::Standard,
|
||||||
|
partition_cfg: None,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -342,6 +345,7 @@ impl Validator {
|
||||||
completed_slots_receiver,
|
completed_slots_receiver,
|
||||||
block_commitment_cache,
|
block_commitment_cache,
|
||||||
config.dev_sigverify_disabled,
|
config.dev_sigverify_disabled,
|
||||||
|
config.partition_cfg.clone(),
|
||||||
);
|
);
|
||||||
|
|
||||||
if config.dev_sigverify_disabled {
|
if config.dev_sigverify_disabled {
|
||||||
|
|
|
@ -192,7 +192,8 @@ pub fn kill_entry_and_spend_and_verify_rest(
|
||||||
let (cluster_nodes, _) = discover_cluster(&entry_point_info.gossip, nodes).unwrap();
|
let (cluster_nodes, _) = discover_cluster(&entry_point_info.gossip, nodes).unwrap();
|
||||||
assert!(cluster_nodes.len() >= nodes);
|
assert!(cluster_nodes.len() >= nodes);
|
||||||
let client = create_client(entry_point_info.client_facing_addr(), VALIDATOR_PORT_RANGE);
|
let client = create_client(entry_point_info.client_facing_addr(), VALIDATOR_PORT_RANGE);
|
||||||
let first_two_epoch_slots = MINIMUM_SLOTS_PER_EPOCH * 3;
|
// sleep long enough to make sure we are in epoch 3
|
||||||
|
let first_two_epoch_slots = MINIMUM_SLOTS_PER_EPOCH * (3 + 1);
|
||||||
|
|
||||||
for ingress_node in &cluster_nodes {
|
for ingress_node in &cluster_nodes {
|
||||||
client
|
client
|
||||||
|
|
|
@ -2,8 +2,11 @@ use log::*;
|
||||||
use serial_test_derive::serial;
|
use serial_test_derive::serial;
|
||||||
use solana_client::thin_client::create_client;
|
use solana_client::thin_client::create_client;
|
||||||
use solana_core::{
|
use solana_core::{
|
||||||
broadcast_stage::BroadcastStageType, consensus::VOTE_THRESHOLD_DEPTH,
|
broadcast_stage::BroadcastStageType,
|
||||||
gossip_service::discover_cluster, validator::ValidatorConfig,
|
consensus::VOTE_THRESHOLD_DEPTH,
|
||||||
|
gossip_service::discover_cluster,
|
||||||
|
partition_cfg::{Partition, PartitionCfg},
|
||||||
|
validator::ValidatorConfig,
|
||||||
};
|
};
|
||||||
use solana_ledger::{bank_forks::SnapshotConfig, blocktree::Blocktree, snapshot_utils};
|
use solana_ledger::{bank_forks::SnapshotConfig, blocktree::Blocktree, snapshot_utils};
|
||||||
use solana_local_cluster::{
|
use solana_local_cluster::{
|
||||||
|
@ -12,6 +15,7 @@ use solana_local_cluster::{
|
||||||
local_cluster::{ClusterConfig, LocalCluster},
|
local_cluster::{ClusterConfig, LocalCluster},
|
||||||
};
|
};
|
||||||
use solana_runtime::accounts_db::AccountsDB;
|
use solana_runtime::accounts_db::AccountsDB;
|
||||||
|
use solana_sdk::timing::timestamp;
|
||||||
use solana_sdk::{
|
use solana_sdk::{
|
||||||
client::SyncClient,
|
client::SyncClient,
|
||||||
clock,
|
clock,
|
||||||
|
@ -183,6 +187,96 @@ fn test_leader_failure_4() {
|
||||||
config.ticks_per_slot * config.poh_config.target_tick_duration.as_millis() as u64,
|
config.ticks_per_slot * config.poh_config.target_tick_duration.as_millis() as u64,
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
fn run_network_partition(partitions: &[usize]) {
|
||||||
|
solana_logger::setup();
|
||||||
|
info!("PARTITION_TEST!");
|
||||||
|
let num_nodes = partitions.iter().sum();
|
||||||
|
let validator_config = ValidatorConfig::default();
|
||||||
|
let mut config = ClusterConfig {
|
||||||
|
cluster_lamports: 10_000,
|
||||||
|
node_stakes: vec![100; num_nodes],
|
||||||
|
validator_configs: vec![validator_config.clone(); num_nodes],
|
||||||
|
..ClusterConfig::default()
|
||||||
|
};
|
||||||
|
let now = timestamp();
|
||||||
|
let partition_start = now + 30_000;
|
||||||
|
let partition_end = partition_start + 10_000;
|
||||||
|
let mut total = 0;
|
||||||
|
for (j, pn) in partitions.iter().enumerate() {
|
||||||
|
info!(
|
||||||
|
"PARTITION_TEST configuring partition {} for nodes {} - {}",
|
||||||
|
j,
|
||||||
|
total,
|
||||||
|
total + *pn
|
||||||
|
);
|
||||||
|
for i in total..(total + *pn) {
|
||||||
|
let mut p1 = Partition::default();
|
||||||
|
p1.num_partitions = partitions.len();
|
||||||
|
p1.my_partition = j;
|
||||||
|
p1.start_ts = partition_start;
|
||||||
|
p1.end_ts = partition_end;
|
||||||
|
config.validator_configs[i].partition_cfg = Some(PartitionCfg::new(vec![p1]));
|
||||||
|
}
|
||||||
|
total += *pn;
|
||||||
|
}
|
||||||
|
info!(
|
||||||
|
"PARTITION_TEST starting cluster with {:?} partitions",
|
||||||
|
partitions
|
||||||
|
);
|
||||||
|
let cluster = LocalCluster::new(&config);
|
||||||
|
let now = timestamp();
|
||||||
|
let timeout = partition_start as i64 - now as i64;
|
||||||
|
info!(
|
||||||
|
"PARTITION_TEST sleeping until partition start timeout {}",
|
||||||
|
timeout
|
||||||
|
);
|
||||||
|
if timeout > 0 {
|
||||||
|
sleep(Duration::from_millis(timeout as u64));
|
||||||
|
}
|
||||||
|
info!("PARTITION_TEST done sleeping until partition start timeout");
|
||||||
|
let now = timestamp();
|
||||||
|
let timeout = partition_end as i64 - now as i64;
|
||||||
|
info!(
|
||||||
|
"PARTITION_TEST sleeping until partition end timeout {}",
|
||||||
|
timeout
|
||||||
|
);
|
||||||
|
if timeout > 0 {
|
||||||
|
sleep(Duration::from_millis(timeout as u64));
|
||||||
|
}
|
||||||
|
info!("PARTITION_TEST done sleeping until partition end timeout");
|
||||||
|
info!("PARTITION_TEST spending on all ndoes");
|
||||||
|
cluster_tests::spend_and_verify_all_nodes(
|
||||||
|
&cluster.entry_point_info,
|
||||||
|
&cluster.funding_keypair,
|
||||||
|
num_nodes,
|
||||||
|
HashSet::new(),
|
||||||
|
);
|
||||||
|
info!("PARTITION_TEST done spending on all ndoes");
|
||||||
|
}
|
||||||
|
|
||||||
|
#[allow(unused_attributes)]
|
||||||
|
#[ignore]
|
||||||
|
#[test]
|
||||||
|
#[serial]
|
||||||
|
fn test_network_partition_1_2() {
|
||||||
|
run_network_partition(&[1, 2])
|
||||||
|
}
|
||||||
|
|
||||||
|
#[allow(unused_attributes)]
|
||||||
|
#[ignore]
|
||||||
|
#[test]
|
||||||
|
#[serial]
|
||||||
|
fn test_network_partition_1_1() {
|
||||||
|
run_network_partition(&[1, 1])
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
#[serial]
|
||||||
|
fn test_network_partition_1_1_1() {
|
||||||
|
run_network_partition(&[1, 1, 1])
|
||||||
|
}
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
#[serial]
|
#[serial]
|
||||||
fn test_two_unbalanced_stakes() {
|
fn test_two_unbalanced_stakes() {
|
||||||
|
|
Loading…
Reference in New Issue