filters for recent contact-infos when checking for live stake (#19204)
Contact-infos are saved to disk: https://github.com/solana-labs/solana/blob/9dfeee299/gossip/src/cluster_info.rs#L1678-L1683 and restored on validator start-up: https://github.com/solana-labs/solana/blob/9dfeee299/core/src/validator.rs#L450 Staked nodes entries will not expire until an epoch after. So when the validator checks for online stake it is erroneously picking up contact-infos restored from disk, which breaks the entire wait-for-supermajority logic: https://github.com/solana-labs/solana/blob/9dfeee299/core/src/validator.rs#L1515-L1561 This commit adds an extra check for the age of contact-info entries and filters out old ones.
This commit is contained in:
parent
0e0022d9ff
commit
7a789e0763
|
@ -1,6 +1,7 @@
|
|||
//! The `validator` module hosts all the validator microservices.
|
||||
|
||||
use crate::{
|
||||
use {
|
||||
crate::{
|
||||
broadcast_stage::BroadcastStageType,
|
||||
cache_block_meta_service::{CacheBlockMetaSender, CacheBlockMetaService},
|
||||
cluster_info_vote_listener::VoteTracker,
|
||||
|
@ -16,33 +17,34 @@ use crate::{
|
|||
tower_storage::TowerStorage,
|
||||
tpu::{Tpu, DEFAULT_TPU_COALESCE_MS},
|
||||
tvu::{Sockets, Tvu, TvuConfig},
|
||||
};
|
||||
use crossbeam_channel::{bounded, unbounded};
|
||||
use rand::{thread_rng, Rng};
|
||||
use solana_entry::poh::compute_hash_time_ns;
|
||||
use solana_gossip::{
|
||||
},
|
||||
crossbeam_channel::{bounded, unbounded},
|
||||
rand::{thread_rng, Rng},
|
||||
solana_entry::poh::compute_hash_time_ns,
|
||||
solana_gossip::{
|
||||
cluster_info::{
|
||||
ClusterInfo, Node, DEFAULT_CONTACT_DEBUG_INTERVAL_MILLIS,
|
||||
DEFAULT_CONTACT_SAVE_INTERVAL_MILLIS,
|
||||
},
|
||||
contact_info::ContactInfo,
|
||||
crds_gossip_pull::CRDS_GOSSIP_PULL_CRDS_TIMEOUT_MS,
|
||||
gossip_service::GossipService,
|
||||
};
|
||||
use solana_ledger::{
|
||||
},
|
||||
solana_ledger::{
|
||||
bank_forks_utils,
|
||||
blockstore::{Blockstore, BlockstoreSignals, CompletedSlotsReceiver, PurgeType},
|
||||
blockstore_db::BlockstoreRecoveryMode,
|
||||
blockstore_processor::{self, TransactionStatusSender},
|
||||
leader_schedule::FixedSchedule,
|
||||
leader_schedule_cache::LeaderScheduleCache,
|
||||
};
|
||||
use solana_measure::measure::Measure;
|
||||
use solana_metrics::datapoint_info;
|
||||
use solana_poh::{
|
||||
},
|
||||
solana_measure::measure::Measure,
|
||||
solana_metrics::datapoint_info,
|
||||
solana_poh::{
|
||||
poh_recorder::{PohRecorder, GRACE_TICKS_FACTOR, MAX_GRACE_SLOTS},
|
||||
poh_service::{self, PohService},
|
||||
};
|
||||
use solana_rpc::{
|
||||
},
|
||||
solana_rpc::{
|
||||
max_slots::MaxSlots,
|
||||
optimistically_confirmed_bank_tracker::{
|
||||
OptimisticallyConfirmedBank, OptimisticallyConfirmedBankTracker,
|
||||
|
@ -53,8 +55,8 @@ use solana_rpc::{
|
|||
rpc_service::JsonRpcService,
|
||||
rpc_subscriptions::RpcSubscriptions,
|
||||
transaction_status_service::TransactionStatusService,
|
||||
};
|
||||
use solana_runtime::{
|
||||
},
|
||||
solana_runtime::{
|
||||
accounts_db::AccountShrinkThreshold,
|
||||
accounts_index::AccountSecondaryIndexes,
|
||||
bank::Bank,
|
||||
|
@ -64,8 +66,8 @@ use solana_runtime::{
|
|||
snapshot_archive_info::SnapshotArchiveInfoGetter,
|
||||
snapshot_config::SnapshotConfig,
|
||||
snapshot_utils,
|
||||
};
|
||||
use solana_sdk::{
|
||||
},
|
||||
solana_sdk::{
|
||||
clock::Slot,
|
||||
epoch_schedule::MAX_LEADER_SCHEDULE_EPOCH_OFFSET,
|
||||
exit::Exit,
|
||||
|
@ -75,19 +77,22 @@ use solana_sdk::{
|
|||
shred_version::compute_shred_version,
|
||||
signature::{Keypair, Signer},
|
||||
timing::timestamp,
|
||||
};
|
||||
use solana_streamer::socket::SocketAddrSpace;
|
||||
use solana_vote_program::vote_state::VoteState;
|
||||
use std::{
|
||||
collections::HashSet,
|
||||
},
|
||||
solana_streamer::socket::SocketAddrSpace,
|
||||
solana_vote_program::vote_state::VoteState,
|
||||
std::{
|
||||
collections::{HashMap, HashSet},
|
||||
net::SocketAddr,
|
||||
ops::Deref,
|
||||
path::{Path, PathBuf},
|
||||
sync::atomic::{AtomicBool, AtomicU64, Ordering},
|
||||
sync::mpsc::Receiver,
|
||||
sync::{Arc, Mutex, RwLock},
|
||||
sync::{
|
||||
atomic::{AtomicBool, AtomicU64, Ordering},
|
||||
mpsc::Receiver,
|
||||
Arc, Mutex, RwLock,
|
||||
},
|
||||
thread::{sleep, Builder, JoinHandle},
|
||||
time::{Duration, Instant},
|
||||
},
|
||||
};
|
||||
|
||||
const MAX_COMPLETED_DATA_SETS_IN_CHANNEL: usize = 100_000;
|
||||
|
@ -1521,7 +1526,20 @@ fn get_stake_percent_in_gossip(bank: &Bank, cluster_info: &ClusterInfo, log: boo
|
|||
let mut offline_nodes = vec![];
|
||||
|
||||
let mut total_activated_stake = 0;
|
||||
let all_tvu_peers = cluster_info.all_tvu_peers();
|
||||
let now = timestamp();
|
||||
// Nodes contact infos are saved to disk and restored on validator startup.
|
||||
// Staked nodes entries will not expire until an epoch after. So it
|
||||
// is necessary here to filter for recent entries to establish liveness.
|
||||
let peers: HashMap<_, _> = cluster_info
|
||||
.all_tvu_peers()
|
||||
.into_iter()
|
||||
.filter(|node| {
|
||||
let age = now.saturating_sub(node.wallclock);
|
||||
// Contact infos are refreshed twice during this period.
|
||||
age < CRDS_GOSSIP_PULL_CRDS_TIMEOUT_MS
|
||||
})
|
||||
.map(|node| (node.id, node))
|
||||
.collect();
|
||||
let my_shred_version = cluster_info.my_shred_version();
|
||||
let my_id = cluster_info.id();
|
||||
|
||||
|
@ -1537,10 +1555,7 @@ fn get_stake_percent_in_gossip(bank: &Bank, cluster_info: &ClusterInfo, log: boo
|
|||
.map(|vote_state| vote_state.node_pubkey)
|
||||
.unwrap_or_default();
|
||||
|
||||
if let Some(peer) = all_tvu_peers
|
||||
.iter()
|
||||
.find(|peer| peer.id == vote_state_node_pubkey)
|
||||
{
|
||||
if let Some(peer) = peers.get(&vote_state_node_pubkey) {
|
||||
if peer.shred_version == my_shred_version {
|
||||
trace!(
|
||||
"observed {} in gossip, (activated_stake={})",
|
||||
|
|
|
@ -265,7 +265,7 @@ fn spy(
|
|||
.into_iter()
|
||||
.map(|x| x.0)
|
||||
.collect::<Vec<_>>();
|
||||
tvu_peers = spy_ref.all_tvu_peers().into_iter().collect::<Vec<_>>();
|
||||
tvu_peers = spy_ref.all_tvu_peers();
|
||||
|
||||
let found_node_by_pubkey = if let Some(pubkey) = find_node_by_pubkey {
|
||||
all_peers.iter().any(|x| x.id == pubkey)
|
||||
|
|
Loading…
Reference in New Issue