215 lines
8.7 KiB
Rust
215 lines
8.7 KiB
Rust
use {
|
|
crate::optimistically_confirmed_bank_tracker::OptimisticallyConfirmedBank,
|
|
solana_ledger::blockstore::Blockstore,
|
|
solana_sdk::clock::Slot,
|
|
std::sync::{
|
|
atomic::{AtomicBool, Ordering},
|
|
Arc, RwLock,
|
|
},
|
|
};
|
|
|
|
#[derive(PartialEq, Eq, Clone, Copy, Debug)]
|
|
pub enum RpcHealthStatus {
|
|
Ok,
|
|
Behind { num_slots: Slot }, // Validator is behind its known validators
|
|
Unknown,
|
|
}
|
|
|
|
pub struct RpcHealth {
|
|
optimistically_confirmed_bank: Arc<RwLock<OptimisticallyConfirmedBank>>,
|
|
blockstore: Arc<Blockstore>,
|
|
health_check_slot_distance: u64,
|
|
override_health_check: Arc<AtomicBool>,
|
|
startup_verification_complete: Arc<AtomicBool>,
|
|
#[cfg(test)]
|
|
stub_health_status: std::sync::RwLock<Option<RpcHealthStatus>>,
|
|
}
|
|
|
|
impl RpcHealth {
|
|
pub fn new(
|
|
optimistically_confirmed_bank: Arc<RwLock<OptimisticallyConfirmedBank>>,
|
|
blockstore: Arc<Blockstore>,
|
|
health_check_slot_distance: u64,
|
|
override_health_check: Arc<AtomicBool>,
|
|
startup_verification_complete: Arc<AtomicBool>,
|
|
) -> Self {
|
|
Self {
|
|
optimistically_confirmed_bank,
|
|
blockstore,
|
|
health_check_slot_distance,
|
|
override_health_check,
|
|
startup_verification_complete,
|
|
#[cfg(test)]
|
|
stub_health_status: std::sync::RwLock::new(None),
|
|
}
|
|
}
|
|
|
|
pub fn check(&self) -> RpcHealthStatus {
|
|
#[cfg(test)]
|
|
{
|
|
if let Some(stub_health_status) = *self.stub_health_status.read().unwrap() {
|
|
return stub_health_status;
|
|
}
|
|
}
|
|
|
|
if self.override_health_check.load(Ordering::Relaxed) {
|
|
return RpcHealthStatus::Ok;
|
|
}
|
|
if !self.startup_verification_complete.load(Ordering::Acquire) {
|
|
return RpcHealthStatus::Unknown;
|
|
}
|
|
|
|
// A node can observe votes by both replaying blocks and observing gossip.
|
|
//
|
|
// ClusterInfoVoteListener receives votes from both of these sources and then records
|
|
// optimistically confirmed slots in the Blockstore via OptimisticConfirmationVerifier.
|
|
// Thus, it is possible for a node to record an optimistically confirmed slot before the
|
|
// node has replayed and validated the slot for itself.
|
|
//
|
|
// OptimisticallyConfirmedBank holds a bank for the latest optimistically confirmed slot
|
|
// that the node has replayed. It is true that the node will have replayed that slot by
|
|
// virtue of having a bank available. Observing that the cluster has optimistically
|
|
// confirmed a slot through gossip is not enough to reconstruct the bank.
|
|
//
|
|
// So, comparing the latest optimistic slot from the Blockstore vs. the slot from the
|
|
// OptimisticallyConfirmedBank bank allows a node to see where it stands in relation to the
|
|
// tip of the cluster.
|
|
let my_latest_optimistically_confirmed_slot = self
|
|
.optimistically_confirmed_bank
|
|
.read()
|
|
.unwrap()
|
|
.bank
|
|
.slot();
|
|
|
|
let mut optimistic_slot_infos = match self.blockstore.get_latest_optimistic_slots(1) {
|
|
Ok(infos) => infos,
|
|
Err(err) => {
|
|
warn!("health check: blockstore error: {err}");
|
|
return RpcHealthStatus::Unknown;
|
|
}
|
|
};
|
|
let Some((cluster_latest_optimistically_confirmed_slot, _, _)) =
|
|
optimistic_slot_infos.pop()
|
|
else {
|
|
warn!("health check: blockstore does not contain any optimistically confirmed slots");
|
|
return RpcHealthStatus::Unknown;
|
|
};
|
|
|
|
if my_latest_optimistically_confirmed_slot
|
|
>= cluster_latest_optimistically_confirmed_slot
|
|
.saturating_sub(self.health_check_slot_distance)
|
|
{
|
|
RpcHealthStatus::Ok
|
|
} else {
|
|
let num_slots = cluster_latest_optimistically_confirmed_slot
|
|
.saturating_sub(my_latest_optimistically_confirmed_slot);
|
|
warn!(
|
|
"health check: behind by {num_slots} \
|
|
slots: me={my_latest_optimistically_confirmed_slot}, \
|
|
latest cluster={cluster_latest_optimistically_confirmed_slot}",
|
|
);
|
|
RpcHealthStatus::Behind { num_slots }
|
|
}
|
|
}
|
|
|
|
#[cfg(test)]
|
|
pub(crate) fn stub(
|
|
optimistically_confirmed_bank: Arc<RwLock<OptimisticallyConfirmedBank>>,
|
|
blockstore: Arc<Blockstore>,
|
|
) -> Arc<Self> {
|
|
Arc::new(Self::new(
|
|
optimistically_confirmed_bank,
|
|
blockstore,
|
|
42,
|
|
Arc::new(AtomicBool::new(false)),
|
|
Arc::new(AtomicBool::new(true)),
|
|
))
|
|
}
|
|
|
|
#[cfg(test)]
|
|
pub(crate) fn stub_set_health_status(&self, stub_health_status: Option<RpcHealthStatus>) {
|
|
*self.stub_health_status.write().unwrap() = stub_health_status;
|
|
}
|
|
}
|
|
|
|
#[cfg(test)]
|
|
pub mod tests {
|
|
use {
|
|
super::*,
|
|
solana_ledger::{
|
|
genesis_utils::{create_genesis_config, GenesisConfigInfo},
|
|
get_tmp_ledger_path_auto_delete,
|
|
},
|
|
solana_runtime::{bank::Bank, bank_forks::BankForks},
|
|
solana_sdk::{clock::UnixTimestamp, hash::Hash, pubkey::Pubkey},
|
|
};
|
|
|
|
#[test]
|
|
fn test_get_health() {
|
|
let ledger_path = get_tmp_ledger_path_auto_delete!();
|
|
let blockstore = Arc::new(Blockstore::open(ledger_path.path()).unwrap());
|
|
let GenesisConfigInfo { genesis_config, .. } = create_genesis_config(100);
|
|
let bank = Bank::new_for_tests(&genesis_config);
|
|
let bank_forks = BankForks::new_rw_arc(bank);
|
|
let optimistically_confirmed_bank =
|
|
OptimisticallyConfirmedBank::locked_from_bank_forks_root(&bank_forks);
|
|
let bank0 = bank_forks.read().unwrap().root_bank();
|
|
assert!(bank0.slot() == 0);
|
|
|
|
let health_check_slot_distance = 10;
|
|
let override_health_check = Arc::new(AtomicBool::new(true));
|
|
let startup_verification_complete = Arc::clone(bank0.get_startup_verification_complete());
|
|
let health = RpcHealth::new(
|
|
optimistically_confirmed_bank.clone(),
|
|
blockstore.clone(),
|
|
health_check_slot_distance,
|
|
override_health_check.clone(),
|
|
startup_verification_complete,
|
|
);
|
|
|
|
// Override health check set to true - status is ok
|
|
assert_eq!(health.check(), RpcHealthStatus::Ok);
|
|
|
|
// Remove the override - status now unknown with incomplete startup verification
|
|
override_health_check.store(false, Ordering::Relaxed);
|
|
assert_eq!(health.check(), RpcHealthStatus::Unknown);
|
|
|
|
// Mark startup verification complete - status still unknown as no slots have been
|
|
// optimistically confirmed yet
|
|
bank0.set_startup_verification_complete();
|
|
assert_eq!(health.check(), RpcHealthStatus::Unknown);
|
|
|
|
// Mark slot 15 as being optimistically confirmed in the Blockstore, this could
|
|
// happen if the cluster confirmed the slot and this node became aware through gossip,
|
|
// but this node has not yet replayed slot 15. The local view of the latest optimistic
|
|
// slot is still slot 0 so status will be behind
|
|
blockstore
|
|
.insert_optimistic_slot(15, &Hash::default(), UnixTimestamp::default())
|
|
.unwrap();
|
|
assert_eq!(health.check(), RpcHealthStatus::Behind { num_slots: 15 });
|
|
|
|
// Simulate this node observing slot 4 as optimistically confirmed - status still behind
|
|
let bank4 = Arc::new(Bank::new_from_parent(bank0, &Pubkey::default(), 4));
|
|
optimistically_confirmed_bank.write().unwrap().bank = bank4.clone();
|
|
assert_eq!(health.check(), RpcHealthStatus::Behind { num_slots: 11 });
|
|
|
|
// Simulate this node observing slot 5 as optimistically confirmed - status now ok
|
|
// as distance is <= health_check_slot_distance
|
|
let bank5 = Arc::new(Bank::new_from_parent(bank4, &Pubkey::default(), 5));
|
|
optimistically_confirmed_bank.write().unwrap().bank = bank5.clone();
|
|
assert_eq!(health.check(), RpcHealthStatus::Ok);
|
|
|
|
// Node now up with tip of cluster
|
|
let bank15 = Arc::new(Bank::new_from_parent(bank5, &Pubkey::default(), 15));
|
|
optimistically_confirmed_bank.write().unwrap().bank = bank15.clone();
|
|
assert_eq!(health.check(), RpcHealthStatus::Ok);
|
|
|
|
// Node "beyond" tip of cluster - this technically isn't possible but could be
|
|
// observed locally due to a race between updates to Blockstore and
|
|
// OptimisticallyConfirmedBank. Either way, not a problem and status is ok.
|
|
let bank16 = Arc::new(Bank::new_from_parent(bank15, &Pubkey::default(), 16));
|
|
optimistically_confirmed_bank.write().unwrap().bank = bank16.clone();
|
|
assert_eq!(health.check(), RpcHealthStatus::Ok);
|
|
}
|
|
}
|