diff --git a/core/src/replay_stage.rs b/core/src/replay_stage.rs index 2686956133..a70f763f6e 100644 --- a/core/src/replay_stage.rs +++ b/core/src/replay_stage.rs @@ -1580,6 +1580,11 @@ impl ReplayStage { assert!(parent.is_frozen()); + if !parent.is_startup_verification_complete() { + info!("startup verification incomplete, so skipping my leader slot"); + return; + } + if bank_forks.read().unwrap().get(poh_slot).is_some() { warn!("{} already have bank in forks at {}?", my_pubkey, poh_slot); return; @@ -1907,6 +1912,11 @@ impl ReplayStage { has_new_vote_been_rooted: bool, wait_to_vote_slot: Option, ) -> Option { + if !bank.is_startup_verification_complete() { + info!("startup verification incomplete, so unable to vote"); + return None; + } + if authorized_voter_keypairs.is_empty() { return None; } diff --git a/core/src/validator.rs b/core/src/validator.rs index bf269304c5..0a8245d326 100644 --- a/core/src/validator.rs +++ b/core/src/validator.rs @@ -736,8 +736,10 @@ impl Validator { ); let poh_config = Arc::new(genesis_config.poh_config.clone()); + let startup_verification_complete; let (poh_recorder, entry_receiver, record_receiver) = { let bank = &bank_forks.read().unwrap().working_bank(); + startup_verification_complete = Arc::clone(bank.get_startup_verification_complete()); PohRecorder::new_with_clear_signal( bank.tick_height(), bank.last_blockhash(), @@ -798,6 +800,7 @@ impl Validator { config.validator_exit.clone(), config.known_validators.clone(), rpc_override_health_check.clone(), + startup_verification_complete, optimistically_confirmed_bank.clone(), config.send_transaction_service_config.clone(), max_slots.clone(), diff --git a/rpc/src/rpc.rs b/rpc/src/rpc.rs index 9889ec404a..86245401d5 100644 --- a/rpc/src/rpc.rs +++ b/rpc/src/rpc.rs @@ -381,7 +381,13 @@ impl JsonRpcRequestProcessor { ))), blockstore, validator_exit: create_validator_exit(&exit), - health: Arc::new(RpcHealth::new(cluster_info.clone(), None, 0, exit.clone())), + health: Arc::new(RpcHealth::new( + cluster_info.clone(), + None, + 0, + exit.clone(), + Arc::clone(bank.get_startup_verification_complete()), + )), cluster_info, genesis_hash, transaction_sender: Arc::new(Mutex::new(sender)), diff --git a/rpc/src/rpc_health.rs b/rpc/src/rpc_health.rs index e68746a5d4..0b84b6282d 100644 --- a/rpc/src/rpc_health.rs +++ b/rpc/src/rpc_health.rs @@ -22,6 +22,7 @@ pub struct RpcHealth { known_validators: Option>, health_check_slot_distance: u64, override_health_check: Arc, + startup_verification_complete: Arc, #[cfg(test)] stub_health_status: std::sync::RwLock>, } @@ -32,12 +33,14 @@ impl RpcHealth { known_validators: Option>, health_check_slot_distance: u64, override_health_check: Arc, + startup_verification_complete: Arc, ) -> Self { Self { cluster_info, known_validators, health_check_slot_distance, override_health_check, + startup_verification_complete, #[cfg(test)] stub_health_status: std::sync::RwLock::new(None), } @@ -51,6 +54,10 @@ impl RpcHealth { } } + if !self.startup_verification_complete.load(Ordering::Relaxed) { + return RpcHealthStatus::Unknown; + } + if self.override_health_check.load(Ordering::Relaxed) { RpcHealthStatus::Ok } else if let Some(known_validators) = &self.known_validators { @@ -134,6 +141,7 @@ impl RpcHealth { None, 42, Arc::new(AtomicBool::new(false)), + Arc::new(AtomicBool::new(true)), )) } diff --git a/rpc/src/rpc_service.rs b/rpc/src/rpc_service.rs index 946ab6582f..ba17587012 100644 --- a/rpc/src/rpc_service.rs +++ b/rpc/src/rpc_service.rs @@ -348,6 +348,7 @@ impl JsonRpcService { validator_exit: Arc>, known_validators: Option>, override_health_check: Arc, + startup_verification_complete: Arc, optimistically_confirmed_bank: Arc>, send_transaction_service_config: send_transaction_service::Config, max_slots: Arc, @@ -365,6 +366,7 @@ impl JsonRpcService { known_validators, config.health_check_slot_distance, override_health_check, + startup_verification_complete, )); let largest_accounts_cache = Arc::new(RwLock::new(LargestAccountsCache::new( @@ -628,6 +630,7 @@ mod tests { validator_exit, None, Arc::new(AtomicBool::new(false)), + Arc::new(AtomicBool::new(true)), optimistically_confirmed_bank, send_transaction_service::Config { retry_rate_ms: 1000, @@ -826,6 +829,7 @@ mod tests { )); let health_check_slot_distance = 123; let override_health_check = Arc::new(AtomicBool::new(false)); + let startup_verification_complete = Arc::new(AtomicBool::new(true)); let known_validators = vec![ solana_sdk::pubkey::new_rand(), solana_sdk::pubkey::new_rand(), @@ -837,6 +841,7 @@ mod tests { Some(known_validators.clone().into_iter().collect()), health_check_slot_distance, override_health_check.clone(), + startup_verification_complete, )); let rm = RpcRequestMiddleware::new(PathBuf::from("/"), None, create_bank_forks(), health); diff --git a/runtime/src/accounts_background_service.rs b/runtime/src/accounts_background_service.rs index d158427e70..40ad5948c1 100644 --- a/runtime/src/accounts_background_service.rs +++ b/runtime/src/accounts_background_service.rs @@ -184,6 +184,9 @@ impl SnapshotRequestHandler { status_cache_slot_deltas, } = snapshot_request; + // we should not rely on the state of this validator until startup verification is complete + assert!(snapshot_root_bank.is_startup_verification_complete()); + let previous_hash = if test_hash_calculation { // We have to use the index version here. // We cannot calculate the non-index way because cache has not been flushed and stores don't match reality. This comment is out of date and can be re-evaluated. diff --git a/runtime/src/accounts_db.rs b/runtime/src/accounts_db.rs index 16f89a5994..dc2a8f44ec 100644 --- a/runtime/src/accounts_db.rs +++ b/runtime/src/accounts_db.rs @@ -1115,6 +1115,8 @@ pub struct AccountsDb { /// true if drop_callback is attached to the bank. is_bank_drop_callback_enabled: AtomicBool, + pub startup_verification_complete: Arc, + /// Set of slots currently being flushed by `flush_slot_cache()` or removed /// by `remove_unrooted_slot()`. Used to ensure `remove_unrooted_slots(slots)` /// can safely clear the set of unrooted slots `slots`. @@ -1902,7 +1904,12 @@ impl AccountsDb { // rayon needs a lot of stack const ACCOUNTS_STACK_SIZE: usize = 8 * 1024 * 1024; + // this will be live shortly + // for now, this check occurs at startup, so it must always be true + let startup_verification_complete = Arc::new(AtomicBool::new(true)); + AccountsDb { + startup_verification_complete, filler_accounts_per_slot: AtomicU64::default(), filler_account_slots_remaining: AtomicU64::default(), active_stats: ActiveStats::default(), diff --git a/runtime/src/bank.rs b/runtime/src/bank.rs index 3e8a5065d5..d787166caf 100644 --- a/runtime/src/bank.rs +++ b/runtime/src/bank.rs @@ -3707,6 +3707,14 @@ impl Bank { )) } + pub fn get_startup_verification_complete(&self) -> &Arc { + &self.rc.accounts.accounts_db.startup_verification_complete + } + + pub fn is_startup_verification_complete(&self) -> bool { + self.get_startup_verification_complete().load(Relaxed) + } + pub fn get_fee_for_message_with_lamports_per_signature( &self, message: &SanitizedMessage, @@ -6727,6 +6735,15 @@ impl Bank { ) } + /// return true if bg hash verification is complete + /// return false if bg hash verification has not completed yet + /// if hash verification failed, a panic will occur + pub fn has_initial_accounts_hash_verification_completed(&self) -> bool { + // this will be live shortly + // for now, this check occurs at startup, so it must always be true + true + } + pub fn get_snapshot_storages(&self, base_slot: Option) -> SnapshotStorages { self.rc .accounts diff --git a/runtime/src/bank_forks.rs b/runtime/src/bank_forks.rs index 5f62bd385b..54e448ae40 100644 --- a/runtime/src/bank_forks.rs +++ b/runtime/src/bank_forks.rs @@ -278,23 +278,27 @@ impl BankForks { { let snapshot_root_bank = self.root_bank(); let root_slot = snapshot_root_bank.slot(); - // Save off the status cache because these may get pruned if another - // `set_root()` is called before the snapshots package can be generated - let status_cache_slot_deltas = snapshot_root_bank - .status_cache - .read() - .unwrap() - .root_slot_deltas(); - if let Err(e) = - accounts_background_request_sender.send_snapshot_request(SnapshotRequest { - snapshot_root_bank, - status_cache_slot_deltas, - }) - { - warn!( - "Error sending snapshot request for bank: {}, err: {:?}", - root_slot, e - ); + if snapshot_root_bank.is_startup_verification_complete() { + // Save off the status cache because these may get pruned if another + // `set_root()` is called before the snapshots package can be generated + let status_cache_slot_deltas = snapshot_root_bank + .status_cache + .read() + .unwrap() + .root_slot_deltas(); + if let Err(e) = accounts_background_request_sender.send_snapshot_request( + SnapshotRequest { + snapshot_root_bank, + status_cache_slot_deltas, + }, + ) { + warn!( + "Error sending snapshot request for bank: {}, err: {:?}", + root_slot, e + ); + } + } else { + info!("Not sending snapshot request for bank: {}, startup verification is incomplete", root_slot); } } snapshot_time.stop();