allow initial hash calc to occur in bg (#26271)

* allow initial hash calc to occur in bg

* validator_initialized -> startup_verification_complete

* add infos for leader and vote

* rework snapshot for startup verification

* change to assert
This commit is contained in:
Jeff Washington (jwash) 2022-06-29 16:48:33 -05:00 committed by GitHub
parent 608191352f
commit 557bf6e656
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
9 changed files with 81 additions and 18 deletions

View File

@ -1580,6 +1580,11 @@ impl ReplayStage {
assert!(parent.is_frozen()); assert!(parent.is_frozen());
if !parent.is_startup_verification_complete() {
info!("startup verification incomplete, so skipping my leader slot");
return;
}
if bank_forks.read().unwrap().get(poh_slot).is_some() { if bank_forks.read().unwrap().get(poh_slot).is_some() {
warn!("{} already have bank in forks at {}?", my_pubkey, poh_slot); warn!("{} already have bank in forks at {}?", my_pubkey, poh_slot);
return; return;
@ -1907,6 +1912,11 @@ impl ReplayStage {
has_new_vote_been_rooted: bool, has_new_vote_been_rooted: bool,
wait_to_vote_slot: Option<Slot>, wait_to_vote_slot: Option<Slot>,
) -> Option<Transaction> { ) -> Option<Transaction> {
if !bank.is_startup_verification_complete() {
info!("startup verification incomplete, so unable to vote");
return None;
}
if authorized_voter_keypairs.is_empty() { if authorized_voter_keypairs.is_empty() {
return None; return None;
} }

View File

@ -736,8 +736,10 @@ impl Validator {
); );
let poh_config = Arc::new(genesis_config.poh_config.clone()); let poh_config = Arc::new(genesis_config.poh_config.clone());
let startup_verification_complete;
let (poh_recorder, entry_receiver, record_receiver) = { let (poh_recorder, entry_receiver, record_receiver) = {
let bank = &bank_forks.read().unwrap().working_bank(); let bank = &bank_forks.read().unwrap().working_bank();
startup_verification_complete = Arc::clone(bank.get_startup_verification_complete());
PohRecorder::new_with_clear_signal( PohRecorder::new_with_clear_signal(
bank.tick_height(), bank.tick_height(),
bank.last_blockhash(), bank.last_blockhash(),
@ -798,6 +800,7 @@ impl Validator {
config.validator_exit.clone(), config.validator_exit.clone(),
config.known_validators.clone(), config.known_validators.clone(),
rpc_override_health_check.clone(), rpc_override_health_check.clone(),
startup_verification_complete,
optimistically_confirmed_bank.clone(), optimistically_confirmed_bank.clone(),
config.send_transaction_service_config.clone(), config.send_transaction_service_config.clone(),
max_slots.clone(), max_slots.clone(),

View File

@ -381,7 +381,13 @@ impl JsonRpcRequestProcessor {
))), ))),
blockstore, blockstore,
validator_exit: create_validator_exit(&exit), validator_exit: create_validator_exit(&exit),
health: Arc::new(RpcHealth::new(cluster_info.clone(), None, 0, exit.clone())), health: Arc::new(RpcHealth::new(
cluster_info.clone(),
None,
0,
exit.clone(),
Arc::clone(bank.get_startup_verification_complete()),
)),
cluster_info, cluster_info,
genesis_hash, genesis_hash,
transaction_sender: Arc::new(Mutex::new(sender)), transaction_sender: Arc::new(Mutex::new(sender)),

View File

@ -22,6 +22,7 @@ pub struct RpcHealth {
known_validators: Option<HashSet<Pubkey>>, known_validators: Option<HashSet<Pubkey>>,
health_check_slot_distance: u64, health_check_slot_distance: u64,
override_health_check: Arc<AtomicBool>, override_health_check: Arc<AtomicBool>,
startup_verification_complete: Arc<AtomicBool>,
#[cfg(test)] #[cfg(test)]
stub_health_status: std::sync::RwLock<Option<RpcHealthStatus>>, stub_health_status: std::sync::RwLock<Option<RpcHealthStatus>>,
} }
@ -32,12 +33,14 @@ impl RpcHealth {
known_validators: Option<HashSet<Pubkey>>, known_validators: Option<HashSet<Pubkey>>,
health_check_slot_distance: u64, health_check_slot_distance: u64,
override_health_check: Arc<AtomicBool>, override_health_check: Arc<AtomicBool>,
startup_verification_complete: Arc<AtomicBool>,
) -> Self { ) -> Self {
Self { Self {
cluster_info, cluster_info,
known_validators, known_validators,
health_check_slot_distance, health_check_slot_distance,
override_health_check, override_health_check,
startup_verification_complete,
#[cfg(test)] #[cfg(test)]
stub_health_status: std::sync::RwLock::new(None), stub_health_status: std::sync::RwLock::new(None),
} }
@ -51,6 +54,10 @@ impl RpcHealth {
} }
} }
if !self.startup_verification_complete.load(Ordering::Relaxed) {
return RpcHealthStatus::Unknown;
}
if self.override_health_check.load(Ordering::Relaxed) { if self.override_health_check.load(Ordering::Relaxed) {
RpcHealthStatus::Ok RpcHealthStatus::Ok
} else if let Some(known_validators) = &self.known_validators { } else if let Some(known_validators) = &self.known_validators {
@ -134,6 +141,7 @@ impl RpcHealth {
None, None,
42, 42,
Arc::new(AtomicBool::new(false)), Arc::new(AtomicBool::new(false)),
Arc::new(AtomicBool::new(true)),
)) ))
} }

View File

@ -348,6 +348,7 @@ impl JsonRpcService {
validator_exit: Arc<RwLock<Exit>>, validator_exit: Arc<RwLock<Exit>>,
known_validators: Option<HashSet<Pubkey>>, known_validators: Option<HashSet<Pubkey>>,
override_health_check: Arc<AtomicBool>, override_health_check: Arc<AtomicBool>,
startup_verification_complete: Arc<AtomicBool>,
optimistically_confirmed_bank: Arc<RwLock<OptimisticallyConfirmedBank>>, optimistically_confirmed_bank: Arc<RwLock<OptimisticallyConfirmedBank>>,
send_transaction_service_config: send_transaction_service::Config, send_transaction_service_config: send_transaction_service::Config,
max_slots: Arc<MaxSlots>, max_slots: Arc<MaxSlots>,
@ -365,6 +366,7 @@ impl JsonRpcService {
known_validators, known_validators,
config.health_check_slot_distance, config.health_check_slot_distance,
override_health_check, override_health_check,
startup_verification_complete,
)); ));
let largest_accounts_cache = Arc::new(RwLock::new(LargestAccountsCache::new( let largest_accounts_cache = Arc::new(RwLock::new(LargestAccountsCache::new(
@ -628,6 +630,7 @@ mod tests {
validator_exit, validator_exit,
None, None,
Arc::new(AtomicBool::new(false)), Arc::new(AtomicBool::new(false)),
Arc::new(AtomicBool::new(true)),
optimistically_confirmed_bank, optimistically_confirmed_bank,
send_transaction_service::Config { send_transaction_service::Config {
retry_rate_ms: 1000, retry_rate_ms: 1000,
@ -826,6 +829,7 @@ mod tests {
)); ));
let health_check_slot_distance = 123; let health_check_slot_distance = 123;
let override_health_check = Arc::new(AtomicBool::new(false)); let override_health_check = Arc::new(AtomicBool::new(false));
let startup_verification_complete = Arc::new(AtomicBool::new(true));
let known_validators = vec![ let known_validators = vec![
solana_sdk::pubkey::new_rand(), solana_sdk::pubkey::new_rand(),
solana_sdk::pubkey::new_rand(), solana_sdk::pubkey::new_rand(),
@ -837,6 +841,7 @@ mod tests {
Some(known_validators.clone().into_iter().collect()), Some(known_validators.clone().into_iter().collect()),
health_check_slot_distance, health_check_slot_distance,
override_health_check.clone(), override_health_check.clone(),
startup_verification_complete,
)); ));
let rm = RpcRequestMiddleware::new(PathBuf::from("/"), None, create_bank_forks(), health); let rm = RpcRequestMiddleware::new(PathBuf::from("/"), None, create_bank_forks(), health);

View File

@ -184,6 +184,9 @@ impl SnapshotRequestHandler {
status_cache_slot_deltas, status_cache_slot_deltas,
} = snapshot_request; } = snapshot_request;
// we should not rely on the state of this validator until startup verification is complete
assert!(snapshot_root_bank.is_startup_verification_complete());
let previous_hash = if test_hash_calculation { let previous_hash = if test_hash_calculation {
// We have to use the index version here. // We have to use the index version here.
// We cannot calculate the non-index way because cache has not been flushed and stores don't match reality. This comment is out of date and can be re-evaluated. // We cannot calculate the non-index way because cache has not been flushed and stores don't match reality. This comment is out of date and can be re-evaluated.

View File

@ -1115,6 +1115,8 @@ pub struct AccountsDb {
/// true if drop_callback is attached to the bank. /// true if drop_callback is attached to the bank.
is_bank_drop_callback_enabled: AtomicBool, is_bank_drop_callback_enabled: AtomicBool,
pub startup_verification_complete: Arc<AtomicBool>,
/// Set of slots currently being flushed by `flush_slot_cache()` or removed /// Set of slots currently being flushed by `flush_slot_cache()` or removed
/// by `remove_unrooted_slot()`. Used to ensure `remove_unrooted_slots(slots)` /// by `remove_unrooted_slot()`. Used to ensure `remove_unrooted_slots(slots)`
/// can safely clear the set of unrooted slots `slots`. /// can safely clear the set of unrooted slots `slots`.
@ -1902,7 +1904,12 @@ impl AccountsDb {
// rayon needs a lot of stack // rayon needs a lot of stack
const ACCOUNTS_STACK_SIZE: usize = 8 * 1024 * 1024; const ACCOUNTS_STACK_SIZE: usize = 8 * 1024 * 1024;
// this will be live shortly
// for now, this check occurs at startup, so it must always be true
let startup_verification_complete = Arc::new(AtomicBool::new(true));
AccountsDb { AccountsDb {
startup_verification_complete,
filler_accounts_per_slot: AtomicU64::default(), filler_accounts_per_slot: AtomicU64::default(),
filler_account_slots_remaining: AtomicU64::default(), filler_account_slots_remaining: AtomicU64::default(),
active_stats: ActiveStats::default(), active_stats: ActiveStats::default(),

View File

@ -3707,6 +3707,14 @@ impl Bank {
)) ))
} }
pub fn get_startup_verification_complete(&self) -> &Arc<AtomicBool> {
&self.rc.accounts.accounts_db.startup_verification_complete
}
pub fn is_startup_verification_complete(&self) -> bool {
self.get_startup_verification_complete().load(Relaxed)
}
pub fn get_fee_for_message_with_lamports_per_signature( pub fn get_fee_for_message_with_lamports_per_signature(
&self, &self,
message: &SanitizedMessage, message: &SanitizedMessage,
@ -6727,6 +6735,15 @@ impl Bank {
) )
} }
/// return true if bg hash verification is complete
/// return false if bg hash verification has not completed yet
/// if hash verification failed, a panic will occur
pub fn has_initial_accounts_hash_verification_completed(&self) -> bool {
// this will be live shortly
// for now, this check occurs at startup, so it must always be true
true
}
pub fn get_snapshot_storages(&self, base_slot: Option<Slot>) -> SnapshotStorages { pub fn get_snapshot_storages(&self, base_slot: Option<Slot>) -> SnapshotStorages {
self.rc self.rc
.accounts .accounts

View File

@ -278,23 +278,27 @@ impl BankForks {
{ {
let snapshot_root_bank = self.root_bank(); let snapshot_root_bank = self.root_bank();
let root_slot = snapshot_root_bank.slot(); let root_slot = snapshot_root_bank.slot();
// Save off the status cache because these may get pruned if another if snapshot_root_bank.is_startup_verification_complete() {
// `set_root()` is called before the snapshots package can be generated // Save off the status cache because these may get pruned if another
let status_cache_slot_deltas = snapshot_root_bank // `set_root()` is called before the snapshots package can be generated
.status_cache let status_cache_slot_deltas = snapshot_root_bank
.read() .status_cache
.unwrap() .read()
.root_slot_deltas(); .unwrap()
if let Err(e) = .root_slot_deltas();
accounts_background_request_sender.send_snapshot_request(SnapshotRequest { if let Err(e) = accounts_background_request_sender.send_snapshot_request(
snapshot_root_bank, SnapshotRequest {
status_cache_slot_deltas, snapshot_root_bank,
}) status_cache_slot_deltas,
{ },
warn!( ) {
"Error sending snapshot request for bank: {}, err: {:?}", warn!(
root_slot, e "Error sending snapshot request for bank: {}, err: {:?}",
); root_slot, e
);
}
} else {
info!("Not sending snapshot request for bank: {}, startup verification is incomplete", root_slot);
} }
} }
snapshot_time.stop(); snapshot_time.stop();