allow initial hash calc to occur in bg (#26271)
* allow initial hash calc to occur in bg * validator_initialized -> startup_verification_complete * add infos for leader and vote * rework snapshot for startup verification * change to assert
This commit is contained in:
parent
608191352f
commit
557bf6e656
|
@ -1580,6 +1580,11 @@ impl ReplayStage {
|
||||||
|
|
||||||
assert!(parent.is_frozen());
|
assert!(parent.is_frozen());
|
||||||
|
|
||||||
|
if !parent.is_startup_verification_complete() {
|
||||||
|
info!("startup verification incomplete, so skipping my leader slot");
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
if bank_forks.read().unwrap().get(poh_slot).is_some() {
|
if bank_forks.read().unwrap().get(poh_slot).is_some() {
|
||||||
warn!("{} already have bank in forks at {}?", my_pubkey, poh_slot);
|
warn!("{} already have bank in forks at {}?", my_pubkey, poh_slot);
|
||||||
return;
|
return;
|
||||||
|
@ -1907,6 +1912,11 @@ impl ReplayStage {
|
||||||
has_new_vote_been_rooted: bool,
|
has_new_vote_been_rooted: bool,
|
||||||
wait_to_vote_slot: Option<Slot>,
|
wait_to_vote_slot: Option<Slot>,
|
||||||
) -> Option<Transaction> {
|
) -> Option<Transaction> {
|
||||||
|
if !bank.is_startup_verification_complete() {
|
||||||
|
info!("startup verification incomplete, so unable to vote");
|
||||||
|
return None;
|
||||||
|
}
|
||||||
|
|
||||||
if authorized_voter_keypairs.is_empty() {
|
if authorized_voter_keypairs.is_empty() {
|
||||||
return None;
|
return None;
|
||||||
}
|
}
|
||||||
|
|
|
@ -736,8 +736,10 @@ impl Validator {
|
||||||
);
|
);
|
||||||
|
|
||||||
let poh_config = Arc::new(genesis_config.poh_config.clone());
|
let poh_config = Arc::new(genesis_config.poh_config.clone());
|
||||||
|
let startup_verification_complete;
|
||||||
let (poh_recorder, entry_receiver, record_receiver) = {
|
let (poh_recorder, entry_receiver, record_receiver) = {
|
||||||
let bank = &bank_forks.read().unwrap().working_bank();
|
let bank = &bank_forks.read().unwrap().working_bank();
|
||||||
|
startup_verification_complete = Arc::clone(bank.get_startup_verification_complete());
|
||||||
PohRecorder::new_with_clear_signal(
|
PohRecorder::new_with_clear_signal(
|
||||||
bank.tick_height(),
|
bank.tick_height(),
|
||||||
bank.last_blockhash(),
|
bank.last_blockhash(),
|
||||||
|
@ -798,6 +800,7 @@ impl Validator {
|
||||||
config.validator_exit.clone(),
|
config.validator_exit.clone(),
|
||||||
config.known_validators.clone(),
|
config.known_validators.clone(),
|
||||||
rpc_override_health_check.clone(),
|
rpc_override_health_check.clone(),
|
||||||
|
startup_verification_complete,
|
||||||
optimistically_confirmed_bank.clone(),
|
optimistically_confirmed_bank.clone(),
|
||||||
config.send_transaction_service_config.clone(),
|
config.send_transaction_service_config.clone(),
|
||||||
max_slots.clone(),
|
max_slots.clone(),
|
||||||
|
|
|
@ -381,7 +381,13 @@ impl JsonRpcRequestProcessor {
|
||||||
))),
|
))),
|
||||||
blockstore,
|
blockstore,
|
||||||
validator_exit: create_validator_exit(&exit),
|
validator_exit: create_validator_exit(&exit),
|
||||||
health: Arc::new(RpcHealth::new(cluster_info.clone(), None, 0, exit.clone())),
|
health: Arc::new(RpcHealth::new(
|
||||||
|
cluster_info.clone(),
|
||||||
|
None,
|
||||||
|
0,
|
||||||
|
exit.clone(),
|
||||||
|
Arc::clone(bank.get_startup_verification_complete()),
|
||||||
|
)),
|
||||||
cluster_info,
|
cluster_info,
|
||||||
genesis_hash,
|
genesis_hash,
|
||||||
transaction_sender: Arc::new(Mutex::new(sender)),
|
transaction_sender: Arc::new(Mutex::new(sender)),
|
||||||
|
|
|
@ -22,6 +22,7 @@ pub struct RpcHealth {
|
||||||
known_validators: Option<HashSet<Pubkey>>,
|
known_validators: Option<HashSet<Pubkey>>,
|
||||||
health_check_slot_distance: u64,
|
health_check_slot_distance: u64,
|
||||||
override_health_check: Arc<AtomicBool>,
|
override_health_check: Arc<AtomicBool>,
|
||||||
|
startup_verification_complete: Arc<AtomicBool>,
|
||||||
#[cfg(test)]
|
#[cfg(test)]
|
||||||
stub_health_status: std::sync::RwLock<Option<RpcHealthStatus>>,
|
stub_health_status: std::sync::RwLock<Option<RpcHealthStatus>>,
|
||||||
}
|
}
|
||||||
|
@ -32,12 +33,14 @@ impl RpcHealth {
|
||||||
known_validators: Option<HashSet<Pubkey>>,
|
known_validators: Option<HashSet<Pubkey>>,
|
||||||
health_check_slot_distance: u64,
|
health_check_slot_distance: u64,
|
||||||
override_health_check: Arc<AtomicBool>,
|
override_health_check: Arc<AtomicBool>,
|
||||||
|
startup_verification_complete: Arc<AtomicBool>,
|
||||||
) -> Self {
|
) -> Self {
|
||||||
Self {
|
Self {
|
||||||
cluster_info,
|
cluster_info,
|
||||||
known_validators,
|
known_validators,
|
||||||
health_check_slot_distance,
|
health_check_slot_distance,
|
||||||
override_health_check,
|
override_health_check,
|
||||||
|
startup_verification_complete,
|
||||||
#[cfg(test)]
|
#[cfg(test)]
|
||||||
stub_health_status: std::sync::RwLock::new(None),
|
stub_health_status: std::sync::RwLock::new(None),
|
||||||
}
|
}
|
||||||
|
@ -51,6 +54,10 @@ impl RpcHealth {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if !self.startup_verification_complete.load(Ordering::Relaxed) {
|
||||||
|
return RpcHealthStatus::Unknown;
|
||||||
|
}
|
||||||
|
|
||||||
if self.override_health_check.load(Ordering::Relaxed) {
|
if self.override_health_check.load(Ordering::Relaxed) {
|
||||||
RpcHealthStatus::Ok
|
RpcHealthStatus::Ok
|
||||||
} else if let Some(known_validators) = &self.known_validators {
|
} else if let Some(known_validators) = &self.known_validators {
|
||||||
|
@ -134,6 +141,7 @@ impl RpcHealth {
|
||||||
None,
|
None,
|
||||||
42,
|
42,
|
||||||
Arc::new(AtomicBool::new(false)),
|
Arc::new(AtomicBool::new(false)),
|
||||||
|
Arc::new(AtomicBool::new(true)),
|
||||||
))
|
))
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -348,6 +348,7 @@ impl JsonRpcService {
|
||||||
validator_exit: Arc<RwLock<Exit>>,
|
validator_exit: Arc<RwLock<Exit>>,
|
||||||
known_validators: Option<HashSet<Pubkey>>,
|
known_validators: Option<HashSet<Pubkey>>,
|
||||||
override_health_check: Arc<AtomicBool>,
|
override_health_check: Arc<AtomicBool>,
|
||||||
|
startup_verification_complete: Arc<AtomicBool>,
|
||||||
optimistically_confirmed_bank: Arc<RwLock<OptimisticallyConfirmedBank>>,
|
optimistically_confirmed_bank: Arc<RwLock<OptimisticallyConfirmedBank>>,
|
||||||
send_transaction_service_config: send_transaction_service::Config,
|
send_transaction_service_config: send_transaction_service::Config,
|
||||||
max_slots: Arc<MaxSlots>,
|
max_slots: Arc<MaxSlots>,
|
||||||
|
@ -365,6 +366,7 @@ impl JsonRpcService {
|
||||||
known_validators,
|
known_validators,
|
||||||
config.health_check_slot_distance,
|
config.health_check_slot_distance,
|
||||||
override_health_check,
|
override_health_check,
|
||||||
|
startup_verification_complete,
|
||||||
));
|
));
|
||||||
|
|
||||||
let largest_accounts_cache = Arc::new(RwLock::new(LargestAccountsCache::new(
|
let largest_accounts_cache = Arc::new(RwLock::new(LargestAccountsCache::new(
|
||||||
|
@ -628,6 +630,7 @@ mod tests {
|
||||||
validator_exit,
|
validator_exit,
|
||||||
None,
|
None,
|
||||||
Arc::new(AtomicBool::new(false)),
|
Arc::new(AtomicBool::new(false)),
|
||||||
|
Arc::new(AtomicBool::new(true)),
|
||||||
optimistically_confirmed_bank,
|
optimistically_confirmed_bank,
|
||||||
send_transaction_service::Config {
|
send_transaction_service::Config {
|
||||||
retry_rate_ms: 1000,
|
retry_rate_ms: 1000,
|
||||||
|
@ -826,6 +829,7 @@ mod tests {
|
||||||
));
|
));
|
||||||
let health_check_slot_distance = 123;
|
let health_check_slot_distance = 123;
|
||||||
let override_health_check = Arc::new(AtomicBool::new(false));
|
let override_health_check = Arc::new(AtomicBool::new(false));
|
||||||
|
let startup_verification_complete = Arc::new(AtomicBool::new(true));
|
||||||
let known_validators = vec![
|
let known_validators = vec![
|
||||||
solana_sdk::pubkey::new_rand(),
|
solana_sdk::pubkey::new_rand(),
|
||||||
solana_sdk::pubkey::new_rand(),
|
solana_sdk::pubkey::new_rand(),
|
||||||
|
@ -837,6 +841,7 @@ mod tests {
|
||||||
Some(known_validators.clone().into_iter().collect()),
|
Some(known_validators.clone().into_iter().collect()),
|
||||||
health_check_slot_distance,
|
health_check_slot_distance,
|
||||||
override_health_check.clone(),
|
override_health_check.clone(),
|
||||||
|
startup_verification_complete,
|
||||||
));
|
));
|
||||||
|
|
||||||
let rm = RpcRequestMiddleware::new(PathBuf::from("/"), None, create_bank_forks(), health);
|
let rm = RpcRequestMiddleware::new(PathBuf::from("/"), None, create_bank_forks(), health);
|
||||||
|
|
|
@ -184,6 +184,9 @@ impl SnapshotRequestHandler {
|
||||||
status_cache_slot_deltas,
|
status_cache_slot_deltas,
|
||||||
} = snapshot_request;
|
} = snapshot_request;
|
||||||
|
|
||||||
|
// we should not rely on the state of this validator until startup verification is complete
|
||||||
|
assert!(snapshot_root_bank.is_startup_verification_complete());
|
||||||
|
|
||||||
let previous_hash = if test_hash_calculation {
|
let previous_hash = if test_hash_calculation {
|
||||||
// We have to use the index version here.
|
// We have to use the index version here.
|
||||||
// We cannot calculate the non-index way because cache has not been flushed and stores don't match reality. This comment is out of date and can be re-evaluated.
|
// We cannot calculate the non-index way because cache has not been flushed and stores don't match reality. This comment is out of date and can be re-evaluated.
|
||||||
|
|
|
@ -1115,6 +1115,8 @@ pub struct AccountsDb {
|
||||||
/// true if drop_callback is attached to the bank.
|
/// true if drop_callback is attached to the bank.
|
||||||
is_bank_drop_callback_enabled: AtomicBool,
|
is_bank_drop_callback_enabled: AtomicBool,
|
||||||
|
|
||||||
|
pub startup_verification_complete: Arc<AtomicBool>,
|
||||||
|
|
||||||
/// Set of slots currently being flushed by `flush_slot_cache()` or removed
|
/// Set of slots currently being flushed by `flush_slot_cache()` or removed
|
||||||
/// by `remove_unrooted_slot()`. Used to ensure `remove_unrooted_slots(slots)`
|
/// by `remove_unrooted_slot()`. Used to ensure `remove_unrooted_slots(slots)`
|
||||||
/// can safely clear the set of unrooted slots `slots`.
|
/// can safely clear the set of unrooted slots `slots`.
|
||||||
|
@ -1902,7 +1904,12 @@ impl AccountsDb {
|
||||||
// rayon needs a lot of stack
|
// rayon needs a lot of stack
|
||||||
const ACCOUNTS_STACK_SIZE: usize = 8 * 1024 * 1024;
|
const ACCOUNTS_STACK_SIZE: usize = 8 * 1024 * 1024;
|
||||||
|
|
||||||
|
// this will be live shortly
|
||||||
|
// for now, this check occurs at startup, so it must always be true
|
||||||
|
let startup_verification_complete = Arc::new(AtomicBool::new(true));
|
||||||
|
|
||||||
AccountsDb {
|
AccountsDb {
|
||||||
|
startup_verification_complete,
|
||||||
filler_accounts_per_slot: AtomicU64::default(),
|
filler_accounts_per_slot: AtomicU64::default(),
|
||||||
filler_account_slots_remaining: AtomicU64::default(),
|
filler_account_slots_remaining: AtomicU64::default(),
|
||||||
active_stats: ActiveStats::default(),
|
active_stats: ActiveStats::default(),
|
||||||
|
|
|
@ -3707,6 +3707,14 @@ impl Bank {
|
||||||
))
|
))
|
||||||
}
|
}
|
||||||
|
|
||||||
|
pub fn get_startup_verification_complete(&self) -> &Arc<AtomicBool> {
|
||||||
|
&self.rc.accounts.accounts_db.startup_verification_complete
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn is_startup_verification_complete(&self) -> bool {
|
||||||
|
self.get_startup_verification_complete().load(Relaxed)
|
||||||
|
}
|
||||||
|
|
||||||
pub fn get_fee_for_message_with_lamports_per_signature(
|
pub fn get_fee_for_message_with_lamports_per_signature(
|
||||||
&self,
|
&self,
|
||||||
message: &SanitizedMessage,
|
message: &SanitizedMessage,
|
||||||
|
@ -6727,6 +6735,15 @@ impl Bank {
|
||||||
)
|
)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// return true if bg hash verification is complete
|
||||||
|
/// return false if bg hash verification has not completed yet
|
||||||
|
/// if hash verification failed, a panic will occur
|
||||||
|
pub fn has_initial_accounts_hash_verification_completed(&self) -> bool {
|
||||||
|
// this will be live shortly
|
||||||
|
// for now, this check occurs at startup, so it must always be true
|
||||||
|
true
|
||||||
|
}
|
||||||
|
|
||||||
pub fn get_snapshot_storages(&self, base_slot: Option<Slot>) -> SnapshotStorages {
|
pub fn get_snapshot_storages(&self, base_slot: Option<Slot>) -> SnapshotStorages {
|
||||||
self.rc
|
self.rc
|
||||||
.accounts
|
.accounts
|
||||||
|
|
|
@ -278,6 +278,7 @@ impl BankForks {
|
||||||
{
|
{
|
||||||
let snapshot_root_bank = self.root_bank();
|
let snapshot_root_bank = self.root_bank();
|
||||||
let root_slot = snapshot_root_bank.slot();
|
let root_slot = snapshot_root_bank.slot();
|
||||||
|
if snapshot_root_bank.is_startup_verification_complete() {
|
||||||
// Save off the status cache because these may get pruned if another
|
// Save off the status cache because these may get pruned if another
|
||||||
// `set_root()` is called before the snapshots package can be generated
|
// `set_root()` is called before the snapshots package can be generated
|
||||||
let status_cache_slot_deltas = snapshot_root_bank
|
let status_cache_slot_deltas = snapshot_root_bank
|
||||||
|
@ -285,17 +286,20 @@ impl BankForks {
|
||||||
.read()
|
.read()
|
||||||
.unwrap()
|
.unwrap()
|
||||||
.root_slot_deltas();
|
.root_slot_deltas();
|
||||||
if let Err(e) =
|
if let Err(e) = accounts_background_request_sender.send_snapshot_request(
|
||||||
accounts_background_request_sender.send_snapshot_request(SnapshotRequest {
|
SnapshotRequest {
|
||||||
snapshot_root_bank,
|
snapshot_root_bank,
|
||||||
status_cache_slot_deltas,
|
status_cache_slot_deltas,
|
||||||
})
|
},
|
||||||
{
|
) {
|
||||||
warn!(
|
warn!(
|
||||||
"Error sending snapshot request for bank: {}, err: {:?}",
|
"Error sending snapshot request for bank: {}, err: {:?}",
|
||||||
root_slot, e
|
root_slot, e
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
|
} else {
|
||||||
|
info!("Not sending snapshot request for bank: {}, startup verification is incomplete", root_slot);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
snapshot_time.stop();
|
snapshot_time.stop();
|
||||||
total_snapshot_ms += snapshot_time.as_ms() as i64;
|
total_snapshot_ms += snapshot_time.as_ms() as i64;
|
||||||
|
|
Loading…
Reference in New Issue