add metrics for startup (#17913)
* add metrics for startup * roll timings up higher * fix test * fix duplicate
This commit is contained in:
parent
7fde9b6ff0
commit
471b34132e
|
@ -146,7 +146,7 @@ mod tests {
|
|||
|
||||
let old_last_bank = old_bank_forks.get(old_last_slot).unwrap();
|
||||
|
||||
let deserialized_bank = snapshot_utils::bank_from_archive(
|
||||
let (deserialized_bank, _timing) = snapshot_utils::bank_from_archive(
|
||||
&account_paths,
|
||||
&[],
|
||||
&old_bank_forks
|
||||
|
|
|
@ -130,7 +130,7 @@ fn load_from_snapshot(
|
|||
process::exit(1);
|
||||
}
|
||||
|
||||
let deserialized_bank = snapshot_utils::bank_from_archive(
|
||||
let (deserialized_bank, timings) = snapshot_utils::bank_from_archive(
|
||||
&account_paths,
|
||||
&process_options.frozen_accounts,
|
||||
&snapshot_config.snapshot_path,
|
||||
|
@ -174,6 +174,7 @@ fn load_from_snapshot(
|
|||
&VerifyRecyclers::default(),
|
||||
transaction_status_sender,
|
||||
cache_block_meta_sender,
|
||||
timings,
|
||||
),
|
||||
Some(deserialized_bank_slot_and_hash),
|
||||
)
|
||||
|
|
|
@ -25,6 +25,7 @@ use solana_runtime::{
|
|||
bank_forks::BankForks,
|
||||
bank_utils,
|
||||
commitment::VOTE_THRESHOLD_SIZE,
|
||||
snapshot_utils::BankFromArchiveTimings,
|
||||
transaction_batch::TransactionBatch,
|
||||
vote_account::ArcVoteAccount,
|
||||
vote_sender_types::ReplayVoteSender,
|
||||
|
@ -421,6 +422,7 @@ pub fn process_blockstore(
|
|||
&recyclers,
|
||||
None,
|
||||
cache_block_meta_sender,
|
||||
BankFromArchiveTimings::default(),
|
||||
)
|
||||
}
|
||||
|
||||
|
@ -432,6 +434,7 @@ pub(crate) fn process_blockstore_from_root(
|
|||
recyclers: &VerifyRecyclers,
|
||||
transaction_status_sender: Option<&TransactionStatusSender>,
|
||||
cache_block_meta_sender: Option<&CacheBlockMetaSender>,
|
||||
timings: BankFromArchiveTimings,
|
||||
) -> BlockstoreProcessorResult {
|
||||
do_process_blockstore_from_root(
|
||||
blockstore,
|
||||
|
@ -440,6 +443,7 @@ pub(crate) fn process_blockstore_from_root(
|
|||
recyclers,
|
||||
transaction_status_sender,
|
||||
cache_block_meta_sender,
|
||||
timings,
|
||||
)
|
||||
}
|
||||
|
||||
|
@ -450,6 +454,7 @@ fn do_process_blockstore_from_root(
|
|||
recyclers: &VerifyRecyclers,
|
||||
transaction_status_sender: Option<&TransactionStatusSender>,
|
||||
cache_block_meta_sender: Option<&CacheBlockMetaSender>,
|
||||
timings: BankFromArchiveTimings,
|
||||
) -> BlockstoreProcessorResult {
|
||||
info!("processing ledger from slot {}...", bank.slot());
|
||||
|
||||
|
@ -529,18 +534,45 @@ fn do_process_blockstore_from_root(
|
|||
}
|
||||
let bank_forks = BankForks::new_from_banks(&initial_forks, root);
|
||||
|
||||
let processing_time = now.elapsed();
|
||||
|
||||
let debug_verify = false;
|
||||
let mut time_cap = Measure::start("capitalization");
|
||||
// We might be promptly restarted after bad capitalization was detected while creating newer snapshot.
|
||||
// In that case, we're most likely restored from the last good snapshot and replayed up to this root.
|
||||
// So again check here for the bad capitalization to avoid to continue until the next snapshot creation.
|
||||
if !bank_forks
|
||||
.root_bank()
|
||||
.calculate_and_verify_capitalization(debug_verify)
|
||||
{
|
||||
return Err(BlockstoreProcessorError::RootBankWithMismatchedCapitalization(root));
|
||||
}
|
||||
time_cap.stop();
|
||||
|
||||
datapoint_info!(
|
||||
"process_blockstore_from_root",
|
||||
("total_time_us", now.elapsed().as_micros(), i64),
|
||||
("total_time_us", processing_time.as_micros(), i64),
|
||||
("frozen_banks", bank_forks.frozen_banks().len(), i64),
|
||||
("slot", bank_forks.root(), i64),
|
||||
("forks", initial_forks.len(), i64)
|
||||
("forks", initial_forks.len(), i64),
|
||||
("calculate_capitalization_us", time_cap.as_us(), i64),
|
||||
("untar_us", timings.untar_us, i64),
|
||||
(
|
||||
"rebuild_bank_from_snapshots_us",
|
||||
timings.rebuild_bank_from_snapshots_us,
|
||||
i64
|
||||
),
|
||||
(
|
||||
"verify_snapshot_bank_us",
|
||||
timings.verify_snapshot_bank_us,
|
||||
i64
|
||||
),
|
||||
);
|
||||
|
||||
info!("ledger processing timing: {:?}", timing);
|
||||
info!(
|
||||
"ledger processed in {}. root slot is {}, {} fork{} at {}, with {} frozen bank{}",
|
||||
HumanTime::from(chrono::Duration::from_std(now.elapsed()).unwrap())
|
||||
HumanTime::from(chrono::Duration::from_std(processing_time).unwrap())
|
||||
.to_text_en(Accuracy::Precise, Tense::Present),
|
||||
bank_forks.root(),
|
||||
initial_forks.len(),
|
||||
|
@ -558,17 +590,6 @@ fn do_process_blockstore_from_root(
|
|||
);
|
||||
assert!(bank_forks.active_banks().is_empty());
|
||||
|
||||
let debug_verify = false;
|
||||
// We might be promptly restarted after bad capitalization was detected while creating newer snapshot.
|
||||
// In that case, we're most likely restored from the last good snapshot and replayed up to this root.
|
||||
// So again check here for the bad capitalization to avoid to continue until the next snapshot creation.
|
||||
if !bank_forks
|
||||
.root_bank()
|
||||
.calculate_and_verify_capitalization(debug_verify)
|
||||
{
|
||||
return Err(BlockstoreProcessorError::RootBankWithMismatchedCapitalization(root));
|
||||
}
|
||||
|
||||
Ok((bank_forks, leader_schedule_cache))
|
||||
}
|
||||
|
||||
|
@ -2897,9 +2918,16 @@ pub mod tests {
|
|||
bank1.squash();
|
||||
|
||||
// Test process_blockstore_from_root() from slot 1 onwards
|
||||
let (bank_forks, _leader_schedule) =
|
||||
do_process_blockstore_from_root(&blockstore, bank1, &opts, &recyclers, None, None)
|
||||
.unwrap();
|
||||
let (bank_forks, _leader_schedule) = do_process_blockstore_from_root(
|
||||
&blockstore,
|
||||
bank1,
|
||||
&opts,
|
||||
&recyclers,
|
||||
None,
|
||||
None,
|
||||
BankFromArchiveTimings::default(),
|
||||
)
|
||||
.unwrap();
|
||||
|
||||
assert_eq!(frozen_bank_slots(&bank_forks), vec![5, 6]);
|
||||
assert_eq!(bank_forks.working_bank().slot(), 6);
|
||||
|
|
|
@ -592,6 +592,13 @@ pub fn remove_snapshot<P: AsRef<Path>>(slot: Slot, snapshot_path: P) -> Result<(
|
|||
Ok(())
|
||||
}
|
||||
|
||||
#[derive(Debug, Default)]
|
||||
pub struct BankFromArchiveTimings {
|
||||
pub rebuild_bank_from_snapshots_us: u64,
|
||||
pub untar_us: u64,
|
||||
pub verify_snapshot_bank_us: u64,
|
||||
}
|
||||
|
||||
#[allow(clippy::too_many_arguments)]
|
||||
pub fn bank_from_archive<P: AsRef<Path>>(
|
||||
account_paths: &[PathBuf],
|
||||
|
@ -606,17 +613,19 @@ pub fn bank_from_archive<P: AsRef<Path>>(
|
|||
accounts_db_caching_enabled: bool,
|
||||
limit_load_slot_count_from_snapshot: Option<usize>,
|
||||
shrink_ratio: AccountShrinkThreshold,
|
||||
) -> Result<Bank> {
|
||||
) -> Result<(Bank, BankFromArchiveTimings)> {
|
||||
let unpack_dir = tempfile::Builder::new()
|
||||
.prefix(TMP_SNAPSHOT_PREFIX)
|
||||
.tempdir_in(snapshot_path)?;
|
||||
|
||||
let mut untar = Measure::start("untar");
|
||||
let unpacked_append_vec_map = untar_snapshot_in(
|
||||
&snapshot_tar,
|
||||
&unpack_dir.as_ref(),
|
||||
account_paths,
|
||||
archive_format,
|
||||
)?;
|
||||
untar.stop();
|
||||
|
||||
let mut measure = Measure::start("bank rebuild from snapshot");
|
||||
let unpacked_snapshots_dir = unpack_dir.as_ref().join("snapshots");
|
||||
|
@ -639,14 +648,20 @@ pub fn bank_from_archive<P: AsRef<Path>>(
|
|||
limit_load_slot_count_from_snapshot,
|
||||
shrink_ratio,
|
||||
)?;
|
||||
measure.stop();
|
||||
|
||||
let mut verify = Measure::start("verify");
|
||||
if !bank.verify_snapshot_bank() {
|
||||
panic!("Snapshot bank for slot {} failed to verify", bank.slot());
|
||||
}
|
||||
measure.stop();
|
||||
info!("{}", measure);
|
||||
verify.stop();
|
||||
let timings = BankFromArchiveTimings {
|
||||
rebuild_bank_from_snapshots_us: measure.as_us(),
|
||||
untar_us: untar.as_us(),
|
||||
verify_snapshot_bank_us: verify.as_us(),
|
||||
};
|
||||
|
||||
Ok(bank)
|
||||
Ok((bank, timings))
|
||||
}
|
||||
|
||||
pub fn get_snapshot_archive_path(
|
||||
|
|
Loading…
Reference in New Issue