add metrics for startup (#17913)
* add metrics for startup * roll timings up higher * fix test * fix duplicate
This commit is contained in:
parent
7fde9b6ff0
commit
471b34132e
|
@ -146,7 +146,7 @@ mod tests {
|
||||||
|
|
||||||
let old_last_bank = old_bank_forks.get(old_last_slot).unwrap();
|
let old_last_bank = old_bank_forks.get(old_last_slot).unwrap();
|
||||||
|
|
||||||
let deserialized_bank = snapshot_utils::bank_from_archive(
|
let (deserialized_bank, _timing) = snapshot_utils::bank_from_archive(
|
||||||
&account_paths,
|
&account_paths,
|
||||||
&[],
|
&[],
|
||||||
&old_bank_forks
|
&old_bank_forks
|
||||||
|
|
|
@ -130,7 +130,7 @@ fn load_from_snapshot(
|
||||||
process::exit(1);
|
process::exit(1);
|
||||||
}
|
}
|
||||||
|
|
||||||
let deserialized_bank = snapshot_utils::bank_from_archive(
|
let (deserialized_bank, timings) = snapshot_utils::bank_from_archive(
|
||||||
&account_paths,
|
&account_paths,
|
||||||
&process_options.frozen_accounts,
|
&process_options.frozen_accounts,
|
||||||
&snapshot_config.snapshot_path,
|
&snapshot_config.snapshot_path,
|
||||||
|
@ -174,6 +174,7 @@ fn load_from_snapshot(
|
||||||
&VerifyRecyclers::default(),
|
&VerifyRecyclers::default(),
|
||||||
transaction_status_sender,
|
transaction_status_sender,
|
||||||
cache_block_meta_sender,
|
cache_block_meta_sender,
|
||||||
|
timings,
|
||||||
),
|
),
|
||||||
Some(deserialized_bank_slot_and_hash),
|
Some(deserialized_bank_slot_and_hash),
|
||||||
)
|
)
|
||||||
|
|
|
@ -25,6 +25,7 @@ use solana_runtime::{
|
||||||
bank_forks::BankForks,
|
bank_forks::BankForks,
|
||||||
bank_utils,
|
bank_utils,
|
||||||
commitment::VOTE_THRESHOLD_SIZE,
|
commitment::VOTE_THRESHOLD_SIZE,
|
||||||
|
snapshot_utils::BankFromArchiveTimings,
|
||||||
transaction_batch::TransactionBatch,
|
transaction_batch::TransactionBatch,
|
||||||
vote_account::ArcVoteAccount,
|
vote_account::ArcVoteAccount,
|
||||||
vote_sender_types::ReplayVoteSender,
|
vote_sender_types::ReplayVoteSender,
|
||||||
|
@ -421,6 +422,7 @@ pub fn process_blockstore(
|
||||||
&recyclers,
|
&recyclers,
|
||||||
None,
|
None,
|
||||||
cache_block_meta_sender,
|
cache_block_meta_sender,
|
||||||
|
BankFromArchiveTimings::default(),
|
||||||
)
|
)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -432,6 +434,7 @@ pub(crate) fn process_blockstore_from_root(
|
||||||
recyclers: &VerifyRecyclers,
|
recyclers: &VerifyRecyclers,
|
||||||
transaction_status_sender: Option<&TransactionStatusSender>,
|
transaction_status_sender: Option<&TransactionStatusSender>,
|
||||||
cache_block_meta_sender: Option<&CacheBlockMetaSender>,
|
cache_block_meta_sender: Option<&CacheBlockMetaSender>,
|
||||||
|
timings: BankFromArchiveTimings,
|
||||||
) -> BlockstoreProcessorResult {
|
) -> BlockstoreProcessorResult {
|
||||||
do_process_blockstore_from_root(
|
do_process_blockstore_from_root(
|
||||||
blockstore,
|
blockstore,
|
||||||
|
@ -440,6 +443,7 @@ pub(crate) fn process_blockstore_from_root(
|
||||||
recyclers,
|
recyclers,
|
||||||
transaction_status_sender,
|
transaction_status_sender,
|
||||||
cache_block_meta_sender,
|
cache_block_meta_sender,
|
||||||
|
timings,
|
||||||
)
|
)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -450,6 +454,7 @@ fn do_process_blockstore_from_root(
|
||||||
recyclers: &VerifyRecyclers,
|
recyclers: &VerifyRecyclers,
|
||||||
transaction_status_sender: Option<&TransactionStatusSender>,
|
transaction_status_sender: Option<&TransactionStatusSender>,
|
||||||
cache_block_meta_sender: Option<&CacheBlockMetaSender>,
|
cache_block_meta_sender: Option<&CacheBlockMetaSender>,
|
||||||
|
timings: BankFromArchiveTimings,
|
||||||
) -> BlockstoreProcessorResult {
|
) -> BlockstoreProcessorResult {
|
||||||
info!("processing ledger from slot {}...", bank.slot());
|
info!("processing ledger from slot {}...", bank.slot());
|
||||||
|
|
||||||
|
@ -529,18 +534,45 @@ fn do_process_blockstore_from_root(
|
||||||
}
|
}
|
||||||
let bank_forks = BankForks::new_from_banks(&initial_forks, root);
|
let bank_forks = BankForks::new_from_banks(&initial_forks, root);
|
||||||
|
|
||||||
|
let processing_time = now.elapsed();
|
||||||
|
|
||||||
|
let debug_verify = false;
|
||||||
|
let mut time_cap = Measure::start("capitalization");
|
||||||
|
// We might be promptly restarted after bad capitalization was detected while creating newer snapshot.
|
||||||
|
// In that case, we're most likely restored from the last good snapshot and replayed up to this root.
|
||||||
|
// So again check here for the bad capitalization to avoid to continue until the next snapshot creation.
|
||||||
|
if !bank_forks
|
||||||
|
.root_bank()
|
||||||
|
.calculate_and_verify_capitalization(debug_verify)
|
||||||
|
{
|
||||||
|
return Err(BlockstoreProcessorError::RootBankWithMismatchedCapitalization(root));
|
||||||
|
}
|
||||||
|
time_cap.stop();
|
||||||
|
|
||||||
datapoint_info!(
|
datapoint_info!(
|
||||||
"process_blockstore_from_root",
|
"process_blockstore_from_root",
|
||||||
("total_time_us", now.elapsed().as_micros(), i64),
|
("total_time_us", processing_time.as_micros(), i64),
|
||||||
("frozen_banks", bank_forks.frozen_banks().len(), i64),
|
("frozen_banks", bank_forks.frozen_banks().len(), i64),
|
||||||
("slot", bank_forks.root(), i64),
|
("slot", bank_forks.root(), i64),
|
||||||
("forks", initial_forks.len(), i64)
|
("forks", initial_forks.len(), i64),
|
||||||
|
("calculate_capitalization_us", time_cap.as_us(), i64),
|
||||||
|
("untar_us", timings.untar_us, i64),
|
||||||
|
(
|
||||||
|
"rebuild_bank_from_snapshots_us",
|
||||||
|
timings.rebuild_bank_from_snapshots_us,
|
||||||
|
i64
|
||||||
|
),
|
||||||
|
(
|
||||||
|
"verify_snapshot_bank_us",
|
||||||
|
timings.verify_snapshot_bank_us,
|
||||||
|
i64
|
||||||
|
),
|
||||||
);
|
);
|
||||||
|
|
||||||
info!("ledger processing timing: {:?}", timing);
|
info!("ledger processing timing: {:?}", timing);
|
||||||
info!(
|
info!(
|
||||||
"ledger processed in {}. root slot is {}, {} fork{} at {}, with {} frozen bank{}",
|
"ledger processed in {}. root slot is {}, {} fork{} at {}, with {} frozen bank{}",
|
||||||
HumanTime::from(chrono::Duration::from_std(now.elapsed()).unwrap())
|
HumanTime::from(chrono::Duration::from_std(processing_time).unwrap())
|
||||||
.to_text_en(Accuracy::Precise, Tense::Present),
|
.to_text_en(Accuracy::Precise, Tense::Present),
|
||||||
bank_forks.root(),
|
bank_forks.root(),
|
||||||
initial_forks.len(),
|
initial_forks.len(),
|
||||||
|
@ -558,17 +590,6 @@ fn do_process_blockstore_from_root(
|
||||||
);
|
);
|
||||||
assert!(bank_forks.active_banks().is_empty());
|
assert!(bank_forks.active_banks().is_empty());
|
||||||
|
|
||||||
let debug_verify = false;
|
|
||||||
// We might be promptly restarted after bad capitalization was detected while creating newer snapshot.
|
|
||||||
// In that case, we're most likely restored from the last good snapshot and replayed up to this root.
|
|
||||||
// So again check here for the bad capitalization to avoid to continue until the next snapshot creation.
|
|
||||||
if !bank_forks
|
|
||||||
.root_bank()
|
|
||||||
.calculate_and_verify_capitalization(debug_verify)
|
|
||||||
{
|
|
||||||
return Err(BlockstoreProcessorError::RootBankWithMismatchedCapitalization(root));
|
|
||||||
}
|
|
||||||
|
|
||||||
Ok((bank_forks, leader_schedule_cache))
|
Ok((bank_forks, leader_schedule_cache))
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -2897,8 +2918,15 @@ pub mod tests {
|
||||||
bank1.squash();
|
bank1.squash();
|
||||||
|
|
||||||
// Test process_blockstore_from_root() from slot 1 onwards
|
// Test process_blockstore_from_root() from slot 1 onwards
|
||||||
let (bank_forks, _leader_schedule) =
|
let (bank_forks, _leader_schedule) = do_process_blockstore_from_root(
|
||||||
do_process_blockstore_from_root(&blockstore, bank1, &opts, &recyclers, None, None)
|
&blockstore,
|
||||||
|
bank1,
|
||||||
|
&opts,
|
||||||
|
&recyclers,
|
||||||
|
None,
|
||||||
|
None,
|
||||||
|
BankFromArchiveTimings::default(),
|
||||||
|
)
|
||||||
.unwrap();
|
.unwrap();
|
||||||
|
|
||||||
assert_eq!(frozen_bank_slots(&bank_forks), vec![5, 6]);
|
assert_eq!(frozen_bank_slots(&bank_forks), vec![5, 6]);
|
||||||
|
|
|
@ -592,6 +592,13 @@ pub fn remove_snapshot<P: AsRef<Path>>(slot: Slot, snapshot_path: P) -> Result<(
|
||||||
Ok(())
|
Ok(())
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[derive(Debug, Default)]
|
||||||
|
pub struct BankFromArchiveTimings {
|
||||||
|
pub rebuild_bank_from_snapshots_us: u64,
|
||||||
|
pub untar_us: u64,
|
||||||
|
pub verify_snapshot_bank_us: u64,
|
||||||
|
}
|
||||||
|
|
||||||
#[allow(clippy::too_many_arguments)]
|
#[allow(clippy::too_many_arguments)]
|
||||||
pub fn bank_from_archive<P: AsRef<Path>>(
|
pub fn bank_from_archive<P: AsRef<Path>>(
|
||||||
account_paths: &[PathBuf],
|
account_paths: &[PathBuf],
|
||||||
|
@ -606,17 +613,19 @@ pub fn bank_from_archive<P: AsRef<Path>>(
|
||||||
accounts_db_caching_enabled: bool,
|
accounts_db_caching_enabled: bool,
|
||||||
limit_load_slot_count_from_snapshot: Option<usize>,
|
limit_load_slot_count_from_snapshot: Option<usize>,
|
||||||
shrink_ratio: AccountShrinkThreshold,
|
shrink_ratio: AccountShrinkThreshold,
|
||||||
) -> Result<Bank> {
|
) -> Result<(Bank, BankFromArchiveTimings)> {
|
||||||
let unpack_dir = tempfile::Builder::new()
|
let unpack_dir = tempfile::Builder::new()
|
||||||
.prefix(TMP_SNAPSHOT_PREFIX)
|
.prefix(TMP_SNAPSHOT_PREFIX)
|
||||||
.tempdir_in(snapshot_path)?;
|
.tempdir_in(snapshot_path)?;
|
||||||
|
|
||||||
|
let mut untar = Measure::start("untar");
|
||||||
let unpacked_append_vec_map = untar_snapshot_in(
|
let unpacked_append_vec_map = untar_snapshot_in(
|
||||||
&snapshot_tar,
|
&snapshot_tar,
|
||||||
&unpack_dir.as_ref(),
|
&unpack_dir.as_ref(),
|
||||||
account_paths,
|
account_paths,
|
||||||
archive_format,
|
archive_format,
|
||||||
)?;
|
)?;
|
||||||
|
untar.stop();
|
||||||
|
|
||||||
let mut measure = Measure::start("bank rebuild from snapshot");
|
let mut measure = Measure::start("bank rebuild from snapshot");
|
||||||
let unpacked_snapshots_dir = unpack_dir.as_ref().join("snapshots");
|
let unpacked_snapshots_dir = unpack_dir.as_ref().join("snapshots");
|
||||||
|
@ -639,14 +648,20 @@ pub fn bank_from_archive<P: AsRef<Path>>(
|
||||||
limit_load_slot_count_from_snapshot,
|
limit_load_slot_count_from_snapshot,
|
||||||
shrink_ratio,
|
shrink_ratio,
|
||||||
)?;
|
)?;
|
||||||
|
measure.stop();
|
||||||
|
|
||||||
|
let mut verify = Measure::start("verify");
|
||||||
if !bank.verify_snapshot_bank() {
|
if !bank.verify_snapshot_bank() {
|
||||||
panic!("Snapshot bank for slot {} failed to verify", bank.slot());
|
panic!("Snapshot bank for slot {} failed to verify", bank.slot());
|
||||||
}
|
}
|
||||||
measure.stop();
|
verify.stop();
|
||||||
info!("{}", measure);
|
let timings = BankFromArchiveTimings {
|
||||||
|
rebuild_bank_from_snapshots_us: measure.as_us(),
|
||||||
|
untar_us: untar.as_us(),
|
||||||
|
verify_snapshot_bank_us: verify.as_us(),
|
||||||
|
};
|
||||||
|
|
||||||
Ok(bank)
|
Ok((bank, timings))
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn get_snapshot_archive_path(
|
pub fn get_snapshot_archive_path(
|
||||||
|
|
Loading…
Reference in New Issue