add metrics to handle_snapshot_requests (#17937)
This commit is contained in:
parent
a0872232d3
commit
e6bbd4b3f0
|
@ -222,7 +222,7 @@ mod tests {
|
||||||
// set_root should send a snapshot request
|
// set_root should send a snapshot request
|
||||||
bank_forks.set_root(bank.slot(), &request_sender, None);
|
bank_forks.set_root(bank.slot(), &request_sender, None);
|
||||||
bank.update_accounts_hash();
|
bank.update_accounts_hash();
|
||||||
snapshot_request_handler.handle_snapshot_requests(false, false, false);
|
snapshot_request_handler.handle_snapshot_requests(false, false, false, 0);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -89,11 +89,13 @@ impl SnapshotRequestHandler {
|
||||||
accounts_db_caching_enabled: bool,
|
accounts_db_caching_enabled: bool,
|
||||||
test_hash_calculation: bool,
|
test_hash_calculation: bool,
|
||||||
use_index_hash_calculation: bool,
|
use_index_hash_calculation: bool,
|
||||||
|
non_snapshot_time_us: u128,
|
||||||
) -> Option<u64> {
|
) -> Option<u64> {
|
||||||
self.snapshot_request_receiver
|
self.snapshot_request_receiver
|
||||||
.try_iter()
|
.try_iter()
|
||||||
.last()
|
.last()
|
||||||
.map(|snapshot_request| {
|
.map(|snapshot_request| {
|
||||||
|
let mut total_time = Measure::start("wallclock time elapsed");
|
||||||
let SnapshotRequest {
|
let SnapshotRequest {
|
||||||
snapshot_root_bank,
|
snapshot_root_bank,
|
||||||
status_cache_slot_deltas,
|
status_cache_slot_deltas,
|
||||||
|
@ -188,6 +190,7 @@ impl SnapshotRequestHandler {
|
||||||
let mut purge_old_snapshots_time = Measure::start("purge_old_snapshots_time");
|
let mut purge_old_snapshots_time = Measure::start("purge_old_snapshots_time");
|
||||||
snapshot_utils::purge_old_snapshots(&self.snapshot_config.snapshot_path);
|
snapshot_utils::purge_old_snapshots(&self.snapshot_config.snapshot_path);
|
||||||
purge_old_snapshots_time.stop();
|
purge_old_snapshots_time.stop();
|
||||||
|
total_time.stop();
|
||||||
|
|
||||||
datapoint_info!(
|
datapoint_info!(
|
||||||
"handle_snapshot_requests-timing",
|
"handle_snapshot_requests-timing",
|
||||||
|
@ -205,6 +208,8 @@ impl SnapshotRequestHandler {
|
||||||
purge_old_snapshots_time.as_us(),
|
purge_old_snapshots_time.as_us(),
|
||||||
i64
|
i64
|
||||||
),
|
),
|
||||||
|
("total_us", total_time.as_us(), i64),
|
||||||
|
("non_snapshot_time_us", non_snapshot_time_us, i64),
|
||||||
);
|
);
|
||||||
snapshot_root_bank.block_height()
|
snapshot_root_bank.block_height()
|
||||||
})
|
})
|
||||||
|
@ -251,6 +256,7 @@ impl AbsRequestHandler {
|
||||||
accounts_db_caching_enabled: bool,
|
accounts_db_caching_enabled: bool,
|
||||||
test_hash_calculation: bool,
|
test_hash_calculation: bool,
|
||||||
use_index_hash_calculation: bool,
|
use_index_hash_calculation: bool,
|
||||||
|
non_snapshot_time_us: u128,
|
||||||
) -> Option<u64> {
|
) -> Option<u64> {
|
||||||
self.snapshot_request_handler
|
self.snapshot_request_handler
|
||||||
.as_ref()
|
.as_ref()
|
||||||
|
@ -259,6 +265,7 @@ impl AbsRequestHandler {
|
||||||
accounts_db_caching_enabled,
|
accounts_db_caching_enabled,
|
||||||
test_hash_calculation,
|
test_hash_calculation,
|
||||||
use_index_hash_calculation,
|
use_index_hash_calculation,
|
||||||
|
non_snapshot_time_us,
|
||||||
)
|
)
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
|
@ -297,87 +304,101 @@ impl AccountsBackgroundService {
|
||||||
let mut last_expiration_check_time = Instant::now();
|
let mut last_expiration_check_time = Instant::now();
|
||||||
let t_background = Builder::new()
|
let t_background = Builder::new()
|
||||||
.name("solana-bg-accounts".to_string())
|
.name("solana-bg-accounts".to_string())
|
||||||
.spawn(move || loop {
|
.spawn(move || {
|
||||||
if exit.load(Ordering::Relaxed) {
|
let mut last_snapshot_end_time = None;
|
||||||
break;
|
loop {
|
||||||
}
|
if exit.load(Ordering::Relaxed) {
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
// Grab the current root bank
|
// Grab the current root bank
|
||||||
let bank = bank_forks.read().unwrap().root_bank().clone();
|
let bank = bank_forks.read().unwrap().root_bank().clone();
|
||||||
|
|
||||||
// Purge accounts of any dead slots
|
// Purge accounts of any dead slots
|
||||||
Self::remove_dead_slots(
|
Self::remove_dead_slots(
|
||||||
&bank,
|
&bank,
|
||||||
&request_handler,
|
&request_handler,
|
||||||
&mut removed_slots_count,
|
&mut removed_slots_count,
|
||||||
&mut total_remove_slots_time,
|
&mut total_remove_slots_time,
|
||||||
);
|
);
|
||||||
|
|
||||||
Self::expire_old_recycle_stores(&bank, &mut last_expiration_check_time);
|
Self::expire_old_recycle_stores(&bank, &mut last_expiration_check_time);
|
||||||
|
|
||||||
// Check to see if there were any requests for snapshotting banks
|
let non_snapshot_time = last_snapshot_end_time
|
||||||
// < the current root bank `bank` above.
|
.map(|last_snapshot_end_time: Instant| {
|
||||||
|
last_snapshot_end_time.elapsed().as_micros()
|
||||||
|
})
|
||||||
|
.unwrap_or_default();
|
||||||
|
|
||||||
// Claim: Any snapshot request for slot `N` found here implies that the last cleanup
|
// Check to see if there were any requests for snapshotting banks
|
||||||
// slot `M` satisfies `M < N`
|
// < the current root bank `bank` above.
|
||||||
//
|
|
||||||
// Proof: Assume for contradiction that we find a snapshot request for slot `N` here,
|
// Claim: Any snapshot request for slot `N` found here implies that the last cleanup
|
||||||
// but cleanup has already happened on some slot `M >= N`. Because the call to
|
// slot `M` satisfies `M < N`
|
||||||
// `bank.clean_accounts(true)` (in the code below) implies we only clean slots `<= bank - 1`,
|
//
|
||||||
// then that means in some *previous* iteration of this loop, we must have gotten a root
|
// Proof: Assume for contradiction that we find a snapshot request for slot `N` here,
|
||||||
// bank for slot some slot `R` where `R > N`, but did not see the snapshot for `N` in the
|
// but cleanup has already happened on some slot `M >= N`. Because the call to
|
||||||
// snapshot request channel.
|
// `bank.clean_accounts(true)` (in the code below) implies we only clean slots `<= bank - 1`,
|
||||||
//
|
// then that means in some *previous* iteration of this loop, we must have gotten a root
|
||||||
// However, this is impossible because BankForks.set_root() will always flush the snapshot
|
// bank for slot some slot `R` where `R > N`, but did not see the snapshot for `N` in the
|
||||||
// request for `N` to the snapshot request channel before setting a root `R > N`, and
|
// snapshot request channel.
|
||||||
// snapshot_request_handler.handle_requests() will always look for the latest
|
//
|
||||||
// available snapshot in the channel.
|
// However, this is impossible because BankForks.set_root() will always flush the snapshot
|
||||||
let snapshot_block_height = request_handler.handle_snapshot_requests(
|
// request for `N` to the snapshot request channel before setting a root `R > N`, and
|
||||||
accounts_db_caching_enabled,
|
// snapshot_request_handler.handle_requests() will always look for the latest
|
||||||
test_hash_calculation,
|
// available snapshot in the channel.
|
||||||
use_index_hash_calculation,
|
let snapshot_block_height = request_handler.handle_snapshot_requests(
|
||||||
);
|
accounts_db_caching_enabled,
|
||||||
if accounts_db_caching_enabled {
|
test_hash_calculation,
|
||||||
// Note that the flush will do an internal clean of the
|
use_index_hash_calculation,
|
||||||
// cache up to bank.slot(), so should be safe as long
|
non_snapshot_time,
|
||||||
// as any later snapshots that are taken are of
|
);
|
||||||
// slots >= bank.slot()
|
if snapshot_block_height.is_some() {
|
||||||
bank.flush_accounts_cache_if_needed();
|
last_snapshot_end_time = Some(Instant::now());
|
||||||
}
|
}
|
||||||
|
|
||||||
if let Some(snapshot_block_height) = snapshot_block_height {
|
|
||||||
// Safe, see proof above
|
|
||||||
assert!(last_cleaned_block_height <= snapshot_block_height);
|
|
||||||
last_cleaned_block_height = snapshot_block_height;
|
|
||||||
} else {
|
|
||||||
if accounts_db_caching_enabled {
|
if accounts_db_caching_enabled {
|
||||||
bank.shrink_candidate_slots();
|
// Note that the flush will do an internal clean of the
|
||||||
|
// cache up to bank.slot(), so should be safe as long
|
||||||
|
// as any later snapshots that are taken are of
|
||||||
|
// slots >= bank.slot()
|
||||||
|
bank.flush_accounts_cache_if_needed();
|
||||||
|
}
|
||||||
|
|
||||||
|
if let Some(snapshot_block_height) = snapshot_block_height {
|
||||||
|
// Safe, see proof above
|
||||||
|
assert!(last_cleaned_block_height <= snapshot_block_height);
|
||||||
|
last_cleaned_block_height = snapshot_block_height;
|
||||||
} else {
|
} else {
|
||||||
// under sustained writes, shrink can lag behind so cap to
|
|
||||||
// SHRUNKEN_ACCOUNT_PER_INTERVAL (which is based on INTERVAL_MS,
|
|
||||||
// which in turn roughly associated block time)
|
|
||||||
consumed_budget = bank
|
|
||||||
.process_stale_slot_with_budget(
|
|
||||||
consumed_budget,
|
|
||||||
SHRUNKEN_ACCOUNT_PER_INTERVAL,
|
|
||||||
)
|
|
||||||
.min(SHRUNKEN_ACCOUNT_PER_INTERVAL);
|
|
||||||
}
|
|
||||||
if bank.block_height() - last_cleaned_block_height
|
|
||||||
> (CLEAN_INTERVAL_BLOCKS + thread_rng().gen_range(0, 10))
|
|
||||||
{
|
|
||||||
if accounts_db_caching_enabled {
|
if accounts_db_caching_enabled {
|
||||||
// Note that the flush will do an internal clean of the
|
bank.shrink_candidate_slots();
|
||||||
// cache up to bank.slot(), so should be safe as long
|
} else {
|
||||||
// as any later snapshots that are taken are of
|
// under sustained writes, shrink can lag behind so cap to
|
||||||
// slots >= bank.slot()
|
// SHRUNKEN_ACCOUNT_PER_INTERVAL (which is based on INTERVAL_MS,
|
||||||
bank.force_flush_accounts_cache();
|
// which in turn roughly associated block time)
|
||||||
|
consumed_budget = bank
|
||||||
|
.process_stale_slot_with_budget(
|
||||||
|
consumed_budget,
|
||||||
|
SHRUNKEN_ACCOUNT_PER_INTERVAL,
|
||||||
|
)
|
||||||
|
.min(SHRUNKEN_ACCOUNT_PER_INTERVAL);
|
||||||
|
}
|
||||||
|
if bank.block_height() - last_cleaned_block_height
|
||||||
|
> (CLEAN_INTERVAL_BLOCKS + thread_rng().gen_range(0, 10))
|
||||||
|
{
|
||||||
|
if accounts_db_caching_enabled {
|
||||||
|
// Note that the flush will do an internal clean of the
|
||||||
|
// cache up to bank.slot(), so should be safe as long
|
||||||
|
// as any later snapshots that are taken are of
|
||||||
|
// slots >= bank.slot()
|
||||||
|
bank.force_flush_accounts_cache();
|
||||||
|
}
|
||||||
|
bank.clean_accounts(true, false);
|
||||||
|
last_cleaned_block_height = bank.block_height();
|
||||||
}
|
}
|
||||||
bank.clean_accounts(true, false);
|
|
||||||
last_cleaned_block_height = bank.block_height();
|
|
||||||
}
|
}
|
||||||
|
sleep(Duration::from_millis(INTERVAL_MS));
|
||||||
}
|
}
|
||||||
sleep(Duration::from_millis(INTERVAL_MS));
|
|
||||||
})
|
})
|
||||||
.unwrap();
|
.unwrap();
|
||||||
Self { t_background }
|
Self { t_background }
|
||||||
|
|
Loading…
Reference in New Issue