add active stats for pieces of hash calc (#32750)

This commit is contained in:
Jeff Washington (jwash) 2023-08-08 07:16:10 -07:00 committed by GitHub
parent 32cb381f69
commit 7c1cf298aa
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
3 changed files with 40 additions and 9 deletions

View File

@ -7476,6 +7476,8 @@ impl AccountsDb {
config: &CalcAccountsHashConfig<'_>,
filler_account_suffix: Option<&Pubkey>,
) -> Result<Vec<CacheHashDataFile>, AccountsHashVerificationError> {
let _ = self.active_stats.activate(ActiveStatItem::HashScan);
let bin_calculator = PubkeyBinCalculator24::new(bins);
assert!(bin_range.start < bins && bin_range.end <= bins && bin_range.start < bin_range.end);
let mut time = Measure::start("scan all accounts");
@ -7650,6 +7652,7 @@ impl AccountsDb {
},
zero_lamport_accounts: flavor.zero_lamport_accounts(),
dir_for_temp_cache_files: self.transient_accounts_hash_cache_path.clone(),
active_stats: &self.active_stats,
};
// get raw data by scanning

View File

@ -1,6 +1,7 @@
use {
crate::{
accounts_db::{AccountStorageEntry, IncludeSlotInHash, PUBKEY_BINS_FOR_CALCULATING_HASHES},
active_stats::{ActiveStatItem, ActiveStats},
ancestors::Ancestors,
pubkey_bins::PubkeyBinCalculator24,
rent_collector::RentCollector,
@ -447,14 +448,15 @@ impl CumulativeOffsets {
}
#[derive(Debug)]
pub struct AccountsHasher {
pub struct AccountsHasher<'a> {
pub filler_account_suffix: Option<Pubkey>,
pub zero_lamport_accounts: ZeroLamportAccounts,
/// The directory where temporary cache files are put
pub dir_for_temp_cache_files: PathBuf,
pub(crate) active_stats: &'a ActiveStats,
}
impl AccountsHasher {
impl<'a> AccountsHasher<'a> {
/// true if it is possible that there are filler accounts present
pub fn filler_accounts_enabled(&self) -> bool {
self.filler_account_suffix.is_some()
@ -561,7 +563,7 @@ impl AccountsHasher {
// This function is designed to allow hashes to be located in multiple, perhaps multiply deep vecs.
// The caller provides a function to return a slice from the source data.
pub fn compute_merkle_root_from_slices<'a, F, T>(
pub fn compute_merkle_root_from_slices<'b, F, T>(
total_hashes: usize,
fanout: usize,
max_levels_per_pass: Option<usize>,
@ -570,8 +572,8 @@ impl AccountsHasher {
) -> (Hash, Vec<Hash>)
where
// returns a slice of hashes starting at the given overall index
F: Fn(usize) -> &'a [T] + std::marker::Sync,
T: Borrow<Hash> + std::marker::Sync + 'a,
F: Fn(usize) -> &'b [T] + std::marker::Sync,
T: Borrow<Hash> + std::marker::Sync + 'b,
{
if total_hashes == 0 {
return (Hasher::default().result(), vec![]);
@ -780,6 +782,8 @@ impl AccountsHasher {
// a. vec: PUBKEY_BINS_FOR_CALCULATING_HASHES in pubkey order
// vec: individual hashes in pubkey order, 1 hash per
// b. lamports
let _ = self.active_stats.activate(ActiveStatItem::HashDeDup);
let mut zeros = Measure::start("eliminate zeros");
let sum = Mutex::new(0u64);
let hash_total = AtomicUsize::default();
@ -810,15 +814,15 @@ impl AccountsHasher {
/// updates `first_items` to point to the next pubkey
/// or removes the entire pubkey division entries (for `min_index`) if the referenced pubkey is the last entry in the same `bin`
/// removed from: `first_items`, `indexes`, and `first_item_pubkey_division`
fn get_item<'a>(
fn get_item<'b>(
min_index: usize,
bin: usize,
first_items: &mut Vec<Pubkey>,
sorted_data_by_pubkey: &[&'a [CalculateHashIntermediate]],
sorted_data_by_pubkey: &[&'b [CalculateHashIntermediate]],
indexes: &mut Vec<usize>,
first_item_to_pubkey_division: &mut Vec<usize>,
binner: &PubkeyBinCalculator24,
) -> &'a CalculateHashIntermediate {
) -> &'b CalculateHashIntermediate {
let first_item = first_items[min_index];
let key = &first_item;
let division_index = first_item_to_pubkey_division[min_index];
@ -1085,6 +1089,7 @@ impl AccountsHasher {
let cumulative = CumulativeHashesFromFiles::from_files(hashes);
let _ = self.active_stats.activate(ActiveStatItem::HashMerkleTree);
let mut hash_time = Measure::start("hash");
let (hash, _) = Self::compute_merkle_root_from_slices(
cumulative.total_count(),
@ -1147,12 +1152,17 @@ pub struct AccountsDeltaHash(pub Hash);
pub mod tests {
use {super::*, itertools::Itertools, std::str::FromStr, tempfile::tempdir};
impl AccountsHasher {
lazy_static! {
static ref ACTIVE_STATS: ActiveStats = ActiveStats::default();
}
impl<'a> AccountsHasher<'a> {
fn new(dir_for_temp_cache_files: PathBuf) -> Self {
Self {
filler_account_suffix: None,
zero_lamport_accounts: ZeroLamportAccounts::Excluded,
dir_for_temp_cache_files,
active_stats: &ACTIVE_STATS,
}
}
}

View File

@ -9,6 +9,9 @@ pub struct ActiveStats {
shrink: AtomicUsize,
hash: AtomicUsize,
flush: AtomicUsize,
hash_scan: AtomicUsize,
hash_dedup: AtomicUsize,
hash_merkle: AtomicUsize,
}
#[derive(Debug, Copy, Clone)]
@ -18,6 +21,9 @@ pub enum ActiveStatItem {
SquashAncient,
Hash,
Flush,
HashScan,
HashDeDup,
HashMerkleTree,
}
/// sole purpose is to handle 'drop' so that stat is decremented when self is dropped
@ -54,6 +60,9 @@ impl ActiveStats {
ActiveStatItem::SquashAncient => &self.squash_ancient,
ActiveStatItem::Hash => &self.hash,
ActiveStatItem::Flush => &self.flush,
ActiveStatItem::HashDeDup => &self.hash_dedup,
ActiveStatItem::HashMerkleTree => &self.hash_merkle,
ActiveStatItem::HashScan => &self.hash_scan,
};
let value = modify_stat(stat);
match item {
@ -66,6 +75,15 @@ impl ActiveStats {
}
ActiveStatItem::Hash => datapoint_info!("accounts_db_active", ("hash", value, i64)),
ActiveStatItem::Flush => datapoint_info!("accounts_db_active", ("flush", value, i64)),
ActiveStatItem::HashDeDup => {
datapoint_info!("accounts_db_active", ("hash_dedup", value, i64))
}
ActiveStatItem::HashMerkleTree => {
datapoint_info!("accounts_db_active", ("hash_merkle_tree", value, i64))
}
ActiveStatItem::HashScan => {
datapoint_info!("accounts_db_active", ("hash_scan", value, i64))
}
};
}
}