generating index sets uncleaned_roots correctly (#26431)

* generating index sets uncleaned_roots correctly

* fix test failures

* rename

* update comments
This commit is contained in:
Jeff Washington (jwash) 2022-07-06 16:12:47 -05:00 committed by GitHub
parent f2fada9f21
commit 16219e300e
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
1 changed files with 50 additions and 27 deletions

View File

@ -8281,7 +8281,7 @@ impl AccountsDb {
m.stop();
index_flush_us = m.as_us();
// this has to happen before pubkeys_to_duplicate_accounts_data_len below
// this has to happen before get_duplicate_accounts_slots_and_data_len below
// get duplicate keys from acct idx. We have to wait until we've finished flushing.
for (slot, key) in self
.accounts_index
@ -8300,6 +8300,7 @@ impl AccountsDb {
// subtract data.len() from accounts_data_len for all old accounts that are in the index twice
let mut accounts_data_len_dedup_timer =
Measure::start("handle accounts data len duplicates");
let uncleaned_roots = Mutex::new(HashSet::<Slot>::default());
if pass == 0 {
let mut unique_pubkeys = HashSet::<Pubkey>::default();
self.uncleaned_pubkeys.iter().for_each(|entry| {
@ -8311,7 +8312,15 @@ impl AccountsDb {
.into_iter()
.collect::<Vec<_>>()
.par_chunks(4096)
.map(|pubkeys| self.pubkeys_to_duplicate_accounts_data_len(pubkeys))
.map(|pubkeys| {
let (count, uncleaned_roots_this_group) =
self.get_duplicate_accounts_slots_and_data_len(pubkeys);
let mut uncleaned_roots = uncleaned_roots.lock().unwrap();
uncleaned_roots_this_group.into_iter().for_each(|slot| {
uncleaned_roots.insert(slot);
});
count
})
.sum();
accounts_data_len.fetch_sub(accounts_data_len_from_duplicates, Ordering::Relaxed);
info!(
@ -8341,9 +8350,14 @@ impl AccountsDb {
};
if pass == 0 {
let uncleaned_roots = uncleaned_roots.into_inner().unwrap();
// Need to add these last, otherwise older updates will be cleaned
for slot in &slots {
self.accounts_index.add_root(*slot, false);
for root in &slots {
// passing 'false' to 'add_root' causes 'root' to be added to 'accounts_index.roots_tracker.uncleaned_roots'
// passing 'true' to 'add_root' does NOT add 'root' to 'accounts_index.roots_tracker.uncleaned_roots'
// So, don't add all slots to 'uncleaned_roots' here since we know which slots contain duplicate pubkeys.
let uncleaned_root = uncleaned_roots.contains(root);
self.accounts_index.add_root(*root, !uncleaned_root);
}
self.set_storage_count_and_alive_bytes(storage_info, &mut timings);
@ -8380,10 +8394,17 @@ impl AccountsDb {
}
}
/// Used during generate_index() to get the _duplicate_ accounts data len from the given pubkeys
/// Used during generate_index() to:
/// 1. get the _duplicate_ accounts data len from the given pubkeys
/// 2. get the slots that contained duplicate pubkeys
/// Note this should only be used when ALL entries in the accounts index are roots.
fn pubkeys_to_duplicate_accounts_data_len(&self, pubkeys: &[Pubkey]) -> u64 {
/// returns (data len sum of all older duplicates, slots that contained duplicate pubkeys)
fn get_duplicate_accounts_slots_and_data_len(
&self,
pubkeys: &[Pubkey],
) -> (u64, HashSet<Slot>) {
let mut accounts_data_len_from_duplicates = 0;
let mut uncleaned_slots = HashSet::<Slot>::default();
pubkeys.iter().for_each(|pubkey| {
if let Some(entry) = self.accounts_index.get_account_read_entry(pubkey) {
let slot_list = entry.slot_list();
@ -8391,26 +8412,29 @@ impl AccountsDb {
return;
}
// Only the account data len in the highest slot should be used, and the rest are
// duplicates. So sort the slot list in descending slot order, skip the first
// item, then sum up the remaining data len, which are the duplicates.
let mut slot_list = slot_list.clone();
slot_list
.select_nth_unstable_by(0, |a, b| b.0.cmp(&a.0))
.2
.iter()
.for_each(|(slot, account_info)| {
let maybe_storage_entry = self
.storage
.get_account_storage_entry(*slot, account_info.store_id());
let mut accessor = LoadedAccountAccessor::Stored(
maybe_storage_entry.map(|entry| (entry, account_info.offset())),
);
let loaded_account = accessor.check_and_get_loaded_account();
accounts_data_len_from_duplicates += loaded_account.data().len();
});
// duplicates. So find the max slot to keep.
// Then sum up the remaining data len, which are the duplicates.
// All of the slots need to go in the 'uncleaned_slots' list. For clean to work properly,
// the slot where duplicate accounts are found in the index need to be in 'uncleaned_slots' list, too.
let max = slot_list.iter().map(|(slot, _)| slot).max().unwrap();
slot_list.iter().for_each(|(slot, account_info)| {
uncleaned_slots.insert(*slot);
if slot == max {
// the info in 'max' is the most recent, current info for this pubkey
return;
}
let maybe_storage_entry = self
.storage
.get_account_storage_entry(*slot, account_info.store_id());
let mut accessor = LoadedAccountAccessor::Stored(
maybe_storage_entry.map(|entry| (entry, account_info.offset())),
);
let loaded_account = accessor.check_and_get_loaded_account();
accounts_data_len_from_duplicates += loaded_account.data().len();
});
}
});
accounts_data_len_from_duplicates as u64
(accounts_data_len_from_duplicates as u64, uncleaned_slots)
}
fn update_storage_info(
@ -10783,9 +10807,8 @@ pub mod tests {
fn assert_not_load_account(accounts: &AccountsDb, slot: Slot, pubkey: Pubkey) {
let ancestors = vec![(slot, 0)].into_iter().collect();
assert!(accounts
.load_without_fixed_root(&ancestors, &pubkey)
.is_none());
let load = accounts.load_without_fixed_root(&ancestors, &pubkey);
assert!(load.is_none(), "{:?}", load);
}
fn reconstruct_accounts_db_via_serialization(accounts: &AccountsDb, slot: Slot) -> AccountsDb {