generating index sets uncleaned_roots correctly (#26431)
* generating index sets uncleaned_roots correctly * fix test failures * rename * update comments
This commit is contained in:
parent
f2fada9f21
commit
16219e300e
|
@ -8281,7 +8281,7 @@ impl AccountsDb {
|
|||
m.stop();
|
||||
index_flush_us = m.as_us();
|
||||
|
||||
// this has to happen before pubkeys_to_duplicate_accounts_data_len below
|
||||
// this has to happen before get_duplicate_accounts_slots_and_data_len below
|
||||
// get duplicate keys from acct idx. We have to wait until we've finished flushing.
|
||||
for (slot, key) in self
|
||||
.accounts_index
|
||||
|
@ -8300,6 +8300,7 @@ impl AccountsDb {
|
|||
// subtract data.len() from accounts_data_len for all old accounts that are in the index twice
|
||||
let mut accounts_data_len_dedup_timer =
|
||||
Measure::start("handle accounts data len duplicates");
|
||||
let uncleaned_roots = Mutex::new(HashSet::<Slot>::default());
|
||||
if pass == 0 {
|
||||
let mut unique_pubkeys = HashSet::<Pubkey>::default();
|
||||
self.uncleaned_pubkeys.iter().for_each(|entry| {
|
||||
|
@ -8311,7 +8312,15 @@ impl AccountsDb {
|
|||
.into_iter()
|
||||
.collect::<Vec<_>>()
|
||||
.par_chunks(4096)
|
||||
.map(|pubkeys| self.pubkeys_to_duplicate_accounts_data_len(pubkeys))
|
||||
.map(|pubkeys| {
|
||||
let (count, uncleaned_roots_this_group) =
|
||||
self.get_duplicate_accounts_slots_and_data_len(pubkeys);
|
||||
let mut uncleaned_roots = uncleaned_roots.lock().unwrap();
|
||||
uncleaned_roots_this_group.into_iter().for_each(|slot| {
|
||||
uncleaned_roots.insert(slot);
|
||||
});
|
||||
count
|
||||
})
|
||||
.sum();
|
||||
accounts_data_len.fetch_sub(accounts_data_len_from_duplicates, Ordering::Relaxed);
|
||||
info!(
|
||||
|
@ -8341,9 +8350,14 @@ impl AccountsDb {
|
|||
};
|
||||
|
||||
if pass == 0 {
|
||||
let uncleaned_roots = uncleaned_roots.into_inner().unwrap();
|
||||
// Need to add these last, otherwise older updates will be cleaned
|
||||
for slot in &slots {
|
||||
self.accounts_index.add_root(*slot, false);
|
||||
for root in &slots {
|
||||
// passing 'false' to 'add_root' causes 'root' to be added to 'accounts_index.roots_tracker.uncleaned_roots'
|
||||
// passing 'true' to 'add_root' does NOT add 'root' to 'accounts_index.roots_tracker.uncleaned_roots'
|
||||
// So, don't add all slots to 'uncleaned_roots' here since we know which slots contain duplicate pubkeys.
|
||||
let uncleaned_root = uncleaned_roots.contains(root);
|
||||
self.accounts_index.add_root(*root, !uncleaned_root);
|
||||
}
|
||||
|
||||
self.set_storage_count_and_alive_bytes(storage_info, &mut timings);
|
||||
|
@ -8380,10 +8394,17 @@ impl AccountsDb {
|
|||
}
|
||||
}
|
||||
|
||||
/// Used during generate_index() to get the _duplicate_ accounts data len from the given pubkeys
|
||||
/// Used during generate_index() to:
|
||||
/// 1. get the _duplicate_ accounts data len from the given pubkeys
|
||||
/// 2. get the slots that contained duplicate pubkeys
|
||||
/// Note this should only be used when ALL entries in the accounts index are roots.
|
||||
fn pubkeys_to_duplicate_accounts_data_len(&self, pubkeys: &[Pubkey]) -> u64 {
|
||||
/// returns (data len sum of all older duplicates, slots that contained duplicate pubkeys)
|
||||
fn get_duplicate_accounts_slots_and_data_len(
|
||||
&self,
|
||||
pubkeys: &[Pubkey],
|
||||
) -> (u64, HashSet<Slot>) {
|
||||
let mut accounts_data_len_from_duplicates = 0;
|
||||
let mut uncleaned_slots = HashSet::<Slot>::default();
|
||||
pubkeys.iter().for_each(|pubkey| {
|
||||
if let Some(entry) = self.accounts_index.get_account_read_entry(pubkey) {
|
||||
let slot_list = entry.slot_list();
|
||||
|
@ -8391,26 +8412,29 @@ impl AccountsDb {
|
|||
return;
|
||||
}
|
||||
// Only the account data len in the highest slot should be used, and the rest are
|
||||
// duplicates. So sort the slot list in descending slot order, skip the first
|
||||
// item, then sum up the remaining data len, which are the duplicates.
|
||||
let mut slot_list = slot_list.clone();
|
||||
slot_list
|
||||
.select_nth_unstable_by(0, |a, b| b.0.cmp(&a.0))
|
||||
.2
|
||||
.iter()
|
||||
.for_each(|(slot, account_info)| {
|
||||
let maybe_storage_entry = self
|
||||
.storage
|
||||
.get_account_storage_entry(*slot, account_info.store_id());
|
||||
let mut accessor = LoadedAccountAccessor::Stored(
|
||||
maybe_storage_entry.map(|entry| (entry, account_info.offset())),
|
||||
);
|
||||
let loaded_account = accessor.check_and_get_loaded_account();
|
||||
accounts_data_len_from_duplicates += loaded_account.data().len();
|
||||
});
|
||||
// duplicates. So find the max slot to keep.
|
||||
// Then sum up the remaining data len, which are the duplicates.
|
||||
// All of the slots need to go in the 'uncleaned_slots' list. For clean to work properly,
|
||||
// the slot where duplicate accounts are found in the index need to be in 'uncleaned_slots' list, too.
|
||||
let max = slot_list.iter().map(|(slot, _)| slot).max().unwrap();
|
||||
slot_list.iter().for_each(|(slot, account_info)| {
|
||||
uncleaned_slots.insert(*slot);
|
||||
if slot == max {
|
||||
// the info in 'max' is the most recent, current info for this pubkey
|
||||
return;
|
||||
}
|
||||
let maybe_storage_entry = self
|
||||
.storage
|
||||
.get_account_storage_entry(*slot, account_info.store_id());
|
||||
let mut accessor = LoadedAccountAccessor::Stored(
|
||||
maybe_storage_entry.map(|entry| (entry, account_info.offset())),
|
||||
);
|
||||
let loaded_account = accessor.check_and_get_loaded_account();
|
||||
accounts_data_len_from_duplicates += loaded_account.data().len();
|
||||
});
|
||||
}
|
||||
});
|
||||
accounts_data_len_from_duplicates as u64
|
||||
(accounts_data_len_from_duplicates as u64, uncleaned_slots)
|
||||
}
|
||||
|
||||
fn update_storage_info(
|
||||
|
@ -10783,9 +10807,8 @@ pub mod tests {
|
|||
|
||||
fn assert_not_load_account(accounts: &AccountsDb, slot: Slot, pubkey: Pubkey) {
|
||||
let ancestors = vec![(slot, 0)].into_iter().collect();
|
||||
assert!(accounts
|
||||
.load_without_fixed_root(&ancestors, &pubkey)
|
||||
.is_none());
|
||||
let load = accounts.load_without_fixed_root(&ancestors, &pubkey);
|
||||
assert!(load.is_none(), "{:?}", load);
|
||||
}
|
||||
|
||||
fn reconstruct_accounts_db_via_serialization(accounts: &AccountsDb, slot: Slot) -> AccountsDb {
|
||||
|
|
Loading…
Reference in New Issue