remove duplicate pubkey during shrink (#28689)

This commit is contained in:
Jeff Washington (jwash) 2022-11-01 07:45:52 -07:00 committed by GitHub
parent 17680fe837
commit 0b51b15af2
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
4 changed files with 71 additions and 34 deletions

View File

@ -321,7 +321,7 @@ struct ShrinkCollect<'a> {
store_ids: Vec<AppendVecId>,
aligned_total: u64,
unrefed_pubkeys: Vec<&'a Pubkey>,
alive_accounts: Vec<&'a (Pubkey, FoundStoredAccount<'a>)>,
alive_accounts: Vec<&'a FoundStoredAccount<'a>>,
alive_total: usize,
total_starting_accounts: usize,
/// true if all alive accounts are zero lamports
@ -362,7 +362,7 @@ struct LoadAccountsIndexForShrink<'a> {
/// number of alive accounts
alive_total: usize,
/// the specific alive accounts
alive_accounts: Vec<&'a (Pubkey, FoundStoredAccount<'a>)>,
alive_accounts: Vec<&'a FoundStoredAccount<'a>>,
/// pubkeys that were unref'd in the accounts index because they were dead
unrefed_pubkeys: Vec<&'a Pubkey>,
/// true if all alive accounts are zero lamport accounts
@ -370,7 +370,7 @@ struct LoadAccountsIndexForShrink<'a> {
}
pub struct GetUniqueAccountsResult<'a> {
pub stored_accounts: Vec<(Pubkey, FoundStoredAccount<'a>)>,
pub stored_accounts: Vec<FoundStoredAccount<'a>>,
pub original_bytes: u64,
store_ids: Vec<AppendVecId>,
}
@ -419,6 +419,12 @@ pub struct FoundStoredAccount<'a> {
pub store_id: AppendVecId,
}
impl<'a> FoundStoredAccount<'a> {
pub fn pubkey(&self) -> &Pubkey {
self.account.pubkey()
}
}
#[cfg(not(test))]
const ABSURD_CONSECUTIVE_FAILED_ITERATIONS: usize = 100;
@ -3670,7 +3676,7 @@ impl AccountsDb {
/// return sum of account size for all alive accounts
fn load_accounts_index_for_shrink<'a>(
&'a self,
accounts: &'a [(Pubkey, FoundStoredAccount<'a>)],
accounts: &'a [FoundStoredAccount<'a>],
) -> LoadAccountsIndexForShrink<'a> {
let count = accounts.len();
let mut alive_accounts = Vec::with_capacity(count);
@ -3683,12 +3689,11 @@ impl AccountsDb {
let mut index = 0;
let mut all_are_zero_lamports = true;
self.accounts_index.scan(
accounts.iter().map(|(key, _)| key),
accounts.iter().map(|account| account.pubkey()),
|pubkey, slots_refs| {
let mut result = AccountsIndexScanResult::None;
if let Some((slot_list, _ref_count)) = slots_refs {
let pair = &accounts[index];
let stored_account = &pair.1;
let stored_account = &accounts[index];
let is_alive = slot_list.iter().any(|(_slot, acct_info)| {
acct_info.matches_storage_location(
stored_account.store_id,
@ -3705,7 +3710,7 @@ impl AccountsDb {
dead += 1;
} else {
all_are_zero_lamports &= stored_account.account.lamports() == 0;
alive_accounts.push(pair);
alive_accounts.push(stored_account);
alive_total += stored_account.account.stored_size;
alive += 1;
}
@ -3767,8 +3772,12 @@ impl AccountsDb {
.collect();
// sort by pubkey to keep account index lookups close
let mut stored_accounts = stored_accounts.into_iter().collect::<Vec<_>>();
stored_accounts.sort_unstable_by(|a, b| a.0.cmp(&b.0));
let mut stored_accounts = stored_accounts
.drain()
.into_iter()
.map(|(_k, v)| v)
.collect::<Vec<_>>();
stored_accounts.sort_unstable_by(|a, b| a.pubkey().cmp(b.pubkey()));
GetUniqueAccountsResult {
stored_accounts,
@ -3782,7 +3791,7 @@ impl AccountsDb {
fn shrink_collect<'a: 'b, 'b, I>(
&'a self,
stores: I,
stored_accounts: &'b mut Vec<(Pubkey, FoundStoredAccount<'b>)>,
stored_accounts: &'b mut Vec<FoundStoredAccount<'b>>,
stats: &ShrinkStats,
) -> ShrinkCollect<'b>
where
@ -3940,8 +3949,8 @@ impl AccountsDb {
let mut hashes = Vec::with_capacity(total_accounts_after_shrink);
let mut write_versions = Vec::with_capacity(total_accounts_after_shrink);
for (pubkey, alive_account) in &shrink_collect.alive_accounts {
accounts.push((pubkey, &alive_account.account));
for alive_account in &shrink_collect.alive_accounts {
accounts.push(&alive_account.account);
hashes.push(alive_account.account.hash);
write_versions.push(alive_account.account.meta.write_version);
}
@ -9863,8 +9872,7 @@ pub mod tests {
hash: &hash,
};
let found = FoundStoredAccount { account, store_id };
let item = (pubkey, found);
let map = vec![&item];
let map = vec![&found];
let to_store = AccountsToStore::new(available_bytes, &map, slot0);
// Done: setup 'to_store'
@ -17490,7 +17498,7 @@ pub mod tests {
shrink_collect
.alive_accounts
.iter()
.map(|(pubkey, _)| *pubkey)
.map(|account| *account.pubkey())
.sorted()
.collect::<Vec<_>>(),
expected_alive_accounts

View File

@ -8,7 +8,7 @@ use {
accounts_db::FoundStoredAccount,
append_vec::{AppendVec, StoredAccountMeta},
},
solana_sdk::{clock::Slot, hash::Hash, pubkey::Pubkey},
solana_sdk::{clock::Slot, hash::Hash},
};
/// a set of accounts need to be stored.
@ -37,7 +37,7 @@ impl<'a> AccountsToStore<'a> {
/// available_bytes: how many bytes remain in the primary storage. Excess accounts will be directed to an overflow storage
pub fn new(
mut available_bytes: u64,
stored_accounts: &'a [&'a (Pubkey, FoundStoredAccount<'a>)],
stored_accounts: &'a [&'a FoundStoredAccount<'a>],
slot: Slot,
) -> Self {
let num_accounts = stored_accounts.len();
@ -46,7 +46,7 @@ impl<'a> AccountsToStore<'a> {
// index of the first account that doesn't fit in the current append vec
let mut index_first_item_overflow = num_accounts; // assume all fit
stored_accounts.iter().for_each(|account| {
let account_size = account.1.account.stored_size as u64;
let account_size = account.account.stored_size as u64;
if available_bytes >= account_size {
available_bytes = available_bytes.saturating_sub(account_size);
} else if index_first_item_overflow == num_accounts {
@ -54,10 +54,10 @@ impl<'a> AccountsToStore<'a> {
// the # of accounts we have so far seen is the most that will fit in the current ancient append vec
index_first_item_overflow = hashes.len();
}
hashes.push(account.1.account.hash);
hashes.push(account.account.hash);
// we have to specify 'slot' here because we are writing to an ancient append vec and squashing slots,
// so we need to update the previous accounts index entry for this account from 'slot' to 'ancient_slot'
accounts.push((&account.1.account, slot));
accounts.push((&account.account, slot));
});
Self {
hashes,
@ -106,7 +106,10 @@ pub mod tests {
accounts_db::{get_temp_accounts_paths, AppendVecId},
append_vec::{AccountMeta, StoredMeta},
},
solana_sdk::account::{AccountSharedData, ReadableAccount},
solana_sdk::{
account::{AccountSharedData, ReadableAccount},
pubkey::Pubkey,
},
};
#[test]
@ -155,8 +158,7 @@ pub mod tests {
hash: &hash,
};
let found = FoundStoredAccount { account, store_id };
let item = (pubkey, found);
let map = vec![&item];
let map = vec![&found];
for (selector, available_bytes) in [
(StorageSelector::Primary, account_size),
(StorageSelector::Overflow, account_size - 1),
@ -166,9 +168,7 @@ pub mod tests {
let (accounts, hashes) = accounts_to_store.get(selector);
assert_eq!(
accounts,
map.iter()
.map(|(_a, b)| (&b.account, slot))
.collect::<Vec<_>>(),
map.iter().map(|b| (&b.account, slot)).collect::<Vec<_>>(),
"mismatch"
);
assert_eq!(hashes, vec![&hash]);

View File

@ -318,17 +318,17 @@ impl<'a> SnapshotMinimizer<'a> {
let mut chunk_bytes = 0;
let mut keep_accounts = Vec::with_capacity(CHUNK_SIZE);
let mut purge_pubkeys = Vec::with_capacity(CHUNK_SIZE);
chunk.iter().for_each(|(pubkey, account)| {
if self.minimized_account_set.contains(pubkey) {
chunk.iter().for_each(|account| {
if self.minimized_account_set.contains(account.pubkey()) {
chunk_bytes += account.account.stored_size;
keep_accounts.push((pubkey, account));
keep_accounts.push(account);
} else if self
.accounts_db()
.accounts_index
.get_account_read_entry(pubkey)
.get_account_read_entry(account.pubkey())
.is_some()
{
purge_pubkeys.push(pubkey);
purge_pubkeys.push(account.pubkey());
}
});
@ -359,8 +359,8 @@ impl<'a> SnapshotMinimizer<'a> {
let mut hashes = Vec::with_capacity(keep_accounts.len());
let mut write_versions = Vec::with_capacity(keep_accounts.len());
for (pubkey, alive_account) in keep_accounts {
accounts.push((pubkey, &alive_account.account));
for alive_account in keep_accounts {
accounts.push(&alive_account.account);
hashes.push(alive_account.account.hash);
write_versions.push(alive_account.account.meta.write_version);
}

View File

@ -99,6 +99,35 @@ impl<'a, T: ReadableAccount + Sync> StorableAccounts<'a, T>
}
}
/// The last parameter exists until this feature is activated:
/// ignore slot when calculating an account hash #28420
impl<'a> StorableAccounts<'a, StoredAccountMeta<'a>>
for (Slot, &'a [&'a StoredAccountMeta<'a>], IncludeSlotInHash)
{
fn pubkey(&self, index: usize) -> &Pubkey {
self.1[index].pubkey()
}
fn account(&self, index: usize) -> &StoredAccountMeta<'a> {
self.1[index]
}
fn slot(&self, _index: usize) -> Slot {
// per-index slot is not unique per slot when per-account slot is not included in the source data
self.0
}
fn target_slot(&self) -> Slot {
self.0
}
fn len(&self) -> usize {
self.1.len()
}
fn contains_multiple_slots(&self) -> bool {
false
}
fn include_slot_in_hash(&self) -> IncludeSlotInHash {
self.2
}
}
/// this tuple contains slot info PER account
impl<'a> StorableAccounts<'a, StoredAccountMeta<'a>>
for (