remove duplicate pubkey when shrinking or combining ancient (#28678)

This commit is contained in:
Jeff Washington (jwash) 2022-10-31 12:00:55 -07:00 committed by GitHub
parent 59bf1809fe
commit c47515d055
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
4 changed files with 158 additions and 49 deletions

View File

@ -4312,15 +4312,16 @@ impl AccountsDb {
/// returns the pubkeys that are in 'accounts' that are already in 'existing_ancient_pubkeys'
/// Also updated 'existing_ancient_pubkeys' to include all pubkeys in 'accounts' since they will soon be written into the ancient slot.
fn get_keys_to_unref_ancient<'a>(
accounts: &'a [(&Pubkey, &StoredAccountMeta<'_>, u64)],
accounts: &'a [(&StoredAccountMeta<'_>, u64)],
existing_ancient_pubkeys: &mut HashSet<Pubkey>,
) -> HashSet<&'a Pubkey> {
let mut unref = HashSet::<&Pubkey>::default();
// for each key that we're about to add that already exists in this storage, we need to unref. The account was in a different storage.
// Now it is being put into an ancient storage again, but it is already there, so maintain max of 1 ref per storage in the accounts index.
// The slot that currently references the account is going away, so unref to maintain # slots that reference the pubkey = refcount.
accounts.iter().for_each(|(key, _, _)| {
if !existing_ancient_pubkeys.insert(**key) {
accounts.iter().for_each(|(account, _)| {
let key = account.pubkey();
if !existing_ancient_pubkeys.insert(*key) {
// this key exists BOTH in 'accounts' and already in the ancient append vec, so we need to unref it
unref.insert(key);
}
@ -4333,7 +4334,7 @@ impl AccountsDb {
/// As a side effect, on exit, 'existing_ancient_pubkeys' will now contain all pubkeys in 'accounts'.
fn unref_accounts_already_in_storage(
&self,
accounts: &[(&Pubkey, &StoredAccountMeta<'_>, u64)],
accounts: &[(&StoredAccountMeta<'_>, u64)],
existing_ancient_pubkeys: &mut HashSet<Pubkey>,
) {
let unref = Self::get_keys_to_unref_ancient(accounts, existing_ancient_pubkeys);
@ -9788,7 +9789,6 @@ pub mod tests {
}
/// this tuple contains slot info PER account
/// last bool is include_slot_in_hash
impl<'a, T: ReadableAccount + Sync> StorableAccounts<'a, T>
for (Slot, &'a [(&'a Pubkey, &'a T, Slot)])
{
@ -9909,9 +9909,30 @@ pub mod tests {
let executable = false;
let owner = Pubkey::default();
let data = Vec::new();
let pubkey = solana_sdk::pubkey::new_rand();
let pubkey2 = solana_sdk::pubkey::new_rand();
let pubkey3 = solana_sdk::pubkey::new_rand();
let pubkey4 = solana_sdk::pubkey::new_rand();
let meta = StoredMeta {
write_version: 5,
pubkey: Pubkey::new_unique(),
pubkey,
data_len: 7,
};
let meta2 = StoredMeta {
write_version: 5,
pubkey: pubkey2,
data_len: 7,
};
let meta3 = StoredMeta {
write_version: 5,
pubkey: pubkey3,
data_len: 7,
};
let meta4 = StoredMeta {
write_version: 5,
pubkey: pubkey4,
data_len: 7,
};
let account_meta = AccountMeta {
@ -9931,10 +9952,33 @@ pub mod tests {
stored_size,
hash: &hash,
};
let pubkey = solana_sdk::pubkey::new_rand();
let stored_account2 = StoredAccountMeta {
meta: &meta2,
account_meta: &account_meta,
data: &data,
offset,
stored_size,
hash: &hash,
};
let stored_account3 = StoredAccountMeta {
meta: &meta3,
account_meta: &account_meta,
data: &data,
offset,
stored_size,
hash: &hash,
};
let stored_account4 = StoredAccountMeta {
meta: &meta4,
account_meta: &account_meta,
data: &data,
offset,
stored_size,
hash: &hash,
};
let slot0 = 0;
let mut existing_ancient_pubkeys = HashSet::default();
let accounts = [(&pubkey, &stored_account, slot0)];
let accounts = [(&stored_account, slot0)];
// pubkey NOT in existing_ancient_pubkeys, so do NOT unref, but add to existing_ancient_pubkeys
let unrefs =
AccountsDb::get_keys_to_unref_ancient(&accounts, &mut existing_ancient_pubkeys);
@ -9951,9 +9995,8 @@ pub mod tests {
vec![&pubkey]
);
assert_eq!(unrefs.iter().cloned().collect::<Vec<_>>(), vec![&pubkey]);
let pubkey2 = solana_sdk::pubkey::new_rand();
// pubkey2 NOT in existing_ancient_pubkeys, so do NOT unref, but add to existing_ancient_pubkeys
let accounts = [(&pubkey2, &stored_account, slot0)];
let accounts = [(&stored_account2, slot0)];
let unrefs =
AccountsDb::get_keys_to_unref_ancient(&accounts, &mut existing_ancient_pubkeys);
assert!(unrefs.is_empty());
@ -9975,13 +10018,8 @@ pub mod tests {
.collect::<Vec<_>>()
);
assert_eq!(unrefs.iter().cloned().collect::<Vec<_>>(), vec![&pubkey2]);
let pubkey3 = solana_sdk::pubkey::new_rand();
let pubkey4 = solana_sdk::pubkey::new_rand();
// pubkey3/4 NOT in existing_ancient_pubkeys, so do NOT unref, but add to existing_ancient_pubkeys
let accounts = [
(&pubkey3, &stored_account, slot0),
(&pubkey4, &stored_account, slot0),
];
let accounts = [(&stored_account3, slot0), (&stored_account4, slot0)];
let unrefs =
AccountsDb::get_keys_to_unref_ancient(&accounts, &mut existing_ancient_pubkeys);
assert!(unrefs.is_empty());

View File

@ -26,7 +26,7 @@ pub enum StorageSelector {
/// The slice arithmetic accross both hashes and account data gets messy. So, this struct abstracts that.
pub struct AccountsToStore<'a> {
hashes: Vec<&'a Hash>,
accounts: Vec<(&'a Pubkey, &'a StoredAccountMeta<'a>, Slot)>,
accounts: Vec<(&'a StoredAccountMeta<'a>, Slot)>,
/// if 'accounts' contains more items than can be contained in the primary storage, then we have to split these accounts.
/// 'index_first_item_overflow' specifies the index of the first item in 'accounts' that will go into the overflow storage
index_first_item_overflow: usize,
@ -57,7 +57,7 @@ impl<'a> AccountsToStore<'a> {
hashes.push(account.1.account.hash);
// we have to specify 'slot' here because we are writing to an ancient append vec and squashing slots,
// so we need to update the previous accounts index entry for this account from 'slot' to 'ancient_slot'
accounts.push((&account.1.account.meta.pubkey, &account.1.account, slot));
accounts.push((&account.1.account, slot));
});
Self {
hashes,
@ -75,10 +75,7 @@ impl<'a> AccountsToStore<'a> {
pub fn get(
&self,
storage: StorageSelector,
) -> (
&[(&'a Pubkey, &'a StoredAccountMeta<'a>, Slot)],
&[&'a Hash],
) {
) -> (&[(&'a StoredAccountMeta<'a>, Slot)], &[&'a Hash]) {
let range = match storage {
StorageSelector::Primary => 0..self.index_first_item_overflow,
StorageSelector::Overflow => self.index_first_item_overflow..self.accounts.len(),
@ -170,7 +167,7 @@ pub mod tests {
assert_eq!(
accounts,
map.iter()
.map(|(a, b)| (a, &b.account, slot))
.map(|(_a, b)| (&b.account, slot))
.collect::<Vec<_>>(),
"mismatch"
);

View File

@ -136,6 +136,10 @@ impl<'a> StoredAccountMeta<'a> {
let executable_byte: &u8 = unsafe { &*(executable_bool as *const bool as *const u8) };
executable_byte
}
pub fn pubkey(&self) -> &Pubkey {
&self.meta.pubkey
}
}
pub struct AppendVecAccountsIter<'a> {

View File

@ -1,6 +1,6 @@
//! trait for abstracting underlying storage of pubkey and account pairs to be written
use {
crate::accounts_db::IncludeSlotInHash,
crate::{accounts_db::IncludeSlotInHash, append_vec::StoredAccountMeta},
solana_sdk::{account::ReadableAccount, clock::Slot, pubkey::Pubkey},
};
@ -100,18 +100,22 @@ impl<'a, T: ReadableAccount + Sync> StorableAccounts<'a, T>
}
/// this tuple contains slot info PER account
impl<'a, T: ReadableAccount + Sync> StorableAccounts<'a, T>
for (Slot, &'a [(&'a Pubkey, &'a T, Slot)], IncludeSlotInHash)
impl<'a> StorableAccounts<'a, StoredAccountMeta<'a>>
for (
Slot,
&'a [(&'a StoredAccountMeta<'a>, Slot)],
IncludeSlotInHash,
)
{
fn pubkey(&self, index: usize) -> &Pubkey {
self.1[index].0
self.1[index].0.pubkey()
}
fn account(&self, index: usize) -> &T {
self.1[index].1
fn account(&self, index: usize) -> &StoredAccountMeta<'a> {
self.1[index].0
}
fn slot(&self, index: usize) -> Slot {
// note that this could be different than 'target_slot()' PER account
self.1[index].2
self.1[index].1
}
fn target_slot(&self) -> Slot {
self.0
@ -133,42 +137,78 @@ impl<'a, T: ReadableAccount + Sync> StorableAccounts<'a, T>
self.2
}
}
#[cfg(test)]
pub mod tests {
use {
super::*,
crate::accounts_db::INCLUDE_SLOT_IN_HASH_TESTS,
solana_sdk::account::{AccountSharedData, WritableAccount},
crate::{
accounts_db::INCLUDE_SLOT_IN_HASH_TESTS,
append_vec::{AccountMeta, StoredAccountMeta, StoredMeta},
},
solana_sdk::{
account::{accounts_equal, AccountSharedData, WritableAccount},
hash::Hash,
},
};
fn compare<'a, T: ReadableAccount + Sync + PartialEq + std::fmt::Debug>(
fn compare<
'a,
T: ReadableAccount + Sync + PartialEq + std::fmt::Debug,
U: ReadableAccount + Sync + PartialEq + std::fmt::Debug,
>(
a: &impl StorableAccounts<'a, T>,
b: &impl StorableAccounts<'a, T>,
b: &impl StorableAccounts<'a, U>,
) {
assert_eq!(a.target_slot(), b.target_slot());
assert_eq!(a.len(), b.len());
assert_eq!(a.is_empty(), b.is_empty());
(0..a.len()).into_iter().for_each(|i| {
assert_eq!(a.pubkey(i), b.pubkey(i));
assert_eq!(a.account(i), b.account(i));
assert!(accounts_equal(a.account(i), b.account(i)));
})
}
#[test]
fn test_contains_multiple_slots() {
let pk = Pubkey::new(&[1; 32]);
let account = AccountSharedData::create(1, Vec::default(), Pubkey::default(), false, 0);
let slot = 0;
let lamports = 1;
let owner = Pubkey::default();
let executable = false;
let rent_epoch = 0;
let meta = StoredMeta {
write_version: 5,
pubkey: pk,
data_len: 7,
};
let account_meta = AccountMeta {
lamports,
owner,
executable,
rent_epoch,
};
let data = Vec::default();
let offset = 99;
let stored_size = 101;
let hash = Hash::new_unique();
let stored_account = StoredAccountMeta {
meta: &meta,
account_meta: &account_meta,
data: &data,
offset,
stored_size,
hash: &hash,
};
let test3 = (
slot,
&vec![(&pk, &account, slot), (&pk, &account, slot)][..],
&vec![(&stored_account, slot), (&stored_account, slot)][..],
INCLUDE_SLOT_IN_HASH_TESTS,
);
assert!(!test3.contains_multiple_slots());
let test3 = (
slot,
&vec![(&pk, &account, slot), (&pk, &account, slot + 1)][..],
&vec![(&stored_account, slot), (&stored_account, slot + 1)][..],
INCLUDE_SLOT_IN_HASH_TESTS,
);
assert!(test3.contains_multiple_slots());
@ -180,28 +220,58 @@ pub mod tests {
for target_slot in 0..max_slots {
for entries in 0..2 {
for starting_slot in 0..max_slots {
let data = Vec::default();
let hash = Hash::new_unique();
let mut raw = Vec::new();
let mut raw2 = Vec::new();
for entry in 0..entries {
let pk = Pubkey::new(&[entry; 32]);
let account = AccountSharedData::create(
((entry as u64) * starting_slot) as u64,
Vec::default(),
Pubkey::default(),
false,
0,
);
raw.push((
pk,
AccountSharedData::create(
((entry as u64) * starting_slot) as u64,
Vec::default(),
Pubkey::default(),
false,
0,
),
account.clone(),
starting_slot % max_slots,
StoredMeta {
write_version: 0, // just something
pubkey: pk,
data_len: u64::MAX, // just something
},
AccountMeta {
lamports: account.lamports(),
owner: *account.owner(),
executable: account.executable(),
rent_epoch: account.rent_epoch(),
},
));
}
for entry in 0..entries {
let offset = 99;
let stored_size = 101;
raw2.push(StoredAccountMeta {
meta: &raw[entry as usize].3,
account_meta: &raw[entry as usize].4,
data: &data,
offset,
stored_size,
hash: &hash,
});
}
let mut two = Vec::new();
let mut three = Vec::new();
raw.iter().for_each(|raw| {
raw.iter().zip(raw2.iter()).for_each(|(raw, raw2)| {
two.push((&raw.0, &raw.1)); // 2 item tuple
three.push((&raw.0, &raw.1, raw.2)); // 3 item tuple, including slot
three.push((raw2, raw.2)); // 2 item tuple, including slot
});
let test2 = (target_slot, &two[..], INCLUDE_SLOT_IN_HASH_TESTS);
let test3 = (target_slot, &three[..], INCLUDE_SLOT_IN_HASH_TESTS);
let old_slot = starting_slot;
let test_moving_slots = StorableAccountsMovingSlots {
@ -214,7 +284,7 @@ pub mod tests {
compare(&test2, &test_moving_slots);
for (i, raw) in raw.iter().enumerate() {
assert_eq!(raw.0, *test3.pubkey(i));
assert_eq!(raw.1, *test3.account(i));
assert!(accounts_equal(&raw.1, test3.account(i)));
assert_eq!(raw.2, test3.slot(i));
assert_eq!(target_slot, test2.slot(i));
assert_eq!(old_slot, test_moving_slots.slot(i));