avoid copies when writing to an ancient append vec (#28981)

* avoid copies when writing to an ancient append vec

* update comments
This commit is contained in:
Jeff Washington (jwash) 2022-11-30 12:09:23 -06:00 committed by GitHub
parent 337621d031
commit 58e12cf963
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
3 changed files with 81 additions and 24 deletions

View File

@ -4352,7 +4352,7 @@ impl AccountsDb {
/// returns the pubkeys that are in 'accounts' that are already in 'existing_ancient_pubkeys'
/// Also updated 'existing_ancient_pubkeys' to include all pubkeys in 'accounts' since they will soon be written into the ancient slot.
fn get_keys_to_unref_ancient<'a>(
accounts: &'a [&StoredAccountMeta<'_>],
accounts: &'a [&FoundStoredAccount<'_>],
existing_ancient_pubkeys: &mut HashSet<Pubkey>,
) -> HashSet<&'a Pubkey> {
let mut unref = HashSet::<&Pubkey>::default();
@ -4374,7 +4374,7 @@ impl AccountsDb {
/// As a side effect, on exit, 'existing_ancient_pubkeys' will now contain all pubkeys in 'accounts'.
fn unref_accounts_already_in_storage(
&self,
accounts: &[&StoredAccountMeta<'_>],
accounts: &[&FoundStoredAccount<'_>],
existing_ancient_pubkeys: &mut HashSet<Pubkey>,
) {
let unref = Self::get_keys_to_unref_ancient(accounts, existing_ancient_pubkeys);
@ -10106,8 +10106,25 @@ pub mod tests {
stored_size,
hash: &hash,
};
let store_id = 0;
let found_account = FoundStoredAccount {
account: stored_account,
store_id,
};
let found_account2 = FoundStoredAccount {
account: stored_account2,
store_id,
};
let found_account3 = FoundStoredAccount {
account: stored_account3,
store_id,
};
let found_account4 = FoundStoredAccount {
account: stored_account4,
store_id,
};
let mut existing_ancient_pubkeys = HashSet::default();
let accounts = [&stored_account];
let accounts = [&found_account];
// pubkey NOT in existing_ancient_pubkeys, so do NOT unref, but add to existing_ancient_pubkeys
let unrefs =
AccountsDb::get_keys_to_unref_ancient(&accounts, &mut existing_ancient_pubkeys);
@ -10125,7 +10142,7 @@ pub mod tests {
);
assert_eq!(unrefs.iter().cloned().collect::<Vec<_>>(), vec![&pubkey]);
// pubkey2 NOT in existing_ancient_pubkeys, so do NOT unref, but add to existing_ancient_pubkeys
let accounts = [&stored_account2];
let accounts = [&found_account2];
let unrefs =
AccountsDb::get_keys_to_unref_ancient(&accounts, &mut existing_ancient_pubkeys);
assert!(unrefs.is_empty());
@ -10148,7 +10165,7 @@ pub mod tests {
);
assert_eq!(unrefs.iter().cloned().collect::<Vec<_>>(), vec![&pubkey2]);
// pubkey3/4 NOT in existing_ancient_pubkeys, so do NOT unref, but add to existing_ancient_pubkeys
let accounts = [&stored_account3, &stored_account4];
let accounts = [&found_account3, &found_account4];
let unrefs =
AccountsDb::get_keys_to_unref_ancient(&accounts, &mut existing_ancient_pubkeys);
assert!(unrefs.is_empty());

View File

@ -4,10 +4,7 @@
//! 2. multiple 'slots' squashed into a single older (ie. ancient) slot for convenience and performance
//! Otherwise, an ancient append vec is the same as any other append vec
use {
crate::{
accounts_db::FoundStoredAccount,
append_vec::{AppendVec, StoredAccountMeta},
},
crate::{accounts_db::FoundStoredAccount, append_vec::AppendVec},
solana_sdk::clock::Slot,
};
@ -25,7 +22,7 @@ pub enum StorageSelector {
/// We need 1-2 of these slices constructed based on available bytes and individual account sizes.
/// The slice arithmetic accross both hashes and account data gets messy. So, this struct abstracts that.
pub struct AccountsToStore<'a> {
accounts: Vec<&'a StoredAccountMeta<'a>>,
accounts: &'a [&'a FoundStoredAccount<'a>],
/// if 'accounts' contains more items than can be contained in the primary storage, then we have to split these accounts.
/// 'index_first_item_overflow' specifies the index of the first item in 'accounts' that will go into the overflow storage
index_first_item_overflow: usize,
@ -37,26 +34,22 @@ impl<'a> AccountsToStore<'a> {
/// available_bytes: how many bytes remain in the primary storage. Excess accounts will be directed to an overflow storage
pub fn new(
mut available_bytes: u64,
stored_accounts: &'a [&'a FoundStoredAccount<'a>],
accounts: &'a [&'a FoundStoredAccount<'a>],
slot: Slot,
) -> Self {
let num_accounts = stored_accounts.len();
let mut accounts = Vec::with_capacity(num_accounts);
let num_accounts = accounts.len();
// index of the first account that doesn't fit in the current append vec
let mut index_first_item_overflow = num_accounts; // assume all fit
stored_accounts.iter().for_each(|account| {
for (i, account) in accounts.iter().enumerate() {
let account_size = account.account.stored_size as u64;
if available_bytes >= account_size {
available_bytes = available_bytes.saturating_sub(account_size);
} else if index_first_item_overflow == num_accounts {
available_bytes = 0;
// the # of accounts we have so far seen is the most that will fit in the current ancient append vec
index_first_item_overflow = accounts.len();
index_first_item_overflow = i;
break;
}
// we have to specify 'slot' here because we are writing to an ancient append vec and squashing slots,
// so we need to update the previous accounts index entry for this account from 'slot' to 'ancient_slot'
accounts.push(&account.account);
});
}
Self {
accounts,
index_first_item_overflow,
@ -70,7 +63,7 @@ impl<'a> AccountsToStore<'a> {
}
/// get the accounts to store in the given 'storage'
pub fn get(&self, storage: StorageSelector) -> &[&'a StoredAccountMeta<'a>] {
pub fn get(&self, storage: StorageSelector) -> &[&'a FoundStoredAccount<'a>] {
let range = match storage {
StorageSelector::Primary => 0..self.index_first_item_overflow,
StorageSelector::Overflow => self.index_first_item_overflow..self.accounts.len(),
@ -99,7 +92,7 @@ pub mod tests {
super::*,
crate::{
accounts_db::{get_temp_accounts_paths, AppendVecId},
append_vec::{AccountMeta, StoredMeta},
append_vec::{AccountMeta, StoredAccountMeta, StoredMeta},
},
solana_sdk::{
account::{AccountSharedData, ReadableAccount},
@ -162,7 +155,7 @@ pub mod tests {
let accounts_to_store = AccountsToStore::new(available_bytes as u64, &map, slot);
let accounts = accounts_to_store.get(selector);
assert_eq!(
accounts,
accounts.iter().map(|b| &b.account).collect::<Vec<_>>(),
map.iter().map(|b| &b.account).collect::<Vec<_>>(),
"mismatch"
);

View File

@ -1,6 +1,9 @@
//! trait for abstracting underlying storage of pubkey and account pairs to be written
use {
crate::{accounts_db::IncludeSlotInHash, append_vec::StoredAccountMeta},
crate::{
accounts_db::{FoundStoredAccount, IncludeSlotInHash},
append_vec::StoredAccountMeta,
},
solana_sdk::{account::ReadableAccount, clock::Slot, hash::Hash, pubkey::Pubkey},
};
@ -154,6 +157,7 @@ impl<'a> StorableAccounts<'a, StoredAccountMeta<'a>>
}
/// this tuple contains a single different source slot that applies to all accounts
/// accounts are StoredAccountMeta
impl<'a> StorableAccounts<'a, StoredAccountMeta<'a>>
for (
Slot,
@ -194,6 +198,49 @@ impl<'a> StorableAccounts<'a, StoredAccountMeta<'a>>
self.1[index].meta.write_version
}
}
/// this tuple contains a single different source slot that applies to all accounts
/// accounts are FoundStoredAccount
impl<'a> StorableAccounts<'a, StoredAccountMeta<'a>>
for (
Slot,
&'a [&'a FoundStoredAccount<'a>],
IncludeSlotInHash,
Slot,
)
{
fn pubkey(&self, index: usize) -> &Pubkey {
self.1[index].pubkey()
}
fn account(&self, index: usize) -> &StoredAccountMeta<'a> {
&self.1[index].account
}
fn slot(&self, _index: usize) -> Slot {
// same other slot for all accounts
self.3
}
fn target_slot(&self) -> Slot {
self.0
}
fn len(&self) -> usize {
self.1.len()
}
fn contains_multiple_slots(&self) -> bool {
false
}
fn include_slot_in_hash(&self) -> IncludeSlotInHash {
self.2
}
fn has_hash_and_write_version(&self) -> bool {
true
}
fn hash(&self, index: usize) -> &Hash {
self.1[index].account.hash
}
fn write_version(&self, index: usize) -> u64 {
self.1[index].account.meta.write_version
}
}
#[cfg(test)]
pub mod tests {
use {