gen idx: refactor StorageSizeAndCount population (#33244)

This commit is contained in:
Jeff Washington (jwash) 2023-09-14 07:53:05 -07:00 committed by GitHub
parent 5d29ed196f
commit 886eabd74d
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
2 changed files with 54 additions and 14 deletions

View File

@ -8965,11 +8965,10 @@ impl AccountsDb {
let mut num_accounts_rent_paying = 0;
let num_accounts = accounts_map.len();
let mut amount_to_top_off_rent = 0;
// first collect into a local HashMap with no lock contention
let mut storage_info_local = StorageSizeAndCount::default();
let mut stored_size_alive = 0;
let items = accounts_map.into_iter().map(|(pubkey, stored_account)| {
storage_info_local.stored_size += stored_account.stored_size();
stored_size_alive += stored_account.stored_size();
if secondary {
self.accounts_index.update_secondary_indexes(
&pubkey,
@ -8999,15 +8998,15 @@ impl AccountsDb {
)
});
let (dirty_pubkeys, insert_time_us) = self
let (dirty_pubkeys, insert_time_us, generate_index_count) = self
.accounts_index
.insert_new_if_missing_into_primary_index(slot, num_accounts, items);
{
// second, collect into the shared DashMap once we've figured out all the info per store_id
let mut info = storage_info.entry(store_id).or_default();
info.stored_size += storage_info_local.stored_size;
info.count += num_accounts;
info.stored_size += stored_size_alive;
info.count += generate_index_count.count;
}
// dirty_pubkeys will contain a pubkey if an item has multiple rooted entries for
@ -9489,7 +9488,11 @@ impl AccountsDb {
entry.count,
store.count(),
);
store.count_and_status.write().unwrap().0 = entry.count;
{
let mut count_and_status = store.count_and_status.write().unwrap();
assert_eq!(count_and_status.0, 0);
count_and_status.0 = entry.count;
}
store.alive_bytes.store(entry.stored_size, Ordering::SeqCst);
store
.approx_store_count
@ -15853,6 +15856,8 @@ pub mod tests {
// fake out the store count to avoid the assert
for (_, store) in accounts.storage.iter() {
store.alive_bytes.store(0, Ordering::Release);
let mut count_and_status = store.count_and_status.write().unwrap();
count_and_status.0 = 0;
}
// populate based on made up hash data
@ -15864,6 +15869,7 @@ pub mod tests {
count: 3,
},
);
accounts.set_storage_count_and_alive_bytes(dashmap, &mut GenerateIndexTimings::default());
assert_eq!(accounts.storage.len(), 1);
for (_, store) in accounts.storage.iter() {

View File

@ -71,6 +71,12 @@ pub type SlotSlice<'s, T> = &'s [(Slot, T)];
pub type RefCount = u64;
pub type AccountMap<T, U> = Arc<InMemAccountsIndex<T, U>>;
#[derive(Default, Debug, PartialEq, Eq)]
pub(crate) struct GenerateIndexCount {
/// number of accounts inserted in the index
pub count: usize,
}
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
/// how accounts index 'upsert' should handle reclaims
pub enum UpsertReclaim {
@ -1586,13 +1592,14 @@ impl<T: IndexValue, U: DiskIndexValue + From<T> + Into<T>> AccountsIndex<T, U> {
// Can save time when inserting lots of new keys.
// But, does NOT update secondary index
// This is designed to be called at startup time.
// returns (dirty_pubkeys, insertion_time_us, GenerateIndexCount)
#[allow(clippy::needless_collect)]
pub(crate) fn insert_new_if_missing_into_primary_index(
&self,
slot: Slot,
item_len: usize,
items: impl Iterator<Item = (Pubkey, T)>,
) -> (Vec<Pubkey>, u64) {
) -> (Vec<Pubkey>, u64, GenerateIndexCount) {
// big enough so not likely to re-allocate, small enough to not over-allocate by too much
// this assumes the largest bin contains twice the expected amount of the average size per bin
let bins = self.bins();
@ -1612,6 +1619,7 @@ impl<T: IndexValue, U: DiskIndexValue + From<T> + Into<T>> AccountsIndex<T, U> {
(pubkey_bin, Vec::with_capacity(expected_items_per_bin))
})
.collect::<Vec<_>>();
let mut count = 0;
let mut dirty_pubkeys = items
.filter_map(|(pubkey, account_info)| {
let pubkey_bin = self.bin_calculator.bin_from_pubkey(&pubkey);
@ -1631,6 +1639,7 @@ impl<T: IndexValue, U: DiskIndexValue + From<T> + Into<T>> AccountsIndex<T, U> {
binned.into_iter().for_each(|(pubkey_bin, items)| {
let r_account_maps = &self.account_maps[pubkey_bin];
let mut insert_time = Measure::start("insert_into_primary_index");
count += items.len();
if use_disk {
r_account_maps.startup_insert_only(items.into_iter());
} else {
@ -1660,7 +1669,11 @@ impl<T: IndexValue, U: DiskIndexValue + From<T> + Into<T>> AccountsIndex<T, U> {
insertion_time.fetch_add(insert_time.as_us(), Ordering::Relaxed);
});
(dirty_pubkeys, insertion_time.load(Ordering::Relaxed))
(
dirty_pubkeys,
insertion_time.load(Ordering::Relaxed),
GenerateIndexCount { count },
)
}
/// use Vec<> because the internal vecs are already allocated per bin
@ -2195,7 +2208,10 @@ pub mod tests {
let account_info = true;
let items = vec![(*pubkey, account_info)];
index.set_startup(Startup::Startup);
index.insert_new_if_missing_into_primary_index(slot, items.len(), items.into_iter());
let expected_len = items.len();
let (_, _, result) =
index.insert_new_if_missing_into_primary_index(slot, items.len(), items.into_iter());
assert_eq!(result.count, expected_len);
index.set_startup(Startup::Normal);
let mut ancestors = Ancestors::default();
@ -2230,7 +2246,10 @@ pub mod tests {
let account_info = false;
let items = vec![(*pubkey, account_info)];
index.set_startup(Startup::Startup);
index.insert_new_if_missing_into_primary_index(slot, items.len(), items.into_iter());
let expected_len = items.len();
let (_, _, result) =
index.insert_new_if_missing_into_primary_index(slot, items.len(), items.into_iter());
assert_eq!(result.count, expected_len);
index.set_startup(Startup::Normal);
let mut ancestors = Ancestors::default();
@ -2337,7 +2356,10 @@ pub mod tests {
index.set_startup(Startup::Startup);
let items = vec![(key0, account_infos[0]), (key1, account_infos[1])];
index.insert_new_if_missing_into_primary_index(slot0, items.len(), items.into_iter());
let expected_len = items.len();
let (_, _, result) =
index.insert_new_if_missing_into_primary_index(slot0, items.len(), items.into_iter());
assert_eq!(result.count, expected_len);
index.set_startup(Startup::Normal);
for (i, key) in [key0, key1].iter().enumerate() {
@ -2388,7 +2410,13 @@ pub mod tests {
} else {
let items = vec![(key, account_infos[0])];
index.set_startup(Startup::Startup);
index.insert_new_if_missing_into_primary_index(slot0, items.len(), items.into_iter());
let expected_len = items.len();
let (_, _, result) = index.insert_new_if_missing_into_primary_index(
slot0,
items.len(),
items.into_iter(),
);
assert_eq!(result.count, expected_len);
index.set_startup(Startup::Normal);
}
assert!(gc.is_empty());
@ -2433,7 +2461,13 @@ pub mod tests {
let items = vec![(key, account_infos[1])];
index.set_startup(Startup::Startup);
index.insert_new_if_missing_into_primary_index(slot1, items.len(), items.into_iter());
let expected_len = items.len();
let (_, _, result) = index.insert_new_if_missing_into_primary_index(
slot1,
items.len(),
items.into_iter(),
);
assert_eq!(result.count, expected_len);
index.set_startup(Startup::Normal);
}
assert!(gc.is_empty());