reorder generate_index info (#33077)

reorder generateindex info
This commit is contained in:
Jeff Washington (jwash) 2023-08-30 16:16:40 -07:00 committed by GitHub
parent f0f75aff59
commit 1b9c9a313c
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
2 changed files with 28 additions and 20 deletions

View File

@ -1637,7 +1637,7 @@ impl<T: IndexValue, U: DiskIndexValue + From<T> + Into<T>> AccountsIndex<T, U> {
let is_zero_lamport = account_info.is_zero_lamport();
let result = if is_zero_lamport { Some(pubkey) } else { None };
binned[binned_index].1.push((pubkey, account_info));
binned[binned_index].1.push((pubkey, (slot, account_info)));
result
})
.collect::<Vec<_>>();
@ -1649,25 +1649,29 @@ impl<T: IndexValue, U: DiskIndexValue + From<T> + Into<T>> AccountsIndex<T, U> {
let r_account_maps = &self.account_maps[pubkey_bin];
let mut insert_time = Measure::start("insert_into_primary_index");
if use_disk {
r_account_maps.startup_insert_only(slot, items.into_iter());
r_account_maps.startup_insert_only(items.into_iter());
} else {
// not using disk buckets, so just write to in-mem
// this is no longer the default case
items.into_iter().for_each(|(pubkey, account_info)| {
let new_entry = PreAllocatedAccountMapEntry::new(
slot,
account_info,
&self.storage.storage,
use_disk,
);
match r_account_maps.insert_new_entry_if_missing_with_lock(pubkey, new_entry) {
InsertNewEntryResults::DidNotExist => {}
InsertNewEntryResults::ExistedNewEntryZeroLamports => {}
InsertNewEntryResults::ExistedNewEntryNonZeroLamports => {
dirty_pubkeys.push(pubkey);
items
.into_iter()
.for_each(|(pubkey, (slot, account_info))| {
let new_entry = PreAllocatedAccountMapEntry::new(
slot,
account_info,
&self.storage.storage,
use_disk,
);
match r_account_maps
.insert_new_entry_if_missing_with_lock(pubkey, new_entry)
{
InsertNewEntryResults::DidNotExist => {}
InsertNewEntryResults::ExistedNewEntryZeroLamports => {}
InsertNewEntryResults::ExistedNewEntryNonZeroLamports => {
dirty_pubkeys.push(pubkey);
}
}
}
});
});
}
insert_time.stop();
insertion_time.fetch_add(insert_time.as_us(), Ordering::Relaxed);

View File

@ -142,7 +142,7 @@ struct StartupInfoDuplicates<T: IndexValue> {
#[derive(Default, Debug)]
struct StartupInfo<T: IndexValue> {
/// entries to add next time we are flushing to disk
insert: Mutex<Vec<(Slot, Pubkey, T)>>,
insert: Mutex<Vec<(Pubkey, (Slot, T))>>,
/// pubkeys with more than 1 entry
duplicates: Mutex<StartupInfoDuplicates<T>>,
}
@ -672,14 +672,16 @@ impl<T: IndexValue, U: DiskIndexValue + From<T> + Into<T>> InMemAccountsIndex<T,
/// Queue up these insertions for when the flush thread is dealing with this bin.
/// This is very fast and requires no lookups or disk access.
pub fn startup_insert_only(&self, slot: Slot, items: impl Iterator<Item = (Pubkey, T)>) {
pub fn startup_insert_only(&self, items: impl Iterator<Item = (Pubkey, (Slot, T))>) {
assert!(self.storage.get_startup());
assert!(self.bucket.is_some());
let mut insert = self.startup_info.insert.lock().unwrap();
// todo: memcpy the new slice into our vector already
// todo: avoid reallocs and just allocate another vec instead of likely resizing this one over and over
items
.into_iter()
.for_each(|(k, v)| insert.push((slot, k, v)));
.for_each(|(k, (slot, v))| insert.push((k, (slot, v))));
}
pub fn insert_new_entry_if_missing_with_lock(
@ -1069,7 +1071,9 @@ impl<T: IndexValue, U: DiskIndexValue + From<T> + Into<T>> InMemAccountsIndex<T,
let disk = self.bucket.as_ref().unwrap();
let mut count = insert.len() as u64;
for (k, entry, duplicate_entry) in disk.batch_insert_non_duplicates(
insert.into_iter().map(|(slot, k, v)| (k, (slot, v.into()))),
insert
.into_iter()
.map(|(k, (slot, v))| (k, (slot, v.into()))),
count as usize,
) {
duplicates.duplicates.push((entry.0, k, entry.1.into()));