From a9aa533684e09a47532b10e123a52a6aa816c18f Mon Sep 17 00:00:00 2001 From: "Jeff Washington (jwash)" <75863576+jeffwashington@users.noreply.github.com> Date: Thu, 20 May 2021 10:11:56 -0500 Subject: [PATCH] replace Ancestors HashMap for performance (#16477) * replace Ancestors HashMap for performance * add ancestors module --- accounts-bench/src/main.rs | 6 +- runtime/benches/accounts.rs | 6 +- runtime/src/ancestors.rs | 334 ++++++++++++++++++++++++++- runtime/src/bank.rs | 33 +-- runtime/src/serde_snapshot.rs | 5 +- runtime/src/serde_snapshot/future.rs | 12 +- runtime/tests/accounts.rs | 2 +- 7 files changed, 372 insertions(+), 26 deletions(-) diff --git a/accounts-bench/src/main.rs b/accounts-bench/src/main.rs index 01524b0713..1eecd987f9 100644 --- a/accounts-bench/src/main.rs +++ b/accounts-bench/src/main.rs @@ -88,11 +88,13 @@ fn main() { num_slots, create_time ); - let mut ancestors: Ancestors = vec![(0, 0)].into_iter().collect(); + let mut ancestors = Vec::with_capacity(num_slots); + ancestors.push((0, 0)); for i in 1..num_slots { - ancestors.insert(i as u64, i - 1); + ancestors.push((i as u64, i - 1)); accounts.add_root(i as u64); } + let ancestors = Ancestors::from(ancestors); let mut elapsed = vec![0; iterations]; let mut elapsed_store = vec![0; iterations]; for x in 0..iterations { diff --git a/runtime/benches/accounts.rs b/runtime/benches/accounts.rs index d7a27ad5a5..29b1561077 100644 --- a/runtime/benches/accounts.rs +++ b/runtime/benches/accounts.rs @@ -108,7 +108,7 @@ fn test_accounts_hash_bank_hash(bencher: &mut Bencher) { let num_accounts = 60_000; let slot = 0; create_test_accounts(&accounts, &mut pubkeys, num_accounts, slot); - let ancestors = vec![(0, 0)].into_iter().collect(); + let ancestors = Ancestors::from(vec![(0, 0)]); let (_, total_lamports) = accounts.accounts_db.update_accounts_hash(0, &ancestors); bencher.iter(|| assert!(accounts.verify_bank_hash_and_lamports(0, &ancestors, total_lamports))); } @@ -124,7 +124,7 @@ fn test_update_accounts_hash(bencher: &mut Bencher) { ); let mut pubkeys: Vec = vec![]; create_test_accounts(&accounts, &mut pubkeys, 50_000, 0); - let ancestors = vec![(0, 0)].into_iter().collect(); + let ancestors = Ancestors::from(vec![(0, 0)]); bencher.iter(|| { accounts.accounts_db.update_accounts_hash(0, &ancestors); }); @@ -378,7 +378,7 @@ fn bench_load_largest_accounts(b: &mut Bencher) { let account = AccountSharedData::new(lamports, 0, &Pubkey::default()); accounts.store_slow_uncached(0, &pubkey, &account); } - let ancestors = vec![(0, 0)].into_iter().collect(); + let ancestors = Ancestors::from(vec![(0, 0)]); b.iter(|| { accounts.load_largest_accounts( &ancestors, diff --git a/runtime/src/ancestors.rs b/runtime/src/ancestors.rs index d4c167206b..71b4927e69 100644 --- a/runtime/src/ancestors.rs +++ b/runtime/src/ancestors.rs @@ -1,4 +1,336 @@ use solana_sdk::clock::Slot; use std::collections::HashMap; -pub type Ancestors = HashMap; +pub type AncestorsForSerialization = HashMap; + +#[derive(Debug, Default, Clone, PartialEq, Serialize, Deserialize, AbiExample)] +pub struct Ancestors { + min: Slot, + slots: Vec>, + count: usize, + max: Slot, + large_range_slots: HashMap, +} + +// some tests produce ancestors ranges that are too large such +// that we prefer to implement them in a sparse HashMap +const ANCESTORS_HASH_MAP_SIZE: u64 = 10_000; + +impl From> for Ancestors { + fn from(source: Vec<(Slot, usize)>) -> Ancestors { + let mut result = Ancestors::default(); + if !source.is_empty() { + result.min = Slot::MAX; + result.max = Slot::MIN; + source.iter().for_each(|(slot, _)| { + result.min = std::cmp::min(result.min, *slot); + result.max = std::cmp::max(result.max, *slot + 1); + }); + let range = result.range(); + if range > ANCESTORS_HASH_MAP_SIZE { + result.large_range_slots = source.into_iter().collect(); + result.min = 0; + result.max = 0; + } else { + result.slots = vec![None; range as usize]; + source.into_iter().for_each(|(slot, size)| { + let slot = result.slot_index(&slot); + if result.slots[slot].is_none() { + result.count += 1; + } + result.slots[slot] = Some(size); + }); + } + } + + result + } +} + +impl From<&HashMap> for Ancestors { + fn from(source: &HashMap) -> Ancestors { + let mut result = Ancestors::default(); + if !source.is_empty() { + result.min = Slot::MAX; + result.max = Slot::MIN; + source.iter().for_each(|(slot, _)| { + result.min = std::cmp::min(result.min, *slot); + result.max = std::cmp::max(result.max, *slot + 1); + }); + let range = result.range(); + if range > ANCESTORS_HASH_MAP_SIZE { + result.large_range_slots = + source.iter().map(|(slot, size)| (*slot, *size)).collect(); + result.min = 0; + result.max = 0; + } else { + result.slots = vec![None; range as usize]; + source.iter().for_each(|(slot, size)| { + let slot = result.slot_index(&slot); + if result.slots[slot].is_none() { + result.count += 1; + } + result.slots[slot] = Some(*size); + }); + } + } + + result + } +} + +impl From<&Ancestors> for HashMap { + fn from(source: &Ancestors) -> HashMap { + let mut result = HashMap::with_capacity(source.len()); + source.keys().iter().for_each(|slot| { + result.insert(*slot, *source.get(slot).unwrap()); + }); + result + } +} + +impl Ancestors { + pub fn keys(&self) -> Vec { + if self.large_range_slots.is_empty() { + self.slots + .iter() + .enumerate() + .filter_map(|(size, i)| i.map(|_| size as u64 + self.min)) + .collect::>() + } else { + self.large_range_slots.keys().copied().collect::>() + } + } + + pub fn get(&self, slot: &Slot) -> Option<&usize> { + if self.large_range_slots.is_empty() { + if slot < &self.min || slot >= &self.max { + return None; + } + let slot = self.slot_index(slot); + self.slots[slot].as_ref() + } else { + self.large_range_slots.get(slot) + } + } + + pub fn remove(&mut self, slot: &Slot) { + if self.large_range_slots.is_empty() { + if slot < &self.min || slot >= &self.max { + return; + } + let slot = self.slot_index(slot); + if self.slots[slot].is_some() { + self.count -= 1; + self.slots[slot] = None; + } + } else { + self.large_range_slots.remove(slot); + } + } + + pub fn contains_key(&self, slot: &Slot) -> bool { + if self.large_range_slots.is_empty() { + if slot < &self.min || slot >= &self.max { + return false; + } + let slot = self.slot_index(slot); + self.slots[slot].is_some() + } else { + self.large_range_slots.contains_key(slot) + } + } + + pub fn len(&self) -> usize { + if self.large_range_slots.is_empty() { + self.count + } else { + self.large_range_slots.len() + } + } + + pub fn is_empty(&self) -> bool { + self.len() == 0 + } + + fn slot_index(&self, slot: &Slot) -> usize { + (slot - self.min) as usize + } + + fn range(&self) -> Slot { + self.max - self.min + } +} +#[cfg(test)] +pub mod tests { + use super::*; + use crate::contains::Contains; + use log::*; + use solana_measure::measure::Measure; + use std::collections::HashSet; + + impl std::iter::FromIterator<(Slot, usize)> for Ancestors { + fn from_iter(iter: I) -> Self + where + I: IntoIterator, + { + let mut data = Vec::new(); + for i in iter { + data.push(i); + } + Ancestors::from(data) + } + } + + impl Ancestors { + pub fn insert(&mut self, mut slot: Slot, size: usize) { + if self.large_range_slots.is_empty() { + if slot < self.min || slot >= self.max { + let new_min = std::cmp::min(self.min, slot); + let new_max = std::cmp::max(self.max, slot + 1); + let new_range = new_max - new_min; + if new_min == self.min { + self.max = slot + 1; + self.slots.resize(new_range as usize, None); + } else { + // min changed + let mut new_slots = vec![None; new_range as usize]; + self.slots.iter().enumerate().for_each(|(i, size)| { + new_slots[i as usize + self.min as usize - slot as usize] = *size + }); + self.slots = new_slots; + self.min = slot; + // fall through and set this value in + } + } + slot -= self.min; + if self.slots[slot as usize].is_none() { + self.count += 1; + } + self.slots[slot as usize] = Some(size); + } else { + self.large_range_slots.insert(slot, size); + } + } + } + + #[test] + fn test_ancestors_permutations() { + solana_logger::setup(); + let mut ancestors = Ancestors::default(); + let mut hash = HashMap::new(); + + let min = 101_000; + let width = 400_000; + let dead = 19; + + let mut slot = min; + while hash.len() < width { + slot += 1; + if slot % dead == 0 { + continue; + } + hash.insert(slot, 0); + ancestors.insert(slot, 0); + } + compare_ancestors(&hash, &ancestors); + + let max = slot + 1; + + let mut time = Measure::start(""); + let mut count = 0; + for slot in (min - 10)..max + 100 { + if hash.contains(&slot) { + count += 1; + } + } + time.stop(); + + let mut time2 = Measure::start(""); + let mut count2 = 0; + for slot in (min - 10)..max + 100 { + if ancestors.contains_key(&slot) { + count2 += 1; + } + } + time2.stop(); + info!( + "{}ms, {}ms, {} ratio", + time.as_ms(), + time2.as_ms(), + time.as_ns() / time2.as_ns() + ); + assert_eq!(count, count2); + } + + fn compare_ancestors(hashset: &HashMap, ancestors: &Ancestors) { + assert_eq!(hashset.len(), ancestors.len()); + assert_eq!(hashset.is_empty(), ancestors.is_empty()); + let mut min = u64::MAX; + let mut max = 0; + for item in hashset.iter() { + let key = item.0; + min = std::cmp::min(min, *key); + max = std::cmp::max(max, *key); + assert_eq!(ancestors.get(&key).unwrap(), item.1); + } + for slot in min - 1..max + 2 { + assert_eq!(ancestors.get(&slot), hashset.get(&slot)); + } + } + + #[test] + fn test_ancestors_smaller() { + solana_logger::setup(); + + for width in 0..34 { + let mut hash = HashSet::new(); + + let min = 1_010_000; + let dead = 19; + + let mut slot = min; + let mut slots = Vec::new(); + while hash.len() < width { + slot += 1; + if slot % dead == 0 { + continue; + } + hash.insert(slot); + slots.push((slot, 0)); + } + let ancestors = Ancestors::from(slots); + + let max = slot + 1; + let passes = 1; + let mut time = Measure::start(""); + let mut count = 0; + for _pass in 0..passes { + for slot in (min - 10)..max + 100 { + if hash.contains(&slot) { + count += 1; + } + } + } + time.stop(); + + let mut time2 = Measure::start(""); + let mut count2 = 0; + for _pass in 0..passes { + for slot in (min - 10)..max + 100 { + if ancestors.contains_key(&slot) { + count2 += 1; + } + } + } + time2.stop(); + info!( + "{}, {}, {}", + time.as_ms(), + time2.as_ms(), + time.as_ns() / time2.as_ns() + ); + assert_eq!(count, count2); + } + } +} diff --git a/runtime/src/bank.rs b/runtime/src/bank.rs index 709d77c761..92dff13def 100644 --- a/runtime/src/bank.rs +++ b/runtime/src/bank.rs @@ -40,7 +40,7 @@ use crate::{ }, accounts_db::{ErrorCounters, SnapshotStorages}, accounts_index::{AccountSecondaryIndexes, IndexKey}, - ancestors::Ancestors, + ancestors::{Ancestors, AncestorsForSerialization}, blockhash_queue::BlockhashQueue, builtins::{self, ActivationType}, epoch_stakes::{EpochStakes, NodeVoteAccounts}, @@ -595,7 +595,7 @@ impl NonceRollbackInfo for NonceRollbackFull { #[derive(Clone, Debug, Default)] pub(crate) struct BankFieldsToDeserialize { pub(crate) blockhash_queue: BlockhashQueue, - pub(crate) ancestors: Ancestors, + pub(crate) ancestors: AncestorsForSerialization, pub(crate) hash: Hash, pub(crate) parent_hash: Hash, pub(crate) parent_slot: Slot, @@ -634,7 +634,7 @@ pub(crate) struct BankFieldsToDeserialize { #[derive(Debug)] pub(crate) struct BankFieldsToSerialize<'a> { pub(crate) blockhash_queue: &'a RwLock, - pub(crate) ancestors: &'a Ancestors, + pub(crate) ancestors: &'a AncestorsForSerialization, pub(crate) hash: Hash, pub(crate) parent_hash: Hash, pub(crate) parent_slot: Slot, @@ -980,7 +980,7 @@ impl Bank { accounts_db_caching_enabled: bool, ) -> Self { let mut bank = Self::default(); - bank.ancestors.insert(bank.slot(), 0); + bank.ancestors = Ancestors::from(vec![(bank.slot(), 0)]); bank.transaction_debug_keys = debug_keys; bank.cluster_type = Some(genesis_config.cluster_type); @@ -1130,10 +1130,12 @@ impl Bank { ("block_height", new.block_height, i64) ); - new.ancestors.insert(new.slot(), 0); + let mut ancestors = Vec::with_capacity(1 + new.parents().len()); + ancestors.push((new.slot(), 0)); new.parents().iter().enumerate().for_each(|(i, p)| { - new.ancestors.insert(p.slot(), i + 1); + ancestors.push((p.slot(), i + 1)); }); + new.ancestors = Ancestors::from(ancestors); // Following code may touch AccountsDb, requiring proper ancestors let parent_epoch = parent.epoch(); @@ -1166,7 +1168,7 @@ impl Bank { pub(crate) fn proper_ancestors(&self) -> impl Iterator + '_ { self.ancestors .keys() - .copied() + .into_iter() .filter(move |slot| *slot != self.slot) } @@ -1216,7 +1218,7 @@ impl Bank { rc: bank_rc, src: new(), blockhash_queue: RwLock::new(fields.blockhash_queue), - ancestors: fields.ancestors, + ancestors: Ancestors::from(&fields.ancestors), hash: RwLock::new(fields.hash), parent_hash: fields.parent_hash, parent_slot: fields.parent_slot, @@ -1308,10 +1310,13 @@ impl Bank { } /// Return subset of bank fields representing serializable state - pub(crate) fn get_fields_to_serialize(&self) -> BankFieldsToSerialize { + pub(crate) fn get_fields_to_serialize<'a>( + &'a self, + ancestors: &'a HashMap, + ) -> BankFieldsToSerialize<'a> { BankFieldsToSerialize { blockhash_queue: &self.blockhash_queue, - ancestors: &self.ancestors, + ancestors, hash: *self.hash.read().unwrap(), parent_hash: self.parent_hash, parent_slot: self.parent_slot, @@ -1380,8 +1385,8 @@ impl Bank { let mut roots = self.src.status_cache.read().unwrap().roots().clone(); let min = roots.iter().min().cloned().unwrap_or(0); for ancestor in self.ancestors.keys() { - if *ancestor >= min { - roots.insert(*ancestor); + if ancestor >= min { + roots.insert(ancestor); } } @@ -4293,7 +4298,7 @@ impl Bank { &self, pubkey: &Pubkey, ) -> Option<(AccountSharedData, Slot)> { - let just_self: Ancestors = vec![(self.slot(), 0)].into_iter().collect(); + let just_self: Ancestors = Ancestors::from(vec![(self.slot(), 0)]); if let Some((account, slot)) = self.load_slow_with_fixed_root(&just_self, pubkey) { if slot == self.slot() { return Some((account, slot)); @@ -10676,6 +10681,7 @@ pub(crate) mod tests { #[test] fn test_upgrade_epoch() { + solana_logger::setup(); let GenesisConfigInfo { mut genesis_config, mint_keypair, @@ -10683,7 +10689,6 @@ pub(crate) mod tests { } = create_genesis_config_with_leader(500, &solana_sdk::pubkey::new_rand(), 0); genesis_config.fee_rate_governor = FeeRateGovernor::new(1, 0); let bank = Arc::new(Bank::new(&genesis_config)); - // Jump to the test-only upgrade epoch -- see `Bank::upgrade_epoch()` let bank = Bank::new_from_parent( &bank, diff --git a/runtime/src/serde_snapshot.rs b/runtime/src/serde_snapshot.rs index 0cfc7b2b97..7b68c887d3 100644 --- a/runtime/src/serde_snapshot.rs +++ b/runtime/src/serde_snapshot.rs @@ -255,7 +255,10 @@ where account_indexes, caching_enabled, )?; - accounts_db.freeze_accounts(&bank_fields.ancestors, frozen_account_pubkeys); + accounts_db.freeze_accounts( + &Ancestors::from(&bank_fields.ancestors), + frozen_account_pubkeys, + ); let bank_rc = BankRc::new(Accounts::new_empty(accounts_db), bank_fields.slot); let bank = Bank::new_from_fields( diff --git a/runtime/src/serde_snapshot/future.rs b/runtime/src/serde_snapshot/future.rs index 2bf612fba1..b822da5825 100644 --- a/runtime/src/serde_snapshot/future.rs +++ b/runtime/src/serde_snapshot/future.rs @@ -3,6 +3,8 @@ use super::common::UnusedAccounts; use solana_frozen_abi::abi_example::IgnoreAsHelper; use {super::*, solana_measure::measure::Measure, std::cell::RefCell}; +use crate::ancestors::AncestorsForSerialization; + type AccountsDbFields = super::AccountsDbFields; // Serializable version of AccountStorageEntry for snapshot format @@ -45,7 +47,7 @@ use std::sync::RwLock; #[derive(Clone, Deserialize)] pub(crate) struct DeserializableVersionedBank { pub(crate) blockhash_queue: BlockhashQueue, - pub(crate) ancestors: Ancestors, + pub(crate) ancestors: AncestorsForSerialization, pub(crate) hash: Hash, pub(crate) parent_hash: Hash, pub(crate) parent_slot: Slot, @@ -122,7 +124,7 @@ impl From for BankFieldsToDeserialize { #[derive(Serialize)] pub(crate) struct SerializableVersionedBank<'a> { pub(crate) blockhash_queue: &'a RwLock, - pub(crate) ancestors: &'a Ancestors, + pub(crate) ancestors: &'a AncestorsForSerialization, pub(crate) hash: Hash, pub(crate) parent_hash: Hash, pub(crate) parent_slot: Slot, @@ -163,7 +165,7 @@ impl<'a> From> for SerializableVersionedB } Self { blockhash_queue: rhs.blockhash_queue, - ancestors: rhs.ancestors, + ancestors: &rhs.ancestors, hash: rhs.hash, parent_hash: rhs.parent_hash, parent_slot: rhs.parent_slot, @@ -213,8 +215,10 @@ impl<'a> TypeContext<'a> for Context { where Self: std::marker::Sized, { + let ancestors = HashMap::from(&serializable_bank.bank.ancestors); + let fields = serializable_bank.bank.get_fields_to_serialize(&ancestors); ( - SerializableVersionedBank::from(serializable_bank.bank.get_fields_to_serialize()), + SerializableVersionedBank::from(fields), SerializableAccountsDb::<'a, Self> { accounts_db: &*serializable_bank.bank.rc.accounts.accounts_db, slot: serializable_bank.bank.rc.slot, diff --git a/runtime/tests/accounts.rs b/runtime/tests/accounts.rs index f04021aff0..d7b7a6329d 100644 --- a/runtime/tests/accounts.rs +++ b/runtime/tests/accounts.rs @@ -76,7 +76,7 @@ fn test_bad_bank_hash() { let db = AccountsDb::new(Vec::new(), &ClusterType::Development); let some_slot: Slot = 0; - let ancestors: Ancestors = [(some_slot, 0)].iter().copied().collect(); + let ancestors = Ancestors::from([(some_slot, 0usize)].iter().copied().collect::>()); let max_accounts = 200; let mut accounts_keys: Vec<_> = (0..max_accounts)