replace Ancestors HashMap for performance (#16477)

* replace Ancestors HashMap for performance

* add ancestors module
This commit is contained in:
Jeff Washington (jwash) 2021-05-20 10:11:56 -05:00 committed by GitHub
parent ddfc15b9f2
commit a9aa533684
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
7 changed files with 372 additions and 26 deletions

View File

@ -88,11 +88,13 @@ fn main() {
num_slots,
create_time
);
let mut ancestors: Ancestors = vec![(0, 0)].into_iter().collect();
let mut ancestors = Vec::with_capacity(num_slots);
ancestors.push((0, 0));
for i in 1..num_slots {
ancestors.insert(i as u64, i - 1);
ancestors.push((i as u64, i - 1));
accounts.add_root(i as u64);
}
let ancestors = Ancestors::from(ancestors);
let mut elapsed = vec![0; iterations];
let mut elapsed_store = vec![0; iterations];
for x in 0..iterations {

View File

@ -108,7 +108,7 @@ fn test_accounts_hash_bank_hash(bencher: &mut Bencher) {
let num_accounts = 60_000;
let slot = 0;
create_test_accounts(&accounts, &mut pubkeys, num_accounts, slot);
let ancestors = vec![(0, 0)].into_iter().collect();
let ancestors = Ancestors::from(vec![(0, 0)]);
let (_, total_lamports) = accounts.accounts_db.update_accounts_hash(0, &ancestors);
bencher.iter(|| assert!(accounts.verify_bank_hash_and_lamports(0, &ancestors, total_lamports)));
}
@ -124,7 +124,7 @@ fn test_update_accounts_hash(bencher: &mut Bencher) {
);
let mut pubkeys: Vec<Pubkey> = vec![];
create_test_accounts(&accounts, &mut pubkeys, 50_000, 0);
let ancestors = vec![(0, 0)].into_iter().collect();
let ancestors = Ancestors::from(vec![(0, 0)]);
bencher.iter(|| {
accounts.accounts_db.update_accounts_hash(0, &ancestors);
});
@ -378,7 +378,7 @@ fn bench_load_largest_accounts(b: &mut Bencher) {
let account = AccountSharedData::new(lamports, 0, &Pubkey::default());
accounts.store_slow_uncached(0, &pubkey, &account);
}
let ancestors = vec![(0, 0)].into_iter().collect();
let ancestors = Ancestors::from(vec![(0, 0)]);
b.iter(|| {
accounts.load_largest_accounts(
&ancestors,

View File

@ -1,4 +1,336 @@
use solana_sdk::clock::Slot;
use std::collections::HashMap;
pub type Ancestors = HashMap<Slot, usize>;
pub type AncestorsForSerialization = HashMap<Slot, usize>;
#[derive(Debug, Default, Clone, PartialEq, Serialize, Deserialize, AbiExample)]
pub struct Ancestors {
min: Slot,
slots: Vec<Option<usize>>,
count: usize,
max: Slot,
large_range_slots: HashMap<Slot, usize>,
}
// some tests produce ancestors ranges that are too large such
// that we prefer to implement them in a sparse HashMap
const ANCESTORS_HASH_MAP_SIZE: u64 = 10_000;
impl From<Vec<(Slot, usize)>> for Ancestors {
fn from(source: Vec<(Slot, usize)>) -> Ancestors {
let mut result = Ancestors::default();
if !source.is_empty() {
result.min = Slot::MAX;
result.max = Slot::MIN;
source.iter().for_each(|(slot, _)| {
result.min = std::cmp::min(result.min, *slot);
result.max = std::cmp::max(result.max, *slot + 1);
});
let range = result.range();
if range > ANCESTORS_HASH_MAP_SIZE {
result.large_range_slots = source.into_iter().collect();
result.min = 0;
result.max = 0;
} else {
result.slots = vec![None; range as usize];
source.into_iter().for_each(|(slot, size)| {
let slot = result.slot_index(&slot);
if result.slots[slot].is_none() {
result.count += 1;
}
result.slots[slot] = Some(size);
});
}
}
result
}
}
impl From<&HashMap<Slot, usize>> for Ancestors {
fn from(source: &HashMap<Slot, usize>) -> Ancestors {
let mut result = Ancestors::default();
if !source.is_empty() {
result.min = Slot::MAX;
result.max = Slot::MIN;
source.iter().for_each(|(slot, _)| {
result.min = std::cmp::min(result.min, *slot);
result.max = std::cmp::max(result.max, *slot + 1);
});
let range = result.range();
if range > ANCESTORS_HASH_MAP_SIZE {
result.large_range_slots =
source.iter().map(|(slot, size)| (*slot, *size)).collect();
result.min = 0;
result.max = 0;
} else {
result.slots = vec![None; range as usize];
source.iter().for_each(|(slot, size)| {
let slot = result.slot_index(&slot);
if result.slots[slot].is_none() {
result.count += 1;
}
result.slots[slot] = Some(*size);
});
}
}
result
}
}
impl From<&Ancestors> for HashMap<Slot, usize> {
fn from(source: &Ancestors) -> HashMap<Slot, usize> {
let mut result = HashMap::with_capacity(source.len());
source.keys().iter().for_each(|slot| {
result.insert(*slot, *source.get(slot).unwrap());
});
result
}
}
impl Ancestors {
pub fn keys(&self) -> Vec<Slot> {
if self.large_range_slots.is_empty() {
self.slots
.iter()
.enumerate()
.filter_map(|(size, i)| i.map(|_| size as u64 + self.min))
.collect::<Vec<_>>()
} else {
self.large_range_slots.keys().copied().collect::<Vec<_>>()
}
}
pub fn get(&self, slot: &Slot) -> Option<&usize> {
if self.large_range_slots.is_empty() {
if slot < &self.min || slot >= &self.max {
return None;
}
let slot = self.slot_index(slot);
self.slots[slot].as_ref()
} else {
self.large_range_slots.get(slot)
}
}
pub fn remove(&mut self, slot: &Slot) {
if self.large_range_slots.is_empty() {
if slot < &self.min || slot >= &self.max {
return;
}
let slot = self.slot_index(slot);
if self.slots[slot].is_some() {
self.count -= 1;
self.slots[slot] = None;
}
} else {
self.large_range_slots.remove(slot);
}
}
pub fn contains_key(&self, slot: &Slot) -> bool {
if self.large_range_slots.is_empty() {
if slot < &self.min || slot >= &self.max {
return false;
}
let slot = self.slot_index(slot);
self.slots[slot].is_some()
} else {
self.large_range_slots.contains_key(slot)
}
}
pub fn len(&self) -> usize {
if self.large_range_slots.is_empty() {
self.count
} else {
self.large_range_slots.len()
}
}
pub fn is_empty(&self) -> bool {
self.len() == 0
}
fn slot_index(&self, slot: &Slot) -> usize {
(slot - self.min) as usize
}
fn range(&self) -> Slot {
self.max - self.min
}
}
#[cfg(test)]
pub mod tests {
use super::*;
use crate::contains::Contains;
use log::*;
use solana_measure::measure::Measure;
use std::collections::HashSet;
impl std::iter::FromIterator<(Slot, usize)> for Ancestors {
fn from_iter<I>(iter: I) -> Self
where
I: IntoIterator<Item = (Slot, usize)>,
{
let mut data = Vec::new();
for i in iter {
data.push(i);
}
Ancestors::from(data)
}
}
impl Ancestors {
pub fn insert(&mut self, mut slot: Slot, size: usize) {
if self.large_range_slots.is_empty() {
if slot < self.min || slot >= self.max {
let new_min = std::cmp::min(self.min, slot);
let new_max = std::cmp::max(self.max, slot + 1);
let new_range = new_max - new_min;
if new_min == self.min {
self.max = slot + 1;
self.slots.resize(new_range as usize, None);
} else {
// min changed
let mut new_slots = vec![None; new_range as usize];
self.slots.iter().enumerate().for_each(|(i, size)| {
new_slots[i as usize + self.min as usize - slot as usize] = *size
});
self.slots = new_slots;
self.min = slot;
// fall through and set this value in
}
}
slot -= self.min;
if self.slots[slot as usize].is_none() {
self.count += 1;
}
self.slots[slot as usize] = Some(size);
} else {
self.large_range_slots.insert(slot, size);
}
}
}
#[test]
fn test_ancestors_permutations() {
solana_logger::setup();
let mut ancestors = Ancestors::default();
let mut hash = HashMap::new();
let min = 101_000;
let width = 400_000;
let dead = 19;
let mut slot = min;
while hash.len() < width {
slot += 1;
if slot % dead == 0 {
continue;
}
hash.insert(slot, 0);
ancestors.insert(slot, 0);
}
compare_ancestors(&hash, &ancestors);
let max = slot + 1;
let mut time = Measure::start("");
let mut count = 0;
for slot in (min - 10)..max + 100 {
if hash.contains(&slot) {
count += 1;
}
}
time.stop();
let mut time2 = Measure::start("");
let mut count2 = 0;
for slot in (min - 10)..max + 100 {
if ancestors.contains_key(&slot) {
count2 += 1;
}
}
time2.stop();
info!(
"{}ms, {}ms, {} ratio",
time.as_ms(),
time2.as_ms(),
time.as_ns() / time2.as_ns()
);
assert_eq!(count, count2);
}
fn compare_ancestors(hashset: &HashMap<u64, usize>, ancestors: &Ancestors) {
assert_eq!(hashset.len(), ancestors.len());
assert_eq!(hashset.is_empty(), ancestors.is_empty());
let mut min = u64::MAX;
let mut max = 0;
for item in hashset.iter() {
let key = item.0;
min = std::cmp::min(min, *key);
max = std::cmp::max(max, *key);
assert_eq!(ancestors.get(&key).unwrap(), item.1);
}
for slot in min - 1..max + 2 {
assert_eq!(ancestors.get(&slot), hashset.get(&slot));
}
}
#[test]
fn test_ancestors_smaller() {
solana_logger::setup();
for width in 0..34 {
let mut hash = HashSet::new();
let min = 1_010_000;
let dead = 19;
let mut slot = min;
let mut slots = Vec::new();
while hash.len() < width {
slot += 1;
if slot % dead == 0 {
continue;
}
hash.insert(slot);
slots.push((slot, 0));
}
let ancestors = Ancestors::from(slots);
let max = slot + 1;
let passes = 1;
let mut time = Measure::start("");
let mut count = 0;
for _pass in 0..passes {
for slot in (min - 10)..max + 100 {
if hash.contains(&slot) {
count += 1;
}
}
}
time.stop();
let mut time2 = Measure::start("");
let mut count2 = 0;
for _pass in 0..passes {
for slot in (min - 10)..max + 100 {
if ancestors.contains_key(&slot) {
count2 += 1;
}
}
}
time2.stop();
info!(
"{}, {}, {}",
time.as_ms(),
time2.as_ms(),
time.as_ns() / time2.as_ns()
);
assert_eq!(count, count2);
}
}
}

View File

@ -40,7 +40,7 @@ use crate::{
},
accounts_db::{ErrorCounters, SnapshotStorages},
accounts_index::{AccountSecondaryIndexes, IndexKey},
ancestors::Ancestors,
ancestors::{Ancestors, AncestorsForSerialization},
blockhash_queue::BlockhashQueue,
builtins::{self, ActivationType},
epoch_stakes::{EpochStakes, NodeVoteAccounts},
@ -595,7 +595,7 @@ impl NonceRollbackInfo for NonceRollbackFull {
#[derive(Clone, Debug, Default)]
pub(crate) struct BankFieldsToDeserialize {
pub(crate) blockhash_queue: BlockhashQueue,
pub(crate) ancestors: Ancestors,
pub(crate) ancestors: AncestorsForSerialization,
pub(crate) hash: Hash,
pub(crate) parent_hash: Hash,
pub(crate) parent_slot: Slot,
@ -634,7 +634,7 @@ pub(crate) struct BankFieldsToDeserialize {
#[derive(Debug)]
pub(crate) struct BankFieldsToSerialize<'a> {
pub(crate) blockhash_queue: &'a RwLock<BlockhashQueue>,
pub(crate) ancestors: &'a Ancestors,
pub(crate) ancestors: &'a AncestorsForSerialization,
pub(crate) hash: Hash,
pub(crate) parent_hash: Hash,
pub(crate) parent_slot: Slot,
@ -980,7 +980,7 @@ impl Bank {
accounts_db_caching_enabled: bool,
) -> Self {
let mut bank = Self::default();
bank.ancestors.insert(bank.slot(), 0);
bank.ancestors = Ancestors::from(vec![(bank.slot(), 0)]);
bank.transaction_debug_keys = debug_keys;
bank.cluster_type = Some(genesis_config.cluster_type);
@ -1130,10 +1130,12 @@ impl Bank {
("block_height", new.block_height, i64)
);
new.ancestors.insert(new.slot(), 0);
let mut ancestors = Vec::with_capacity(1 + new.parents().len());
ancestors.push((new.slot(), 0));
new.parents().iter().enumerate().for_each(|(i, p)| {
new.ancestors.insert(p.slot(), i + 1);
ancestors.push((p.slot(), i + 1));
});
new.ancestors = Ancestors::from(ancestors);
// Following code may touch AccountsDb, requiring proper ancestors
let parent_epoch = parent.epoch();
@ -1166,7 +1168,7 @@ impl Bank {
pub(crate) fn proper_ancestors(&self) -> impl Iterator<Item = Slot> + '_ {
self.ancestors
.keys()
.copied()
.into_iter()
.filter(move |slot| *slot != self.slot)
}
@ -1216,7 +1218,7 @@ impl Bank {
rc: bank_rc,
src: new(),
blockhash_queue: RwLock::new(fields.blockhash_queue),
ancestors: fields.ancestors,
ancestors: Ancestors::from(&fields.ancestors),
hash: RwLock::new(fields.hash),
parent_hash: fields.parent_hash,
parent_slot: fields.parent_slot,
@ -1308,10 +1310,13 @@ impl Bank {
}
/// Return subset of bank fields representing serializable state
pub(crate) fn get_fields_to_serialize(&self) -> BankFieldsToSerialize {
pub(crate) fn get_fields_to_serialize<'a>(
&'a self,
ancestors: &'a HashMap<Slot, usize>,
) -> BankFieldsToSerialize<'a> {
BankFieldsToSerialize {
blockhash_queue: &self.blockhash_queue,
ancestors: &self.ancestors,
ancestors,
hash: *self.hash.read().unwrap(),
parent_hash: self.parent_hash,
parent_slot: self.parent_slot,
@ -1380,8 +1385,8 @@ impl Bank {
let mut roots = self.src.status_cache.read().unwrap().roots().clone();
let min = roots.iter().min().cloned().unwrap_or(0);
for ancestor in self.ancestors.keys() {
if *ancestor >= min {
roots.insert(*ancestor);
if ancestor >= min {
roots.insert(ancestor);
}
}
@ -4293,7 +4298,7 @@ impl Bank {
&self,
pubkey: &Pubkey,
) -> Option<(AccountSharedData, Slot)> {
let just_self: Ancestors = vec![(self.slot(), 0)].into_iter().collect();
let just_self: Ancestors = Ancestors::from(vec![(self.slot(), 0)]);
if let Some((account, slot)) = self.load_slow_with_fixed_root(&just_self, pubkey) {
if slot == self.slot() {
return Some((account, slot));
@ -10676,6 +10681,7 @@ pub(crate) mod tests {
#[test]
fn test_upgrade_epoch() {
solana_logger::setup();
let GenesisConfigInfo {
mut genesis_config,
mint_keypair,
@ -10683,7 +10689,6 @@ pub(crate) mod tests {
} = create_genesis_config_with_leader(500, &solana_sdk::pubkey::new_rand(), 0);
genesis_config.fee_rate_governor = FeeRateGovernor::new(1, 0);
let bank = Arc::new(Bank::new(&genesis_config));
// Jump to the test-only upgrade epoch -- see `Bank::upgrade_epoch()`
let bank = Bank::new_from_parent(
&bank,

View File

@ -255,7 +255,10 @@ where
account_indexes,
caching_enabled,
)?;
accounts_db.freeze_accounts(&bank_fields.ancestors, frozen_account_pubkeys);
accounts_db.freeze_accounts(
&Ancestors::from(&bank_fields.ancestors),
frozen_account_pubkeys,
);
let bank_rc = BankRc::new(Accounts::new_empty(accounts_db), bank_fields.slot);
let bank = Bank::new_from_fields(

View File

@ -3,6 +3,8 @@ use super::common::UnusedAccounts;
use solana_frozen_abi::abi_example::IgnoreAsHelper;
use {super::*, solana_measure::measure::Measure, std::cell::RefCell};
use crate::ancestors::AncestorsForSerialization;
type AccountsDbFields = super::AccountsDbFields<SerializableAccountStorageEntry>;
// Serializable version of AccountStorageEntry for snapshot format
@ -45,7 +47,7 @@ use std::sync::RwLock;
#[derive(Clone, Deserialize)]
pub(crate) struct DeserializableVersionedBank {
pub(crate) blockhash_queue: BlockhashQueue,
pub(crate) ancestors: Ancestors,
pub(crate) ancestors: AncestorsForSerialization,
pub(crate) hash: Hash,
pub(crate) parent_hash: Hash,
pub(crate) parent_slot: Slot,
@ -122,7 +124,7 @@ impl From<DeserializableVersionedBank> for BankFieldsToDeserialize {
#[derive(Serialize)]
pub(crate) struct SerializableVersionedBank<'a> {
pub(crate) blockhash_queue: &'a RwLock<BlockhashQueue>,
pub(crate) ancestors: &'a Ancestors,
pub(crate) ancestors: &'a AncestorsForSerialization,
pub(crate) hash: Hash,
pub(crate) parent_hash: Hash,
pub(crate) parent_slot: Slot,
@ -163,7 +165,7 @@ impl<'a> From<crate::bank::BankFieldsToSerialize<'a>> for SerializableVersionedB
}
Self {
blockhash_queue: rhs.blockhash_queue,
ancestors: rhs.ancestors,
ancestors: &rhs.ancestors,
hash: rhs.hash,
parent_hash: rhs.parent_hash,
parent_slot: rhs.parent_slot,
@ -213,8 +215,10 @@ impl<'a> TypeContext<'a> for Context {
where
Self: std::marker::Sized,
{
let ancestors = HashMap::from(&serializable_bank.bank.ancestors);
let fields = serializable_bank.bank.get_fields_to_serialize(&ancestors);
(
SerializableVersionedBank::from(serializable_bank.bank.get_fields_to_serialize()),
SerializableVersionedBank::from(fields),
SerializableAccountsDb::<'a, Self> {
accounts_db: &*serializable_bank.bank.rc.accounts.accounts_db,
slot: serializable_bank.bank.rc.slot,

View File

@ -76,7 +76,7 @@ fn test_bad_bank_hash() {
let db = AccountsDb::new(Vec::new(), &ClusterType::Development);
let some_slot: Slot = 0;
let ancestors: Ancestors = [(some_slot, 0)].iter().copied().collect();
let ancestors = Ancestors::from([(some_slot, 0usize)].iter().copied().collect::<Vec<_>>());
let max_accounts = 200;
let mut accounts_keys: Vec<_> = (0..max_accounts)