use slots returned from get_snapshot_storages to sort (#17638)

* use slots returned from get_snapshot_storages to sort

* add tests
This commit is contained in:
Jeff Washington (jwash) 2021-06-02 18:24:55 -05:00 committed by GitHub
parent 4bd32d891f
commit 9388aaca15
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
3 changed files with 126 additions and 45 deletions

View File

@ -4378,17 +4378,22 @@ impl AccountsDb {
check_hash: bool, check_hash: bool,
) -> Result<(Hash, u64), BankHashVerificationError> { ) -> Result<(Hash, u64), BankHashVerificationError> {
if !use_index { if !use_index {
let mut time = Measure::start("collect"); let mut collect_time = Measure::start("collect");
let combined_maps = self.get_snapshot_storages(slot, Some(ancestors)).0; let (combined_maps, slots) = self.get_snapshot_storages(slot, Some(ancestors));
time.stop(); collect_time.stop();
let mut sort_time = Measure::start("sort_storages");
let storages = SortedStorages::new_with_slots(&combined_maps, &slots);
sort_time.stop();
let timings = HashStats { let timings = HashStats {
collect_snapshots_us: time.as_us(), collect_snapshots_us: collect_time.as_us(),
storage_sort_us: sort_time.as_us(),
..HashStats::default() ..HashStats::default()
}; };
Self::calculate_accounts_hash_without_index( Self::calculate_accounts_hash_without_index(
&combined_maps, &storages,
Some(&self.thread_pool_clean), Some(&self.thread_pool_clean),
timings, timings,
check_hash, check_hash,
@ -4437,7 +4442,7 @@ impl AccountsDb {
let bin_calculator = PubkeyBinCalculator16::new(bins); let bin_calculator = PubkeyBinCalculator16::new(bins);
assert!(bin_range.start < bins && bin_range.end <= bins && bin_range.start < bin_range.end); assert!(bin_range.start < bins && bin_range.end <= bins && bin_range.start < bin_range.end);
let mut time = Measure::start("scan all accounts"); let mut time = Measure::start("scan all accounts");
stats.num_snapshot_storage = storage.len(); stats.num_snapshot_storage = storage.slot_count();
let mismatch_found = AtomicU64::new(0); let mismatch_found = AtomicU64::new(0);
let result: Vec<Vec<Vec<CalculateHashIntermediate>>> = Self::scan_account_storage_no_bank( let result: Vec<Vec<Vec<CalculateHashIntermediate>>> = Self::scan_account_storage_no_bank(
@ -4504,7 +4509,7 @@ impl AccountsDb {
// modeled after get_accounts_delta_hash // modeled after get_accounts_delta_hash
// intended to be faster than calculate_accounts_hash // intended to be faster than calculate_accounts_hash
pub fn calculate_accounts_hash_without_index( pub fn calculate_accounts_hash_without_index(
storages: &[SnapshotStorage], storages: &SortedStorages,
thread_pool: Option<&ThreadPool>, thread_pool: Option<&ThreadPool>,
mut stats: HashStats, mut stats: HashStats,
check_hash: bool, check_hash: bool,
@ -4528,11 +4533,6 @@ impl AccountsDb {
let mut previous_pass = PreviousPass::default(); let mut previous_pass = PreviousPass::default();
let mut final_result = (Hash::default(), 0); let mut final_result = (Hash::default(), 0);
let mut sort_time = Measure::start("sort_storages");
let storages = SortedStorages::new(storages);
sort_time.stop();
stats.storage_sort_us = sort_time.as_us();
for pass in 0..num_scan_passes { for pass in 0..num_scan_passes {
let bounds = Range { let bounds = Range {
start: pass * bins_per_pass, start: pass * bins_per_pass,
@ -6182,7 +6182,7 @@ pub mod tests {
let (storages, _size, _slot_expected) = sample_storage(); let (storages, _size, _slot_expected) = sample_storage();
let result = AccountsDb::calculate_accounts_hash_without_index( let result = AccountsDb::calculate_accounts_hash_without_index(
&storages, &get_storage_refs(&storages),
None, None,
HashStats::default(), HashStats::default(),
false, false,
@ -6203,7 +6203,7 @@ pub mod tests {
}); });
let sum = raw_expected.iter().map(|item| item.lamports).sum(); let sum = raw_expected.iter().map(|item| item.lamports).sum();
let result = AccountsDb::calculate_accounts_hash_without_index( let result = AccountsDb::calculate_accounts_hash_without_index(
&storages, &get_storage_refs(&storages),
None, None,
HashStats::default(), HashStats::default(),
false, false,

View File

@ -11,6 +11,7 @@ use {
snapshot_package::{ snapshot_package::{
AccountsPackage, AccountsPackagePre, AccountsPackageSendError, AccountsPackageSender, AccountsPackage, AccountsPackagePre, AccountsPackageSendError, AccountsPackageSender,
}, },
sorted_storages::SortedStorages,
}, },
bincode::{config::Options, serialize_into}, bincode::{config::Options, serialize_into},
bzip2::bufread::BzDecoder, bzip2::bufread::BzDecoder,
@ -999,8 +1000,9 @@ pub fn process_accounts_package_pre(
let hash = accounts_package.hash; // temporarily remaining here let hash = accounts_package.hash; // temporarily remaining here
if let Some(expected_hash) = accounts_package.hash_for_testing { if let Some(expected_hash) = accounts_package.hash_for_testing {
let sorted_storages = SortedStorages::new(&accounts_package.storages);
let (hash, lamports) = AccountsDb::calculate_accounts_hash_without_index( let (hash, lamports) = AccountsDb::calculate_accounts_hash_without_index(
&accounts_package.storages, &sorted_storages,
thread_pool, thread_pool,
crate::accounts_hash::HashStats::default(), crate::accounts_hash::HashStats::default(),
false, false,

View File

@ -7,7 +7,7 @@ use std::ops::Range;
pub struct SortedStorages<'a> { pub struct SortedStorages<'a> {
range: Range<Slot>, range: Range<Slot>,
storages: Vec<Option<&'a SnapshotStorage>>, storages: Vec<Option<&'a SnapshotStorage>>,
count: usize, slot_count: usize,
} }
impl<'a> SortedStorages<'a> { impl<'a> SortedStorages<'a> {
@ -28,35 +28,45 @@ impl<'a> SortedStorages<'a> {
&self.range &self.range
} }
pub fn len(&self) -> usize { pub fn slot_count(&self) -> usize {
self.count self.slot_count
}
pub fn is_empty(&self) -> bool {
self.len() == 0
} }
// assumptions:
// 1. each SnapshotStorage.!is_empty()
// 2. SnapshotStorage.first().unwrap().get_slot() is unique from all other SnapshotStorage items.
pub fn new(source: &'a [SnapshotStorage]) -> Self { pub fn new(source: &'a [SnapshotStorage]) -> Self {
let mut min = Slot::MAX;
let mut max = Slot::MIN;
let mut count = 0;
let mut time = Measure::start("get slot");
let slots = source let slots = source
.iter() .iter()
.map(|storages| { .map(|storages| {
count += storages.len(); let first = storages.first();
if !storages.is_empty() { assert!(first.is_some(), "SnapshotStorage.is_empty()");
storages.first().map(|storage| { let storage = first.unwrap();
let slot = storage.slot(); storage.slot() // this must be unique. Will be enforced in new_with_slots
min = std::cmp::min(slot, min);
max = std::cmp::max(slot + 1, max);
slot
})
} else {
None
}
}) })
.collect::<Vec<_>>(); .collect::<Vec<_>>();
Self::new_with_slots(source, &slots)
}
// source[i] is in slot slots[i]
// assumptions:
// 1. slots vector contains unique slot #s.
// 2. slots and source are the same len
pub fn new_with_slots(source: &'a [SnapshotStorage], slots: &[Slot]) -> Self {
assert_eq!(
source.len(),
slots.len(),
"source and slots are different lengths"
);
let mut min = Slot::MAX;
let mut max = Slot::MIN;
let slot_count = source.len();
let mut time = Measure::start("get slot");
slots.iter().for_each(|slot| {
let slot = *slot;
min = std::cmp::min(slot, min);
max = std::cmp::max(slot + 1, max);
});
time.stop(); time.stop();
let mut time2 = Measure::start("sort"); let mut time2 = Measure::start("sort");
let range; let range;
@ -75,11 +85,9 @@ impl<'a> SortedStorages<'a> {
.iter() .iter()
.zip(slots) .zip(slots)
.for_each(|(original_storages, slot)| { .for_each(|(original_storages, slot)| {
if let Some(slot) = slot { let index = (slot - min) as usize;
let index = (slot - min) as usize; assert!(storages[index].is_none(), "slots are not unique"); // we should not encounter the same slot twice
assert!(storages[index].is_none()); storages[index] = Some(original_storages);
storages[index] = Some(original_storages);
}
}); });
} }
time2.stop(); time2.stop();
@ -87,7 +95,7 @@ impl<'a> SortedStorages<'a> {
Self { Self {
range, range,
storages, storages,
count, slot_count,
} }
} }
} }
@ -102,7 +110,7 @@ pub mod tests {
start: min, start: min,
end: min + len as Slot, end: min + len as Slot,
}; };
let count = source.len(); let slot_count = source.len();
for (storage, slot) in source { for (storage, slot) in source {
storages[*slot as usize] = Some(*storage); storages[*slot as usize] = Some(*storage);
} }
@ -110,8 +118,79 @@ pub mod tests {
Self { Self {
range, range,
storages, storages,
count, slot_count,
} }
} }
} }
#[test]
#[should_panic(expected = "SnapshotStorage.is_empty()")]
fn test_sorted_storages_empty() {
SortedStorages::new(&[Vec::new()]);
}
#[test]
#[should_panic(expected = "slots are not unique")]
fn test_sorted_storages_duplicate_slots() {
SortedStorages::new_with_slots(&[Vec::new(), Vec::new()], &[0, 0]);
}
#[test]
#[should_panic(expected = "source and slots are different lengths")]
fn test_sorted_storages_mismatched_lengths() {
SortedStorages::new_with_slots(&[Vec::new()], &[0, 0]);
}
#[test]
fn test_sorted_storages_none() {
let result = SortedStorages::new_with_slots(&[], &[]);
assert_eq!(result.range, Range::default());
assert_eq!(result.slot_count, 0);
assert_eq!(result.storages.len(), 0);
assert!(result.get(0).is_none());
}
#[test]
fn test_sorted_storages_1() {
let vec = vec![];
let vec_check = vec.clone();
let slot = 4;
let vecs = [vec];
let result = SortedStorages::new_with_slots(&vecs, &[slot]);
assert_eq!(
result.range,
Range {
start: slot,
end: slot + 1
}
);
assert_eq!(result.slot_count, 1);
assert_eq!(result.storages.len(), 1);
assert_eq!(result.get(slot).unwrap().len(), vec_check.len());
}
#[test]
fn test_sorted_storages_2() {
let vec = vec![];
let vec_check = vec.clone();
let slots = [4, 7];
let vecs = [vec.clone(), vec];
let result = SortedStorages::new_with_slots(&vecs, &slots);
assert_eq!(
result.range,
Range {
start: slots[0],
end: slots[1] + 1,
}
);
assert_eq!(result.slot_count, 2);
assert_eq!(result.storages.len() as Slot, slots[1] - slots[0] + 1);
assert!(result.get(0).is_none());
assert!(result.get(3).is_none());
assert!(result.get(5).is_none());
assert!(result.get(6).is_none());
assert!(result.get(8).is_none());
assert_eq!(result.get(slots[0]).unwrap().len(), vec_check.len());
assert_eq!(result.get(slots[1]).unwrap().len(), vec_check.len());
}
} }