From 7fee0bc69cb3af4870e3fdd29ce5d762ab1bd7c8 Mon Sep 17 00:00:00 2001 From: "Jeff Washington (jwash)" <75863576+jeffwashington@users.noreply.github.com> Date: Tue, 16 Feb 2021 18:29:50 -0600 Subject: [PATCH] move hashing functionality to accounts_hash (#15353) --- merkle-root-bench/src/main.rs | 4 +- runtime/src/accounts_db.rs | 1553 +------------------------------- runtime/src/accounts_hash.rs | 1559 +++++++++++++++++++++++++++++++++ runtime/src/lib.rs | 1 + 4 files changed, 1579 insertions(+), 1538 deletions(-) create mode 100644 runtime/src/accounts_hash.rs diff --git a/merkle-root-bench/src/main.rs b/merkle-root-bench/src/main.rs index 1c2db251a2..76ec3388dc 100644 --- a/merkle-root-bench/src/main.rs +++ b/merkle-root-bench/src/main.rs @@ -1,7 +1,7 @@ extern crate log; use clap::{crate_description, crate_name, value_t, App, Arg}; use solana_measure::measure::Measure; -use solana_runtime::accounts_db::AccountsDB; +use solana_runtime::accounts_hash::AccountsHash; use solana_sdk::{hash::Hash, pubkey::Pubkey}; fn main() { @@ -36,7 +36,7 @@ fn main() { let hashes = hashes.clone(); // done outside timing let mut time = Measure::start("compute_merkle_root"); let fanout = 16; - AccountsDB::compute_merkle_root(hashes, fanout); + AccountsHash::compute_merkle_root(hashes, fanout); time.stop(); time.as_us() }) diff --git a/runtime/src/accounts_db.rs b/runtime/src/accounts_db.rs index b08b418284..2e22f677d1 100644 --- a/runtime/src/accounts_db.rs +++ b/runtime/src/accounts_db.rs @@ -20,6 +20,7 @@ use crate::{ accounts_cache::{AccountsCache, CachedAccount, SlotCache}, + accounts_hash::{AccountsHash, CalculateHashIntermediate, HashStats}, accounts_index::{ AccountIndex, AccountsIndex, AccountsIndexRootsStats, Ancestors, IndexKey, IsCached, SlotList, SlotSlice, ZeroLamport, @@ -51,7 +52,7 @@ use std::{ borrow::Cow, boxed::Box, collections::{hash_map::Entry, BTreeMap, BTreeSet, HashMap, HashSet}, - convert::{TryFrom, TryInto}, + convert::TryFrom, io::{Error as IOError, Result as IOResult}, ops::RangeBounds, path::{Path, PathBuf}, @@ -92,8 +93,6 @@ const CACHE_VIRTUAL_WRITE_VERSION: u64 = 0; const CACHE_VIRTUAL_OFFSET: usize = 0; const CACHE_VIRTUAL_STORED_SIZE: usize = 0; -const MERKLE_FANOUT: usize = 16; - type DashMapVersionHash = DashMap; lazy_static! { @@ -167,179 +166,6 @@ type ReclaimResult = (AccountSlots, AppendVecOffsets); type StorageFinder<'a> = Box Arc + 'a>; type ShrinkCandidates = HashMap>>; -const ZERO_RAW_LAMPORTS_SENTINEL: u64 = std::u64::MAX; - -#[derive(Debug, Default)] -pub struct HashStats { - pub scan_time_total_us: u64, - pub zeros_time_total_us: u64, - pub hash_time_total_us: u64, - pub sort_time_total_us: u64, - pub flatten_time_total_us: u64, - pub pre_scan_flatten_time_total_us: u64, - pub hash_total: usize, - pub unreduced_entries: usize, - pub num_snapshot_storage: usize, -} -impl HashStats { - fn log(&mut self) { - let total_time_us = self.scan_time_total_us - + self.zeros_time_total_us - + self.hash_time_total_us - + self.sort_time_total_us - + self.flatten_time_total_us - + self.pre_scan_flatten_time_total_us; - datapoint_info!( - "calculate_accounts_hash_without_index", - ("accounts_scan", self.scan_time_total_us, i64), - ("eliminate_zeros", self.zeros_time_total_us, i64), - ("hash", self.hash_time_total_us, i64), - ("sort", self.sort_time_total_us, i64), - ("hash_total", self.hash_total, i64), - ("flatten", self.flatten_time_total_us, i64), - ("unreduced_entries", self.unreduced_entries as i64, i64), - ( - "num_snapshot_storage", - self.num_snapshot_storage as i64, - i64 - ), - ( - "pre_scan_flatten", - self.pre_scan_flatten_time_total_us as i64, - i64 - ), - ("total", total_time_us as i64, i64), - ); - } -} - -#[derive(Default, Debug, PartialEq, Clone)] -struct CalculateHashIntermediate { - pub version: u64, - pub hash: Hash, - pub lamports: u64, - pub slot: Slot, - pub pubkey: Pubkey, -} - -impl CalculateHashIntermediate { - pub fn new(version: u64, hash: Hash, lamports: u64, slot: Slot, pubkey: Pubkey) -> Self { - Self { - version, - hash, - lamports, - slot, - pubkey, - } - } -} - -#[derive(Default, Debug)] -struct CumulativeOffset { - pub index: Vec, - pub start_offset: usize, -} - -impl CumulativeOffset { - pub fn new(index: Vec, start_offset: usize) -> CumulativeOffset { - Self { - index, - start_offset, - } - } -} - -// Allow retreiving &[start..end] from a logical src: Vec, where src is really Vec> (or later Vec>>) -// This model prevents callers from having to flatten which saves both working memory and time. -#[derive(Default, Debug)] -struct CumulativeOffsets { - cumulative_offsets: Vec, - total_count: usize, -} - -impl CumulativeOffsets { - pub fn from_raw(raw: &[Vec]) -> CumulativeOffsets { - let mut total_count: usize = 0; - let cumulative_offsets: Vec<_> = raw - .iter() - .enumerate() - .filter_map(|(i, v)| { - let len = v.len(); - if len > 0 { - let result = CumulativeOffset::new(vec![i], total_count); - total_count += len; - Some(result) - } else { - None - } - }) - .collect(); - - Self { - cumulative_offsets, - total_count, - } - } - - pub fn from_raw_2d(raw: &[Vec>]) -> CumulativeOffsets { - let mut total_count: usize = 0; - let mut cumulative_offsets = Vec::with_capacity(0); - for (i, v_outer) in raw.iter().enumerate() { - for (j, v) in v_outer.iter().enumerate() { - let len = v.len(); - if len > 0 { - if cumulative_offsets.is_empty() { - // the first inner, non-empty vector we find gives us an approximate rectangular shape - cumulative_offsets = Vec::with_capacity(raw.len() * v_outer.len()); - } - cumulative_offsets.push(CumulativeOffset::new(vec![i, j], total_count)); - total_count += len; - } - } - } - - Self { - cumulative_offsets, - total_count, - } - } - - // return the biggest slice possible that starts at 'start' - pub fn get_slice<'a, T>(&self, raw: &'a [Vec], start: usize) -> &'a [T] { - // This could be binary search, but we expect a small number of vectors. - for i in (0..self.cumulative_offsets.len()).into_iter().rev() { - let index = &self.cumulative_offsets[i]; - if start >= index.start_offset { - let start = start - index.start_offset; - const DIMENSION: usize = 0; - return &raw[index.index[DIMENSION]][start..]; - } - } - panic!( - "get_slice didn't find: {}, len: {}", - start, self.total_count - ); - } - - // return the biggest slice possible that starts at 'start' - pub fn get_slice_2d<'a, T>(&self, raw: &'a [Vec>], start: usize) -> &'a [T] { - // This could be binary search, but we expect a small number of vectors. - for i in (0..self.cumulative_offsets.len()).into_iter().rev() { - let index = &self.cumulative_offsets[i]; - if start >= index.start_offset { - let start = start - index.start_offset; - const DIMENSION_0: usize = 0; - const DIMENSION_1: usize = 1; - return &raw[index.index[DIMENSION_0]][index.index[DIMENSION_1]][start..]; - } - } - panic!( - "get_slice didn't find: {}, len: {}", - start, self.total_count - ); - } -} - trait Versioned { fn version(&self) -> u64; } @@ -3561,186 +3387,13 @@ impl AccountsDB { ); } - pub fn compute_merkle_root(hashes: Vec<(Pubkey, Hash)>, fanout: usize) -> Hash { - Self::compute_merkle_root_loop(hashes, fanout, |t| t.1) - } - - // this function avoids an infinite recursion compiler error - fn compute_merkle_root_recurse(hashes: Vec, fanout: usize) -> Hash { - Self::compute_merkle_root_loop(hashes, fanout, |t: &Hash| *t) - } - - fn div_ceil(x: usize, y: usize) -> usize { - let mut result = x / y; - if x % y != 0 { - result += 1; - } - result - } - - // For the first iteration, there could be more items in the tuple than just hash and lamports. - // Using extractor allows us to avoid an unnecessary array copy on the first iteration. - fn compute_merkle_root_loop(hashes: Vec, fanout: usize, extractor: F) -> Hash - where - F: Fn(&T) -> Hash + std::marker::Sync, - T: std::marker::Sync, - { - if hashes.is_empty() { - return Hasher::default().result(); - } - - let mut time = Measure::start("time"); - - let total_hashes = hashes.len(); - let chunks = Self::div_ceil(total_hashes, fanout); - - let result: Vec<_> = (0..chunks) - .into_par_iter() - .map(|i| { - let start_index = i * fanout; - let end_index = std::cmp::min(start_index + fanout, total_hashes); - - let mut hasher = Hasher::default(); - for item in hashes.iter().take(end_index).skip(start_index) { - let h = extractor(&item); - hasher.hash(h.as_ref()); - } - - hasher.result() - }) - .collect(); - time.stop(); - debug!("hashing {} {}", total_hashes, time); - - if result.len() == 1 { - result[0] - } else { - Self::compute_merkle_root_recurse(result, fanout) - } - } - - // This function is designed to allow hashes to be located in multiple, perhaps multiply deep vecs. - // The caller provides a function to return a slice from the source data. - fn compute_merkle_root_from_slices<'a, F>( - total_hashes: usize, - fanout: usize, - max_levels_per_pass: Option, - get_hashes: F, - ) -> Hash - where - F: Fn(usize) -> &'a [Hash] + std::marker::Sync, - { - if total_hashes == 0 { - return Hasher::default().result(); - } - - let mut time = Measure::start("time"); - - const THREE_LEVEL_OPTIMIZATION: usize = 3; // this '3' is dependent on the code structure below where we manually unroll - let target = fanout.pow(THREE_LEVEL_OPTIMIZATION as u32); - - // Only use the 3 level optimization if we have at least 4 levels of data. - // Otherwise, we'll be serializing a parallel operation. - let threshold = target * fanout; - let three_level = max_levels_per_pass.unwrap_or(usize::MAX) >= THREE_LEVEL_OPTIMIZATION - && total_hashes >= threshold; - let num_hashes_per_chunk = if three_level { target } else { fanout }; - - let chunks = Self::div_ceil(total_hashes, num_hashes_per_chunk); - - // initial fetch - could return entire slice - let data: &[Hash] = get_hashes(0); - let data_len = data.len(); - - let result: Vec<_> = (0..chunks) - .into_par_iter() - .map(|i| { - let start_index = i * num_hashes_per_chunk; - let end_index = std::cmp::min(start_index + num_hashes_per_chunk, total_hashes); - - let mut hasher = Hasher::default(); - let mut data_index = start_index; - let mut data = data; - let mut data_len = data_len; - - if !three_level { - // 1 group of fanout - // The result of this loop is a single hash value from fanout input hashes. - for i in start_index..end_index { - if data_index >= data_len { - // fetch next slice - data = get_hashes(i); - data_len = data.len(); - data_index = 0; - } - hasher.hash(data[data_index].as_ref()); - data_index += 1; - } - } else { - // hash 3 levels of fanout simultaneously. - // The result of this loop is a single hash value from fanout^3 input hashes. - let mut i = start_index; - while i < end_index { - let mut hasher_j = Hasher::default(); - for _j in 0..fanout { - let mut hasher_k = Hasher::default(); - let end = std::cmp::min(end_index - i, fanout); - for _k in 0..end { - if data_index >= data_len { - // fetch next slice - data = get_hashes(i); - data_len = data.len(); - data_index = 0; - } - hasher_k.hash(data[data_index].as_ref()); - data_index += 1; - i += 1; - } - hasher_j.hash(hasher_k.result().as_ref()); - if i >= end_index { - break; - } - } - hasher.hash(hasher_j.result().as_ref()); - } - } - - hasher.result() - }) - .collect(); - time.stop(); - debug!("hashing {} {}", total_hashes, time); - - if result.len() == 1 { - result[0] - } else { - Self::compute_merkle_root_recurse(result, fanout) - } - } - - fn accumulate_account_hashes(mut hashes: Vec<(Pubkey, Hash)>) -> Hash { - Self::sort_hashes_by_pubkey(&mut hashes); - - Self::compute_merkle_root_loop(hashes, MERKLE_FANOUT, |i| i.1) - } - - fn sort_hashes_by_pubkey(hashes: &mut Vec<(Pubkey, Hash)>) { - hashes.par_sort_unstable_by(|a, b| a.0.cmp(&b.0)); - } - - pub fn checked_cast_for_capitalization(balance: u128) -> u64 { - balance - .try_into() - .expect("overflow is detected while summing capitalization") - } - pub fn checked_iterative_sum_for_capitalization(total_cap: u64, new_cap: u64) -> u64 { let new_total = total_cap as u128 + new_cap as u128; - Self::checked_cast_for_capitalization(new_total) + AccountsHash::checked_cast_for_capitalization(new_total) } pub fn checked_sum_for_capitalization>(balances: T) -> u64 { - Self::checked_cast_for_capitalization(balances.map(|b| b as u128).sum::()) + AccountsHash::checked_cast_for_capitalization(balances.map(|b| b as u128).sum::()) } pub fn account_balance_for_capitalization( @@ -3786,7 +3439,7 @@ impl AccountsDB { let mismatch_found = AtomicU64::new(0); // Pick a chunk size big enough to allow us to produce output vectors that are smaller than the overall size. // We'll also accumulate the lamports within each chunk and fewer chunks results in less contention to accumulate the sum. - let chunks = MERKLE_FANOUT.pow(4); + let chunks = crate::accounts_hash::MERKLE_FANOUT.pow(4); let total_lamports = Mutex::::new(0); let hashes: Vec> = { self.thread_pool_clean.install(|| { @@ -3844,7 +3497,8 @@ impl AccountsDB { }) .collect(); let mut total = total_lamports.lock().unwrap(); - *total = Self::checked_cast_for_capitalization(*total as u128 + sum); + *total = + AccountsHash::checked_cast_for_capitalization(*total as u128 + sum); result }) .collect() @@ -3858,18 +3512,11 @@ impl AccountsDB { return Err(MismatchedAccountHash); } - let cumulative_offsets = CumulativeOffsets::from_raw(&hashes); - scan.stop(); - let hash_total = cumulative_offsets.total_count; let total_lamports = *total_lamports.lock().unwrap(); + let mut hash_time = Measure::start("hash"); - let accumulated_hash = Self::compute_merkle_root_from_slices( - hash_total, - MERKLE_FANOUT, - None, - |start: usize| cumulative_offsets.get_slice(&hashes, start), - ); + let (accumulated_hash, hash_total) = AccountsHash::calculate_hash(hashes); hash_time.stop(); datapoint_info!( "update_accounts_hash", @@ -3921,7 +3568,7 @@ impl AccountsDB { /// Scan through all the account storage in parallel fn scan_account_storage_no_bank( snapshot_storages: &[SnapshotStorage], - stats: &mut HashStats, + stats: &mut crate::accounts_hash::HashStats, scan_func: F, ) -> Vec where @@ -3956,243 +3603,6 @@ impl AccountsDB { .collect() } - fn flatten_hash_intermediate( - data_sections_by_pubkey: Vec>>, - stats: &mut HashStats, - ) -> Vec> - where - T: Clone, - { - // flatten this: - // vec: just a level of hierarchy - // vec: 1 vec per PUBKEY_BINS_FOR_CALCULATING_HASHES - // vec: Intermediate data whose pubkey belongs in this division - // into this: - // vec: 1 vec per PUBKEY_BINS_FOR_CALCULATING_HASHES - // vec: Intermediate data whose pubkey belongs in this division - let mut flatten_time = Measure::start("flatten"); - let mut data_by_pubkey: Vec> = vec![]; - let mut raw_len = 0; - for mut outer in data_sections_by_pubkey { - let outer_len = outer.len(); - for pubkey_index in 0..outer_len { - let this_len = outer[pubkey_index].len(); - if this_len == 0 { - continue; - } - raw_len += this_len; - let mut data = vec![]; - std::mem::swap(&mut data, &mut outer[pubkey_index]); - - if data_by_pubkey.len() <= pubkey_index { - data_by_pubkey.extend(vec![vec![]; pubkey_index - data_by_pubkey.len() + 1]); - } - - data_by_pubkey[pubkey_index].extend(data); - } - } - flatten_time.stop(); - stats.flatten_time_total_us += flatten_time.as_us(); - stats.unreduced_entries = raw_len; - data_by_pubkey - } - - fn compare_two_hash_entries( - a: &CalculateHashIntermediate, - b: &CalculateHashIntermediate, - ) -> std::cmp::Ordering { - // note partial_cmp only returns None with floating point comparisons - match a.pubkey.partial_cmp(&b.pubkey).unwrap() { - std::cmp::Ordering::Equal => match b.slot.partial_cmp(&a.slot).unwrap() { - std::cmp::Ordering::Equal => b.version.partial_cmp(&a.version).unwrap(), - other => other, - }, - other => other, - } - } - - fn sort_hash_intermediate( - data_by_pubkey: Vec>, - stats: &mut HashStats, - ) -> Vec> { - // sort each PUBKEY_DIVISION vec - let mut sort_time = Measure::start("sort"); - let sorted_data_by_pubkey: Vec> = data_by_pubkey - .into_par_iter() - .map(|mut pk_range| { - pk_range.par_sort_unstable_by(Self::compare_two_hash_entries); - pk_range - }) - .collect(); - sort_time.stop(); - stats.sort_time_total_us += sort_time.as_us(); - sorted_data_by_pubkey - } - - fn de_dup_and_eliminate_zeros( - sorted_data_by_pubkey: Vec>, - stats: &mut HashStats, - ) -> (Vec>>, u64) { - // 1. eliminate zero lamport accounts - // 2. pick the highest slot or (slot = and highest version) of each pubkey - // 3. produce this output: - // vec: PUBKEY_BINS_FOR_CALCULATING_HASHES in pubkey order - // vec: sorted sections from parallelism, in pubkey order - // vec: individual hashes in pubkey order - let mut zeros = Measure::start("eliminate zeros"); - let overall_sum = Mutex::new(0u64); - const CHUNKS: usize = 10; - let hashes: Vec>> = sorted_data_by_pubkey - .into_par_iter() - .map(|pubkey_division| { - let (hashes, sum) = Self::de_dup_accounts_in_parallel(&pubkey_division, CHUNKS); - let mut overall = overall_sum.lock().unwrap(); - *overall = Self::checked_cast_for_capitalization(sum as u128 + *overall as u128); - hashes - }) - .collect(); - zeros.stop(); - stats.zeros_time_total_us += zeros.as_us(); - let sum = *overall_sum.lock().unwrap(); - (hashes, sum) - } - - // 1. eliminate zero lamport accounts - // 2. pick the highest slot or (slot = and highest version) of each pubkey - // 3. produce this output: - // vec: sorted sections from parallelism, in pubkey order - // vec: individual hashes in pubkey order - fn de_dup_accounts_in_parallel( - pubkey_division: &[CalculateHashIntermediate], - chunk_count: usize, - ) -> (Vec>, u64) { - let len = pubkey_division.len(); - let max = if len > chunk_count { - std::cmp::max(chunk_count, 1) - } else { - 1 - }; - let chunk_size = len / max; - let overall_sum = Mutex::new(0u64); - let hashes: Vec> = (0..max) - .into_par_iter() - .map(|chunk_index| { - let mut start_index = chunk_index * chunk_size; - let mut end_index = start_index + chunk_size; - if chunk_index == max - 1 { - end_index = len; - } - - let is_first_slice = chunk_index == 0; - if !is_first_slice { - // note that this causes all regions after region 0 to have 1 item that overlaps with the previous region - start_index -= 1; - } - - let (result, sum) = Self::de_dup_accounts_from_stores( - chunk_index == 0, - &pubkey_division[start_index..end_index], - ); - let mut overall = overall_sum.lock().unwrap(); - *overall = Self::checked_cast_for_capitalization(sum + *overall as u128); - - result - }) - .collect(); - - let sum = *overall_sum.lock().unwrap(); - (hashes, sum) - } - - fn de_dup_accounts_from_stores( - is_first_slice: bool, - slice: &[CalculateHashIntermediate], - ) -> (Vec, u128) { - let len = slice.len(); - let mut result: Vec = Vec::with_capacity(len); - - let mut sum: u128 = 0; - if len > 0 { - let mut i = 0; - // look_for_first_key means the first key we find in our slice may be a - // continuation of accounts belonging to a key that started in the last slice. - // so, look_for_first_key=true means we have to find the first key different than - // the first key we encounter in our slice. Note that if this is true, - // our slice begins one index prior to the 'actual' start of our logical range. - let mut look_for_first_key = !is_first_slice; - 'outer: loop { - // at start of loop, item at 'i' is the first entry for a given pubkey - unless look_for_first - let now = &slice[i]; - let last = now.pubkey; - if !look_for_first_key && now.lamports != ZERO_RAW_LAMPORTS_SENTINEL { - // first entry for this key that starts in our slice - result.push(now.hash); - sum += now.lamports as u128; - } - for (k, now) in slice.iter().enumerate().skip(i + 1) { - if now.pubkey != last { - i = k; - look_for_first_key = false; - continue 'outer; - } else { - let prev = &slice[k - 1]; - assert!( - !(prev.slot == now.slot - && prev.version == now.version - && (prev.hash != now.hash || prev.lamports != now.lamports)), - "Conflicting store data. Pubkey: {}, Slot: {}, Version: {}, Hashes: {}, {}, Lamports: {}, {}", now.pubkey, now.slot, now.version, prev.hash, now.hash, prev.lamports, now.lamports - ); - } - } - - break; // ran out of items in our slice, so our slice is done - } - } - (result, sum) - } - - fn flatten_hashes_and_hash( - hashes: Vec>>, - fanout: usize, - stats: &mut HashStats, - ) -> Hash { - let mut hash_time = Measure::start("flat2"); - - let offsets = CumulativeOffsets::from_raw_2d(&hashes); - - let get_slice = |start: usize| -> &[Hash] { offsets.get_slice_2d(&hashes, start) }; - let hash = - Self::compute_merkle_root_from_slices(offsets.total_count, fanout, None, get_slice); - hash_time.stop(); - stats.hash_time_total_us += hash_time.as_us(); - stats.hash_total = offsets.total_count; - - hash - } - - // input: - // vec: unordered, created by parallelism - // vec: [0..bins] - where bins are pubkey ranges - // vec: [..] - items which fin in the containing bin, unordered within this vec - // so, assumption is middle vec is bins sorted by pubkey - fn rest_of_hash_calculation( - data_sections_by_pubkey: Vec>>, - mut stats: &mut HashStats, - ) -> (Hash, u64) { - let outer = Self::flatten_hash_intermediate(data_sections_by_pubkey, &mut stats); - - let sorted_data_by_pubkey = Self::sort_hash_intermediate(outer, &mut stats); - - let (hashes, total_lamports) = - Self::de_dup_and_eliminate_zeros(sorted_data_by_pubkey, &mut stats); - - let hash = Self::flatten_hashes_and_hash(hashes, MERKLE_FANOUT, &mut stats); - - stats.log(); - - (hash, total_lamports) - } - fn calculate_accounts_hash_helper( &self, use_index: bool, @@ -4252,7 +3662,7 @@ impl AccountsDB { fn scan_snapshot_stores( storage: &[SnapshotStorage], simple_capitalization_enabled: bool, - mut stats: &mut HashStats, + mut stats: &mut crate::accounts_hash::HashStats, bins: usize, ) -> Vec>> { let max_plus_1 = std::u8::MAX as usize + 1; @@ -4269,7 +3679,7 @@ impl AccountsDB { let raw_lamports = loaded_account.lamports(); let zero_raw_lamports = raw_lamports == 0; let balance = if zero_raw_lamports { - ZERO_RAW_LAMPORTS_SENTINEL + crate::accounts_hash::ZERO_RAW_LAMPORTS_SENTINEL } else { Self::account_balance_for_capitalization( raw_lamports, @@ -4318,7 +3728,7 @@ impl AccountsDB { PUBKEY_BINS_FOR_CALCULATING_HASHES, ); - Self::rest_of_hash_calculation(result, &mut stats) + AccountsHash::rest_of_hash_calculation(result, &mut stats) }; if let Some(thread_pool) = thread_pool { thread_pool.install(scan_and_hash) @@ -4411,7 +3821,7 @@ impl AccountsDB { }; let dirty_keys = hashes.iter().map(|(pubkey, _hash)| *pubkey).collect(); - let ret = Self::accumulate_account_hashes(hashes); + let ret = AccountsHash::accumulate_account_hashes(hashes); accumulate.stop(); let mut uncleaned_time = Measure::start("uncleaned_index"); self.uncleaned_pubkeys.insert(slot, dirty_keys); @@ -5536,8 +4946,8 @@ pub mod tests { // TODO: all the bank tests are bank specific, issue: 2194 use super::*; use crate::{ - accounts_index::tests::*, accounts_index::RefCount, append_vec::AccountMeta, - inline_spl_token_v2_0, + accounts_hash::MERKLE_FANOUT, accounts_index::tests::*, accounts_index::RefCount, + append_vec::AccountMeta, inline_spl_token_v2_0, }; use assert_matches::assert_matches; use rand::{thread_rng, Rng}; @@ -5691,611 +5101,6 @@ pub mod tests { assert_eq!(result[1], vec![raw_expected]); } - #[test] - fn test_accountsdb_cumulative_offsets1_d() { - let input = vec![vec![0, 1], vec![], vec![2, 3, 4], vec![]]; - let cumulative = CumulativeOffsets::from_raw(&input); - - let src: Vec<_> = input.clone().into_iter().flatten().collect(); - let len = src.len(); - assert_eq!(cumulative.total_count, len); - assert_eq!(cumulative.cumulative_offsets.len(), 2); // 2 non-empty vectors - - const DIMENSION: usize = 0; - assert_eq!(cumulative.cumulative_offsets[0].index[DIMENSION], 0); - assert_eq!(cumulative.cumulative_offsets[1].index[DIMENSION], 2); - - assert_eq!(cumulative.cumulative_offsets[0].start_offset, 0); - assert_eq!(cumulative.cumulative_offsets[1].start_offset, 2); - - for start in 0..len { - let slice = cumulative.get_slice(&input, start); - let len = slice.len(); - assert!(len > 0); - assert_eq!(&src[start..(start + len)], slice); - } - - let input = vec![vec![], vec![0, 1], vec![], vec![2, 3, 4], vec![]]; - let cumulative = CumulativeOffsets::from_raw(&input); - - let src: Vec<_> = input.clone().into_iter().flatten().collect(); - let len = src.len(); - assert_eq!(cumulative.total_count, len); - assert_eq!(cumulative.cumulative_offsets.len(), 2); // 2 non-empty vectors - - assert_eq!(cumulative.cumulative_offsets[0].index[DIMENSION], 1); - assert_eq!(cumulative.cumulative_offsets[1].index[DIMENSION], 3); - - assert_eq!(cumulative.cumulative_offsets[0].start_offset, 0); - assert_eq!(cumulative.cumulative_offsets[1].start_offset, 2); - - for start in 0..len { - let slice = cumulative.get_slice(&input, start); - let len = slice.len(); - assert!(len > 0); - assert_eq!(&src[start..(start + len)], slice); - } - - let input: Vec> = vec![vec![]]; - let cumulative = CumulativeOffsets::from_raw(&input); - - let src: Vec<_> = input.into_iter().flatten().collect(); - let len = src.len(); - assert_eq!(cumulative.total_count, len); - assert_eq!(cumulative.cumulative_offsets.len(), 0); // 2 non-empty vectors - } - - #[test] - fn test_accountsdb_cumulative_offsets2_d() { - let input = vec![vec![vec![0, 1], vec![], vec![2, 3, 4], vec![]]]; - let cumulative = CumulativeOffsets::from_raw_2d(&input); - - let src: Vec<_> = input - .clone() - .into_iter() - .flatten() - .into_iter() - .flatten() - .collect(); - let len = src.len(); - assert_eq!(cumulative.total_count, len); - assert_eq!(cumulative.cumulative_offsets.len(), 2); // 2 non-empty vectors - - const DIMENSION_0: usize = 0; - const DIMENSION_1: usize = 1; - assert_eq!(cumulative.cumulative_offsets[0].index[DIMENSION_0], 0); - assert_eq!(cumulative.cumulative_offsets[0].index[DIMENSION_1], 0); - assert_eq!(cumulative.cumulative_offsets[1].index[DIMENSION_0], 0); - assert_eq!(cumulative.cumulative_offsets[1].index[DIMENSION_1], 2); - - assert_eq!(cumulative.cumulative_offsets[0].start_offset, 0); - assert_eq!(cumulative.cumulative_offsets[1].start_offset, 2); - - for start in 0..len { - let slice = cumulative.get_slice_2d(&input, start); - let len = slice.len(); - assert!(len > 0); - assert_eq!(&src[start..(start + len)], slice); - } - - let input = vec![vec![vec![], vec![0, 1], vec![], vec![2, 3, 4], vec![]]]; - let cumulative = CumulativeOffsets::from_raw_2d(&input); - - let src: Vec<_> = input - .clone() - .into_iter() - .flatten() - .into_iter() - .flatten() - .collect(); - let len = src.len(); - assert_eq!(cumulative.total_count, len); - assert_eq!(cumulative.cumulative_offsets.len(), 2); // 2 non-empty vectors - - assert_eq!(cumulative.cumulative_offsets[0].index[DIMENSION_0], 0); - assert_eq!(cumulative.cumulative_offsets[0].index[DIMENSION_1], 1); - assert_eq!(cumulative.cumulative_offsets[1].index[DIMENSION_0], 0); - assert_eq!(cumulative.cumulative_offsets[1].index[DIMENSION_1], 3); - - assert_eq!(cumulative.cumulative_offsets[0].start_offset, 0); - assert_eq!(cumulative.cumulative_offsets[1].start_offset, 2); - - for start in 0..len { - let slice = cumulative.get_slice_2d(&input, start); - let len = slice.len(); - assert!(len > 0); - assert_eq!(&src[start..(start + len)], slice); - } - - let input: Vec>> = vec![vec![]]; - let cumulative = CumulativeOffsets::from_raw_2d(&input); - - let src: Vec<_> = input.into_iter().flatten().collect(); - let len = src.len(); - assert_eq!(cumulative.total_count, len); - assert_eq!(cumulative.cumulative_offsets.len(), 0); // 2 non-empty vectors - - let input = vec![ - vec![vec![0, 1]], - vec![vec![]], - vec![vec![], vec![2, 3, 4], vec![]], - ]; - let cumulative = CumulativeOffsets::from_raw_2d(&input); - - let src: Vec<_> = input - .clone() - .into_iter() - .flatten() - .into_iter() - .flatten() - .collect(); - let len = src.len(); - assert_eq!(cumulative.total_count, len); - assert_eq!(cumulative.cumulative_offsets.len(), 2); // 2 non-empty vectors - - assert_eq!(cumulative.cumulative_offsets[0].index[DIMENSION_0], 0); - assert_eq!(cumulative.cumulative_offsets[0].index[DIMENSION_1], 0); - assert_eq!(cumulative.cumulative_offsets[1].index[DIMENSION_0], 2); - assert_eq!(cumulative.cumulative_offsets[1].index[DIMENSION_1], 1); - - assert_eq!(cumulative.cumulative_offsets[0].start_offset, 0); - assert_eq!(cumulative.cumulative_offsets[1].start_offset, 2); - - for start in 0..len { - let slice = cumulative.get_slice_2d(&input, start); - let len = slice.len(); - assert!(len > 0); - assert_eq!(&src[start..(start + len)], slice); - } - } - - #[test] - fn test_accountsdb_div_ceil() { - assert_eq!(AccountsDB::div_ceil(10, 3), 4); - assert_eq!(AccountsDB::div_ceil(0, 1), 0); - assert_eq!(AccountsDB::div_ceil(0, 5), 0); - assert_eq!(AccountsDB::div_ceil(9, 3), 3); - assert_eq!(AccountsDB::div_ceil(9, 9), 1); - } - - #[test] - #[should_panic(expected = "attempt to divide by zero")] - fn test_accountsdb_div_ceil_fail() { - assert_eq!(AccountsDB::div_ceil(10, 0), 0); - } - - #[test] - fn test_accountsdb_rest_of_hash_calculation() { - solana_logger::setup(); - - let mut account_maps: Vec = Vec::new(); - - let key = Pubkey::new(&[11u8; 32]); - let hash = Hash::new(&[1u8; 32]); - let val = CalculateHashIntermediate::new(0, hash, 88, Slot::default(), key); - account_maps.push(val); - - // 2nd key - zero lamports, so will be removed - let key = Pubkey::new(&[12u8; 32]); - let hash = Hash::new(&[2u8; 32]); - let val = CalculateHashIntermediate::new( - 0, - hash, - ZERO_RAW_LAMPORTS_SENTINEL, - Slot::default(), - key, - ); - account_maps.push(val); - - let result = AccountsDB::rest_of_hash_calculation( - vec![vec![account_maps.clone()]], - &mut HashStats::default(), - ); - let expected_hash = Hash::from_str("8j9ARGFv4W2GfML7d3sVJK2MePwrikqYnu6yqer28cCa").unwrap(); - assert_eq!((result.0, result.1), (expected_hash, 88)); - - // 3rd key - with pubkey value before 1st key so it will be sorted first - let key = Pubkey::new(&[10u8; 32]); - let hash = Hash::new(&[2u8; 32]); - let val = CalculateHashIntermediate::new(0, hash, 20, Slot::default(), key); - account_maps.push(val); - - let result = AccountsDB::rest_of_hash_calculation( - vec![vec![account_maps.clone()]], - &mut HashStats::default(), - ); - let expected_hash = Hash::from_str("EHv9C5vX7xQjjMpsJMzudnDTzoTSRwYkqLzY8tVMihGj").unwrap(); - assert_eq!((result.0, result.1), (expected_hash, 108)); - - // 3rd key - with later slot - let key = Pubkey::new(&[10u8; 32]); - let hash = Hash::new(&[99u8; 32]); - let val = CalculateHashIntermediate::new(0, hash, 30, Slot::default() + 1, key); - account_maps.push(val); - - let result = AccountsDB::rest_of_hash_calculation( - vec![vec![account_maps]], - &mut HashStats::default(), - ); - let expected_hash = Hash::from_str("7NNPg5A8Xsg1uv4UFm6KZNwsipyyUnmgCrznP6MBWoBZ").unwrap(); - assert_eq!((result.0, result.1), (expected_hash, 118)); - } - - #[test] - fn test_accountsdb_de_dup_accounts_zero_chunks() { - let (hashes, lamports) = - AccountsDB::de_dup_accounts_in_parallel(&[CalculateHashIntermediate::default()], 0); - assert_eq!(vec![vec![Hash::default()]], hashes); - assert_eq!(lamports, 0); - } - - #[test] - fn test_accountsdb_de_dup_accounts_empty() { - solana_logger::setup(); - - let (hashes, lamports) = - AccountsDB::de_dup_and_eliminate_zeros(vec![vec![], vec![]], &mut HashStats::default()); - assert_eq!( - vec![vec![Hash::default(); 0], vec![]], - hashes.into_iter().flatten().collect::>() - ); - assert_eq!(lamports, 0); - - let (hashes, lamports) = - AccountsDB::de_dup_and_eliminate_zeros(vec![], &mut HashStats::default()); - let empty: Vec>> = Vec::default(); - assert_eq!(empty, hashes); - assert_eq!(lamports, 0); - - let (hashes, lamports) = AccountsDB::de_dup_accounts_in_parallel(&[], 1); - assert_eq!( - vec![Hash::default(); 0], - hashes.into_iter().flatten().collect::>() - ); - assert_eq!(lamports, 0); - - let (hashes, lamports) = AccountsDB::de_dup_accounts_in_parallel(&[], 2); - assert_eq!( - vec![Hash::default(); 0], - hashes.into_iter().flatten().collect::>() - ); - assert_eq!(lamports, 0); - } - - #[test] - fn test_accountsdb_de_dup_accounts_from_stores() { - solana_logger::setup(); - - let key_a = Pubkey::new(&[1u8; 32]); - let key_b = Pubkey::new(&[2u8; 32]); - let key_c = Pubkey::new(&[3u8; 32]); - const COUNT: usize = 6; - const VERSION: u64 = 0; - let hashes: Vec<_> = (0..COUNT) - .into_iter() - .map(|i| Hash::new(&[i as u8; 32])) - .collect(); - // create this vector - // abbbcc - let keys = [key_a, key_b, key_b, key_b, key_c, key_c]; - - let accounts: Vec<_> = hashes - .into_iter() - .zip(keys.iter()) - .enumerate() - .map(|(i, (hash, key))| { - CalculateHashIntermediate::new( - VERSION, - hash, - (i + 1) as u64, - u64::MAX - i as u64, - *key, - ) - }) - .collect(); - - type ExpectedType = (String, bool, u64, String); - let expected:Vec = vec![ - // ("key/lamports key2/lamports ...", - // is_first_slice - // result lamports - // result hashes) - // "a5" = key_a, 5 lamports - ("a1", false, 0, "[]"), - ("a1b2", false, 2, "[4vJ9JU1bJJE96FWSJKvHsmmFADCg4gpZQff4P3bkLKi]"), - ("a1b2b3", false, 2, "[4vJ9JU1bJJE96FWSJKvHsmmFADCg4gpZQff4P3bkLKi]"), - ("a1b2b3b4", false, 2, "[4vJ9JU1bJJE96FWSJKvHsmmFADCg4gpZQff4P3bkLKi]"), - ("a1b2b3b4c5", false, 7, "[4vJ9JU1bJJE96FWSJKvHsmmFADCg4gpZQff4P3bkLKi, GgBaCs3NCBuZN12kCJgAW63ydqohFkHEdfdEXBPzLHq]"), - ("b2", false, 0, "[]"), - ("b2b3", false, 0, "[]"), - ("b2b3b4", false, 0, "[]"), - ("b2b3b4c5", false, 5, "[GgBaCs3NCBuZN12kCJgAW63ydqohFkHEdfdEXBPzLHq]"), - ("b3", false, 0, "[]"), - ("b3b4", false, 0, "[]"), - ("b3b4c5", false, 5, "[GgBaCs3NCBuZN12kCJgAW63ydqohFkHEdfdEXBPzLHq]"), - ("b4", false, 0, "[]"), - ("b4c5", false, 5, "[GgBaCs3NCBuZN12kCJgAW63ydqohFkHEdfdEXBPzLHq]"), - ("c5", false, 0, "[]"), - ("a1", true, 1, "[11111111111111111111111111111111]"), - ("a1b2", true, 3, "[11111111111111111111111111111111, 4vJ9JU1bJJE96FWSJKvHsmmFADCg4gpZQff4P3bkLKi]"), - ("a1b2b3", true, 3, "[11111111111111111111111111111111, 4vJ9JU1bJJE96FWSJKvHsmmFADCg4gpZQff4P3bkLKi]"), - ("a1b2b3b4", true, 3, "[11111111111111111111111111111111, 4vJ9JU1bJJE96FWSJKvHsmmFADCg4gpZQff4P3bkLKi]"), - ("a1b2b3b4c5", true, 8, "[11111111111111111111111111111111, 4vJ9JU1bJJE96FWSJKvHsmmFADCg4gpZQff4P3bkLKi, GgBaCs3NCBuZN12kCJgAW63ydqohFkHEdfdEXBPzLHq]"), - ("b2", true, 2, "[4vJ9JU1bJJE96FWSJKvHsmmFADCg4gpZQff4P3bkLKi]"), - ("b2b3", true, 2, "[4vJ9JU1bJJE96FWSJKvHsmmFADCg4gpZQff4P3bkLKi]"), - ("b2b3b4", true, 2, "[4vJ9JU1bJJE96FWSJKvHsmmFADCg4gpZQff4P3bkLKi]"), - ("b2b3b4c5", true, 7, "[4vJ9JU1bJJE96FWSJKvHsmmFADCg4gpZQff4P3bkLKi, GgBaCs3NCBuZN12kCJgAW63ydqohFkHEdfdEXBPzLHq]"), - ("b3", true, 3, "[8qbHbw2BbbTHBW1sbeqakYXVKRQM8Ne7pLK7m6CVfeR]"), - ("b3b4", true, 3, "[8qbHbw2BbbTHBW1sbeqakYXVKRQM8Ne7pLK7m6CVfeR]"), - ("b3b4c5", true, 8, "[8qbHbw2BbbTHBW1sbeqakYXVKRQM8Ne7pLK7m6CVfeR, GgBaCs3NCBuZN12kCJgAW63ydqohFkHEdfdEXBPzLHq]"), - ("b4", true, 4, "[CktRuQ2mttgRGkXJtyksdKHjUdc2C4TgDzyB98oEzy8]"), - ("b4c5", true, 9, "[CktRuQ2mttgRGkXJtyksdKHjUdc2C4TgDzyB98oEzy8, GgBaCs3NCBuZN12kCJgAW63ydqohFkHEdfdEXBPzLHq]"), - ("c5", true, 5, "[GgBaCs3NCBuZN12kCJgAW63ydqohFkHEdfdEXBPzLHq]"), - ].into_iter().map(|item| { - let result: ExpectedType = ( - item.0.to_string(), - item.1, - item.2, - item.3.to_string(), - ); - result - }).collect(); - - let mut expected_index = 0; - for first_slice in 0..2 { - for start in 0..COUNT { - for end in start + 1..COUNT { - let is_first_slice = first_slice == 1; - let accounts = accounts.clone(); - let slice = &accounts[start..end]; - - let result = AccountsDB::de_dup_accounts_from_stores(is_first_slice, slice); - let (hashes2, lamports2) = AccountsDB::de_dup_accounts_in_parallel(slice, 1); - let (hashes3, lamports3) = AccountsDB::de_dup_accounts_in_parallel(slice, 2); - let (hashes4, lamports4) = AccountsDB::de_dup_and_eliminate_zeros( - vec![slice.to_vec()], - &mut HashStats::default(), - ); - let (hashes5, lamports5) = AccountsDB::de_dup_and_eliminate_zeros( - vec![slice.to_vec(), slice.to_vec()], - &mut HashStats::default(), - ); - let (hashes6, lamports6) = AccountsDB::de_dup_and_eliminate_zeros( - vec![vec![], slice.to_vec()], - &mut HashStats::default(), - ); - - assert_eq!( - hashes2.iter().flatten().collect::>(), - hashes3.iter().flatten().collect::>() - ); - let expected2 = hashes2.clone().into_iter().flatten().collect::>(); - assert_eq!( - expected2, - hashes4 - .into_iter() - .flatten() - .into_iter() - .flatten() - .collect::>() - ); - assert_eq!( - vec![expected2.clone(), expected2.clone()], - hashes5.into_iter().flatten().collect::>() - ); - assert_eq!( - vec![vec![], expected2.clone()], - hashes6.into_iter().flatten().collect::>() - ); - assert_eq!(lamports2, lamports3); - assert_eq!(lamports2, lamports4); - assert_eq!(lamports2 * 2, lamports5); - assert_eq!(lamports2, lamports6); - - let hashes: Vec<_> = hashes2.into_iter().flatten().collect(); - - let human_readable = slice - .iter() - .map(|v| { - let mut s = (if v.pubkey == key_a { - "a" - } else if v.pubkey == key_b { - "b" - } else { - "c" - }) - .to_string(); - - s.push_str(&v.lamports.to_string()); - s - }) - .collect::(); - - let hash_result_as_string = format!("{:?}", result.0); - - let packaged_result: ExpectedType = ( - human_readable, - is_first_slice, - result.1 as u64, - hash_result_as_string, - ); - - if is_first_slice { - // the parallel version always starts with 'first slice' - assert_eq!( - result.0, hashes, - "description: {:?}, expected index: {}", - packaged_result, expected_index - ); - assert_eq!( - result.1 as u64, lamports2, - "description: {:?}, expected index: {}", - packaged_result, expected_index - ); - } - - assert_eq!(expected[expected_index], packaged_result); - - // for generating expected results - // error!("{:?},", packaged_result); - expected_index += 1; - } - } - } - - for first_slice in 0..2 { - let result = AccountsDB::de_dup_accounts_from_stores(first_slice == 1, &[]); - assert_eq!((vec![Hash::default(); 0], 0), result); - } - } - - #[test] - fn test_accountsdb_flatten_hashes_and_hash() { - solana_logger::setup(); - const COUNT: usize = 4; - let hashes: Vec<_> = (0..COUNT) - .into_iter() - .map(|i| Hash::new(&[(i) as u8; 32])) - .collect(); - let expected = AccountsDB::compute_merkle_root_loop(hashes.clone(), MERKLE_FANOUT, |i| *i); - - assert_eq!( - AccountsDB::flatten_hashes_and_hash( - vec![vec![hashes.clone()]], - MERKLE_FANOUT, - &mut HashStats::default() - ), - expected, - ); - for in_first in 1..COUNT - 1 { - assert_eq!( - AccountsDB::flatten_hashes_and_hash( - vec![vec![ - hashes.clone()[0..in_first].to_vec(), - hashes.clone()[in_first..COUNT].to_vec() - ]], - MERKLE_FANOUT, - &mut HashStats::default() - ), - expected - ); - } - } - - #[test] - fn test_sort_hash_intermediate() { - solana_logger::setup(); - let mut stats = HashStats::default(); - let key = Pubkey::new_unique(); - let hash = Hash::new_unique(); - let val = CalculateHashIntermediate::new(1, hash, 1, 1, key); - - // slot same, version < - let hash2 = Hash::new_unique(); - let val2 = CalculateHashIntermediate::new(0, hash2, 4, 1, key); - let val3 = CalculateHashIntermediate::new(3, hash2, 4, 1, key); - let val4 = CalculateHashIntermediate::new(4, hash2, 4, 1, key); - - let src = vec![vec![val2.clone()], vec![val.clone()]]; - let result = AccountsDB::sort_hash_intermediate(src.clone(), &mut stats); - assert_eq!(result, src); - - let src = vec![ - vec![val2.clone(), val.clone()], - vec![val3.clone(), val4.clone()], - ]; - let sorted = vec![vec![val, val2], vec![val4, val3]]; - let result = AccountsDB::sort_hash_intermediate(src, &mut stats); - assert_eq!(result, sorted); - - let src = vec![vec![]]; - let result = AccountsDB::sort_hash_intermediate(src.clone(), &mut stats); - assert_eq!(result, src); - - let src = vec![]; - let result = AccountsDB::sort_hash_intermediate(src.clone(), &mut stats); - assert_eq!(result, src); - } - - #[test] - fn test_accountsdb_compare_two_hash_entries() { - solana_logger::setup(); - let key = Pubkey::new_unique(); - let hash = Hash::new_unique(); - let val = CalculateHashIntermediate::new(1, hash, 1, 1, key); - - // slot same, version < - let hash2 = Hash::new_unique(); - let val2 = CalculateHashIntermediate::new(0, hash2, 4, 1, key); - assert_eq!( - std::cmp::Ordering::Less, - AccountsDB::compare_two_hash_entries(&val, &val2) - ); - - let list = vec![val.clone(), val2.clone()]; - let mut list_bkup = list.clone(); - list_bkup.sort_by(AccountsDB::compare_two_hash_entries); - let list = AccountsDB::sort_hash_intermediate(vec![list], &mut HashStats::default()); - assert_eq!(list, vec![list_bkup]); - - let list = vec![val2, val.clone()]; // reverse args - let mut list_bkup = list.clone(); - list_bkup.sort_by(AccountsDB::compare_two_hash_entries); - let list = AccountsDB::sort_hash_intermediate(vec![list], &mut HashStats::default()); - assert_eq!(list, vec![list_bkup]); - - // slot same, vers = - let hash3 = Hash::new_unique(); - let val3 = CalculateHashIntermediate::new(1, hash3, 2, 1, key); - assert_eq!( - std::cmp::Ordering::Equal, - AccountsDB::compare_two_hash_entries(&val, &val3) - ); - - // slot same, vers > - let hash4 = Hash::new_unique(); - let val4 = CalculateHashIntermediate::new(2, hash4, 6, 1, key); - assert_eq!( - std::cmp::Ordering::Greater, - AccountsDB::compare_two_hash_entries(&val, &val4) - ); - - // slot >, version < - let hash5 = Hash::new_unique(); - let val5 = CalculateHashIntermediate::new(0, hash5, 8, 2, key); - assert_eq!( - std::cmp::Ordering::Greater, - AccountsDB::compare_two_hash_entries(&val, &val5) - ); - } - - #[test] - fn test_accountsdb_remove_zero_balance_accounts() { - solana_logger::setup(); - - let key = Pubkey::new_unique(); - let hash = Hash::new_unique(); - let mut account_maps: Vec = Vec::new(); - let val = CalculateHashIntermediate::new(0, hash, 1, Slot::default(), key); - account_maps.push(val.clone()); - - let result = AccountsDB::de_dup_accounts_from_stores(true, &account_maps[..]); - assert_eq!(result, (vec![val.hash], val.lamports as u128)); - - // zero original lamports, higher version - let val = CalculateHashIntermediate::new( - 1, - hash, - ZERO_RAW_LAMPORTS_SENTINEL, - Slot::default(), - key, - ); - account_maps.insert(0, val); // has to be before other entry since sort order matters - - let result = AccountsDB::de_dup_accounts_from_stores(true, &account_maps[..]); - assert_eq!(result, (vec![], 0)); - } - #[test] fn test_accountsdb_calculate_accounts_hash_without_index_simple() { solana_logger::setup(); @@ -6312,7 +5117,7 @@ pub mod tests { let (storages, raw_expected) = sample_storages_and_accounts(); let expected_hash = - AccountsDB::compute_merkle_root_loop(raw_expected.clone(), MERKLE_FANOUT, |item| { + AccountsHash::compute_merkle_root_loop(raw_expected.clone(), MERKLE_FANOUT, |item| { item.hash }); let sum = raw_expected.iter().map(|item| item.lamports).sum(); @@ -6375,330 +5180,6 @@ pub mod tests { assert_eq!(result, vec![vec![expected]]); } - #[test] - fn test_accountsdb_flatten_hash_intermediate() { - solana_logger::setup(); - let test = vec![vec![vec![CalculateHashIntermediate::new( - 1, - Hash::new_unique(), - 2, - 3, - Pubkey::new_unique(), - )]]]; - let mut stats = HashStats::default(); - let result = AccountsDB::flatten_hash_intermediate(test.clone(), &mut stats); - assert_eq!(result, test[0]); - assert_eq!(stats.unreduced_entries, 1); - - let mut stats = HashStats::default(); - let result = AccountsDB::flatten_hash_intermediate( - vec![vec![vec![CalculateHashIntermediate::default(); 0]]], - &mut stats, - ); - assert_eq!(result.iter().flatten().count(), 0); - assert_eq!(stats.unreduced_entries, 0); - - let test = vec![ - vec![vec![ - CalculateHashIntermediate::new(1, Hash::new_unique(), 2, 3, Pubkey::new_unique()), - CalculateHashIntermediate::new(8, Hash::new_unique(), 9, 10, Pubkey::new_unique()), - ]], - vec![vec![CalculateHashIntermediate::new( - 4, - Hash::new_unique(), - 5, - 6, - Pubkey::new_unique(), - )]], - ]; - let mut stats = HashStats::default(); - let result = AccountsDB::flatten_hash_intermediate(test.clone(), &mut stats); - let expected = test - .into_iter() - .flatten() - .into_iter() - .flatten() - .collect::>(); - assert_eq!(result.into_iter().flatten().collect::>(), expected); - assert_eq!(stats.unreduced_entries, expected.len()); - } - - #[test] - fn test_accountsdb_flatten_hash_intermediate2() { - solana_logger::setup(); - // data is ordered: - // vec: just a level of hierarchy - // vec: 1 vec per PUBKEY_BINS_FOR_CALCULATING_HASHES - // vec: Intermediate data whose pubkey belongs in this division - let binned_data = vec![ - vec![vec![1, 2], vec![3, 4], vec![], vec![5]], - vec![vec![], vec![11, 12]], - ]; - let mut combined: Vec>> = vec![vec![]]; - binned_data.iter().enumerate().for_each(|(bin, v)| { - v.iter() - .enumerate() - .for_each(|(dimension0, v): (usize, &Vec)| { - while combined.len() <= dimension0 { - combined.push(vec![]); - } - let vec: &mut Vec> = &mut combined[dimension0]; - while vec.len() <= bin { - vec.push(vec![]); - } - vec[bin].extend(v.clone()); - }); - }); - - let mut stats = HashStats::default(); - let result = AccountsDB::flatten_hash_intermediate(combined, &mut stats); - assert_eq!( - result, - binned_data - .clone() - .into_iter() - .map(|x| x.into_iter().flatten().collect::>()) - .collect::>() - ); - assert_eq!( - stats.unreduced_entries, - binned_data - .into_iter() - .flatten() - .into_iter() - .flatten() - .count() - ); - - let src = vec![vec![vec![0]]]; - let result = AccountsDB::flatten_hash_intermediate(src, &mut stats); - assert_eq!(result, vec![vec![0]]); - - let src = vec![vec![vec![0], vec![1]]]; - let result = AccountsDB::flatten_hash_intermediate(src, &mut stats); - assert_eq!(result, vec![vec![0], vec![1]]); - - let src = vec![vec![vec![]], vec![vec![], vec![1]]]; - let result = AccountsDB::flatten_hash_intermediate(src, &mut stats); - assert_eq!(result, vec![vec![], vec![1]]); - - let src: Vec>> = vec![vec![vec![], vec![]]]; - let result = AccountsDB::flatten_hash_intermediate(src, &mut stats); - let expected: Vec> = vec![]; - assert_eq!(result, expected); - - let src: Vec>> = vec![vec![vec![], vec![]], vec![vec![], vec![]]]; - let result = AccountsDB::flatten_hash_intermediate(src, &mut stats); - assert_eq!(result, expected); - - let src: Vec>> = vec![vec![vec![], vec![]], vec![vec![]]]; - let result = AccountsDB::flatten_hash_intermediate(src, &mut stats); - assert_eq!(result, expected); - - let src: Vec>> = vec![vec![], vec![vec![]]]; - let result = AccountsDB::flatten_hash_intermediate(src, &mut stats); - let expected: Vec> = vec![]; - assert_eq!(result, expected); - } - - fn test_hashing_larger(hashes: Vec<(Pubkey, Hash)>, fanout: usize) -> Hash { - let result = AccountsDB::compute_merkle_root(hashes.clone(), fanout); - let reduced: Vec<_> = hashes.iter().map(|x| x.1).collect(); - let result2 = test_hashing(reduced, fanout); - assert_eq!(result, result2, "len: {}", hashes.len()); - result - } - - fn test_hashing(hashes: Vec, fanout: usize) -> Hash { - let temp: Vec<_> = hashes.iter().map(|h| (Pubkey::default(), *h)).collect(); - let result = AccountsDB::compute_merkle_root(temp, fanout); - let reduced: Vec<_> = hashes.clone(); - let result2 = - AccountsDB::compute_merkle_root_from_slices(hashes.len(), fanout, None, |start| { - &reduced[start..] - }); - assert_eq!(result, result2, "len: {}", hashes.len()); - - let result2 = - AccountsDB::compute_merkle_root_from_slices(hashes.len(), fanout, Some(1), |start| { - &reduced[start..] - }); - assert_eq!(result, result2, "len: {}", hashes.len()); - - let reduced2: Vec<_> = hashes.iter().map(|x| vec![*x]).collect(); - let result2 = - AccountsDB::flatten_hashes_and_hash(vec![reduced2], fanout, &mut HashStats::default()); - assert_eq!(result, result2, "len: {}", hashes.len()); - - let max = std::cmp::min(reduced.len(), fanout * 2); - for left in 0..max { - for right in left + 1..max { - let src = vec![ - vec![reduced[0..left].to_vec(), reduced[left..right].to_vec()], - vec![reduced[right..].to_vec()], - ]; - let result2 = - AccountsDB::flatten_hashes_and_hash(src, fanout, &mut HashStats::default()); - assert_eq!(result, result2); - } - } - result - } - - #[test] - fn test_accountsdb_compute_merkle_root_large() { - solana_logger::setup(); - - // handle fanout^x -1, +0, +1 for a few 'x's - const FANOUT: usize = 3; - let mut hash_counts: Vec<_> = (1..6) - .map(|x| { - let mark = FANOUT.pow(x); - vec![mark - 1, mark, mark + 1] - }) - .flatten() - .collect(); - - // saturate the test space for threshold to threshold + target - // this hits right before we use the 3 deep optimization and all the way through all possible partial last chunks - let target = FANOUT.pow(3); - let threshold = target * FANOUT; - hash_counts.extend(threshold - 1..=threshold + target); - - for hash_count in hash_counts { - let hashes: Vec<_> = (0..hash_count) - .into_iter() - .map(|_| Hash::new_unique()) - .collect(); - - test_hashing(hashes, FANOUT); - } - } - - #[test] - fn test_accountsdb_compute_merkle_root() { - solana_logger::setup(); - - let expected_results = vec![ - (0, 0, "GKot5hBsd81kMupNCXHaqbhv3huEbxAFMLnpcX2hniwn", 0), - (0, 1, "8unXKJYTxrR423HgQxbDmx29mFri1QNrzVKKDxEfc6bj", 0), - (0, 2, "6QfkevXLLqbfAaR1kVjvMLFtEXvNUVrpmkwXqgsYtCFW", 1), - (0, 3, "G3FrJd9JrXcMiqChTSfvEdBL2sCPny3ebiUy9Xxbn7a2", 3), - (0, 4, "G3sZXHhwoCFuNyWy7Efffr47RBW33ibEp7b2hqNDmXdu", 6), - (0, 5, "78atJJYpokAPKMJwHxUW8SBDvPkkSpTBV7GiB27HwosJ", 10), - (0, 6, "7c9SM2BmCRVVXdrEdKcMK91MviPqXqQMd8QAb77tgLEy", 15), - (0, 7, "3hsmnZPhf22UvBLiZ4dVa21Qsdh65CCrtYXsb8MxoVAa", 21), - (0, 8, "5bwXUiC6RCRhb8fqvjvUXT6waU25str3UXA3a6Aq1jux", 28), - (0, 9, "3NNtQKH6PaYpCnFBtyi2icK9eYX3YM5pqA3SKaXtUNzu", 36), - (1, 0, "GKot5hBsd81kMupNCXHaqbhv3huEbxAFMLnpcX2hniwn", 0), - (1, 1, "4GWVCsnEu1iRyxjAB3F7J7C4MMvcoxFWtP9ihvwvDgxY", 0), - (1, 2, "8ML8Te6Uw2mipFr2v9sMZDcziXzhVqJo2qeMJohg1CJx", 1), - (1, 3, "AMEuC3AgqAeRBGBhSfTmuMdfbAiXJnGmKv99kHmcAE1H", 3), - (1, 4, "HEnDuJLHpsQfrApimGrovTqPEF6Vkrx2dKFr3BDtYzWx", 6), - (1, 5, "6rH69iP2yM1o565noZN1EqjySW4PhYUskz3c5tXePUfV", 10), - (1, 6, "7qEQMEXdfSPjbZ3q4cuuZwebDMvTvuaQ3dBiHoDUKo9a", 15), - (1, 7, "GDJz7LSKYjqqz6ujCaaQRJRmQ7TLNCwYJhdT84qT4qwk", 21), - (1, 8, "HT9krPLVTo3rr5WZQBQFrbqWs8SbYScXfnt8EVuobboM", 28), - (1, 9, "8y2pMgqMdRsvqw6BQXm6wtz3qxGPss72i6H6gVpPyeda", 36), - ]; - - let mut expected_index = 0; - let start = 0; - let default_fanout = 2; - // test 0..3 recursions (at fanout = 2) and 1 item remainder. The internals have 1 special case first loop and subsequent loops are the same types. - let iterations = default_fanout * default_fanout * default_fanout + 2; - for pass in 0..2 { - let fanout = if pass == 0 { - default_fanout - } else { - MERKLE_FANOUT - }; - for count in start..iterations { - let mut input: Vec<_> = (0..count) - .map(|i| { - let key = Pubkey::new(&[(pass * iterations + count) as u8; 32]); - let hash = Hash::new(&[(pass * iterations + count + i + 1) as u8; 32]); - (key, hash) - }) - .collect(); - - let result = if pass == 0 { - test_hashing_larger(input.clone(), fanout) - } else { - // this sorts inside - let early_result = AccountsDB::accumulate_account_hashes( - input.iter().map(|i| (i.0, i.1)).collect::>(), - ); - AccountsDB::sort_hashes_by_pubkey(&mut input); - let result = AccountsDB::compute_merkle_root(input.clone(), fanout); - assert_eq!(early_result, result); - result - }; - // compare against captured, expected results for hash (and lamports) - assert_eq!( - ( - pass, - count, - &*(result.to_string()), - expected_results[expected_index].3 - ), // we no longer calculate lamports - expected_results[expected_index] - ); - expected_index += 1; - } - } - } - - #[test] - #[should_panic(expected = "overflow is detected while summing capitalization")] - fn test_accountsdb_lamport_overflow() { - solana_logger::setup(); - - let offset = 2; - let input = vec![ - CalculateHashIntermediate::new( - 0, - Hash::new_unique(), - u64::MAX - offset, - 0, - Pubkey::new_unique(), - ), - CalculateHashIntermediate::new( - 0, - Hash::new_unique(), - offset + 1, - 0, - Pubkey::new_unique(), - ), - ]; - AccountsDB::de_dup_accounts_in_parallel(&input, 1); - } - - #[test] - #[should_panic(expected = "overflow is detected while summing capitalization")] - fn test_accountsdb_lamport_overflow2() { - solana_logger::setup(); - - let offset = 2; - let input = vec![ - vec![CalculateHashIntermediate::new( - 0, - Hash::new_unique(), - u64::MAX - offset, - 0, - Pubkey::new_unique(), - )], - vec![CalculateHashIntermediate::new( - 0, - Hash::new_unique(), - offset + 1, - 0, - Pubkey::new_unique(), - )], - ]; - AccountsDB::de_dup_and_eliminate_zeros(input, &mut HashStats::default()); - } - #[test] fn test_accountsdb_add_root() { solana_logger::setup(); diff --git a/runtime/src/accounts_hash.rs b/runtime/src/accounts_hash.rs new file mode 100644 index 0000000000..2959c4ff57 --- /dev/null +++ b/runtime/src/accounts_hash.rs @@ -0,0 +1,1559 @@ +use log::*; +use rayon::prelude::*; +use solana_measure::measure::Measure; +use solana_sdk::{ + clock::Slot, + hash::{Hash, Hasher}, + pubkey::Pubkey, +}; +use std::{convert::TryInto, sync::Mutex}; + +pub const ZERO_RAW_LAMPORTS_SENTINEL: u64 = std::u64::MAX; +pub const MERKLE_FANOUT: usize = 16; + +#[derive(Debug, Default)] +pub struct HashStats { + pub scan_time_total_us: u64, + pub zeros_time_total_us: u64, + pub hash_time_total_us: u64, + pub sort_time_total_us: u64, + pub flatten_time_total_us: u64, + pub pre_scan_flatten_time_total_us: u64, + pub hash_total: usize, + pub unreduced_entries: usize, + pub num_snapshot_storage: usize, +} +impl HashStats { + fn log(&mut self) { + let total_time_us = self.scan_time_total_us + + self.zeros_time_total_us + + self.hash_time_total_us + + self.sort_time_total_us + + self.flatten_time_total_us + + self.pre_scan_flatten_time_total_us; + datapoint_info!( + "calculate_accounts_hash_without_index", + ("accounts_scan", self.scan_time_total_us, i64), + ("eliminate_zeros", self.zeros_time_total_us, i64), + ("hash", self.hash_time_total_us, i64), + ("sort", self.sort_time_total_us, i64), + ("hash_total", self.hash_total, i64), + ("flatten", self.flatten_time_total_us, i64), + ("unreduced_entries", self.unreduced_entries as i64, i64), + ( + "num_snapshot_storage", + self.num_snapshot_storage as i64, + i64 + ), + ( + "pre_scan_flatten", + self.pre_scan_flatten_time_total_us as i64, + i64 + ), + ("total", total_time_us as i64, i64), + ); + } +} + +#[derive(Default, Debug, PartialEq, Clone)] +pub struct CalculateHashIntermediate { + pub version: u64, + pub hash: Hash, + pub lamports: u64, + pub slot: Slot, + pub pubkey: Pubkey, +} + +impl CalculateHashIntermediate { + pub fn new(version: u64, hash: Hash, lamports: u64, slot: Slot, pubkey: Pubkey) -> Self { + Self { + version, + hash, + lamports, + slot, + pubkey, + } + } +} + +#[derive(Default, Debug)] +pub struct CumulativeOffset { + pub index: Vec, + pub start_offset: usize, +} + +impl CumulativeOffset { + pub fn new(index: Vec, start_offset: usize) -> CumulativeOffset { + Self { + index, + start_offset, + } + } +} + +// Allow retreiving &[start..end] from a logical src: Vec, where src is really Vec> (or later Vec>>) +// This model prevents callers from having to flatten which saves both working memory and time. +#[derive(Default, Debug)] +pub struct CumulativeOffsets { + cumulative_offsets: Vec, + total_count: usize, +} + +impl CumulativeOffsets { + pub fn from_raw(raw: &[Vec]) -> CumulativeOffsets { + let mut total_count: usize = 0; + let cumulative_offsets: Vec<_> = raw + .iter() + .enumerate() + .filter_map(|(i, v)| { + let len = v.len(); + if len > 0 { + let result = CumulativeOffset::new(vec![i], total_count); + total_count += len; + Some(result) + } else { + None + } + }) + .collect(); + + Self { + cumulative_offsets, + total_count, + } + } + + pub fn from_raw_2d(raw: &[Vec>]) -> CumulativeOffsets { + let mut total_count: usize = 0; + let mut cumulative_offsets = Vec::with_capacity(0); + for (i, v_outer) in raw.iter().enumerate() { + for (j, v) in v_outer.iter().enumerate() { + let len = v.len(); + if len > 0 { + if cumulative_offsets.is_empty() { + // the first inner, non-empty vector we find gives us an approximate rectangular shape + cumulative_offsets = Vec::with_capacity(raw.len() * v_outer.len()); + } + cumulative_offsets.push(CumulativeOffset::new(vec![i, j], total_count)); + total_count += len; + } + } + } + + Self { + cumulative_offsets, + total_count, + } + } + + // return the biggest slice possible that starts at 'start' + pub fn get_slice<'a, T>(&self, raw: &'a [Vec], start: usize) -> &'a [T] { + // This could be binary search, but we expect a small number of vectors. + for i in (0..self.cumulative_offsets.len()).into_iter().rev() { + let index = &self.cumulative_offsets[i]; + if start >= index.start_offset { + let start = start - index.start_offset; + const DIMENSION: usize = 0; + return &raw[index.index[DIMENSION]][start..]; + } + } + panic!( + "get_slice didn't find: {}, len: {}", + start, self.total_count + ); + } + + // return the biggest slice possible that starts at 'start' + pub fn get_slice_2d<'a, T>(&self, raw: &'a [Vec>], start: usize) -> &'a [T] { + // This could be binary search, but we expect a small number of vectors. + for i in (0..self.cumulative_offsets.len()).into_iter().rev() { + let index = &self.cumulative_offsets[i]; + if start >= index.start_offset { + let start = start - index.start_offset; + const DIMENSION_0: usize = 0; + const DIMENSION_1: usize = 1; + return &raw[index.index[DIMENSION_0]][index.index[DIMENSION_1]][start..]; + } + } + panic!( + "get_slice didn't find: {}, len: {}", + start, self.total_count + ); + } +} + +#[derive(Debug)] +pub struct AccountsHash { + pub dummy: i32, +} + +impl AccountsHash { + pub fn calculate_hash(hashes: Vec>) -> (Hash, usize) { + let cumulative_offsets = CumulativeOffsets::from_raw(&hashes); + + let hash_total = cumulative_offsets.total_count; + let result = AccountsHash::compute_merkle_root_from_slices( + hash_total, + MERKLE_FANOUT, + None, + |start: usize| cumulative_offsets.get_slice(&hashes, start), + ); + (result, hash_total) + } + + pub fn compute_merkle_root(hashes: Vec<(Pubkey, Hash)>, fanout: usize) -> Hash { + Self::compute_merkle_root_loop(hashes, fanout, |t| t.1) + } + + // this function avoids an infinite recursion compiler error + pub fn compute_merkle_root_recurse(hashes: Vec, fanout: usize) -> Hash { + Self::compute_merkle_root_loop(hashes, fanout, |t: &Hash| *t) + } + + pub fn div_ceil(x: usize, y: usize) -> usize { + let mut result = x / y; + if x % y != 0 { + result += 1; + } + result + } + + // For the first iteration, there could be more items in the tuple than just hash and lamports. + // Using extractor allows us to avoid an unnecessary array copy on the first iteration. + pub fn compute_merkle_root_loop(hashes: Vec, fanout: usize, extractor: F) -> Hash + where + F: Fn(&T) -> Hash + std::marker::Sync, + T: std::marker::Sync, + { + if hashes.is_empty() { + return Hasher::default().result(); + } + + let mut time = Measure::start("time"); + + let total_hashes = hashes.len(); + let chunks = Self::div_ceil(total_hashes, fanout); + + let result: Vec<_> = (0..chunks) + .into_par_iter() + .map(|i| { + let start_index = i * fanout; + let end_index = std::cmp::min(start_index + fanout, total_hashes); + + let mut hasher = Hasher::default(); + for item in hashes.iter().take(end_index).skip(start_index) { + let h = extractor(&item); + hasher.hash(h.as_ref()); + } + + hasher.result() + }) + .collect(); + time.stop(); + debug!("hashing {} {}", total_hashes, time); + + if result.len() == 1 { + result[0] + } else { + Self::compute_merkle_root_recurse(result, fanout) + } + } + + // This function is designed to allow hashes to be located in multiple, perhaps multiply deep vecs. + // The caller provides a function to return a slice from the source data. + pub fn compute_merkle_root_from_slices<'a, F>( + total_hashes: usize, + fanout: usize, + max_levels_per_pass: Option, + get_hashes: F, + ) -> Hash + where + F: Fn(usize) -> &'a [Hash] + std::marker::Sync, + { + if total_hashes == 0 { + return Hasher::default().result(); + } + + let mut time = Measure::start("time"); + + const THREE_LEVEL_OPTIMIZATION: usize = 3; // this '3' is dependent on the code structure below where we manually unroll + let target = fanout.pow(THREE_LEVEL_OPTIMIZATION as u32); + + // Only use the 3 level optimization if we have at least 4 levels of data. + // Otherwise, we'll be serializing a parallel operation. + let threshold = target * fanout; + let three_level = max_levels_per_pass.unwrap_or(usize::MAX) >= THREE_LEVEL_OPTIMIZATION + && total_hashes >= threshold; + let num_hashes_per_chunk = if three_level { target } else { fanout }; + + let chunks = Self::div_ceil(total_hashes, num_hashes_per_chunk); + + // initial fetch - could return entire slice + let data: &[Hash] = get_hashes(0); + let data_len = data.len(); + + let result: Vec<_> = (0..chunks) + .into_par_iter() + .map(|i| { + let start_index = i * num_hashes_per_chunk; + let end_index = std::cmp::min(start_index + num_hashes_per_chunk, total_hashes); + + let mut hasher = Hasher::default(); + let mut data_index = start_index; + let mut data = data; + let mut data_len = data_len; + + if !three_level { + // 1 group of fanout + // The result of this loop is a single hash value from fanout input hashes. + for i in start_index..end_index { + if data_index >= data_len { + // fetch next slice + data = get_hashes(i); + data_len = data.len(); + data_index = 0; + } + hasher.hash(data[data_index].as_ref()); + data_index += 1; + } + } else { + // hash 3 levels of fanout simultaneously. + // The result of this loop is a single hash value from fanout^3 input hashes. + let mut i = start_index; + while i < end_index { + let mut hasher_j = Hasher::default(); + for _j in 0..fanout { + let mut hasher_k = Hasher::default(); + let end = std::cmp::min(end_index - i, fanout); + for _k in 0..end { + if data_index >= data_len { + // fetch next slice + data = get_hashes(i); + data_len = data.len(); + data_index = 0; + } + hasher_k.hash(data[data_index].as_ref()); + data_index += 1; + i += 1; + } + hasher_j.hash(hasher_k.result().as_ref()); + if i >= end_index { + break; + } + } + hasher.hash(hasher_j.result().as_ref()); + } + } + + hasher.result() + }) + .collect(); + time.stop(); + debug!("hashing {} {}", total_hashes, time); + + if result.len() == 1 { + result[0] + } else { + Self::compute_merkle_root_recurse(result, fanout) + } + } + + pub fn accumulate_account_hashes(mut hashes: Vec<(Pubkey, Hash)>) -> Hash { + Self::sort_hashes_by_pubkey(&mut hashes); + + Self::compute_merkle_root_loop(hashes, MERKLE_FANOUT, |i| i.1) + } + + pub fn sort_hashes_by_pubkey(hashes: &mut Vec<(Pubkey, Hash)>) { + hashes.par_sort_unstable_by(|a, b| a.0.cmp(&b.0)); + } + + fn flatten_hash_intermediate( + data_sections_by_pubkey: Vec>>, + stats: &mut HashStats, + ) -> Vec> + where + T: Clone, + { + // flatten this: + // vec: just a level of hierarchy + // vec: 1 vec per PUBKEY_BINS_FOR_CALCULATING_HASHES + // vec: Intermediate data whose pubkey belongs in this division + // into this: + // vec: 1 vec per PUBKEY_BINS_FOR_CALCULATING_HASHES + // vec: Intermediate data whose pubkey belongs in this division + let mut flatten_time = Measure::start("flatten"); + let mut data_by_pubkey: Vec> = vec![]; + let mut raw_len = 0; + for mut outer in data_sections_by_pubkey { + let outer_len = outer.len(); + for pubkey_index in 0..outer_len { + let this_len = outer[pubkey_index].len(); + if this_len == 0 { + continue; + } + raw_len += this_len; + let mut data = vec![]; + std::mem::swap(&mut data, &mut outer[pubkey_index]); + + if data_by_pubkey.len() <= pubkey_index { + data_by_pubkey.extend(vec![vec![]; pubkey_index - data_by_pubkey.len() + 1]); + } + + data_by_pubkey[pubkey_index].extend(data); + } + } + flatten_time.stop(); + stats.flatten_time_total_us += flatten_time.as_us(); + stats.unreduced_entries = raw_len; + data_by_pubkey + } + + pub fn compare_two_hash_entries( + a: &CalculateHashIntermediate, + b: &CalculateHashIntermediate, + ) -> std::cmp::Ordering { + // note partial_cmp only returns None with floating point comparisons + match a.pubkey.partial_cmp(&b.pubkey).unwrap() { + std::cmp::Ordering::Equal => match b.slot.partial_cmp(&a.slot).unwrap() { + std::cmp::Ordering::Equal => b.version.partial_cmp(&a.version).unwrap(), + other => other, + }, + other => other, + } + } + + fn sort_hash_intermediate( + data_by_pubkey: Vec>, + stats: &mut HashStats, + ) -> Vec> { + // sort each PUBKEY_DIVISION vec + let mut sort_time = Measure::start("sort"); + let sorted_data_by_pubkey: Vec> = data_by_pubkey + .into_par_iter() + .map(|mut pk_range| { + pk_range.par_sort_unstable_by(Self::compare_two_hash_entries); + pk_range + }) + .collect(); + sort_time.stop(); + stats.sort_time_total_us += sort_time.as_us(); + sorted_data_by_pubkey + } + + pub fn checked_cast_for_capitalization(balance: u128) -> u64 { + balance + .try_into() + .expect("overflow is detected while summing capitalization") + } + + fn de_dup_and_eliminate_zeros( + sorted_data_by_pubkey: Vec>, + stats: &mut HashStats, + ) -> (Vec>>, u64) { + // 1. eliminate zero lamport accounts + // 2. pick the highest slot or (slot = and highest version) of each pubkey + // 3. produce this output: + // vec: PUBKEY_BINS_FOR_CALCULATING_HASHES in pubkey order + // vec: sorted sections from parallelism, in pubkey order + // vec: individual hashes in pubkey order + let mut zeros = Measure::start("eliminate zeros"); + let overall_sum = Mutex::new(0u64); + const CHUNKS: usize = 10; + let hashes: Vec>> = sorted_data_by_pubkey + .into_par_iter() + .map(|pubkey_division| { + let (hashes, sum) = Self::de_dup_accounts_in_parallel(&pubkey_division, CHUNKS); + let mut overall = overall_sum.lock().unwrap(); + *overall = Self::checked_cast_for_capitalization(sum as u128 + *overall as u128); + hashes + }) + .collect(); + zeros.stop(); + stats.zeros_time_total_us += zeros.as_us(); + let sum = *overall_sum.lock().unwrap(); + (hashes, sum) + } + + // 1. eliminate zero lamport accounts + // 2. pick the highest slot or (slot = and highest version) of each pubkey + // 3. produce this output: + // vec: sorted sections from parallelism, in pubkey order + // vec: individual hashes in pubkey order + fn de_dup_accounts_in_parallel( + pubkey_division: &[CalculateHashIntermediate], + chunk_count: usize, + ) -> (Vec>, u64) { + let len = pubkey_division.len(); + let max = if len > chunk_count { + std::cmp::max(chunk_count, 1) + } else { + 1 + }; + let chunk_size = len / max; + let overall_sum = Mutex::new(0u64); + let hashes: Vec> = (0..max) + .into_par_iter() + .map(|chunk_index| { + let mut start_index = chunk_index * chunk_size; + let mut end_index = start_index + chunk_size; + if chunk_index == max - 1 { + end_index = len; + } + + let is_first_slice = chunk_index == 0; + if !is_first_slice { + // note that this causes all regions after region 0 to have 1 item that overlaps with the previous region + start_index -= 1; + } + + let (result, sum) = Self::de_dup_accounts_from_stores( + chunk_index == 0, + &pubkey_division[start_index..end_index], + ); + let mut overall = overall_sum.lock().unwrap(); + *overall = Self::checked_cast_for_capitalization(sum + *overall as u128); + + result + }) + .collect(); + + let sum = *overall_sum.lock().unwrap(); + (hashes, sum) + } + + fn de_dup_accounts_from_stores( + is_first_slice: bool, + slice: &[CalculateHashIntermediate], + ) -> (Vec, u128) { + let len = slice.len(); + let mut result: Vec = Vec::with_capacity(len); + + let mut sum: u128 = 0; + if len > 0 { + let mut i = 0; + // look_for_first_key means the first key we find in our slice may be a + // continuation of accounts belonging to a key that started in the last slice. + // so, look_for_first_key=true means we have to find the first key different than + // the first key we encounter in our slice. Note that if this is true, + // our slice begins one index prior to the 'actual' start of our logical range. + let mut look_for_first_key = !is_first_slice; + 'outer: loop { + // at start of loop, item at 'i' is the first entry for a given pubkey - unless look_for_first + let now = &slice[i]; + let last = now.pubkey; + if !look_for_first_key && now.lamports != ZERO_RAW_LAMPORTS_SENTINEL { + // first entry for this key that starts in our slice + result.push(now.hash); + sum += now.lamports as u128; + } + for (k, now) in slice.iter().enumerate().skip(i + 1) { + if now.pubkey != last { + i = k; + look_for_first_key = false; + continue 'outer; + } else { + let prev = &slice[k - 1]; + assert!( + !(prev.slot == now.slot + && prev.version == now.version + && (prev.hash != now.hash || prev.lamports != now.lamports)), + "Conflicting store data. Pubkey: {}, Slot: {}, Version: {}, Hashes: {}, {}, Lamports: {}, {}", now.pubkey, now.slot, now.version, prev.hash, now.hash, prev.lamports, now.lamports + ); + } + } + + break; // ran out of items in our slice, so our slice is done + } + } + (result, sum) + } + + fn flatten_hashes_and_hash( + hashes: Vec>>, + fanout: usize, + stats: &mut HashStats, + ) -> Hash { + let mut hash_time = Measure::start("flat2"); + + let offsets = CumulativeOffsets::from_raw_2d(&hashes); + + let get_slice = |start: usize| -> &[Hash] { offsets.get_slice_2d(&hashes, start) }; + let hash = AccountsHash::compute_merkle_root_from_slices( + offsets.total_count, + fanout, + None, + get_slice, + ); + hash_time.stop(); + stats.hash_time_total_us += hash_time.as_us(); + stats.hash_total = offsets.total_count; + + hash + } + + // input: + // vec: unordered, created by parallelism + // vec: [0..bins] - where bins are pubkey ranges + // vec: [..] - items which fin in the containing bin, unordered within this vec + // so, assumption is middle vec is bins sorted by pubkey + pub fn rest_of_hash_calculation( + data_sections_by_pubkey: Vec>>, + mut stats: &mut HashStats, + ) -> (Hash, u64) { + let outer = Self::flatten_hash_intermediate(data_sections_by_pubkey, &mut stats); + + let sorted_data_by_pubkey = Self::sort_hash_intermediate(outer, &mut stats); + + let (hashes, total_lamports) = + Self::de_dup_and_eliminate_zeros(sorted_data_by_pubkey, &mut stats); + + let hash = Self::flatten_hashes_and_hash(hashes, MERKLE_FANOUT, &mut stats); + + stats.log(); + + (hash, total_lamports) + } +} + +#[cfg(test)] +pub mod tests { + // TODO: all the bank tests are bank specific, issue: 2194 + use super::*; + use std::str::FromStr; + + #[test] + fn test_accountsdb_div_ceil() { + assert_eq!(AccountsHash::div_ceil(10, 3), 4); + assert_eq!(AccountsHash::div_ceil(0, 1), 0); + assert_eq!(AccountsHash::div_ceil(0, 5), 0); + assert_eq!(AccountsHash::div_ceil(9, 3), 3); + assert_eq!(AccountsHash::div_ceil(9, 9), 1); + } + + #[test] + #[should_panic(expected = "attempt to divide by zero")] + fn test_accountsdb_div_ceil_fail() { + assert_eq!(AccountsHash::div_ceil(10, 0), 0); + } + + #[test] + fn test_accountsdb_rest_of_hash_calculation() { + solana_logger::setup(); + + let mut account_maps: Vec = Vec::new(); + + let key = Pubkey::new(&[11u8; 32]); + let hash = Hash::new(&[1u8; 32]); + let val = CalculateHashIntermediate::new(0, hash, 88, Slot::default(), key); + account_maps.push(val); + + // 2nd key - zero lamports, so will be removed + let key = Pubkey::new(&[12u8; 32]); + let hash = Hash::new(&[2u8; 32]); + let val = CalculateHashIntermediate::new( + 0, + hash, + ZERO_RAW_LAMPORTS_SENTINEL, + Slot::default(), + key, + ); + account_maps.push(val); + + let result = AccountsHash::rest_of_hash_calculation( + vec![vec![account_maps.clone()]], + &mut HashStats::default(), + ); + let expected_hash = Hash::from_str("8j9ARGFv4W2GfML7d3sVJK2MePwrikqYnu6yqer28cCa").unwrap(); + assert_eq!((result.0, result.1), (expected_hash, 88)); + + // 3rd key - with pubkey value before 1st key so it will be sorted first + let key = Pubkey::new(&[10u8; 32]); + let hash = Hash::new(&[2u8; 32]); + let val = CalculateHashIntermediate::new(0, hash, 20, Slot::default(), key); + account_maps.push(val); + + let result = AccountsHash::rest_of_hash_calculation( + vec![vec![account_maps.clone()]], + &mut HashStats::default(), + ); + let expected_hash = Hash::from_str("EHv9C5vX7xQjjMpsJMzudnDTzoTSRwYkqLzY8tVMihGj").unwrap(); + assert_eq!((result.0, result.1), (expected_hash, 108)); + + // 3rd key - with later slot + let key = Pubkey::new(&[10u8; 32]); + let hash = Hash::new(&[99u8; 32]); + let val = CalculateHashIntermediate::new(0, hash, 30, Slot::default() + 1, key); + account_maps.push(val); + + let result = AccountsHash::rest_of_hash_calculation( + vec![vec![account_maps]], + &mut HashStats::default(), + ); + let expected_hash = Hash::from_str("7NNPg5A8Xsg1uv4UFm6KZNwsipyyUnmgCrznP6MBWoBZ").unwrap(); + assert_eq!((result.0, result.1), (expected_hash, 118)); + } + + #[test] + fn test_accountsdb_de_dup_accounts_zero_chunks() { + let (hashes, lamports) = + AccountsHash::de_dup_accounts_in_parallel(&[CalculateHashIntermediate::default()], 0); + assert_eq!(vec![vec![Hash::default()]], hashes); + assert_eq!(lamports, 0); + } + + #[test] + fn test_accountsdb_de_dup_accounts_empty() { + solana_logger::setup(); + + let (hashes, lamports) = AccountsHash::de_dup_and_eliminate_zeros( + vec![vec![], vec![]], + &mut HashStats::default(), + ); + assert_eq!( + vec![vec![Hash::default(); 0], vec![]], + hashes.into_iter().flatten().collect::>() + ); + assert_eq!(lamports, 0); + + let (hashes, lamports) = + AccountsHash::de_dup_and_eliminate_zeros(vec![], &mut HashStats::default()); + let empty: Vec>> = Vec::default(); + assert_eq!(empty, hashes); + assert_eq!(lamports, 0); + + let (hashes, lamports) = AccountsHash::de_dup_accounts_in_parallel(&[], 1); + assert_eq!( + vec![Hash::default(); 0], + hashes.into_iter().flatten().collect::>() + ); + assert_eq!(lamports, 0); + + let (hashes, lamports) = AccountsHash::de_dup_accounts_in_parallel(&[], 2); + assert_eq!( + vec![Hash::default(); 0], + hashes.into_iter().flatten().collect::>() + ); + assert_eq!(lamports, 0); + } + + #[test] + fn test_accountsdb_de_dup_accounts_from_stores() { + solana_logger::setup(); + + let key_a = Pubkey::new(&[1u8; 32]); + let key_b = Pubkey::new(&[2u8; 32]); + let key_c = Pubkey::new(&[3u8; 32]); + const COUNT: usize = 6; + const VERSION: u64 = 0; + let hashes: Vec<_> = (0..COUNT) + .into_iter() + .map(|i| Hash::new(&[i as u8; 32])) + .collect(); + // create this vector + // abbbcc + let keys = [key_a, key_b, key_b, key_b, key_c, key_c]; + + let accounts: Vec<_> = hashes + .into_iter() + .zip(keys.iter()) + .enumerate() + .map(|(i, (hash, key))| { + CalculateHashIntermediate::new( + VERSION, + hash, + (i + 1) as u64, + u64::MAX - i as u64, + *key, + ) + }) + .collect(); + + type ExpectedType = (String, bool, u64, String); + let expected:Vec = vec![ + // ("key/lamports key2/lamports ...", + // is_first_slice + // result lamports + // result hashes) + // "a5" = key_a, 5 lamports + ("a1", false, 0, "[]"), + ("a1b2", false, 2, "[4vJ9JU1bJJE96FWSJKvHsmmFADCg4gpZQff4P3bkLKi]"), + ("a1b2b3", false, 2, "[4vJ9JU1bJJE96FWSJKvHsmmFADCg4gpZQff4P3bkLKi]"), + ("a1b2b3b4", false, 2, "[4vJ9JU1bJJE96FWSJKvHsmmFADCg4gpZQff4P3bkLKi]"), + ("a1b2b3b4c5", false, 7, "[4vJ9JU1bJJE96FWSJKvHsmmFADCg4gpZQff4P3bkLKi, GgBaCs3NCBuZN12kCJgAW63ydqohFkHEdfdEXBPzLHq]"), + ("b2", false, 0, "[]"), + ("b2b3", false, 0, "[]"), + ("b2b3b4", false, 0, "[]"), + ("b2b3b4c5", false, 5, "[GgBaCs3NCBuZN12kCJgAW63ydqohFkHEdfdEXBPzLHq]"), + ("b3", false, 0, "[]"), + ("b3b4", false, 0, "[]"), + ("b3b4c5", false, 5, "[GgBaCs3NCBuZN12kCJgAW63ydqohFkHEdfdEXBPzLHq]"), + ("b4", false, 0, "[]"), + ("b4c5", false, 5, "[GgBaCs3NCBuZN12kCJgAW63ydqohFkHEdfdEXBPzLHq]"), + ("c5", false, 0, "[]"), + ("a1", true, 1, "[11111111111111111111111111111111]"), + ("a1b2", true, 3, "[11111111111111111111111111111111, 4vJ9JU1bJJE96FWSJKvHsmmFADCg4gpZQff4P3bkLKi]"), + ("a1b2b3", true, 3, "[11111111111111111111111111111111, 4vJ9JU1bJJE96FWSJKvHsmmFADCg4gpZQff4P3bkLKi]"), + ("a1b2b3b4", true, 3, "[11111111111111111111111111111111, 4vJ9JU1bJJE96FWSJKvHsmmFADCg4gpZQff4P3bkLKi]"), + ("a1b2b3b4c5", true, 8, "[11111111111111111111111111111111, 4vJ9JU1bJJE96FWSJKvHsmmFADCg4gpZQff4P3bkLKi, GgBaCs3NCBuZN12kCJgAW63ydqohFkHEdfdEXBPzLHq]"), + ("b2", true, 2, "[4vJ9JU1bJJE96FWSJKvHsmmFADCg4gpZQff4P3bkLKi]"), + ("b2b3", true, 2, "[4vJ9JU1bJJE96FWSJKvHsmmFADCg4gpZQff4P3bkLKi]"), + ("b2b3b4", true, 2, "[4vJ9JU1bJJE96FWSJKvHsmmFADCg4gpZQff4P3bkLKi]"), + ("b2b3b4c5", true, 7, "[4vJ9JU1bJJE96FWSJKvHsmmFADCg4gpZQff4P3bkLKi, GgBaCs3NCBuZN12kCJgAW63ydqohFkHEdfdEXBPzLHq]"), + ("b3", true, 3, "[8qbHbw2BbbTHBW1sbeqakYXVKRQM8Ne7pLK7m6CVfeR]"), + ("b3b4", true, 3, "[8qbHbw2BbbTHBW1sbeqakYXVKRQM8Ne7pLK7m6CVfeR]"), + ("b3b4c5", true, 8, "[8qbHbw2BbbTHBW1sbeqakYXVKRQM8Ne7pLK7m6CVfeR, GgBaCs3NCBuZN12kCJgAW63ydqohFkHEdfdEXBPzLHq]"), + ("b4", true, 4, "[CktRuQ2mttgRGkXJtyksdKHjUdc2C4TgDzyB98oEzy8]"), + ("b4c5", true, 9, "[CktRuQ2mttgRGkXJtyksdKHjUdc2C4TgDzyB98oEzy8, GgBaCs3NCBuZN12kCJgAW63ydqohFkHEdfdEXBPzLHq]"), + ("c5", true, 5, "[GgBaCs3NCBuZN12kCJgAW63ydqohFkHEdfdEXBPzLHq]"), + ].into_iter().map(|item| { + let result: ExpectedType = ( + item.0.to_string(), + item.1, + item.2, + item.3.to_string(), + ); + result + }).collect(); + + let mut expected_index = 0; + for first_slice in 0..2 { + for start in 0..COUNT { + for end in start + 1..COUNT { + let is_first_slice = first_slice == 1; + let accounts = accounts.clone(); + let slice = &accounts[start..end]; + + let result = AccountsHash::de_dup_accounts_from_stores(is_first_slice, slice); + let (hashes2, lamports2) = AccountsHash::de_dup_accounts_in_parallel(slice, 1); + let (hashes3, lamports3) = AccountsHash::de_dup_accounts_in_parallel(slice, 2); + let (hashes4, lamports4) = AccountsHash::de_dup_and_eliminate_zeros( + vec![slice.to_vec()], + &mut HashStats::default(), + ); + let (hashes5, lamports5) = AccountsHash::de_dup_and_eliminate_zeros( + vec![slice.to_vec(), slice.to_vec()], + &mut HashStats::default(), + ); + let (hashes6, lamports6) = AccountsHash::de_dup_and_eliminate_zeros( + vec![vec![], slice.to_vec()], + &mut HashStats::default(), + ); + + assert_eq!( + hashes2.iter().flatten().collect::>(), + hashes3.iter().flatten().collect::>() + ); + let expected2 = hashes2.clone().into_iter().flatten().collect::>(); + assert_eq!( + expected2, + hashes4 + .into_iter() + .flatten() + .into_iter() + .flatten() + .collect::>() + ); + assert_eq!( + vec![expected2.clone(), expected2.clone()], + hashes5.into_iter().flatten().collect::>() + ); + assert_eq!( + vec![vec![], expected2.clone()], + hashes6.into_iter().flatten().collect::>() + ); + assert_eq!(lamports2, lamports3); + assert_eq!(lamports2, lamports4); + assert_eq!(lamports2 * 2, lamports5); + assert_eq!(lamports2, lamports6); + + let hashes: Vec<_> = hashes2.into_iter().flatten().collect(); + + let human_readable = slice + .iter() + .map(|v| { + let mut s = (if v.pubkey == key_a { + "a" + } else if v.pubkey == key_b { + "b" + } else { + "c" + }) + .to_string(); + + s.push_str(&v.lamports.to_string()); + s + }) + .collect::(); + + let hash_result_as_string = format!("{:?}", result.0); + + let packaged_result: ExpectedType = ( + human_readable, + is_first_slice, + result.1 as u64, + hash_result_as_string, + ); + + if is_first_slice { + // the parallel version always starts with 'first slice' + assert_eq!( + result.0, hashes, + "description: {:?}, expected index: {}", + packaged_result, expected_index + ); + assert_eq!( + result.1 as u64, lamports2, + "description: {:?}, expected index: {}", + packaged_result, expected_index + ); + } + + assert_eq!(expected[expected_index], packaged_result); + + // for generating expected results + // error!("{:?},", packaged_result); + expected_index += 1; + } + } + } + + for first_slice in 0..2 { + let result = AccountsHash::de_dup_accounts_from_stores(first_slice == 1, &[]); + assert_eq!((vec![Hash::default(); 0], 0), result); + } + } + + #[test] + fn test_accountsdb_flatten_hashes_and_hash() { + solana_logger::setup(); + const COUNT: usize = 4; + let hashes: Vec<_> = (0..COUNT) + .into_iter() + .map(|i| Hash::new(&[(i) as u8; 32])) + .collect(); + let expected = + AccountsHash::compute_merkle_root_loop(hashes.clone(), MERKLE_FANOUT, |i| *i); + + assert_eq!( + AccountsHash::flatten_hashes_and_hash( + vec![vec![hashes.clone()]], + MERKLE_FANOUT, + &mut HashStats::default() + ), + expected, + ); + for in_first in 1..COUNT - 1 { + assert_eq!( + AccountsHash::flatten_hashes_and_hash( + vec![vec![ + hashes.clone()[0..in_first].to_vec(), + hashes.clone()[in_first..COUNT].to_vec() + ]], + MERKLE_FANOUT, + &mut HashStats::default() + ), + expected + ); + } + } + + #[test] + fn test_sort_hash_intermediate() { + solana_logger::setup(); + let mut stats = HashStats::default(); + let key = Pubkey::new_unique(); + let hash = Hash::new_unique(); + let val = CalculateHashIntermediate::new(1, hash, 1, 1, key); + + // slot same, version < + let hash2 = Hash::new_unique(); + let val2 = CalculateHashIntermediate::new(0, hash2, 4, 1, key); + let val3 = CalculateHashIntermediate::new(3, hash2, 4, 1, key); + let val4 = CalculateHashIntermediate::new(4, hash2, 4, 1, key); + + let src = vec![vec![val2.clone()], vec![val.clone()]]; + let result = AccountsHash::sort_hash_intermediate(src.clone(), &mut stats); + assert_eq!(result, src); + + let src = vec![ + vec![val2.clone(), val.clone()], + vec![val3.clone(), val4.clone()], + ]; + let sorted = vec![vec![val, val2], vec![val4, val3]]; + let result = AccountsHash::sort_hash_intermediate(src, &mut stats); + assert_eq!(result, sorted); + + let src = vec![vec![]]; + let result = AccountsHash::sort_hash_intermediate(src.clone(), &mut stats); + assert_eq!(result, src); + + let src = vec![]; + let result = AccountsHash::sort_hash_intermediate(src.clone(), &mut stats); + assert_eq!(result, src); + } + + #[test] + fn test_accountsdb_compare_two_hash_entries() { + solana_logger::setup(); + let key = Pubkey::new_unique(); + let hash = Hash::new_unique(); + let val = CalculateHashIntermediate::new(1, hash, 1, 1, key); + + // slot same, version < + let hash2 = Hash::new_unique(); + let val2 = CalculateHashIntermediate::new(0, hash2, 4, 1, key); + assert_eq!( + std::cmp::Ordering::Less, + AccountsHash::compare_two_hash_entries(&val, &val2) + ); + + let list = vec![val.clone(), val2.clone()]; + let mut list_bkup = list.clone(); + list_bkup.sort_by(AccountsHash::compare_two_hash_entries); + let list = AccountsHash::sort_hash_intermediate(vec![list], &mut HashStats::default()); + assert_eq!(list, vec![list_bkup]); + + let list = vec![val2, val.clone()]; // reverse args + let mut list_bkup = list.clone(); + list_bkup.sort_by(AccountsHash::compare_two_hash_entries); + let list = AccountsHash::sort_hash_intermediate(vec![list], &mut HashStats::default()); + assert_eq!(list, vec![list_bkup]); + + // slot same, vers = + let hash3 = Hash::new_unique(); + let val3 = CalculateHashIntermediate::new(1, hash3, 2, 1, key); + assert_eq!( + std::cmp::Ordering::Equal, + AccountsHash::compare_two_hash_entries(&val, &val3) + ); + + // slot same, vers > + let hash4 = Hash::new_unique(); + let val4 = CalculateHashIntermediate::new(2, hash4, 6, 1, key); + assert_eq!( + std::cmp::Ordering::Greater, + AccountsHash::compare_two_hash_entries(&val, &val4) + ); + + // slot >, version < + let hash5 = Hash::new_unique(); + let val5 = CalculateHashIntermediate::new(0, hash5, 8, 2, key); + assert_eq!( + std::cmp::Ordering::Greater, + AccountsHash::compare_two_hash_entries(&val, &val5) + ); + } + + #[test] + fn test_accountsdb_remove_zero_balance_accounts() { + solana_logger::setup(); + + let key = Pubkey::new_unique(); + let hash = Hash::new_unique(); + let mut account_maps: Vec = Vec::new(); + let val = CalculateHashIntermediate::new(0, hash, 1, Slot::default(), key); + account_maps.push(val.clone()); + + let result = AccountsHash::de_dup_accounts_from_stores(true, &account_maps[..]); + assert_eq!(result, (vec![val.hash], val.lamports as u128)); + + // zero original lamports, higher version + let val = CalculateHashIntermediate::new( + 1, + hash, + ZERO_RAW_LAMPORTS_SENTINEL, + Slot::default(), + key, + ); + account_maps.insert(0, val); // has to be before other entry since sort order matters + + let result = AccountsHash::de_dup_accounts_from_stores(true, &account_maps[..]); + assert_eq!(result, (vec![], 0)); + } + + #[test] + fn test_accountsdb_cumulative_offsets1_d() { + let input = vec![vec![0, 1], vec![], vec![2, 3, 4], vec![]]; + let cumulative = CumulativeOffsets::from_raw(&input); + + let src: Vec<_> = input.clone().into_iter().flatten().collect(); + let len = src.len(); + assert_eq!(cumulative.total_count, len); + assert_eq!(cumulative.cumulative_offsets.len(), 2); // 2 non-empty vectors + + const DIMENSION: usize = 0; + assert_eq!(cumulative.cumulative_offsets[0].index[DIMENSION], 0); + assert_eq!(cumulative.cumulative_offsets[1].index[DIMENSION], 2); + + assert_eq!(cumulative.cumulative_offsets[0].start_offset, 0); + assert_eq!(cumulative.cumulative_offsets[1].start_offset, 2); + + for start in 0..len { + let slice = cumulative.get_slice(&input, start); + let len = slice.len(); + assert!(len > 0); + assert_eq!(&src[start..(start + len)], slice); + } + + let input = vec![vec![], vec![0, 1], vec![], vec![2, 3, 4], vec![]]; + let cumulative = CumulativeOffsets::from_raw(&input); + + let src: Vec<_> = input.clone().into_iter().flatten().collect(); + let len = src.len(); + assert_eq!(cumulative.total_count, len); + assert_eq!(cumulative.cumulative_offsets.len(), 2); // 2 non-empty vectors + + assert_eq!(cumulative.cumulative_offsets[0].index[DIMENSION], 1); + assert_eq!(cumulative.cumulative_offsets[1].index[DIMENSION], 3); + + assert_eq!(cumulative.cumulative_offsets[0].start_offset, 0); + assert_eq!(cumulative.cumulative_offsets[1].start_offset, 2); + + for start in 0..len { + let slice = cumulative.get_slice(&input, start); + let len = slice.len(); + assert!(len > 0); + assert_eq!(&src[start..(start + len)], slice); + } + + let input: Vec> = vec![vec![]]; + let cumulative = CumulativeOffsets::from_raw(&input); + + let src: Vec<_> = input.into_iter().flatten().collect(); + let len = src.len(); + assert_eq!(cumulative.total_count, len); + assert_eq!(cumulative.cumulative_offsets.len(), 0); // 2 non-empty vectors + } + + #[test] + fn test_accountsdb_cumulative_offsets2_d() { + let input = vec![vec![vec![0, 1], vec![], vec![2, 3, 4], vec![]]]; + let cumulative = CumulativeOffsets::from_raw_2d(&input); + + let src: Vec<_> = input + .clone() + .into_iter() + .flatten() + .into_iter() + .flatten() + .collect(); + let len = src.len(); + assert_eq!(cumulative.total_count, len); + assert_eq!(cumulative.cumulative_offsets.len(), 2); // 2 non-empty vectors + + const DIMENSION_0: usize = 0; + const DIMENSION_1: usize = 1; + assert_eq!(cumulative.cumulative_offsets[0].index[DIMENSION_0], 0); + assert_eq!(cumulative.cumulative_offsets[0].index[DIMENSION_1], 0); + assert_eq!(cumulative.cumulative_offsets[1].index[DIMENSION_0], 0); + assert_eq!(cumulative.cumulative_offsets[1].index[DIMENSION_1], 2); + + assert_eq!(cumulative.cumulative_offsets[0].start_offset, 0); + assert_eq!(cumulative.cumulative_offsets[1].start_offset, 2); + + for start in 0..len { + let slice = cumulative.get_slice_2d(&input, start); + let len = slice.len(); + assert!(len > 0); + assert_eq!(&src[start..(start + len)], slice); + } + + let input = vec![vec![vec![], vec![0, 1], vec![], vec![2, 3, 4], vec![]]]; + let cumulative = CumulativeOffsets::from_raw_2d(&input); + + let src: Vec<_> = input + .clone() + .into_iter() + .flatten() + .into_iter() + .flatten() + .collect(); + let len = src.len(); + assert_eq!(cumulative.total_count, len); + assert_eq!(cumulative.cumulative_offsets.len(), 2); // 2 non-empty vectors + + assert_eq!(cumulative.cumulative_offsets[0].index[DIMENSION_0], 0); + assert_eq!(cumulative.cumulative_offsets[0].index[DIMENSION_1], 1); + assert_eq!(cumulative.cumulative_offsets[1].index[DIMENSION_0], 0); + assert_eq!(cumulative.cumulative_offsets[1].index[DIMENSION_1], 3); + + assert_eq!(cumulative.cumulative_offsets[0].start_offset, 0); + assert_eq!(cumulative.cumulative_offsets[1].start_offset, 2); + + for start in 0..len { + let slice = cumulative.get_slice_2d(&input, start); + let len = slice.len(); + assert!(len > 0); + assert_eq!(&src[start..(start + len)], slice); + } + + let input: Vec>> = vec![vec![]]; + let cumulative = CumulativeOffsets::from_raw_2d(&input); + + let src: Vec<_> = input.into_iter().flatten().collect(); + let len = src.len(); + assert_eq!(cumulative.total_count, len); + assert_eq!(cumulative.cumulative_offsets.len(), 0); // 2 non-empty vectors + + let input = vec![ + vec![vec![0, 1]], + vec![vec![]], + vec![vec![], vec![2, 3, 4], vec![]], + ]; + let cumulative = CumulativeOffsets::from_raw_2d(&input); + + let src: Vec<_> = input + .clone() + .into_iter() + .flatten() + .into_iter() + .flatten() + .collect(); + let len = src.len(); + assert_eq!(cumulative.total_count, len); + assert_eq!(cumulative.cumulative_offsets.len(), 2); // 2 non-empty vectors + + assert_eq!(cumulative.cumulative_offsets[0].index[DIMENSION_0], 0); + assert_eq!(cumulative.cumulative_offsets[0].index[DIMENSION_1], 0); + assert_eq!(cumulative.cumulative_offsets[1].index[DIMENSION_0], 2); + assert_eq!(cumulative.cumulative_offsets[1].index[DIMENSION_1], 1); + + assert_eq!(cumulative.cumulative_offsets[0].start_offset, 0); + assert_eq!(cumulative.cumulative_offsets[1].start_offset, 2); + + for start in 0..len { + let slice = cumulative.get_slice_2d(&input, start); + let len = slice.len(); + assert!(len > 0); + assert_eq!(&src[start..(start + len)], slice); + } + } + + #[test] + fn test_accountsdb_flatten_hash_intermediate() { + solana_logger::setup(); + let test = vec![vec![vec![CalculateHashIntermediate::new( + 1, + Hash::new_unique(), + 2, + 3, + Pubkey::new_unique(), + )]]]; + let mut stats = HashStats::default(); + let result = AccountsHash::flatten_hash_intermediate(test.clone(), &mut stats); + assert_eq!(result, test[0]); + assert_eq!(stats.unreduced_entries, 1); + + let mut stats = HashStats::default(); + let result = AccountsHash::flatten_hash_intermediate( + vec![vec![vec![CalculateHashIntermediate::default(); 0]]], + &mut stats, + ); + assert_eq!(result.iter().flatten().count(), 0); + assert_eq!(stats.unreduced_entries, 0); + + let test = vec![ + vec![vec![ + CalculateHashIntermediate::new(1, Hash::new_unique(), 2, 3, Pubkey::new_unique()), + CalculateHashIntermediate::new(8, Hash::new_unique(), 9, 10, Pubkey::new_unique()), + ]], + vec![vec![CalculateHashIntermediate::new( + 4, + Hash::new_unique(), + 5, + 6, + Pubkey::new_unique(), + )]], + ]; + let mut stats = HashStats::default(); + let result = AccountsHash::flatten_hash_intermediate(test.clone(), &mut stats); + let expected = test + .into_iter() + .flatten() + .into_iter() + .flatten() + .collect::>(); + assert_eq!(result.into_iter().flatten().collect::>(), expected); + assert_eq!(stats.unreduced_entries, expected.len()); + } + + #[test] + fn test_accountsdb_flatten_hash_intermediate2() { + solana_logger::setup(); + // data is ordered: + // vec: just a level of hierarchy + // vec: 1 vec per PUBKEY_BINS_FOR_CALCULATING_HASHES + // vec: Intermediate data whose pubkey belongs in this division + let binned_data = vec![ + vec![vec![1, 2], vec![3, 4], vec![], vec![5]], + vec![vec![], vec![11, 12]], + ]; + let mut combined: Vec>> = vec![vec![]]; + binned_data.iter().enumerate().for_each(|(bin, v)| { + v.iter() + .enumerate() + .for_each(|(dimension0, v): (usize, &Vec)| { + while combined.len() <= dimension0 { + combined.push(vec![]); + } + let vec: &mut Vec> = &mut combined[dimension0]; + while vec.len() <= bin { + vec.push(vec![]); + } + vec[bin].extend(v.clone()); + }); + }); + + let mut stats = HashStats::default(); + let result = AccountsHash::flatten_hash_intermediate(combined, &mut stats); + assert_eq!( + result, + binned_data + .clone() + .into_iter() + .map(|x| x.into_iter().flatten().collect::>()) + .collect::>() + ); + assert_eq!( + stats.unreduced_entries, + binned_data + .into_iter() + .flatten() + .into_iter() + .flatten() + .count() + ); + + let src = vec![vec![vec![0]]]; + let result = AccountsHash::flatten_hash_intermediate(src, &mut stats); + assert_eq!(result, vec![vec![0]]); + + let src = vec![vec![vec![0], vec![1]]]; + let result = AccountsHash::flatten_hash_intermediate(src, &mut stats); + assert_eq!(result, vec![vec![0], vec![1]]); + + let src = vec![vec![vec![]], vec![vec![], vec![1]]]; + let result = AccountsHash::flatten_hash_intermediate(src, &mut stats); + assert_eq!(result, vec![vec![], vec![1]]); + + let src: Vec>> = vec![vec![vec![], vec![]]]; + let result = AccountsHash::flatten_hash_intermediate(src, &mut stats); + let expected: Vec> = vec![]; + assert_eq!(result, expected); + + let src: Vec>> = vec![vec![vec![], vec![]], vec![vec![], vec![]]]; + let result = AccountsHash::flatten_hash_intermediate(src, &mut stats); + assert_eq!(result, expected); + + let src: Vec>> = vec![vec![vec![], vec![]], vec![vec![]]]; + let result = AccountsHash::flatten_hash_intermediate(src, &mut stats); + assert_eq!(result, expected); + + let src: Vec>> = vec![vec![], vec![vec![]]]; + let result = AccountsHash::flatten_hash_intermediate(src, &mut stats); + let expected: Vec> = vec![]; + assert_eq!(result, expected); + } + + fn test_hashing_larger(hashes: Vec<(Pubkey, Hash)>, fanout: usize) -> Hash { + let result = AccountsHash::compute_merkle_root(hashes.clone(), fanout); + let reduced: Vec<_> = hashes.iter().map(|x| x.1).collect(); + let result2 = test_hashing(reduced, fanout); + assert_eq!(result, result2, "len: {}", hashes.len()); + result + } + + fn test_hashing(hashes: Vec, fanout: usize) -> Hash { + let temp: Vec<_> = hashes.iter().map(|h| (Pubkey::default(), *h)).collect(); + let result = AccountsHash::compute_merkle_root(temp, fanout); + let reduced: Vec<_> = hashes.clone(); + let result2 = + AccountsHash::compute_merkle_root_from_slices(hashes.len(), fanout, None, |start| { + &reduced[start..] + }); + assert_eq!(result, result2, "len: {}", hashes.len()); + + let result2 = + AccountsHash::compute_merkle_root_from_slices(hashes.len(), fanout, Some(1), |start| { + &reduced[start..] + }); + assert_eq!(result, result2, "len: {}", hashes.len()); + + let reduced2: Vec<_> = hashes.iter().map(|x| vec![*x]).collect(); + let result2 = AccountsHash::flatten_hashes_and_hash( + vec![reduced2], + fanout, + &mut HashStats::default(), + ); + assert_eq!(result, result2, "len: {}", hashes.len()); + + let max = std::cmp::min(reduced.len(), fanout * 2); + for left in 0..max { + for right in left + 1..max { + let src = vec![ + vec![reduced[0..left].to_vec(), reduced[left..right].to_vec()], + vec![reduced[right..].to_vec()], + ]; + let result2 = + AccountsHash::flatten_hashes_and_hash(src, fanout, &mut HashStats::default()); + assert_eq!(result, result2); + } + } + result + } + + #[test] + fn test_accountsdb_compute_merkle_root_large() { + solana_logger::setup(); + + // handle fanout^x -1, +0, +1 for a few 'x's + const FANOUT: usize = 3; + let mut hash_counts: Vec<_> = (1..6) + .map(|x| { + let mark = FANOUT.pow(x); + vec![mark - 1, mark, mark + 1] + }) + .flatten() + .collect(); + + // saturate the test space for threshold to threshold + target + // this hits right before we use the 3 deep optimization and all the way through all possible partial last chunks + let target = FANOUT.pow(3); + let threshold = target * FANOUT; + hash_counts.extend(threshold - 1..=threshold + target); + + for hash_count in hash_counts { + let hashes: Vec<_> = (0..hash_count) + .into_iter() + .map(|_| Hash::new_unique()) + .collect(); + + test_hashing(hashes, FANOUT); + } + } + + #[test] + fn test_accountsdb_compute_merkle_root() { + solana_logger::setup(); + + let expected_results = vec![ + (0, 0, "GKot5hBsd81kMupNCXHaqbhv3huEbxAFMLnpcX2hniwn", 0), + (0, 1, "8unXKJYTxrR423HgQxbDmx29mFri1QNrzVKKDxEfc6bj", 0), + (0, 2, "6QfkevXLLqbfAaR1kVjvMLFtEXvNUVrpmkwXqgsYtCFW", 1), + (0, 3, "G3FrJd9JrXcMiqChTSfvEdBL2sCPny3ebiUy9Xxbn7a2", 3), + (0, 4, "G3sZXHhwoCFuNyWy7Efffr47RBW33ibEp7b2hqNDmXdu", 6), + (0, 5, "78atJJYpokAPKMJwHxUW8SBDvPkkSpTBV7GiB27HwosJ", 10), + (0, 6, "7c9SM2BmCRVVXdrEdKcMK91MviPqXqQMd8QAb77tgLEy", 15), + (0, 7, "3hsmnZPhf22UvBLiZ4dVa21Qsdh65CCrtYXsb8MxoVAa", 21), + (0, 8, "5bwXUiC6RCRhb8fqvjvUXT6waU25str3UXA3a6Aq1jux", 28), + (0, 9, "3NNtQKH6PaYpCnFBtyi2icK9eYX3YM5pqA3SKaXtUNzu", 36), + (1, 0, "GKot5hBsd81kMupNCXHaqbhv3huEbxAFMLnpcX2hniwn", 0), + (1, 1, "4GWVCsnEu1iRyxjAB3F7J7C4MMvcoxFWtP9ihvwvDgxY", 0), + (1, 2, "8ML8Te6Uw2mipFr2v9sMZDcziXzhVqJo2qeMJohg1CJx", 1), + (1, 3, "AMEuC3AgqAeRBGBhSfTmuMdfbAiXJnGmKv99kHmcAE1H", 3), + (1, 4, "HEnDuJLHpsQfrApimGrovTqPEF6Vkrx2dKFr3BDtYzWx", 6), + (1, 5, "6rH69iP2yM1o565noZN1EqjySW4PhYUskz3c5tXePUfV", 10), + (1, 6, "7qEQMEXdfSPjbZ3q4cuuZwebDMvTvuaQ3dBiHoDUKo9a", 15), + (1, 7, "GDJz7LSKYjqqz6ujCaaQRJRmQ7TLNCwYJhdT84qT4qwk", 21), + (1, 8, "HT9krPLVTo3rr5WZQBQFrbqWs8SbYScXfnt8EVuobboM", 28), + (1, 9, "8y2pMgqMdRsvqw6BQXm6wtz3qxGPss72i6H6gVpPyeda", 36), + ]; + + let mut expected_index = 0; + let start = 0; + let default_fanout = 2; + // test 0..3 recursions (at fanout = 2) and 1 item remainder. The internals have 1 special case first loop and subsequent loops are the same types. + let iterations = default_fanout * default_fanout * default_fanout + 2; + for pass in 0..2 { + let fanout = if pass == 0 { + default_fanout + } else { + MERKLE_FANOUT + }; + for count in start..iterations { + let mut input: Vec<_> = (0..count) + .map(|i| { + let key = Pubkey::new(&[(pass * iterations + count) as u8; 32]); + let hash = Hash::new(&[(pass * iterations + count + i + 1) as u8; 32]); + (key, hash) + }) + .collect(); + + let result = if pass == 0 { + test_hashing_larger(input.clone(), fanout) + } else { + // this sorts inside + let early_result = AccountsHash::accumulate_account_hashes( + input.iter().map(|i| (i.0, i.1)).collect::>(), + ); + AccountsHash::sort_hashes_by_pubkey(&mut input); + let result = AccountsHash::compute_merkle_root(input.clone(), fanout); + assert_eq!(early_result, result); + result + }; + // compare against captured, expected results for hash (and lamports) + assert_eq!( + ( + pass, + count, + &*(result.to_string()), + expected_results[expected_index].3 + ), // we no longer calculate lamports + expected_results[expected_index] + ); + expected_index += 1; + } + } + } + + #[test] + #[should_panic(expected = "overflow is detected while summing capitalization")] + fn test_accountsdb_lamport_overflow() { + solana_logger::setup(); + + let offset = 2; + let input = vec![ + CalculateHashIntermediate::new( + 0, + Hash::new_unique(), + u64::MAX - offset, + 0, + Pubkey::new_unique(), + ), + CalculateHashIntermediate::new( + 0, + Hash::new_unique(), + offset + 1, + 0, + Pubkey::new_unique(), + ), + ]; + AccountsHash::de_dup_accounts_in_parallel(&input, 1); + } + + #[test] + #[should_panic(expected = "overflow is detected while summing capitalization")] + fn test_accountsdb_lamport_overflow2() { + solana_logger::setup(); + + let offset = 2; + let input = vec![ + vec![CalculateHashIntermediate::new( + 0, + Hash::new_unique(), + u64::MAX - offset, + 0, + Pubkey::new_unique(), + )], + vec![CalculateHashIntermediate::new( + 0, + Hash::new_unique(), + offset + 1, + 0, + Pubkey::new_unique(), + )], + ]; + AccountsHash::de_dup_and_eliminate_zeros(input, &mut HashStats::default()); + } +} diff --git a/runtime/src/lib.rs b/runtime/src/lib.rs index aa31d3b851..9ee87d6ce2 100644 --- a/runtime/src/lib.rs +++ b/runtime/src/lib.rs @@ -3,6 +3,7 @@ pub mod accounts; pub mod accounts_background_service; pub mod accounts_cache; pub mod accounts_db; +pub mod accounts_hash; pub mod accounts_index; pub mod append_vec; pub mod bank;