2021-12-03 09:00:31 -08:00
use {
2022-11-30 12:27:27 -08:00
crate ::{
2023-05-11 13:23:29 -07:00
accounts_db ::{ AccountStorageEntry , IncludeSlotInHash , PUBKEY_BINS_FOR_CALCULATING_HASHES } ,
2023-08-08 07:16:10 -07:00
active_stats ::{ ActiveStatItem , ActiveStats } ,
2022-11-30 12:27:27 -08:00
ancestors ::Ancestors ,
2023-07-31 13:13:19 -07:00
pubkey_bins ::PubkeyBinCalculator24 ,
2022-11-30 12:27:27 -08:00
rent_collector ::RentCollector ,
} ,
2023-09-15 09:47:38 -07:00
bytemuck ::{ Pod , Zeroable } ,
2021-12-03 09:00:31 -08:00
log ::* ,
2022-11-30 12:27:27 -08:00
memmap2 ::MmapMut ,
2022-03-23 18:52:38 -07:00
rayon ::prelude ::* ,
2023-07-31 13:13:19 -07:00
solana_measure ::{ measure ::Measure , measure_us } ,
2021-12-03 09:00:31 -08:00
solana_sdk ::{
hash ::{ Hash , Hasher } ,
pubkey ::Pubkey ,
2022-07-28 07:46:34 -07:00
slot_history ::Slot ,
2022-05-02 11:46:17 -07:00
sysvar ::epoch_schedule ::EpochSchedule ,
2021-12-03 09:00:31 -08:00
} ,
2022-04-12 09:32:23 -07:00
std ::{
borrow ::Borrow ,
convert ::TryInto ,
2023-09-11 10:57:21 -07:00
io ::{ Seek , SeekFrom , Write } ,
2023-05-23 14:09:16 -07:00
path ::PathBuf ,
2022-04-12 09:32:23 -07:00
sync ::{
2022-04-19 06:29:09 -07:00
atomic ::{ AtomicU64 , AtomicUsize , Ordering } ,
2023-08-09 14:27:04 -07:00
Arc ,
2022-04-12 09:32:23 -07:00
} ,
} ,
2023-05-23 14:09:16 -07:00
tempfile ::tempfile_in ,
2021-02-16 16:29:50 -08:00
} ;
pub const MERKLE_FANOUT : usize = 16 ;
2022-11-30 12:27:27 -08:00
/// 1 file containing account hashes sorted by pubkey, mapped into memory
struct MmapAccountHashesFile {
2023-09-11 10:57:21 -07:00
/// raw slice of `Hash` values. Can be a larger slice than `count`
2022-11-30 12:27:27 -08:00
mmap : MmapMut ,
2023-09-11 10:57:21 -07:00
/// # of valid Hash entries in `mmap`
count : usize ,
2022-11-30 12:27:27 -08:00
}
impl MmapAccountHashesFile {
/// return a slice of account hashes starting at 'index'
fn read ( & self , index : usize ) -> & [ Hash ] {
let start = std ::mem ::size_of ::< Hash > ( ) * index ;
2023-09-15 08:09:01 -07:00
let end = std ::mem ::size_of ::< Hash > ( ) * self . count ;
let bytes = & self . mmap [ start .. end ] ;
bytemuck ::cast_slice ( bytes )
2022-11-30 12:27:27 -08:00
}
2023-09-11 10:57:21 -07:00
/// write a hash to the end of mmap file.
fn write ( & mut self , hash : & Hash ) {
let start = self . count * std ::mem ::size_of ::< Hash > ( ) ;
let end = start + std ::mem ::size_of ::< Hash > ( ) ;
self . mmap [ start .. end ] . copy_from_slice ( hash . as_ref ( ) ) ;
self . count + = 1 ;
}
2022-11-30 12:27:27 -08:00
}
/// 1 file containing account hashes sorted by pubkey
2023-09-11 13:49:51 -07:00
struct AccountHashesFile {
2022-11-30 12:27:27 -08:00
/// # hashes and an open file that will be deleted on drop. None if there are zero hashes to represent, and thus, no file.
2023-09-11 10:57:21 -07:00
writer : Option < MmapAccountHashesFile > ,
2023-05-23 14:09:16 -07:00
/// The directory where temporary cache files are put
dir_for_temp_cache_files : PathBuf ,
2023-09-11 10:57:21 -07:00
/// # bytes allocated
capacity : usize ,
2022-11-30 12:27:27 -08:00
}
impl AccountHashesFile {
2023-09-11 10:57:21 -07:00
/// return a mmap reader that can be accessed by slice
fn get_reader ( & mut self ) -> Option < MmapAccountHashesFile > {
std ::mem ::take ( & mut self . writer )
2022-11-30 12:27:27 -08:00
}
/// # hashes stored in this file
2023-09-11 13:49:51 -07:00
fn count ( & self ) -> usize {
2023-09-11 10:57:21 -07:00
self . writer
2022-11-30 12:27:27 -08:00
. as_ref ( )
2023-09-11 10:57:21 -07:00
. map ( | writer | writer . count )
2022-11-30 12:27:27 -08:00
. unwrap_or_default ( )
}
/// write 'hash' to the file
/// If the file isn't open, create it first.
2023-09-11 13:49:51 -07:00
fn write ( & mut self , hash : & Hash ) {
2023-09-11 10:57:21 -07:00
if self . writer . is_none ( ) {
2022-11-30 12:27:27 -08:00
// we have hashes to write but no file yet, so create a file that will auto-delete on drop
2023-09-11 10:57:21 -07:00
let mut data = tempfile_in ( & self . dir_for_temp_cache_files ) . unwrap_or_else ( | err | {
2023-09-06 09:03:05 -07:00
panic! (
2023-09-11 10:57:21 -07:00
" Unable to create file within {}: {err} " ,
2023-09-06 09:03:05 -07:00
self . dir_for_temp_cache_files . display ( )
)
} ) ;
2023-09-11 10:57:21 -07:00
// Theoretical performance optimization: write a zero to the end of
// the file so that we won't have to resize it later, which may be
// expensive.
2023-10-09 07:38:00 -07:00
assert! ( self . capacity > 0 ) ;
2023-09-11 10:57:21 -07:00
data . seek ( SeekFrom ::Start ( ( self . capacity - 1 ) as u64 ) )
. unwrap ( ) ;
data . write_all ( & [ 0 ] ) . unwrap ( ) ;
data . rewind ( ) . unwrap ( ) ;
data . flush ( ) . unwrap ( ) ;
//UNSAFE: Required to create a Mmap
let map = unsafe { MmapMut ::map_mut ( & data ) } ;
let map = map . unwrap_or_else ( | e | {
error! (
" Failed to map the data file (size: {}): {}. \n
Please increase sysctl vm . max_map_count or equivalent for your platform . " ,
self . capacity , e
) ;
std ::process ::exit ( 1 ) ;
} ) ;
self . writer = Some ( MmapAccountHashesFile {
mmap : map ,
count : 0 ,
} ) ;
}
self . writer . as_mut ( ) . unwrap ( ) . write ( hash ) ;
2022-11-30 12:27:27 -08:00
}
2021-03-18 08:32:07 -07:00
}
2022-03-23 11:44:23 -07:00
/// parameters to calculate accounts hash
2022-03-31 07:29:45 -07:00
#[ derive(Debug) ]
2022-03-23 11:44:23 -07:00
pub struct CalcAccountsHashConfig < ' a > {
2022-03-30 15:19:34 -07:00
/// true to use a thread pool dedicated to bg operations
2022-03-23 18:52:38 -07:00
pub use_bg_thread_pool : bool ,
2022-03-30 15:19:34 -07:00
/// verify every hash in append vec/write cache with a recalculated hash
2022-03-23 11:44:23 -07:00
pub check_hash : bool ,
2022-09-30 10:55:27 -07:00
/// 'ancestors' is used to get storages
2022-03-24 08:34:28 -07:00
pub ancestors : Option < & ' a Ancestors > ,
2022-03-30 15:19:34 -07:00
/// does hash calc need to consider account data that exists in the write cache?
/// if so, 'ancestors' will be used for this purpose as well as storages.
2022-05-02 11:46:17 -07:00
pub epoch_schedule : & ' a EpochSchedule ,
2022-03-31 08:51:18 -07:00
pub rent_collector : & ' a RentCollector ,
2022-07-19 07:55:52 -07:00
/// used for tracking down hash mismatches after the fact
pub store_detailed_debug_info_on_failure : bool ,
2023-05-11 13:23:29 -07:00
pub include_slot_in_hash : IncludeSlotInHash ,
2022-03-23 11:44:23 -07:00
}
2022-03-24 07:40:35 -07:00
// smallest, 3 quartiles, largest, average
pub type StorageSizeQuartileStats = [ usize ; 6 ] ;
2021-02-16 16:29:50 -08:00
#[ derive(Debug, Default) ]
pub struct HashStats {
2023-07-05 12:53:36 -07:00
pub total_us : u64 ,
2022-08-23 09:45:48 -07:00
pub mark_time_us : u64 ,
2023-07-05 10:57:07 -07:00
pub cache_hash_data_us : u64 ,
2021-02-16 16:29:50 -08:00
pub scan_time_total_us : u64 ,
pub zeros_time_total_us : u64 ,
pub hash_time_total_us : u64 ,
pub sort_time_total_us : u64 ,
pub hash_total : usize ,
pub num_snapshot_storage : usize ,
2023-02-20 12:55:48 -08:00
pub scan_chunks : usize ,
2021-10-25 12:37:50 -07:00
pub num_slots : usize ,
2022-08-23 09:45:48 -07:00
pub num_dirty_slots : usize ,
2021-06-01 11:17:49 -07:00
pub collect_snapshots_us : u64 ,
2021-06-01 13:07:46 -07:00
pub storage_sort_us : u64 ,
2022-03-24 07:40:35 -07:00
pub storage_size_quartiles : StorageSizeQuartileStats ,
2022-08-03 08:02:08 -07:00
pub oldest_root : Slot ,
2022-04-22 13:01:51 -07:00
pub roots_older_than_epoch : AtomicUsize ,
pub accounts_in_roots_older_than_epoch : AtomicUsize ,
pub append_vec_sizes_older_than_epoch : AtomicUsize ,
2022-11-15 09:31:24 -08:00
pub longest_ancient_scan_us : AtomicU64 ,
pub sum_ancient_scans_us : AtomicU64 ,
pub count_ancient_scans : AtomicU64 ,
2023-07-31 13:13:19 -07:00
pub pubkey_bin_search_us : AtomicU64 ,
2021-02-16 16:29:50 -08:00
}
impl HashStats {
2023-01-18 11:51:08 -08:00
pub fn calc_storage_size_quartiles ( & mut self , storages : & [ Arc < AccountStorageEntry > ] ) {
2022-03-24 07:40:35 -07:00
let mut sum = 0 ;
let mut sizes = storages
. iter ( )
2023-01-11 12:05:15 -08:00
. map ( | storage | {
let cap = storage . accounts . capacity ( ) as usize ;
sum + = cap ;
cap
2022-03-24 07:40:35 -07:00
} )
. collect ::< Vec < _ > > ( ) ;
sizes . sort_unstable ( ) ;
let len = sizes . len ( ) ;
self . storage_size_quartiles = if len = = 0 {
StorageSizeQuartileStats ::default ( )
} else {
[
* sizes . first ( ) . unwrap ( ) ,
sizes [ len / 4 ] ,
sizes [ len * 2 / 4 ] ,
sizes [ len * 3 / 4 ] ,
* sizes . last ( ) . unwrap ( ) ,
sum / len ,
]
} ;
}
2023-07-05 12:53:36 -07:00
pub fn log ( & self ) {
2021-02-16 16:29:50 -08:00
datapoint_info! (
2022-10-24 18:07:00 -07:00
" calculate_accounts_hash_from_storages " ,
2023-07-05 12:53:36 -07:00
( " total_us " , self . total_us , i64 ) ,
2022-08-23 09:45:48 -07:00
( " mark_time_us " , self . mark_time_us , i64 ) ,
2023-07-05 10:57:07 -07:00
( " cache_hash_data_us " , self . cache_hash_data_us , i64 ) ,
2022-08-23 13:13:16 -07:00
( " accounts_scan_us " , self . scan_time_total_us , i64 ) ,
( " eliminate_zeros_us " , self . zeros_time_total_us , i64 ) ,
( " hash_us " , self . hash_time_total_us , i64 ) ,
2023-07-05 06:35:44 -07:00
( " sort_us " , self . sort_time_total_us , i64 ) ,
2021-02-16 16:29:50 -08:00
( " hash_total " , self . hash_total , i64 ) ,
2021-06-01 13:07:46 -07:00
( " storage_sort_us " , self . storage_sort_us , i64 ) ,
2023-06-29 07:43:07 -07:00
( " collect_snapshots_us " , self . collect_snapshots_us , i64 ) ,
( " num_snapshot_storage " , self . num_snapshot_storage , i64 ) ,
( " scan_chunks " , self . scan_chunks , i64 ) ,
( " num_slots " , self . num_slots , i64 ) ,
( " num_dirty_slots " , self . num_dirty_slots , i64 ) ,
( " storage_size_min " , self . storage_size_quartiles [ 0 ] , i64 ) ,
2022-03-24 07:40:35 -07:00
(
" storage_size_quartile_1 " ,
2023-06-29 07:43:07 -07:00
self . storage_size_quartiles [ 1 ] ,
2022-03-24 07:40:35 -07:00
i64
) ,
(
" storage_size_quartile_2 " ,
2023-06-29 07:43:07 -07:00
self . storage_size_quartiles [ 2 ] ,
2022-03-24 07:40:35 -07:00
i64
) ,
(
" storage_size_quartile_3 " ,
2023-06-29 07:43:07 -07:00
self . storage_size_quartiles [ 3 ] ,
2022-03-24 07:40:35 -07:00
i64
) ,
2023-06-29 07:43:07 -07:00
( " storage_size_max " , self . storage_size_quartiles [ 4 ] , i64 ) ,
( " storage_size_avg " , self . storage_size_quartiles [ 5 ] , i64 ) ,
2022-04-22 13:01:51 -07:00
(
" roots_older_than_epoch " ,
2023-06-29 07:43:07 -07:00
self . roots_older_than_epoch . load ( Ordering ::Relaxed ) ,
2022-04-22 13:01:51 -07:00
i64
) ,
2023-06-29 07:43:07 -07:00
( " oldest_root " , self . oldest_root , i64 ) ,
2022-11-15 09:31:24 -08:00
(
" longest_ancient_scan_us " ,
2023-06-29 07:43:07 -07:00
self . longest_ancient_scan_us . load ( Ordering ::Relaxed ) ,
2022-11-15 09:31:24 -08:00
i64
) ,
(
" sum_ancient_scans_us " ,
2023-06-29 07:43:07 -07:00
self . sum_ancient_scans_us . load ( Ordering ::Relaxed ) ,
2022-11-15 09:31:24 -08:00
i64
) ,
(
" count_ancient_scans " ,
2023-06-29 07:43:07 -07:00
self . count_ancient_scans . load ( Ordering ::Relaxed ) ,
2022-11-15 09:31:24 -08:00
i64
) ,
2022-04-22 13:01:51 -07:00
(
" append_vec_sizes_older_than_epoch " ,
self . append_vec_sizes_older_than_epoch
2023-06-29 07:43:07 -07:00
. load ( Ordering ::Relaxed ) ,
2022-04-22 13:01:51 -07:00
i64
) ,
(
" accounts_in_roots_older_than_epoch " ,
self . accounts_in_roots_older_than_epoch
2023-06-29 07:43:07 -07:00
. load ( Ordering ::Relaxed ) ,
2022-04-22 13:01:51 -07:00
i64
) ,
2023-07-31 13:13:19 -07:00
(
" pubkey_bin_search_us " ,
self . pubkey_bin_search_us . load ( Ordering ::Relaxed ) ,
i64
) ,
2021-02-16 16:29:50 -08:00
) ;
}
}
2022-09-30 07:34:18 -07:00
/// While scanning appendvecs, this is the info that needs to be extracted, de-duped, and sorted from what is stored in an append vec.
/// Note this can be saved/loaded during hash calculation to a memory mapped file whose contents are
/// [CalculateHashIntermediate]
#[ repr(C) ]
2023-09-15 09:47:38 -07:00
#[ derive(Default, Debug, PartialEq, Eq, Clone, Copy, Pod, Zeroable) ]
2021-02-16 16:29:50 -08:00
pub struct CalculateHashIntermediate {
pub hash : Hash ,
pub lamports : u64 ,
pub pubkey : Pubkey ,
}
2023-09-15 09:47:38 -07:00
// In order to safely guarantee CalculateHashIntermediate is Pod, it cannot have any padding
const _ : ( ) = assert! (
std ::mem ::size_of ::< CalculateHashIntermediate > ( )
= = std ::mem ::size_of ::< Hash > ( ) + std ::mem ::size_of ::< u64 > ( ) + std ::mem ::size_of ::< Pubkey > ( ) ,
" CalculateHashIntermediate cannot have any padding "
) ;
2022-05-22 18:00:42 -07:00
#[ derive(Default, Debug, PartialEq, Eq) ]
2021-02-16 16:29:50 -08:00
pub struct CumulativeOffset {
pub index : Vec < usize > ,
pub start_offset : usize ,
}
2021-06-14 06:43:59 -07:00
pub trait ExtractSliceFromRawData < ' b , T : ' b > {
fn extract < ' a > ( & ' b self , offset : & ' a CumulativeOffset , start : usize ) -> & ' b [ T ] ;
}
impl < ' b , T : ' b > ExtractSliceFromRawData < ' b , T > for Vec < Vec < T > > {
fn extract < ' a > ( & ' b self , offset : & ' a CumulativeOffset , start : usize ) -> & ' b [ T ] {
& self [ offset . index [ 0 ] ] [ start .. ]
}
}
impl < ' b , T : ' b > ExtractSliceFromRawData < ' b , T > for Vec < Vec < Vec < T > > > {
fn extract < ' a > ( & ' b self , offset : & ' a CumulativeOffset , start : usize ) -> & ' b [ T ] {
& self [ offset . index [ 0 ] ] [ offset . index [ 1 ] ] [ start .. ]
}
}
2021-03-02 06:36:49 -08:00
// Allow retrieving &[start..end] from a logical src: Vec<T>, where src is really Vec<Vec<T>> (or later Vec<Vec<Vec<T>>>)
2021-02-16 16:29:50 -08:00
// This model prevents callers from having to flatten which saves both working memory and time.
#[ derive(Default, Debug) ]
2023-09-11 13:49:51 -07:00
struct CumulativeOffsets {
2021-02-16 16:29:50 -08:00
cumulative_offsets : Vec < CumulativeOffset > ,
total_count : usize ,
}
2022-11-30 12:27:27 -08:00
/// used by merkle tree calculation to lookup account hashes by overall index
#[ derive(Default) ]
2023-09-11 13:49:51 -07:00
struct CumulativeHashesFromFiles {
2022-11-30 12:27:27 -08:00
/// source of hashes in order
readers : Vec < MmapAccountHashesFile > ,
/// look up reader index and offset by overall index
cumulative : CumulativeOffsets ,
}
impl CumulativeHashesFromFiles {
/// Calculate offset from overall index to which file and offset within that file based on the length of each hash file.
/// Also collect readers to access the data.
2023-09-11 13:49:51 -07:00
fn from_files ( hashes : Vec < AccountHashesFile > ) -> Self {
2022-11-30 12:27:27 -08:00
let mut readers = Vec ::with_capacity ( hashes . len ( ) ) ;
let cumulative = CumulativeOffsets ::new ( hashes . into_iter ( ) . filter_map ( | mut hash_file | {
// ignores all hashfiles that have zero entries
2023-09-11 10:57:21 -07:00
hash_file . get_reader ( ) . map ( | reader | {
let count = reader . count ;
2022-11-30 12:27:27 -08:00
readers . push ( reader ) ;
count
} )
} ) ) ;
Self {
cumulative ,
readers ,
}
}
/// total # of items referenced
2023-09-11 13:49:51 -07:00
fn total_count ( & self ) -> usize {
2022-11-30 12:27:27 -08:00
self . cumulative . total_count
}
// return the biggest slice possible that starts at the overall index 'start'
2023-09-11 13:49:51 -07:00
fn get_slice ( & self , start : usize ) -> & [ Hash ] {
2022-11-30 12:27:27 -08:00
let ( start , offset ) = self . cumulative . find ( start ) ;
let data_source_index = offset . index [ 0 ] ;
let data = & self . readers [ data_source_index ] ;
// unwrap here because we should never ask for data that doesn't exist. If we do, then cumulative calculated incorrectly.
data . read ( start )
}
}
2021-02-16 16:29:50 -08:00
impl CumulativeOffsets {
2023-09-11 13:49:51 -07:00
fn new < I > ( iter : I ) -> Self
2022-11-30 12:27:27 -08:00
where
I : Iterator < Item = usize > ,
{
2021-02-16 16:29:50 -08:00
let mut total_count : usize = 0 ;
2022-11-30 12:27:27 -08:00
let cumulative_offsets : Vec < _ > = iter
2021-02-16 16:29:50 -08:00
. enumerate ( )
2022-11-30 12:27:27 -08:00
. filter_map ( | ( i , len ) | {
2021-02-16 16:29:50 -08:00
if len > 0 {
2023-09-11 15:33:25 -07:00
let result = CumulativeOffset {
index : vec ! [ i ] ,
start_offset : total_count ,
} ;
2021-02-16 16:29:50 -08:00
total_count + = len ;
Some ( result )
} else {
None
}
} )
. collect ( ) ;
Self {
cumulative_offsets ,
total_count ,
}
}
2023-09-11 13:49:51 -07:00
fn from_raw < T > ( raw : & [ Vec < T > ] ) -> Self {
2022-11-30 12:27:27 -08:00
Self ::new ( raw . iter ( ) . map ( | v | v . len ( ) ) )
}
/// find the index of the data source that contains 'start'
2021-06-14 06:43:59 -07:00
fn find_index ( & self , start : usize ) -> usize {
2021-06-07 10:09:17 -07:00
assert! ( ! self . cumulative_offsets . is_empty ( ) ) ;
2021-06-14 06:43:59 -07:00
match self . cumulative_offsets [ .. ] . binary_search_by ( | index | index . start_offset . cmp ( & start ) ) {
2021-06-07 10:09:17 -07:00
Ok ( index ) = > index ,
Err ( index ) = > index - 1 , // we would insert at index so we are before the item at index
2021-06-14 06:43:59 -07:00
}
}
2022-11-30 12:27:27 -08:00
/// given overall start index 'start'
/// return ('start', which is the offset into the data source at 'index',
/// and 'index', which is the data source to use)
2021-06-14 06:43:59 -07:00
fn find ( & self , start : usize ) -> ( usize , & CumulativeOffset ) {
let index = self . find_index ( start ) ;
2021-06-07 10:09:17 -07:00
let index = & self . cumulative_offsets [ index ] ;
let start = start - index . start_offset ;
( start , index )
}
2021-02-16 16:29:50 -08:00
// return the biggest slice possible that starts at 'start'
2023-09-11 13:49:51 -07:00
fn get_slice < ' a , ' b , T , U > ( & ' a self , raw : & ' b U , start : usize ) -> & ' b [ T ]
2021-06-14 06:43:59 -07:00
where
U : ExtractSliceFromRawData < ' b , T > + ' b ,
{
2021-06-07 10:09:17 -07:00
let ( start , index ) = self . find ( start ) ;
2021-06-14 06:43:59 -07:00
raw . extract ( index , start )
2021-02-16 16:29:50 -08:00
}
}
2023-01-17 12:04:29 -08:00
#[ derive(Debug) ]
2023-08-08 07:16:10 -07:00
pub struct AccountsHasher < ' a > {
2021-10-11 10:46:27 -07:00
pub filler_account_suffix : Option < Pubkey > ,
2023-01-17 12:04:29 -08:00
pub zero_lamport_accounts : ZeroLamportAccounts ,
2023-05-23 14:09:16 -07:00
/// The directory where temporary cache files are put
pub dir_for_temp_cache_files : PathBuf ,
2023-08-08 07:16:10 -07:00
pub ( crate ) active_stats : & ' a ActiveStats ,
2021-02-16 16:29:50 -08:00
}
2023-09-28 06:58:08 -07:00
/// Pointer to a specific item in chunked accounts hash slices.
#[ derive(Debug, Clone, Copy) ]
struct SlotGroupPointer {
/// slot group index
slot_group_index : usize ,
/// offset within a slot group
offset : usize ,
}
/// A struct for the location of an account hash item inside chunked accounts hash slices.
#[ derive(Debug) ]
struct ItemLocation < ' a > {
/// account's pubkey
key : & ' a Pubkey ,
/// pointer to the item in slot group slices
pointer : SlotGroupPointer ,
}
2023-08-08 07:16:10 -07:00
impl < ' a > AccountsHasher < ' a > {
2021-11-12 07:53:21 -08:00
/// true if it is possible that there are filler accounts present
pub fn filler_accounts_enabled ( & self ) -> bool {
self . filler_account_suffix . is_some ( )
}
2021-02-16 16:29:50 -08:00
pub fn calculate_hash ( hashes : Vec < Vec < Hash > > ) -> ( Hash , usize ) {
let cumulative_offsets = CumulativeOffsets ::from_raw ( & hashes ) ;
let hash_total = cumulative_offsets . total_count ;
2022-11-18 15:25:44 -08:00
let result = AccountsHasher ::compute_merkle_root_from_slices (
2021-02-16 16:29:50 -08:00
hash_total ,
MERKLE_FANOUT ,
None ,
| start : usize | cumulative_offsets . get_slice ( & hashes , start ) ,
2021-03-18 08:32:07 -07:00
None ,
2021-02-16 16:29:50 -08:00
) ;
2021-03-18 08:32:07 -07:00
( result . 0 , hash_total )
2021-02-16 16:29:50 -08:00
}
pub fn compute_merkle_root ( hashes : Vec < ( Pubkey , Hash ) > , fanout : usize ) -> Hash {
2022-12-13 07:20:14 -08:00
Self ::compute_merkle_root_loop ( hashes , fanout , | t | & t . 1 )
2021-02-16 16:29:50 -08:00
}
// this function avoids an infinite recursion compiler error
pub fn compute_merkle_root_recurse ( hashes : Vec < Hash > , fanout : usize ) -> Hash {
2022-12-13 07:20:14 -08:00
Self ::compute_merkle_root_loop ( hashes , fanout , | t | t )
2021-02-16 16:29:50 -08:00
}
pub fn div_ceil ( x : usize , y : usize ) -> usize {
let mut result = x / y ;
if x % y ! = 0 {
result + = 1 ;
}
result
}
// For the first iteration, there could be more items in the tuple than just hash and lamports.
// Using extractor allows us to avoid an unnecessary array copy on the first iteration.
pub fn compute_merkle_root_loop < T , F > ( hashes : Vec < T > , fanout : usize , extractor : F ) -> Hash
where
2022-12-13 07:20:14 -08:00
F : Fn ( & T ) -> & Hash + std ::marker ::Sync ,
2021-02-16 16:29:50 -08:00
T : std ::marker ::Sync ,
{
if hashes . is_empty ( ) {
return Hasher ::default ( ) . result ( ) ;
}
let mut time = Measure ::start ( " time " ) ;
let total_hashes = hashes . len ( ) ;
let chunks = Self ::div_ceil ( total_hashes , fanout ) ;
let result : Vec < _ > = ( 0 .. chunks )
. into_par_iter ( )
. map ( | i | {
let start_index = i * fanout ;
let end_index = std ::cmp ::min ( start_index + fanout , total_hashes ) ;
let mut hasher = Hasher ::default ( ) ;
for item in hashes . iter ( ) . take ( end_index ) . skip ( start_index ) {
2021-06-18 06:34:46 -07:00
let h = extractor ( item ) ;
2021-02-16 16:29:50 -08:00
hasher . hash ( h . as_ref ( ) ) ;
}
hasher . result ( )
} )
. collect ( ) ;
time . stop ( ) ;
debug! ( " hashing {} {} " , total_hashes , time ) ;
if result . len ( ) = = 1 {
result [ 0 ]
} else {
Self ::compute_merkle_root_recurse ( result , fanout )
}
}
2021-03-18 08:32:07 -07:00
fn calculate_three_level_chunks (
total_hashes : usize ,
fanout : usize ,
max_levels_per_pass : Option < usize > ,
specific_level_count : Option < usize > ,
) -> ( usize , usize , bool ) {
const THREE_LEVEL_OPTIMIZATION : usize = 3 ; // this '3' is dependent on the code structure below where we manually unroll
let target = fanout . pow ( THREE_LEVEL_OPTIMIZATION as u32 ) ;
// Only use the 3 level optimization if we have at least 4 levels of data.
// Otherwise, we'll be serializing a parallel operation.
let threshold = target * fanout ;
let mut three_level = max_levels_per_pass . unwrap_or ( usize ::MAX ) > = THREE_LEVEL_OPTIMIZATION
& & total_hashes > = threshold ;
if three_level {
if let Some ( specific_level_count_value ) = specific_level_count {
three_level = specific_level_count_value > = THREE_LEVEL_OPTIMIZATION ;
}
}
let ( num_hashes_per_chunk , levels_hashed ) = if three_level {
( target , THREE_LEVEL_OPTIMIZATION )
} else {
( fanout , 1 )
} ;
( num_hashes_per_chunk , levels_hashed , three_level )
}
2021-02-16 16:29:50 -08:00
// This function is designed to allow hashes to be located in multiple, perhaps multiply deep vecs.
// The caller provides a function to return a slice from the source data.
2023-09-26 15:35:25 -07:00
fn compute_merkle_root_from_slices < ' b , F , T > (
2021-02-16 16:29:50 -08:00
total_hashes : usize ,
fanout : usize ,
max_levels_per_pass : Option < usize > ,
2021-03-18 08:32:07 -07:00
get_hash_slice_starting_at_index : F ,
specific_level_count : Option < usize > ,
) -> ( Hash , Vec < Hash > )
2021-02-16 16:29:50 -08:00
where
2022-11-30 12:27:27 -08:00
// returns a slice of hashes starting at the given overall index
2023-08-08 07:16:10 -07:00
F : Fn ( usize ) -> & ' b [ T ] + std ::marker ::Sync ,
T : Borrow < Hash > + std ::marker ::Sync + ' b ,
2021-02-16 16:29:50 -08:00
{
if total_hashes = = 0 {
2021-03-18 08:32:07 -07:00
return ( Hasher ::default ( ) . result ( ) , vec! [ ] ) ;
2021-02-16 16:29:50 -08:00
}
let mut time = Measure ::start ( " time " ) ;
2021-03-18 08:32:07 -07:00
let ( num_hashes_per_chunk , levels_hashed , three_level ) = Self ::calculate_three_level_chunks (
total_hashes ,
fanout ,
max_levels_per_pass ,
specific_level_count ,
) ;
2021-02-16 16:29:50 -08:00
let chunks = Self ::div_ceil ( total_hashes , num_hashes_per_chunk ) ;
// initial fetch - could return entire slice
2021-11-16 08:30:55 -08:00
let data = get_hash_slice_starting_at_index ( 0 ) ;
2021-02-16 16:29:50 -08:00
let data_len = data . len ( ) ;
let result : Vec < _ > = ( 0 .. chunks )
. into_par_iter ( )
. map ( | i | {
2021-03-18 08:32:07 -07:00
// summary:
// this closure computes 1 or 3 levels of merkle tree (all chunks will be 1 or all will be 3)
// for a subset (our chunk) of the input data [start_index..end_index]
// index into get_hash_slice_starting_at_index where this chunk's range begins
2021-02-16 16:29:50 -08:00
let start_index = i * num_hashes_per_chunk ;
2021-03-18 08:32:07 -07:00
// index into get_hash_slice_starting_at_index where this chunk's range ends
2021-02-16 16:29:50 -08:00
let end_index = std ::cmp ::min ( start_index + num_hashes_per_chunk , total_hashes ) ;
2021-03-18 08:32:07 -07:00
// will compute the final result for this closure
2021-02-16 16:29:50 -08:00
let mut hasher = Hasher ::default ( ) ;
2021-03-18 08:32:07 -07:00
// index into 'data' where we are currently pulling data
// if we exhaust our data, then we will request a new slice, and data_index resets to 0, the beginning of the new slice
2021-02-16 16:29:50 -08:00
let mut data_index = start_index ;
2021-03-18 08:32:07 -07:00
// source data, which we may refresh when we exhaust
2021-02-16 16:29:50 -08:00
let mut data = data ;
2021-03-18 08:32:07 -07:00
// len of the source data
2021-02-16 16:29:50 -08:00
let mut data_len = data_len ;
if ! three_level {
// 1 group of fanout
// The result of this loop is a single hash value from fanout input hashes.
for i in start_index .. end_index {
if data_index > = data_len {
2021-03-18 08:32:07 -07:00
// we exhausted our data, fetch next slice starting at i
data = get_hash_slice_starting_at_index ( i ) ;
2021-02-16 16:29:50 -08:00
data_len = data . len ( ) ;
data_index = 0 ;
}
2021-11-16 08:30:55 -08:00
hasher . hash ( data [ data_index ] . borrow ( ) . as_ref ( ) ) ;
2021-02-16 16:29:50 -08:00
data_index + = 1 ;
}
} else {
// hash 3 levels of fanout simultaneously.
2021-03-18 08:32:07 -07:00
// This codepath produces 1 hash value for between 1..=fanout^3 input hashes.
// It is equivalent to running the normal merkle tree calculation 3 iterations on the input.
//
// big idea:
// merkle trees usually reduce the input vector by a factor of fanout with each iteration
// example with fanout 2:
// start: [0,1,2,3,4,5,6,7] in our case: [...16M...] or really, 1B
// iteration0 [.5, 2.5, 4.5, 6.5] [... 1M...]
// iteration1 [1.5, 5.5] [...65k...]
// iteration2 3.5 [...4k... ]
// So iteration 0 consumes N elements, hashes them in groups of 'fanout' and produces a vector of N/fanout elements
// and the process repeats until there is only 1 hash left.
//
// With the three_level code path, we make each chunk we iterate of size fanout^3 (4096)
// So, the input could be 16M hashes and the output will be 4k hashes, or N/fanout^3
// The goal is to reduce the amount of data that has to be constructed and held in memory.
// When we know we have enough hashes, then, in 1 pass, we hash 3 levels simultaneously, storing far fewer intermediate hashes.
//
// Now, some details:
2021-02-16 16:29:50 -08:00
// The result of this loop is a single hash value from fanout^3 input hashes.
2021-03-18 08:32:07 -07:00
// concepts:
// what we're conceptually hashing: "raw_hashes"[start_index..end_index]
// example: [a,b,c,d,e,f]
// but... hashes[] may really be multiple vectors that are pieced together.
// example: [[a,b],[c],[d,e,f]]
// get_hash_slice_starting_at_index(any_index) abstracts that and returns a slice starting at raw_hashes[any_index..]
// such that the end of get_hash_slice_starting_at_index may be <, >, or = end_index
// example: get_hash_slice_starting_at_index(1) returns [b]
// get_hash_slice_starting_at_index(3) returns [d,e,f]
// This code is basically 3 iterations of merkle tree hashing occurring simultaneously.
// The first fanout raw hashes are hashed in hasher_k. This is iteration0
// Once hasher_k has hashed fanout hashes, hasher_k's result hash is hashed in hasher_j and then discarded
// hasher_k then starts over fresh and hashes the next fanout raw hashes. This is iteration0 again for a new set of data.
// Once hasher_j has hashed fanout hashes (from k), hasher_j's result hash is hashed in hasher and then discarded
// Once hasher has hashed fanout hashes (from j), then the result of hasher is the hash for fanout^3 raw hashes.
// If there are < fanout^3 hashes, then this code stops when it runs out of raw hashes and returns whatever it hashed.
// This is always how the very last elements work in a merkle tree.
2021-02-16 16:29:50 -08:00
let mut i = start_index ;
while i < end_index {
let mut hasher_j = Hasher ::default ( ) ;
for _j in 0 .. fanout {
let mut hasher_k = Hasher ::default ( ) ;
let end = std ::cmp ::min ( end_index - i , fanout ) ;
for _k in 0 .. end {
if data_index > = data_len {
2021-03-18 08:32:07 -07:00
// we exhausted our data, fetch next slice starting at i
data = get_hash_slice_starting_at_index ( i ) ;
2021-02-16 16:29:50 -08:00
data_len = data . len ( ) ;
data_index = 0 ;
}
2021-11-16 08:30:55 -08:00
hasher_k . hash ( data [ data_index ] . borrow ( ) . as_ref ( ) ) ;
2021-02-16 16:29:50 -08:00
data_index + = 1 ;
i + = 1 ;
}
hasher_j . hash ( hasher_k . result ( ) . as_ref ( ) ) ;
if i > = end_index {
break ;
}
}
hasher . hash ( hasher_j . result ( ) . as_ref ( ) ) ;
}
}
hasher . result ( )
} )
. collect ( ) ;
time . stop ( ) ;
debug! ( " hashing {} {} " , total_hashes , time ) ;
2021-03-18 08:32:07 -07:00
if let Some ( mut specific_level_count_value ) = specific_level_count {
specific_level_count_value - = levels_hashed ;
if specific_level_count_value = = 0 {
( Hash ::default ( ) , result )
} else {
assert! ( specific_level_count_value > 0 ) ;
// We did not hash the number of levels required by 'specific_level_count', so repeat
Self ::compute_merkle_root_from_slices_recurse (
result ,
fanout ,
max_levels_per_pass ,
Some ( specific_level_count_value ) ,
)
}
2021-02-16 16:29:50 -08:00
} else {
2021-03-18 08:32:07 -07:00
(
if result . len ( ) = = 1 {
result [ 0 ]
} else {
Self ::compute_merkle_root_recurse ( result , fanout )
} ,
vec! [ ] , // no intermediate results needed by caller
)
2021-02-16 16:29:50 -08:00
}
}
2023-09-26 15:35:25 -07:00
fn compute_merkle_root_from_slices_recurse (
2021-03-18 08:32:07 -07:00
hashes : Vec < Hash > ,
fanout : usize ,
max_levels_per_pass : Option < usize > ,
specific_level_count : Option < usize > ,
) -> ( Hash , Vec < Hash > ) {
Self ::compute_merkle_root_from_slices (
hashes . len ( ) ,
fanout ,
max_levels_per_pass ,
| start | & hashes [ start .. ] ,
specific_level_count ,
)
}
2023-10-09 13:00:52 -07:00
pub fn accumulate_account_hashes ( mut hashes : Vec < ( Pubkey , AccountHash ) > ) -> Hash {
2021-02-16 16:29:50 -08:00
hashes . par_sort_unstable_by ( | a , b | a . 0. cmp ( & b . 0 ) ) ;
2023-10-09 13:00:52 -07:00
Self ::compute_merkle_root_loop ( hashes , MERKLE_FANOUT , | i | & i . 1 . 0 )
2021-02-16 16:29:50 -08:00
}
pub fn compare_two_hash_entries (
a : & CalculateHashIntermediate ,
b : & CalculateHashIntermediate ,
) -> std ::cmp ::Ordering {
// note partial_cmp only returns None with floating point comparisons
2021-06-07 07:01:16 -07:00
a . pubkey . partial_cmp ( & b . pubkey ) . unwrap ( )
2021-02-16 16:29:50 -08:00
}
pub fn checked_cast_for_capitalization ( balance : u128 ) -> u64 {
2022-11-29 13:55:07 -08:00
balance . try_into ( ) . unwrap_or_else ( | _ | {
2022-12-06 06:30:06 -08:00
panic! ( " overflow is detected while summing capitalization: {balance} " )
2022-11-29 13:55:07 -08:00
} )
2021-02-16 16:29:50 -08:00
}
2022-11-21 09:56:07 -08:00
/// returns:
/// Vec, with one entry per bin
/// for each entry, Vec<Hash> in pubkey order
2022-11-30 12:27:27 -08:00
/// If return Vec<AccountHashesFile> was flattened, it would be all hashes, in pubkey order.
2023-07-31 13:13:19 -07:00
fn de_dup_accounts (
2021-10-11 10:46:27 -07:00
& self ,
2023-07-31 13:13:19 -07:00
sorted_data_by_pubkey : & [ & [ CalculateHashIntermediate ] ] ,
2021-02-16 16:29:50 -08:00
stats : & mut HashStats ,
2021-06-21 13:32:03 -07:00
max_bin : usize ,
2022-11-30 12:27:27 -08:00
) -> ( Vec < AccountHashesFile > , u64 ) {
2021-02-16 16:29:50 -08:00
// 1. eliminate zero lamport accounts
// 2. pick the highest slot or (slot = and highest version) of each pubkey
// 3. produce this output:
2021-06-21 13:32:03 -07:00
// a. vec: PUBKEY_BINS_FOR_CALCULATING_HASHES in pubkey order
// vec: individual hashes in pubkey order, 1 hash per
// b. lamports
2023-08-10 08:56:00 -07:00
let _guard = self . active_stats . activate ( ActiveStatItem ::HashDeDup ) ;
2023-08-08 07:16:10 -07:00
2023-09-29 19:12:50 -07:00
#[ derive(Default) ]
struct DedupResult {
hashes_files : Vec < AccountHashesFile > ,
hashes_count : usize ,
lamports_sum : u64 ,
}
2021-02-16 16:29:50 -08:00
let mut zeros = Measure ::start ( " eliminate zeros " ) ;
2023-09-29 19:12:50 -07:00
let DedupResult {
hashes_files : hashes ,
hashes_count : hash_total ,
lamports_sum : lamports_total ,
} = ( 0 .. max_bin )
2021-02-16 16:29:50 -08:00
. into_par_iter ( )
2023-09-29 19:12:50 -07:00
. fold ( DedupResult ::default , | mut accum , bin | {
let ( hashes_file , lamports_bin ) =
self . de_dup_accounts_in_parallel ( sorted_data_by_pubkey , bin , max_bin , stats ) ;
accum . lamports_sum = accum
. lamports_sum
. checked_add ( lamports_bin )
. expect ( " summing capitalization cannot overflow " ) ;
accum . hashes_count + = hashes_file . count ( ) ;
accum . hashes_files . push ( hashes_file ) ;
accum
} )
2023-08-09 14:27:04 -07:00
. reduce (
2023-09-29 19:12:50 -07:00
| | DedupResult {
hashes_files : Vec ::with_capacity ( max_bin ) ,
.. Default ::default ( )
2023-08-09 14:27:04 -07:00
} ,
| mut a , mut b | {
2023-09-29 19:12:50 -07:00
a . lamports_sum = a
. lamports_sum
. checked_add ( b . lamports_sum )
. expect ( " summing capitalization cannot overflow " ) ;
a . hashes_count + = b . hashes_count ;
a . hashes_files . append ( & mut b . hashes_files ) ;
2023-08-09 14:27:04 -07:00
a
} ,
) ;
2021-02-16 16:29:50 -08:00
zeros . stop ( ) ;
stats . zeros_time_total_us + = zeros . as_us ( ) ;
2023-08-09 14:27:04 -07:00
stats . hash_total + = hash_total ;
( hashes , lamports_total )
2021-02-16 16:29:50 -08:00
}
2023-09-28 06:58:08 -07:00
/// Given the item location, return the item in the `CalculatedHashIntermediate` slices and the next item location in the same bin.
/// If the end of the `CalculatedHashIntermediate` slice is reached or all the accounts in current bin have been exhausted, return `None` for next item location.
2023-08-08 07:16:10 -07:00
fn get_item < ' b > (
sorted_data_by_pubkey : & [ & ' b [ CalculateHashIntermediate ] ] ,
2023-09-28 06:58:08 -07:00
bin : usize ,
2023-07-31 13:13:19 -07:00
binner : & PubkeyBinCalculator24 ,
2023-09-28 06:58:08 -07:00
item_loc : & ItemLocation < ' b > ,
) -> ( & ' b CalculateHashIntermediate , Option < ItemLocation < ' b > > ) {
let division_data = & sorted_data_by_pubkey [ item_loc . pointer . slot_group_index ] ;
let mut index = item_loc . pointer . offset ;
2021-06-21 13:32:03 -07:00
index + = 1 ;
2023-09-28 06:58:08 -07:00
let mut next = None ;
while index < division_data . len ( ) {
2021-06-21 13:32:03 -07:00
// still more items where we found the previous key, so just increment the index for that slot group, skipping all pubkeys that are equal
2023-07-31 13:13:19 -07:00
let next_key = & division_data [ index ] . pubkey ;
2023-09-28 06:58:08 -07:00
if next_key = = item_loc . key {
2021-06-21 13:32:03 -07:00
index + = 1 ;
continue ; // duplicate entries of same pubkey, so keep skipping
}
2023-07-31 13:13:19 -07:00
if binner . bin_from_pubkey ( next_key ) > bin {
// the next pubkey is not in our bin
break ;
}
2021-06-21 13:32:03 -07:00
// point to the next pubkey > key
2023-09-28 06:58:08 -07:00
next = Some ( ItemLocation {
key : next_key ,
pointer : SlotGroupPointer {
slot_group_index : item_loc . pointer . slot_group_index ,
offset : index ,
} ,
} ) ;
2021-06-21 13:32:03 -07:00
break ;
}
2021-11-08 12:27:48 -08:00
// this is the previous first item that was requested
2023-09-28 06:58:08 -07:00
( & division_data [ index - 1 ] , next )
2023-07-31 13:13:19 -07:00
}
/// `hash_data` must be sorted by `binner.bin_from_pubkey()`
/// return index in `hash_data` of first pubkey that is in `bin`, based on `binner`
fn binary_search_for_first_pubkey_in_bin (
hash_data : & [ CalculateHashIntermediate ] ,
bin : usize ,
binner : & PubkeyBinCalculator24 ,
) -> Option < usize > {
let potential_index = if bin = = 0 {
// `bin` == 0 is special because there cannot be `bin`-1
// so either element[0] is in bin 0 or there is nothing in bin 0.
0
} else {
// search for the first pubkey that is in `bin`
// There could be many keys in a row with the same `bin`.
// So, for each pubkey, use calculated_bin * 2 + 1 as the bin of a given pubkey for binary search.
// And compare the bin of each pubkey with `bin` * 2.
// So all keys that are in `bin` will compare as `bin` * 2 + 1
// all keys that are in `bin`-1 will compare as ((`bin` - 1) * 2 + 1), which is (`bin` * 2 - 1)
// NO keys will compare as `bin` * 2 because we add 1.
// So, the binary search will NEVER return Ok(found_index), but will always return Err(index of first key in `bin`).
// Note that if NO key is in `bin`, then the key at the found index will be in a bin > `bin`, so return None.
let just_prior_to_desired_bin = bin * 2 ;
let search = hash_data . binary_search_by ( | data | {
( 1 + 2 * binner . bin_from_pubkey ( & data . pubkey ) ) . cmp ( & just_prior_to_desired_bin )
} ) ;
// returns Err(index where item should be) since the desired item will never exist
search . expect_err ( " it is impossible to find a matching bin " )
} ;
// note that `potential_index` could be == hash_data.len(). This indicates the first key in `bin` would be
// after the data we have. Thus, no key is in `bin`.
// This also handles the case where `hash_data` is empty, since len() will be 0 and `get` will return None.
hash_data . get ( potential_index ) . and_then ( | potential_data | {
( binner . bin_from_pubkey ( & potential_data . pubkey ) = = bin ) . then_some ( potential_index )
} )
}
/// `hash_data` must be sorted by `binner.bin_from_pubkey()`
/// return index in `hash_data` of first pubkey that is in `bin`, based on `binner`
fn find_first_pubkey_in_bin (
hash_data : & [ CalculateHashIntermediate ] ,
bin : usize ,
bins : usize ,
binner : & PubkeyBinCalculator24 ,
stats : & HashStats ,
) -> Option < usize > {
if hash_data . is_empty ( ) {
return None ;
}
let ( result , us ) = measure_us! ( {
// assume uniform distribution of pubkeys and choose first guess based on bin we're looking for
let i = hash_data . len ( ) * bin / bins ;
let estimate = & hash_data [ i ] ;
let pubkey_bin = binner . bin_from_pubkey ( & estimate . pubkey ) ;
let range = if pubkey_bin > = bin {
// i pubkey matches or is too large, so look <= i for the first pubkey in the right bin
// i+1 could be the first pubkey in the right bin
0 .. ( i + 1 )
} else {
// i pubkey is too small, so look after i
( i + 1 ) .. hash_data . len ( )
} ;
Some (
range . start +
// binary search the subset
Self ::binary_search_for_first_pubkey_in_bin (
& hash_data [ range ] ,
bin ,
binner ,
) ? ,
)
} ) ;
stats . pubkey_bin_search_us . fetch_add ( us , Ordering ::Relaxed ) ;
result
2021-06-21 13:32:03 -07:00
}
2023-09-28 06:58:08 -07:00
/// Return the working_set and max number of pubkeys for hash dedup.
/// `working_set` holds SlotGroupPointer {slot_group_index, offset} for items in account's pubkey descending order.
fn initialize_dedup_working_set (
2023-07-31 13:13:19 -07:00
sorted_data_by_pubkey : & [ & [ CalculateHashIntermediate ] ] ,
2021-06-21 13:32:03 -07:00
pubkey_bin : usize ,
2023-07-31 13:13:19 -07:00
bins : usize ,
2023-09-28 06:58:08 -07:00
binner : & PubkeyBinCalculator24 ,
2023-07-31 13:13:19 -07:00
stats : & HashStats ,
2023-09-28 06:58:08 -07:00
) -> (
Vec < SlotGroupPointer > , /* working_set */
usize , /* max_inclusive_num_pubkeys */
) {
// working_set holds the lowest items for each slot_group sorted by pubkey descending (min_key is the last)
let mut working_set : Vec < SlotGroupPointer > = Vec ::default ( ) ;
// Initialize 'working_set', which holds the current lowest item in each slot group.
// `working_set` should be initialized in reverse order of slot_groups. Later slot_groups are
// processed first. For each slot_group, if the lowest item for current slot group is
// already in working_set (i.e. inserted by a later slot group), the next lowest item
// in this slot group is searched and checked, until either one that is `not` in the
// working_set is found, which will then be inserted, or no next lowest item is found.
// Iterating in reverse order of slot_group will guarantee that each slot group will be
// scanned only once and scanned continuously. Therefore, it can achieve better data
// locality during the scan.
2023-09-11 10:57:21 -07:00
let max_inclusive_num_pubkeys = sorted_data_by_pubkey
2023-07-31 13:13:19 -07:00
. iter ( )
. enumerate ( )
2023-09-28 06:58:08 -07:00
. rev ( )
2023-09-11 10:57:21 -07:00
. map ( | ( i , hash_data ) | {
2023-07-31 13:13:19 -07:00
let first_pubkey_in_bin =
2023-09-28 06:58:08 -07:00
Self ::find_first_pubkey_in_bin ( hash_data , pubkey_bin , bins , binner , stats ) ;
2023-07-31 13:13:19 -07:00
if let Some ( first_pubkey_in_bin ) = first_pubkey_in_bin {
2023-09-28 06:58:08 -07:00
let mut next = Some ( ItemLocation {
key : & hash_data [ first_pubkey_in_bin ] . pubkey ,
pointer : SlotGroupPointer {
slot_group_index : i ,
offset : first_pubkey_in_bin ,
} ,
} ) ;
Self ::add_next_item (
& mut next ,
& mut working_set ,
sorted_data_by_pubkey ,
pubkey_bin ,
binner ,
) ;
2023-09-11 10:57:21 -07:00
let mut first_pubkey_in_next_bin = first_pubkey_in_bin + 1 ;
while first_pubkey_in_next_bin < hash_data . len ( ) {
if binner . bin_from_pubkey ( & hash_data [ first_pubkey_in_next_bin ] . pubkey )
! = pubkey_bin
{
break ;
}
first_pubkey_in_next_bin + = 1 ;
}
first_pubkey_in_next_bin - first_pubkey_in_bin
} else {
0
2021-02-16 16:29:50 -08:00
}
2023-09-11 10:57:21 -07:00
} )
. sum ::< usize > ( ) ;
2023-09-28 06:58:08 -07:00
( working_set , max_inclusive_num_pubkeys )
}
/// Add next item into hash dedup working set
fn add_next_item < ' b > (
next : & mut Option < ItemLocation < ' b > > ,
working_set : & mut Vec < SlotGroupPointer > ,
sorted_data_by_pubkey : & [ & ' b [ CalculateHashIntermediate ] ] ,
pubkey_bin : usize ,
binner : & PubkeyBinCalculator24 ,
) {
// looping to add next item to working set
while let Some ( ItemLocation { key , pointer } ) = std ::mem ::take ( next ) {
// if `new key` is less than the min key in the working set, skip binary search and
// insert item to the end vec directly
if let Some ( SlotGroupPointer {
slot_group_index : current_min_slot_group_index ,
offset : current_min_offset ,
} ) = working_set . last ( )
{
let current_min_key = & sorted_data_by_pubkey [ * current_min_slot_group_index ]
[ * current_min_offset ]
. pubkey ;
if key < current_min_key {
working_set . push ( pointer ) ;
break ;
}
}
let found = working_set . binary_search_by ( | pointer | {
let prob = & sorted_data_by_pubkey [ pointer . slot_group_index ] [ pointer . offset ] . pubkey ;
( * key ) . cmp ( prob )
} ) ;
match found {
Err ( index ) = > {
// found a new new key, insert into the working_set. This is O(n/2) on
// average. Theoretically, this operation could be expensive and may be further
// optimized in future.
working_set . insert ( index , pointer ) ;
break ;
}
Ok ( index ) = > {
let found = & mut working_set [ index ] ;
if found . slot_group_index > pointer . slot_group_index {
// There is already a later slot group that contains this key in the working_set,
// look up again.
let ( _item , new_next ) = Self ::get_item (
sorted_data_by_pubkey ,
pubkey_bin ,
binner ,
& ItemLocation { key , pointer } ,
) ;
* next = new_next ;
} else {
// A previous slot contains this key, replace it, and look for next item in the previous slot group.
let ( _item , new_next ) = Self ::get_item (
sorted_data_by_pubkey ,
pubkey_bin ,
binner ,
& ItemLocation {
key ,
pointer : * found ,
} ,
) ;
* found = pointer ;
* next = new_next ;
}
}
}
}
}
// go through: [..][pubkey_bin][..] and return hashes and lamport sum
// slot groups^ ^accounts found in a slot group, sorted by pubkey, higher slot, write_version
// 1. handle zero lamport accounts
// 2. pick the highest slot or (slot = and highest version) of each pubkey
// 3. produce this output:
// a. AccountHashesFile: individual account hashes in pubkey order
// b. lamport sum
fn de_dup_accounts_in_parallel (
& self ,
sorted_data_by_pubkey : & [ & [ CalculateHashIntermediate ] ] ,
pubkey_bin : usize ,
bins : usize ,
stats : & HashStats ,
) -> ( AccountHashesFile , u64 ) {
let binner = PubkeyBinCalculator24 ::new ( bins ) ;
// working_set hold the lowest items for each slot_group sorted by pubkey descending (min_key is the last)
let ( mut working_set , max_inclusive_num_pubkeys ) = Self ::initialize_dedup_working_set (
sorted_data_by_pubkey ,
pubkey_bin ,
bins ,
& binner ,
stats ,
) ;
2023-09-11 10:57:21 -07:00
let mut hashes = AccountHashesFile {
writer : None ,
dir_for_temp_cache_files : self . dir_for_temp_cache_files . clone ( ) ,
capacity : max_inclusive_num_pubkeys * std ::mem ::size_of ::< Hash > ( ) ,
} ;
2021-06-21 13:32:03 -07:00
let mut overall_sum = 0 ;
2021-11-12 07:53:21 -08:00
let filler_accounts_enabled = self . filler_accounts_enabled ( ) ;
2021-06-21 13:32:03 -07:00
2023-09-28 06:58:08 -07:00
while let Some ( pointer ) = working_set . pop ( ) {
let key = & sorted_data_by_pubkey [ pointer . slot_group_index ] [ pointer . offset ] . pubkey ;
2021-06-21 13:32:03 -07:00
// get the min item, add lamports, get hash
2023-09-28 06:58:08 -07:00
let ( item , mut next ) = Self ::get_item (
2023-07-31 13:13:19 -07:00
sorted_data_by_pubkey ,
2023-09-28 06:58:08 -07:00
pubkey_bin ,
2023-07-31 13:13:19 -07:00
& binner ,
2023-09-28 06:58:08 -07:00
& ItemLocation { key , pointer } ,
2021-06-21 13:32:03 -07:00
) ;
2021-11-11 01:44:32 -08:00
2023-01-17 12:04:29 -08:00
// add lamports and get hash
if item . lamports ! = 0 {
// do not include filler accounts in the hash
if ! ( filler_accounts_enabled & & self . is_filler_account ( & item . pubkey ) ) {
overall_sum = Self ::checked_cast_for_capitalization (
item . lamports as u128 + overall_sum as u128 ,
) ;
hashes . write ( & item . hash ) ;
}
} else {
// if lamports == 0, check if they should be included
if self . zero_lamport_accounts = = ZeroLamportAccounts ::Included {
// For incremental accounts hash, the hash of a zero lamport account is
// the hash of its pubkey
let hash = blake3 ::hash ( bytemuck ::bytes_of ( & item . pubkey ) ) ;
let hash = Hash ::new_from_array ( hash . into ( ) ) ;
hashes . write ( & hash ) ;
}
2021-02-16 16:29:50 -08:00
}
2023-01-17 12:04:29 -08:00
2023-09-28 06:58:08 -07:00
Self ::add_next_item (
& mut next ,
& mut working_set ,
sorted_data_by_pubkey ,
pubkey_bin ,
& binner ,
) ;
2021-02-16 16:29:50 -08:00
}
2023-07-31 13:13:19 -07:00
( hashes , overall_sum )
2021-02-16 16:29:50 -08:00
}
2021-10-11 10:46:27 -07:00
fn is_filler_account ( & self , pubkey : & Pubkey ) -> bool {
crate ::accounts_db ::AccountsDb ::is_filler_account_helper (
pubkey ,
self . filler_account_suffix . as_ref ( ) ,
)
}
2023-08-21 10:04:37 -07:00
/// input:
/// vec: group of slot data, ordered by Slot (low to high)
/// vec: [..] - items found in that slot range Sorted by: Pubkey, higher Slot, higher Write version (if pubkey =)
2021-02-16 16:29:50 -08:00
pub fn rest_of_hash_calculation (
2021-10-11 10:46:27 -07:00
& self ,
2023-07-31 13:13:19 -07:00
sorted_data_by_pubkey : & [ & [ CalculateHashIntermediate ] ] ,
2023-05-18 11:08:13 -07:00
stats : & mut HashStats ,
2022-11-30 12:27:27 -08:00
) -> ( Hash , u64 ) {
2023-01-17 12:04:29 -08:00
let ( hashes , total_lamports ) = self . de_dup_accounts (
2023-07-31 13:13:19 -07:00
sorted_data_by_pubkey ,
2022-11-30 12:27:27 -08:00
stats ,
PUBKEY_BINS_FOR_CALCULATING_HASHES ,
) ;
let cumulative = CumulativeHashesFromFiles ::from_files ( hashes ) ;
2021-03-18 08:32:07 -07:00
2023-08-10 08:56:00 -07:00
let _guard = self . active_stats . activate ( ActiveStatItem ::HashMerkleTree ) ;
2022-11-30 12:27:27 -08:00
let mut hash_time = Measure ::start ( " hash " ) ;
let ( hash , _ ) = Self ::compute_merkle_root_from_slices (
cumulative . total_count ( ) ,
MERKLE_FANOUT ,
None ,
| start | cumulative . get_slice ( start ) ,
None ,
) ;
hash_time . stop ( ) ;
stats . hash_time_total_us + = hash_time . as_us ( ) ;
( hash , total_lamports )
}
}
2021-03-18 08:32:07 -07:00
2023-01-17 12:04:29 -08:00
/// How should zero-lamport accounts be treated by the accounts hasher?
#[ derive(Debug, Copy, Clone, Eq, PartialEq) ]
pub enum ZeroLamportAccounts {
Excluded ,
Included ,
}
2023-10-09 13:00:52 -07:00
/// Hash of an account
#[ repr(transparent) ]
#[ derive(Debug, Copy, Clone, Eq, PartialEq, Pod, Zeroable) ]
pub struct AccountHash ( pub Hash ) ;
// Ensure the newtype wrapper never changes size from the underlying Hash
// This also ensures there are no padding bytes, which is requried to safely implement Pod
const _ : ( ) = assert! ( std ::mem ::size_of ::< AccountHash > ( ) = = std ::mem ::size_of ::< Hash > ( ) ) ;
impl Borrow < Hash > for AccountHash {
fn borrow ( & self ) -> & Hash {
& self . 0
}
}
2023-02-21 12:20:51 -08:00
/// Hash of accounts
#[ derive(Debug, Copy, Clone, Eq, PartialEq) ]
2023-08-21 07:34:56 -07:00
pub enum AccountsHashKind {
2023-02-21 12:20:51 -08:00
Full ( AccountsHash ) ,
Incremental ( IncrementalAccountsHash ) ,
}
2023-08-21 07:34:56 -07:00
impl AccountsHashKind {
2023-02-21 12:20:51 -08:00
pub fn as_hash ( & self ) -> & Hash {
match self {
2023-08-21 07:34:56 -07:00
AccountsHashKind ::Full ( AccountsHash ( hash ) )
| AccountsHashKind ::Incremental ( IncrementalAccountsHash ( hash ) ) = > hash ,
2023-02-21 12:20:51 -08:00
}
}
}
2023-08-21 07:34:56 -07:00
impl From < AccountsHash > for AccountsHashKind {
2023-02-21 12:20:51 -08:00
fn from ( accounts_hash : AccountsHash ) -> Self {
2023-08-21 07:34:56 -07:00
AccountsHashKind ::Full ( accounts_hash )
2023-02-21 12:20:51 -08:00
}
}
2023-08-21 07:34:56 -07:00
impl From < IncrementalAccountsHash > for AccountsHashKind {
2023-02-21 12:20:51 -08:00
fn from ( incremental_accounts_hash : IncrementalAccountsHash ) -> Self {
2023-08-21 07:34:56 -07:00
AccountsHashKind ::Incremental ( incremental_accounts_hash )
2023-02-21 12:20:51 -08:00
}
}
2023-01-17 12:04:29 -08:00
/// Hash of accounts
2023-02-20 13:00:53 -08:00
#[ derive(Debug, Copy, Clone, Eq, PartialEq) ]
2022-11-30 12:27:27 -08:00
pub struct AccountsHash ( pub Hash ) ;
2023-02-21 12:20:51 -08:00
/// Hash of accounts that includes zero-lamport accounts
/// Used with incremental snapshots
#[ derive(Debug, Copy, Clone, Eq, PartialEq) ]
pub struct IncrementalAccountsHash ( pub Hash ) ;
2022-11-30 12:27:27 -08:00
2023-01-20 11:23:56 -08:00
/// Hash of accounts written in a single slot
2023-02-20 13:00:53 -08:00
#[ derive(Debug, Copy, Clone, Eq, PartialEq) ]
2023-01-20 11:23:56 -08:00
pub struct AccountsDeltaHash ( pub Hash ) ;
2023-08-09 13:03:36 -07:00
/// Snapshot serde-safe accounts delta hash
#[ derive(Clone, Default, Debug, Serialize, Deserialize, PartialEq, Eq, AbiExample) ]
pub struct SerdeAccountsDeltaHash ( pub Hash ) ;
impl From < SerdeAccountsDeltaHash > for AccountsDeltaHash {
fn from ( accounts_delta_hash : SerdeAccountsDeltaHash ) -> Self {
Self ( accounts_delta_hash . 0 )
}
}
impl From < AccountsDeltaHash > for SerdeAccountsDeltaHash {
fn from ( accounts_delta_hash : AccountsDeltaHash ) -> Self {
Self ( accounts_delta_hash . 0 )
}
}
/// Snapshot serde-safe accounts hash
#[ derive(Clone, Default, Debug, Serialize, Deserialize, PartialEq, Eq, AbiExample) ]
pub struct SerdeAccountsHash ( pub Hash ) ;
impl From < SerdeAccountsHash > for AccountsHash {
fn from ( accounts_hash : SerdeAccountsHash ) -> Self {
Self ( accounts_hash . 0 )
}
}
impl From < AccountsHash > for SerdeAccountsHash {
fn from ( accounts_hash : AccountsHash ) -> Self {
Self ( accounts_hash . 0 )
}
}
/// Snapshot serde-safe incremental accounts hash
#[ derive(Clone, Default, Debug, Serialize, Deserialize, PartialEq, Eq, AbiExample) ]
pub struct SerdeIncrementalAccountsHash ( pub Hash ) ;
impl From < SerdeIncrementalAccountsHash > for IncrementalAccountsHash {
fn from ( incremental_accounts_hash : SerdeIncrementalAccountsHash ) -> Self {
Self ( incremental_accounts_hash . 0 )
}
}
impl From < IncrementalAccountsHash > for SerdeIncrementalAccountsHash {
fn from ( incremental_accounts_hash : IncrementalAccountsHash ) -> Self {
Self ( incremental_accounts_hash . 0 )
}
}
2022-11-30 12:27:27 -08:00
#[ cfg(test) ]
2023-09-11 13:49:51 -07:00
mod tests {
2023-07-31 13:13:19 -07:00
use { super ::* , itertools ::Itertools , std ::str ::FromStr , tempfile ::tempdir } ;
2023-05-23 14:09:16 -07:00
2023-08-08 07:16:10 -07:00
lazy_static! {
static ref ACTIVE_STATS : ActiveStats = ActiveStats ::default ( ) ;
}
impl < ' a > AccountsHasher < ' a > {
2023-05-23 14:09:16 -07:00
fn new ( dir_for_temp_cache_files : PathBuf ) -> Self {
Self {
filler_account_suffix : None ,
zero_lamport_accounts : ZeroLamportAccounts ::Excluded ,
dir_for_temp_cache_files ,
2023-08-08 07:16:10 -07:00
active_stats : & ACTIVE_STATS ,
2023-05-23 14:09:16 -07:00
}
}
}
impl AccountHashesFile {
fn new ( dir_for_temp_cache_files : PathBuf ) -> Self {
Self {
2023-09-11 10:57:21 -07:00
writer : None ,
2023-05-23 14:09:16 -07:00
dir_for_temp_cache_files ,
2023-09-11 10:57:21 -07:00
capacity : 1024 , /* default 1k for tests */
2023-05-23 14:09:16 -07:00
}
}
}
2021-03-18 08:32:07 -07:00
2023-09-11 13:49:51 -07:00
impl CumulativeOffsets {
fn from_raw_2d < T > ( raw : & [ Vec < Vec < T > > ] ) -> Self {
let mut total_count : usize = 0 ;
let mut cumulative_offsets = Vec ::with_capacity ( 0 ) ;
for ( i , v_outer ) in raw . iter ( ) . enumerate ( ) {
for ( j , v ) in v_outer . iter ( ) . enumerate ( ) {
let len = v . len ( ) ;
if len > 0 {
if cumulative_offsets . is_empty ( ) {
// the first inner, non-empty vector we find gives us an approximate rectangular shape
cumulative_offsets = Vec ::with_capacity ( raw . len ( ) * v_outer . len ( ) ) ;
}
2023-09-11 15:33:25 -07:00
cumulative_offsets . push ( CumulativeOffset {
index : vec ! [ i , j ] ,
start_offset : total_count ,
} ) ;
2023-09-11 13:49:51 -07:00
total_count + = len ;
}
}
}
Self {
cumulative_offsets ,
total_count ,
}
}
}
2023-07-31 13:13:19 -07:00
#[ test ]
fn test_find_first_pubkey_in_bin ( ) {
let stats = HashStats ::default ( ) ;
for ( bins , expected_count ) in [ 1 , 2 , 4 ] . into_iter ( ) . zip ( [ 5 , 20 , 120 ] ) {
let bins : usize = bins ;
let binner = PubkeyBinCalculator24 ::new ( bins ) ;
let mut count = 0 usize ;
// # pubkeys in each bin are permutations of these
// 0 means none in this bin
// large number (20) means the found key will be well before or after the expected index based on an assumption of uniform distribution
for counts in [ 0 , 1 , 2 , 20 , 0 ] . into_iter ( ) . permutations ( bins ) {
count + = 1 ;
let hash_data = counts
. iter ( )
. enumerate ( )
. flat_map ( | ( bin , count ) | {
( 0 .. * count ) . map ( move | _ | {
let binner = PubkeyBinCalculator24 ::new ( bins ) ;
2023-09-11 15:33:25 -07:00
CalculateHashIntermediate {
hash : Hash ::default ( ) ,
lamports : 0 ,
pubkey : binner . lowest_pubkey_from_bin ( bin , bins ) ,
}
2023-07-31 13:13:19 -07:00
} )
} )
. collect ::< Vec < _ > > ( ) ;
// look for the first pubkey in each bin
for ( bin , count_in_bin ) in counts . iter ( ) . enumerate ( ) . take ( bins ) {
let first = AccountsHasher ::find_first_pubkey_in_bin (
& hash_data , bin , bins , & binner , & stats ,
) ;
// test both functions
let first_again = AccountsHasher ::binary_search_for_first_pubkey_in_bin (
& hash_data , bin , & binner ,
) ;
assert_eq! ( first , first_again ) ;
assert_eq! ( first . is_none ( ) , count_in_bin = = & 0 ) ;
if let Some ( first ) = first {
assert_eq! ( binner . bin_from_pubkey ( & hash_data [ first ] . pubkey ) , bin ) ;
if first > 0 {
assert! ( binner . bin_from_pubkey ( & hash_data [ first - 1 ] . pubkey ) < bin ) ;
}
}
}
}
assert_eq! (
count , expected_count ,
" too few iterations in test. bins: {bins} "
) ;
}
}
2022-11-30 12:27:27 -08:00
#[ test ]
fn test_account_hashes_file ( ) {
2023-05-23 14:09:16 -07:00
let dir_for_temp_cache_files = tempdir ( ) . unwrap ( ) ;
2022-11-30 12:27:27 -08:00
// 0 hashes
2023-05-23 14:09:16 -07:00
let mut file = AccountHashesFile ::new ( dir_for_temp_cache_files . path ( ) . to_path_buf ( ) ) ;
2022-11-30 12:27:27 -08:00
assert! ( file . get_reader ( ) . is_none ( ) ) ;
let hashes = ( 0 .. 2 ) . map ( | i | Hash ::new ( & [ i ; 32 ] ) ) . collect ::< Vec < _ > > ( ) ;
// 1 hash
file . write ( & hashes [ 0 ] ) ;
let reader = file . get_reader ( ) . unwrap ( ) ;
2023-09-11 10:57:21 -07:00
assert_eq! ( & [ hashes [ 0 ] ] [ .. ] , reader . read ( 0 ) ) ;
assert! ( reader . read ( 1 ) . is_empty ( ) ) ;
2022-11-30 12:27:27 -08:00
// multiple hashes
2023-05-23 14:09:16 -07:00
let mut file = AccountHashesFile ::new ( dir_for_temp_cache_files . path ( ) . to_path_buf ( ) ) ;
2022-11-30 12:27:27 -08:00
assert! ( file . get_reader ( ) . is_none ( ) ) ;
hashes . iter ( ) . for_each ( | hash | file . write ( hash ) ) ;
let reader = file . get_reader ( ) . unwrap ( ) ;
2023-09-11 10:57:21 -07:00
( 0 .. 2 ) . for_each ( | i | assert_eq! ( & hashes [ i .. ] , reader . read ( i ) ) ) ;
assert! ( reader . read ( 2 ) . is_empty ( ) ) ;
2022-11-30 12:27:27 -08:00
}
2021-03-18 08:32:07 -07:00
2022-11-30 12:27:27 -08:00
#[ test ]
fn test_cumulative_hashes_from_files ( ) {
2023-05-23 14:09:16 -07:00
let dir_for_temp_cache_files = tempdir ( ) . unwrap ( ) ;
2022-11-30 12:27:27 -08:00
( 0 .. 4 ) . for_each ( | permutation | {
let hashes = ( 0 .. 2 ) . map ( | i | Hash ::new ( & [ i + 1 ; 32 ] ) ) . collect ::< Vec < _ > > ( ) ;
2021-03-18 08:32:07 -07:00
2022-11-30 12:27:27 -08:00
let mut combined = Vec ::default ( ) ;
2021-03-18 08:32:07 -07:00
2022-11-30 12:27:27 -08:00
// 0 hashes
2023-05-23 14:09:16 -07:00
let file0 = AccountHashesFile ::new ( dir_for_temp_cache_files . path ( ) . to_path_buf ( ) ) ;
2021-03-18 08:32:07 -07:00
2022-11-30 12:27:27 -08:00
// 1 hash
2023-05-23 14:09:16 -07:00
let mut file1 = AccountHashesFile ::new ( dir_for_temp_cache_files . path ( ) . to_path_buf ( ) ) ;
2022-11-30 12:27:27 -08:00
file1 . write ( & hashes [ 0 ] ) ;
combined . push ( hashes [ 0 ] ) ;
2021-02-16 16:29:50 -08:00
2022-11-30 12:27:27 -08:00
// multiple hashes
2023-05-23 14:09:16 -07:00
let mut file2 = AccountHashesFile ::new ( dir_for_temp_cache_files . path ( ) . to_path_buf ( ) ) ;
2022-11-30 12:27:27 -08:00
hashes . iter ( ) . for_each ( | hash | {
file2 . write ( hash ) ;
combined . push ( * hash ) ;
2021-03-18 08:32:07 -07:00
} ) ;
2022-11-30 12:27:27 -08:00
let hashes = if permutation = = 0 {
vec! [ file0 , file1 , file2 ]
} else if permutation = = 1 {
// include more empty files
vec! [
file0 ,
file1 ,
2023-05-23 14:09:16 -07:00
AccountHashesFile ::new ( dir_for_temp_cache_files . path ( ) . to_path_buf ( ) ) ,
2022-11-30 12:27:27 -08:00
file2 ,
2023-05-23 14:09:16 -07:00
AccountHashesFile ::new ( dir_for_temp_cache_files . path ( ) . to_path_buf ( ) ) ,
2022-11-30 12:27:27 -08:00
]
} else if permutation = = 2 {
vec! [ file1 , file2 ]
2021-03-18 08:32:07 -07:00
} else {
2022-11-30 12:27:27 -08:00
// swap file2 and 1
let one = combined . remove ( 0 ) ;
combined . push ( one ) ;
vec! [
file2 ,
2023-05-23 14:09:16 -07:00
AccountHashesFile ::new ( dir_for_temp_cache_files . path ( ) . to_path_buf ( ) ) ,
AccountHashesFile ::new ( dir_for_temp_cache_files . path ( ) . to_path_buf ( ) ) ,
2022-11-30 12:27:27 -08:00
file1 ,
]
2021-03-18 08:32:07 -07:00
} ;
2022-11-28 07:09:47 -08:00
2022-11-30 12:27:27 -08:00
let cumulative = CumulativeHashesFromFiles ::from_files ( hashes ) ;
let len = combined . len ( ) ;
assert_eq! ( cumulative . total_count ( ) , len ) ;
( 0 .. combined . len ( ) ) . for_each ( | start | {
let mut retreived = Vec ::default ( ) ;
let mut cumulative_start = start ;
// read all data
while retreived . len ( ) < ( len - start ) {
let this_one = cumulative . get_slice ( cumulative_start ) ;
retreived . extend ( this_one . iter ( ) ) ;
cumulative_start + = this_one . len ( ) ;
assert_ne! ( 0 , this_one . len ( ) ) ;
}
assert_eq! (
& combined [ start .. ] ,
& retreived [ .. ] ,
" permutation: {permutation} "
) ;
} ) ;
} ) ;
}
2021-02-16 16:29:50 -08:00
#[ test ]
fn test_accountsdb_div_ceil ( ) {
2022-11-18 15:25:44 -08:00
assert_eq! ( AccountsHasher ::div_ceil ( 10 , 3 ) , 4 ) ;
assert_eq! ( AccountsHasher ::div_ceil ( 0 , 1 ) , 0 ) ;
assert_eq! ( AccountsHasher ::div_ceil ( 0 , 5 ) , 0 ) ;
assert_eq! ( AccountsHasher ::div_ceil ( 9 , 3 ) , 3 ) ;
assert_eq! ( AccountsHasher ::div_ceil ( 9 , 9 ) , 1 ) ;
2021-02-16 16:29:50 -08:00
}
#[ test ]
#[ should_panic(expected = " attempt to divide by zero " ) ]
fn test_accountsdb_div_ceil_fail ( ) {
2022-11-18 15:25:44 -08:00
assert_eq! ( AccountsHasher ::div_ceil ( 10 , 0 ) , 0 ) ;
2021-02-16 16:29:50 -08:00
}
2023-07-31 13:13:19 -07:00
fn for_rest ( original : & [ CalculateHashIntermediate ] ) -> Vec < & [ CalculateHashIntermediate ] > {
vec! [ original ]
2021-06-21 13:32:03 -07:00
}
2021-02-16 16:29:50 -08:00
#[ test ]
fn test_accountsdb_rest_of_hash_calculation ( ) {
solana_logger ::setup ( ) ;
2021-06-02 06:35:10 -07:00
let mut account_maps = Vec ::new ( ) ;
2021-02-16 16:29:50 -08:00
2023-09-11 15:33:25 -07:00
let pubkey = Pubkey ::from ( [ 11 u8 ; 32 ] ) ;
2021-02-16 16:29:50 -08:00
let hash = Hash ::new ( & [ 1 u8 ; 32 ] ) ;
2023-09-11 15:33:25 -07:00
let val = CalculateHashIntermediate {
hash ,
lamports : 88 ,
pubkey ,
} ;
2021-02-16 16:29:50 -08:00
account_maps . push ( val ) ;
// 2nd key - zero lamports, so will be removed
2023-09-11 15:33:25 -07:00
let pubkey = Pubkey ::from ( [ 12 u8 ; 32 ] ) ;
2021-02-16 16:29:50 -08:00
let hash = Hash ::new ( & [ 2 u8 ; 32 ] ) ;
2023-09-11 15:33:25 -07:00
let val = CalculateHashIntermediate {
hash ,
lamports : 0 ,
pubkey ,
} ;
2021-02-16 16:29:50 -08:00
account_maps . push ( val ) ;
2023-05-23 14:09:16 -07:00
let dir_for_temp_cache_files = tempdir ( ) . unwrap ( ) ;
let accounts_hash = AccountsHasher ::new ( dir_for_temp_cache_files . path ( ) . to_path_buf ( ) ) ;
2022-11-30 12:27:27 -08:00
let result = accounts_hash
2023-07-31 13:13:19 -07:00
. rest_of_hash_calculation ( & for_rest ( & account_maps ) , & mut HashStats ::default ( ) ) ;
2021-02-16 16:29:50 -08:00
let expected_hash = Hash ::from_str ( " 8j9ARGFv4W2GfML7d3sVJK2MePwrikqYnu6yqer28cCa " ) . unwrap ( ) ;
assert_eq! ( ( result . 0 , result . 1 ) , ( expected_hash , 88 ) ) ;
// 3rd key - with pubkey value before 1st key so it will be sorted first
2023-09-11 15:33:25 -07:00
let pubkey = Pubkey ::from ( [ 10 u8 ; 32 ] ) ;
2021-02-16 16:29:50 -08:00
let hash = Hash ::new ( & [ 2 u8 ; 32 ] ) ;
2023-09-11 15:33:25 -07:00
let val = CalculateHashIntermediate {
hash ,
lamports : 20 ,
pubkey ,
} ;
2021-06-21 13:32:03 -07:00
account_maps . insert ( 0 , val ) ;
2021-02-16 16:29:50 -08:00
2022-11-30 12:27:27 -08:00
let result = accounts_hash
2023-07-31 13:13:19 -07:00
. rest_of_hash_calculation ( & for_rest ( & account_maps ) , & mut HashStats ::default ( ) ) ;
2021-02-16 16:29:50 -08:00
let expected_hash = Hash ::from_str ( " EHv9C5vX7xQjjMpsJMzudnDTzoTSRwYkqLzY8tVMihGj " ) . unwrap ( ) ;
assert_eq! ( ( result . 0 , result . 1 ) , ( expected_hash , 108 ) ) ;
// 3rd key - with later slot
2023-09-11 15:33:25 -07:00
let pubkey = Pubkey ::from ( [ 10 u8 ; 32 ] ) ;
2021-02-16 16:29:50 -08:00
let hash = Hash ::new ( & [ 99 u8 ; 32 ] ) ;
2023-09-11 15:33:25 -07:00
let val = CalculateHashIntermediate {
hash ,
lamports : 30 ,
pubkey ,
} ;
2021-06-21 13:32:03 -07:00
account_maps . insert ( 1 , val ) ;
2021-02-16 16:29:50 -08:00
2022-11-30 12:27:27 -08:00
let result = accounts_hash
2023-07-31 13:13:19 -07:00
. rest_of_hash_calculation ( & for_rest ( & account_maps ) , & mut HashStats ::default ( ) ) ;
2021-02-16 16:29:50 -08:00
let expected_hash = Hash ::from_str ( " 7NNPg5A8Xsg1uv4UFm6KZNwsipyyUnmgCrznP6MBWoBZ " ) . unwrap ( ) ;
assert_eq! ( ( result . 0 , result . 1 ) , ( expected_hash , 118 ) ) ;
}
2021-06-21 13:32:03 -07:00
fn one_range ( ) -> usize {
1
}
fn zero_range ( ) -> usize {
0
}
2021-02-16 16:29:50 -08:00
#[ test ]
fn test_accountsdb_de_dup_accounts_zero_chunks ( ) {
2023-07-31 13:13:19 -07:00
let vec = vec! [ vec! [ CalculateHashIntermediate {
2022-08-19 12:18:00 -07:00
lamports : 1 ,
.. CalculateHashIntermediate ::default ( )
2023-07-31 13:13:19 -07:00
} ] ] ;
2022-10-18 07:51:38 -07:00
let temp_vec = vec . to_vec ( ) ;
2023-07-31 13:13:19 -07:00
let slice = convert_to_slice ( & temp_vec ) ;
2023-05-23 14:09:16 -07:00
let dir_for_temp_cache_files = tempdir ( ) . unwrap ( ) ;
let accounts_hasher = AccountsHasher ::new ( dir_for_temp_cache_files . path ( ) . to_path_buf ( ) ) ;
2023-07-31 13:13:19 -07:00
let ( mut hashes , lamports ) =
accounts_hasher . de_dup_accounts_in_parallel ( & slice , 0 , 1 , & HashStats ::default ( ) ) ;
2023-09-11 10:57:21 -07:00
assert_eq! ( & [ Hash ::default ( ) ] , hashes . get_reader ( ) . unwrap ( ) . read ( 0 ) ) ;
2022-08-19 12:18:00 -07:00
assert_eq! ( lamports , 1 ) ;
2021-02-16 16:29:50 -08:00
}
2022-11-30 12:27:27 -08:00
fn get_vec_vec ( hashes : Vec < AccountHashesFile > ) -> Vec < Vec < Hash > > {
hashes . into_iter ( ) . map ( get_vec ) . collect ( )
}
fn get_vec ( mut hashes : AccountHashesFile ) -> Vec < Hash > {
hashes
. get_reader ( )
2023-09-11 10:57:21 -07:00
. map ( | r | r . read ( 0 ) . to_vec ( ) )
2022-11-30 12:27:27 -08:00
. unwrap_or_default ( )
}
2021-02-16 16:29:50 -08:00
#[ test ]
fn test_accountsdb_de_dup_accounts_empty ( ) {
solana_logger ::setup ( ) ;
2023-05-23 14:09:16 -07:00
let dir_for_temp_cache_files = tempdir ( ) . unwrap ( ) ;
let accounts_hash = AccountsHasher ::new ( dir_for_temp_cache_files . path ( ) . to_path_buf ( ) ) ;
2021-02-16 16:29:50 -08:00
2023-07-31 13:13:19 -07:00
let empty = [ ] ;
let vec = & empty ;
2021-11-16 08:30:55 -08:00
let ( hashes , lamports ) =
2023-07-31 13:13:19 -07:00
accounts_hash . de_dup_accounts ( vec , & mut HashStats ::default ( ) , one_range ( ) ) ;
2021-02-16 16:29:50 -08:00
assert_eq! (
2022-11-30 12:27:27 -08:00
vec! [ Hash ::default ( ) ; 0 ] ,
get_vec_vec ( hashes )
. into_iter ( )
. flatten ( )
. collect ::< Vec < _ > > ( ) ,
2021-02-16 16:29:50 -08:00
) ;
assert_eq! ( lamports , 0 ) ;
2021-11-16 08:30:55 -08:00
let vec = vec! [ ] ;
let ( hashes , lamports ) =
2023-01-17 12:04:29 -08:00
accounts_hash . de_dup_accounts ( & vec , & mut HashStats ::default ( ) , zero_range ( ) ) ;
2022-11-30 12:27:27 -08:00
let empty : Vec < Vec < Hash > > = Vec ::default ( ) ;
assert_eq! ( empty , get_vec_vec ( hashes ) ) ;
2021-02-16 16:29:50 -08:00
assert_eq! ( lamports , 0 ) ;
2023-07-31 13:13:19 -07:00
let ( hashes , lamports ) =
accounts_hash . de_dup_accounts_in_parallel ( & [ ] , 1 , 1 , & HashStats ::default ( ) ) ;
2022-11-30 12:27:27 -08:00
assert_eq! ( vec! [ Hash ::default ( ) ; 0 ] , get_vec ( hashes ) ) ;
2021-02-16 16:29:50 -08:00
assert_eq! ( lamports , 0 ) ;
2023-07-31 13:13:19 -07:00
let ( hashes , lamports ) =
accounts_hash . de_dup_accounts_in_parallel ( & [ ] , 2 , 1 , & HashStats ::default ( ) ) ;
2022-11-30 12:27:27 -08:00
assert_eq! ( vec! [ Hash ::default ( ) ; 0 ] , get_vec ( hashes ) ) ;
2021-02-16 16:29:50 -08:00
assert_eq! ( lamports , 0 ) ;
}
#[ test ]
fn test_accountsdb_de_dup_accounts_from_stores ( ) {
solana_logger ::setup ( ) ;
2023-01-21 10:06:27 -08:00
let key_a = Pubkey ::from ( [ 1 u8 ; 32 ] ) ;
let key_b = Pubkey ::from ( [ 2 u8 ; 32 ] ) ;
let key_c = Pubkey ::from ( [ 3 u8 ; 32 ] ) ;
2021-02-16 16:29:50 -08:00
const COUNT : usize = 6 ;
2023-01-05 10:05:32 -08:00
let hashes = ( 0 .. COUNT ) . map ( | i | Hash ::new ( & [ i as u8 ; 32 ] ) ) ;
2021-02-16 16:29:50 -08:00
// create this vector
// abbbcc
let keys = [ key_a , key_b , key_b , key_b , key_c , key_c ] ;
let accounts : Vec < _ > = hashes
. zip ( keys . iter ( ) )
. enumerate ( )
2023-09-11 15:33:25 -07:00
. map ( | ( i , ( hash , & pubkey ) ) | CalculateHashIntermediate {
hash ,
lamports : ( i + 1 ) as u64 ,
pubkey ,
} )
2021-02-16 16:29:50 -08:00
. collect ( ) ;
type ExpectedType = ( String , bool , u64 , String ) ;
let expected :Vec < ExpectedType > = vec! [
// ("key/lamports key2/lamports ...",
2021-06-07 07:01:16 -07:00
// is_last_slice
2021-02-16 16:29:50 -08:00
// result lamports
// result hashes)
// "a5" = key_a, 5 lamports
2021-06-21 13:32:03 -07:00
( " a1 " , false , 1 , " [11111111111111111111111111111111] " ) ,
( " a1b2 " , false , 3 , " [11111111111111111111111111111111, 4vJ9JU1bJJE96FWSJKvHsmmFADCg4gpZQff4P3bkLKi] " ) ,
( " a1b2b3 " , false , 4 , " [11111111111111111111111111111111, 8qbHbw2BbbTHBW1sbeqakYXVKRQM8Ne7pLK7m6CVfeR] " ) ,
( " a1b2b3b4 " , false , 5 , " [11111111111111111111111111111111, CktRuQ2mttgRGkXJtyksdKHjUdc2C4TgDzyB98oEzy8] " ) ,
( " a1b2b3b4c5 " , false , 10 , " [11111111111111111111111111111111, CktRuQ2mttgRGkXJtyksdKHjUdc2C4TgDzyB98oEzy8, GgBaCs3NCBuZN12kCJgAW63ydqohFkHEdfdEXBPzLHq] " ) ,
( " b2 " , false , 2 , " [4vJ9JU1bJJE96FWSJKvHsmmFADCg4gpZQff4P3bkLKi] " ) ,
( " b2b3 " , false , 3 , " [8qbHbw2BbbTHBW1sbeqakYXVKRQM8Ne7pLK7m6CVfeR] " ) ,
( " b2b3b4 " , false , 4 , " [CktRuQ2mttgRGkXJtyksdKHjUdc2C4TgDzyB98oEzy8] " ) ,
( " b2b3b4c5 " , false , 9 , " [CktRuQ2mttgRGkXJtyksdKHjUdc2C4TgDzyB98oEzy8, GgBaCs3NCBuZN12kCJgAW63ydqohFkHEdfdEXBPzLHq] " ) ,
( " b3 " , false , 3 , " [8qbHbw2BbbTHBW1sbeqakYXVKRQM8Ne7pLK7m6CVfeR] " ) ,
( " b3b4 " , false , 4 , " [CktRuQ2mttgRGkXJtyksdKHjUdc2C4TgDzyB98oEzy8] " ) ,
( " b3b4c5 " , false , 9 , " [CktRuQ2mttgRGkXJtyksdKHjUdc2C4TgDzyB98oEzy8, GgBaCs3NCBuZN12kCJgAW63ydqohFkHEdfdEXBPzLHq] " ) ,
( " b4 " , false , 4 , " [CktRuQ2mttgRGkXJtyksdKHjUdc2C4TgDzyB98oEzy8] " ) ,
( " b4c5 " , false , 9 , " [CktRuQ2mttgRGkXJtyksdKHjUdc2C4TgDzyB98oEzy8, GgBaCs3NCBuZN12kCJgAW63ydqohFkHEdfdEXBPzLHq] " ) ,
( " c5 " , false , 5 , " [GgBaCs3NCBuZN12kCJgAW63ydqohFkHEdfdEXBPzLHq] " ) ,
2021-02-16 16:29:50 -08:00
( " a1 " , true , 1 , " [11111111111111111111111111111111] " ) ,
( " a1b2 " , true , 3 , " [11111111111111111111111111111111, 4vJ9JU1bJJE96FWSJKvHsmmFADCg4gpZQff4P3bkLKi] " ) ,
2021-06-07 07:01:16 -07:00
( " a1b2b3 " , true , 4 , " [11111111111111111111111111111111, 8qbHbw2BbbTHBW1sbeqakYXVKRQM8Ne7pLK7m6CVfeR] " ) ,
( " a1b2b3b4 " , true , 5 , " [11111111111111111111111111111111, CktRuQ2mttgRGkXJtyksdKHjUdc2C4TgDzyB98oEzy8] " ) ,
( " a1b2b3b4c5 " , true , 10 , " [11111111111111111111111111111111, CktRuQ2mttgRGkXJtyksdKHjUdc2C4TgDzyB98oEzy8, GgBaCs3NCBuZN12kCJgAW63ydqohFkHEdfdEXBPzLHq] " ) ,
2021-02-16 16:29:50 -08:00
( " b2 " , true , 2 , " [4vJ9JU1bJJE96FWSJKvHsmmFADCg4gpZQff4P3bkLKi] " ) ,
2021-06-07 07:01:16 -07:00
( " b2b3 " , true , 3 , " [8qbHbw2BbbTHBW1sbeqakYXVKRQM8Ne7pLK7m6CVfeR] " ) ,
( " b2b3b4 " , true , 4 , " [CktRuQ2mttgRGkXJtyksdKHjUdc2C4TgDzyB98oEzy8] " ) ,
( " b2b3b4c5 " , true , 9 , " [CktRuQ2mttgRGkXJtyksdKHjUdc2C4TgDzyB98oEzy8, GgBaCs3NCBuZN12kCJgAW63ydqohFkHEdfdEXBPzLHq] " ) ,
2021-02-16 16:29:50 -08:00
( " b3 " , true , 3 , " [8qbHbw2BbbTHBW1sbeqakYXVKRQM8Ne7pLK7m6CVfeR] " ) ,
2021-06-07 07:01:16 -07:00
( " b3b4 " , true , 4 , " [CktRuQ2mttgRGkXJtyksdKHjUdc2C4TgDzyB98oEzy8] " ) ,
( " b3b4c5 " , true , 9 , " [CktRuQ2mttgRGkXJtyksdKHjUdc2C4TgDzyB98oEzy8, GgBaCs3NCBuZN12kCJgAW63ydqohFkHEdfdEXBPzLHq] " ) ,
2021-02-16 16:29:50 -08:00
( " b4 " , true , 4 , " [CktRuQ2mttgRGkXJtyksdKHjUdc2C4TgDzyB98oEzy8] " ) ,
( " b4c5 " , true , 9 , " [CktRuQ2mttgRGkXJtyksdKHjUdc2C4TgDzyB98oEzy8, GgBaCs3NCBuZN12kCJgAW63ydqohFkHEdfdEXBPzLHq] " ) ,
( " c5 " , true , 5 , " [GgBaCs3NCBuZN12kCJgAW63ydqohFkHEdfdEXBPzLHq] " ) ,
] . into_iter ( ) . map ( | item | {
let result : ExpectedType = (
item . 0. to_string ( ) ,
item . 1 ,
item . 2 ,
item . 3. to_string ( ) ,
) ;
result
} ) . collect ( ) ;
2023-05-23 14:09:16 -07:00
let dir_for_temp_cache_files = tempdir ( ) . unwrap ( ) ;
let hash = AccountsHasher ::new ( dir_for_temp_cache_files . path ( ) . to_path_buf ( ) ) ;
2021-02-16 16:29:50 -08:00
let mut expected_index = 0 ;
2021-06-07 07:01:16 -07:00
for last_slice in 0 .. 2 {
2021-02-16 16:29:50 -08:00
for start in 0 .. COUNT {
for end in start + 1 .. COUNT {
2021-06-07 07:01:16 -07:00
let is_last_slice = last_slice = = 1 ;
2021-02-16 16:29:50 -08:00
let accounts = accounts . clone ( ) ;
let slice = & accounts [ start .. end ] ;
2023-07-31 13:13:19 -07:00
let slice2 = vec! [ slice . to_vec ( ) ] ;
2021-06-21 13:32:03 -07:00
let slice = & slice2 [ .. ] ;
2023-07-31 13:13:19 -07:00
let slice_temp = convert_to_slice ( & slice2 ) ;
let ( hashes2 , lamports2 ) =
hash . de_dup_accounts_in_parallel ( & slice_temp , 0 , 1 , & HashStats ::default ( ) ) ;
let slice3 = convert_to_slice ( & slice2 ) ;
let ( hashes3 , lamports3 ) =
hash . de_dup_accounts_in_parallel ( & slice3 , 0 , 1 , & HashStats ::default ( ) ) ;
2021-11-16 08:30:55 -08:00
let vec = slice . to_vec ( ) ;
2023-07-31 13:13:19 -07:00
let slice4 = convert_to_slice ( & vec ) ;
let mut max_bin = end - start ;
if ! max_bin . is_power_of_two ( ) {
max_bin = 1 ;
}
2023-01-17 12:04:29 -08:00
let ( hashes4 , lamports4 ) =
2023-07-31 13:13:19 -07:00
hash . de_dup_accounts ( & slice4 , & mut HashStats ::default ( ) , max_bin ) ;
2021-11-16 08:30:55 -08:00
let vec = slice . to_vec ( ) ;
2023-07-31 13:13:19 -07:00
let slice5 = convert_to_slice ( & vec ) ;
2023-01-17 12:04:29 -08:00
let ( hashes5 , lamports5 ) =
2023-07-31 13:13:19 -07:00
hash . de_dup_accounts ( & slice5 , & mut HashStats ::default ( ) , max_bin ) ;
2021-11-16 08:30:55 -08:00
let vec = slice . to_vec ( ) ;
2023-07-31 13:13:19 -07:00
let slice5 = convert_to_slice ( & vec ) ;
2023-01-17 12:04:29 -08:00
let ( hashes6 , lamports6 ) =
2023-07-31 13:13:19 -07:00
hash . de_dup_accounts ( & slice5 , & mut HashStats ::default ( ) , max_bin ) ;
2021-02-16 16:29:50 -08:00
2022-11-30 12:27:27 -08:00
let hashes2 = get_vec ( hashes2 ) ;
let hashes3 = get_vec ( hashes3 ) ;
let hashes4 = get_vec_vec ( hashes4 ) ;
let hashes5 = get_vec_vec ( hashes5 ) ;
let hashes6 = get_vec_vec ( hashes6 ) ;
2021-06-21 13:32:03 -07:00
assert_eq! ( hashes2 , hashes3 ) ;
let expected2 = hashes2 . clone ( ) ;
2021-02-16 16:29:50 -08:00
assert_eq! (
expected2 ,
2021-06-21 13:32:03 -07:00
hashes4 . into_iter ( ) . flatten ( ) . collect ::< Vec < _ > > ( ) ,
2022-12-06 06:30:06 -08:00
" last_slice: {last_slice}, start: {start}, end: {end}, slice: {slice:?} "
2021-02-16 16:29:50 -08:00
) ;
assert_eq! (
2021-06-21 13:32:03 -07:00
expected2 . clone ( ) ,
hashes5 . iter ( ) . flatten ( ) . copied ( ) . collect ::< Vec < _ > > ( ) ,
2022-12-06 06:30:06 -08:00
" last_slice: {last_slice}, start: {start}, end: {end}, slice: {slice:?} "
2021-02-16 16:29:50 -08:00
) ;
assert_eq! (
2021-06-21 13:32:03 -07:00
expected2 . clone ( ) ,
hashes6 . iter ( ) . flatten ( ) . copied ( ) . collect ::< Vec < _ > > ( )
2021-02-16 16:29:50 -08:00
) ;
assert_eq! ( lamports2 , lamports3 ) ;
assert_eq! ( lamports2 , lamports4 ) ;
2021-06-21 13:32:03 -07:00
assert_eq! ( lamports2 , lamports5 ) ;
2021-02-16 16:29:50 -08:00
assert_eq! ( lamports2 , lamports6 ) ;
2023-07-31 13:13:19 -07:00
let human_readable = slice [ 0 ]
2021-02-16 16:29:50 -08:00
. iter ( )
. map ( | v | {
let mut s = ( if v . pubkey = = key_a {
" a "
} else if v . pubkey = = key_b {
" b "
} else {
" c "
} )
. to_string ( ) ;
s . push_str ( & v . lamports . to_string ( ) ) ;
s
} )
. collect ::< String > ( ) ;
2022-12-06 06:30:06 -08:00
let hash_result_as_string = format! ( " {hashes2:?} " ) ;
2021-02-16 16:29:50 -08:00
let packaged_result : ExpectedType = (
human_readable ,
2021-06-07 07:01:16 -07:00
is_last_slice ,
2022-11-09 11:39:38 -08:00
lamports2 ,
2021-02-16 16:29:50 -08:00
hash_result_as_string ,
) ;
assert_eq! ( expected [ expected_index ] , packaged_result ) ;
// for generating expected results
2021-06-21 13:32:03 -07:00
// error!("{:?},", packaged_result);
2021-02-16 16:29:50 -08:00
expected_index + = 1 ;
}
}
}
}
#[ test ]
fn test_accountsdb_compare_two_hash_entries ( ) {
solana_logger ::setup ( ) ;
2023-09-11 15:33:25 -07:00
let pubkey = Pubkey ::new_unique ( ) ;
2021-02-16 16:29:50 -08:00
let hash = Hash ::new_unique ( ) ;
2023-09-11 15:33:25 -07:00
let val = CalculateHashIntermediate {
hash ,
lamports : 1 ,
pubkey ,
} ;
2021-02-16 16:29:50 -08:00
// slot same, version <
let hash2 = Hash ::new_unique ( ) ;
2023-09-11 15:33:25 -07:00
let val2 = CalculateHashIntermediate {
hash : hash2 ,
lamports : 4 ,
pubkey ,
} ;
2021-02-16 16:29:50 -08:00
assert_eq! (
2021-06-07 07:01:16 -07:00
std ::cmp ::Ordering ::Equal , // no longer comparing slots or versions
2022-11-18 15:25:44 -08:00
AccountsHasher ::compare_two_hash_entries ( & val , & val2 )
2021-02-16 16:29:50 -08:00
) ;
// slot same, vers =
let hash3 = Hash ::new_unique ( ) ;
2023-09-11 15:33:25 -07:00
let val3 = CalculateHashIntermediate {
hash : hash3 ,
lamports : 2 ,
pubkey ,
} ;
2021-02-16 16:29:50 -08:00
assert_eq! (
std ::cmp ::Ordering ::Equal ,
2022-11-18 15:25:44 -08:00
AccountsHasher ::compare_two_hash_entries ( & val , & val3 )
2021-02-16 16:29:50 -08:00
) ;
// slot same, vers >
let hash4 = Hash ::new_unique ( ) ;
2023-09-11 15:33:25 -07:00
let val4 = CalculateHashIntermediate {
hash : hash4 ,
lamports : 6 ,
pubkey ,
} ;
2021-02-16 16:29:50 -08:00
assert_eq! (
2021-06-07 07:01:16 -07:00
std ::cmp ::Ordering ::Equal , // no longer comparing slots or versions
2022-11-18 15:25:44 -08:00
AccountsHasher ::compare_two_hash_entries ( & val , & val4 )
2021-02-16 16:29:50 -08:00
) ;
// slot >, version <
let hash5 = Hash ::new_unique ( ) ;
2023-09-11 15:33:25 -07:00
let val5 = CalculateHashIntermediate {
hash : hash5 ,
lamports : 8 ,
pubkey ,
} ;
2021-02-16 16:29:50 -08:00
assert_eq! (
2021-06-07 07:01:16 -07:00
std ::cmp ::Ordering ::Equal , // no longer comparing slots or versions
2022-11-18 15:25:44 -08:00
AccountsHasher ::compare_two_hash_entries ( & val , & val5 )
2021-02-16 16:29:50 -08:00
) ;
}
2022-10-18 07:51:38 -07:00
fn test_de_dup_accounts_in_parallel < ' a > (
2023-07-31 13:13:19 -07:00
account_maps : & ' a [ & ' a [ CalculateHashIntermediate ] ] ,
) -> ( AccountHashesFile , u64 ) {
2023-05-23 14:09:16 -07:00
let dir_for_temp_cache_files = tempdir ( ) . unwrap ( ) ;
let accounts_hasher = AccountsHasher ::new ( dir_for_temp_cache_files . path ( ) . to_path_buf ( ) ) ;
2023-07-31 13:13:19 -07:00
accounts_hasher . de_dup_accounts_in_parallel ( account_maps , 0 , 1 , & HashStats ::default ( ) )
2021-06-21 13:32:03 -07:00
}
2021-02-16 16:29:50 -08:00
#[ test ]
fn test_accountsdb_remove_zero_balance_accounts ( ) {
solana_logger ::setup ( ) ;
2023-09-11 15:33:25 -07:00
let pubkey = Pubkey ::new_unique ( ) ;
2021-02-16 16:29:50 -08:00
let hash = Hash ::new_unique ( ) ;
2021-06-02 06:35:10 -07:00
let mut account_maps = Vec ::new ( ) ;
2023-09-11 15:33:25 -07:00
let val = CalculateHashIntermediate {
hash ,
lamports : 1 ,
pubkey ,
} ;
2023-09-15 09:47:38 -07:00
account_maps . push ( val ) ;
2021-02-16 16:29:50 -08:00
2023-07-31 13:13:19 -07:00
let vecs = vec! [ account_maps . to_vec ( ) ] ;
let slice = convert_to_slice ( & vecs ) ;
let ( hashfile , lamports ) = test_de_dup_accounts_in_parallel ( & slice ) ;
2022-11-30 12:27:27 -08:00
assert_eq! (
2023-07-31 13:13:19 -07:00
( get_vec ( hashfile ) , lamports ) ,
( vec! [ val . hash ] , val . lamports )
2022-11-30 12:27:27 -08:00
) ;
2021-02-16 16:29:50 -08:00
// zero original lamports, higher version
2023-09-11 15:33:25 -07:00
let val = CalculateHashIntermediate {
hash ,
lamports : 0 ,
pubkey ,
} ;
2021-06-07 07:01:16 -07:00
account_maps . push ( val ) ; // has to be after previous entry since account_maps are in slot order
2021-02-16 16:29:50 -08:00
2023-07-31 13:13:19 -07:00
let vecs = vec! [ account_maps . to_vec ( ) ] ;
let slice = convert_to_slice ( & vecs ) ;
let ( hashfile , lamports ) = test_de_dup_accounts_in_parallel ( & slice ) ;
assert_eq! ( ( get_vec ( hashfile ) , lamports ) , ( vec! [ ] , 0 ) ) ;
2021-02-16 16:29:50 -08:00
}
2023-09-11 13:20:28 -07:00
#[ test ]
fn test_accountsdb_dup_pubkey_2_chunks ( ) {
// 2 chunks, a dup pubkey in each chunk
for reverse in [ false , true ] {
let key = Pubkey ::new_from_array ( [ 1 ; 32 ] ) ; // key is BEFORE key2
let key2 = Pubkey ::new_from_array ( [ 2 ; 32 ] ) ;
let hash = Hash ::new_unique ( ) ;
let mut account_maps = Vec ::new ( ) ;
let mut account_maps2 = Vec ::new ( ) ;
2023-09-11 15:33:25 -07:00
let val = CalculateHashIntermediate {
hash ,
lamports : 1 ,
pubkey : key ,
} ;
2023-09-15 09:47:38 -07:00
account_maps . push ( val ) ;
2023-09-11 15:33:25 -07:00
let val2 = CalculateHashIntermediate {
hash ,
lamports : 2 ,
pubkey : key2 ,
} ;
2023-09-15 09:47:38 -07:00
account_maps . push ( val2 ) ;
2023-09-11 15:33:25 -07:00
let val3 = CalculateHashIntermediate {
hash ,
lamports : 3 ,
pubkey : key2 ,
} ;
2023-09-15 09:47:38 -07:00
account_maps2 . push ( val3 ) ;
2023-09-11 13:20:28 -07:00
let mut vecs = vec! [ account_maps . to_vec ( ) , account_maps2 . to_vec ( ) ] ;
if reverse {
vecs = vecs . into_iter ( ) . rev ( ) . collect ( ) ;
}
let slice = convert_to_slice ( & vecs ) ;
let ( hashfile , lamports ) = test_de_dup_accounts_in_parallel ( & slice ) ;
assert_eq! (
( get_vec ( hashfile ) , lamports ) ,
(
vec! [ val . hash , if reverse { val2 . hash } else { val3 . hash } ] ,
val . lamports
+ if reverse {
val2 . lamports
} else {
val3 . lamports
}
) ,
" reverse: {reverse} "
) ;
}
}
#[ test ]
fn test_accountsdb_dup_pubkey_2_chunks_backwards ( ) {
// 2 chunks, a dup pubkey in each chunk
for reverse in [ false , true ] {
let key = Pubkey ::new_from_array ( [ 3 ; 32 ] ) ; // key is AFTER key2
let key2 = Pubkey ::new_from_array ( [ 2 ; 32 ] ) ;
let hash = Hash ::new_unique ( ) ;
let mut account_maps = Vec ::new ( ) ;
let mut account_maps2 = Vec ::new ( ) ;
2023-09-11 15:33:25 -07:00
let val2 = CalculateHashIntermediate {
hash ,
lamports : 2 ,
pubkey : key2 ,
} ;
2023-09-15 09:47:38 -07:00
account_maps . push ( val2 ) ;
2023-09-11 15:33:25 -07:00
let val = CalculateHashIntermediate {
hash ,
lamports : 1 ,
pubkey : key ,
} ;
2023-09-15 09:47:38 -07:00
account_maps . push ( val ) ;
2023-09-11 15:33:25 -07:00
let val3 = CalculateHashIntermediate {
hash ,
lamports : 3 ,
pubkey : key2 ,
} ;
2023-09-15 09:47:38 -07:00
account_maps2 . push ( val3 ) ;
2023-09-11 13:20:28 -07:00
let mut vecs = vec! [ account_maps . to_vec ( ) , account_maps2 . to_vec ( ) ] ;
if reverse {
vecs = vecs . into_iter ( ) . rev ( ) . collect ( ) ;
}
let slice = convert_to_slice ( & vecs ) ;
let ( hashfile , lamports ) = test_de_dup_accounts_in_parallel ( & slice ) ;
assert_eq! (
( get_vec ( hashfile ) , lamports ) ,
(
vec! [ if reverse { val2 . hash } else { val3 . hash } , val . hash ] ,
val . lamports
+ if reverse {
val2 . lamports
} else {
val3 . lamports
}
) ,
" reverse: {reverse} "
) ;
}
}
2021-02-16 16:29:50 -08:00
#[ test ]
fn test_accountsdb_cumulative_offsets1_d ( ) {
let input = vec! [ vec! [ 0 , 1 ] , vec! [ ] , vec! [ 2 , 3 , 4 ] , vec! [ ] ] ;
let cumulative = CumulativeOffsets ::from_raw ( & input ) ;
let src : Vec < _ > = input . clone ( ) . into_iter ( ) . flatten ( ) . collect ( ) ;
let len = src . len ( ) ;
assert_eq! ( cumulative . total_count , len ) ;
assert_eq! ( cumulative . cumulative_offsets . len ( ) , 2 ) ; // 2 non-empty vectors
const DIMENSION : usize = 0 ;
assert_eq! ( cumulative . cumulative_offsets [ 0 ] . index [ DIMENSION ] , 0 ) ;
assert_eq! ( cumulative . cumulative_offsets [ 1 ] . index [ DIMENSION ] , 2 ) ;
assert_eq! ( cumulative . cumulative_offsets [ 0 ] . start_offset , 0 ) ;
assert_eq! ( cumulative . cumulative_offsets [ 1 ] . start_offset , 2 ) ;
for start in 0 .. len {
let slice = cumulative . get_slice ( & input , start ) ;
let len = slice . len ( ) ;
assert! ( len > 0 ) ;
assert_eq! ( & src [ start .. ( start + len ) ] , slice ) ;
}
let input = vec! [ vec! [ ] , vec! [ 0 , 1 ] , vec! [ ] , vec! [ 2 , 3 , 4 ] , vec! [ ] ] ;
let cumulative = CumulativeOffsets ::from_raw ( & input ) ;
let src : Vec < _ > = input . clone ( ) . into_iter ( ) . flatten ( ) . collect ( ) ;
let len = src . len ( ) ;
assert_eq! ( cumulative . total_count , len ) ;
assert_eq! ( cumulative . cumulative_offsets . len ( ) , 2 ) ; // 2 non-empty vectors
assert_eq! ( cumulative . cumulative_offsets [ 0 ] . index [ DIMENSION ] , 1 ) ;
assert_eq! ( cumulative . cumulative_offsets [ 1 ] . index [ DIMENSION ] , 3 ) ;
assert_eq! ( cumulative . cumulative_offsets [ 0 ] . start_offset , 0 ) ;
assert_eq! ( cumulative . cumulative_offsets [ 1 ] . start_offset , 2 ) ;
for start in 0 .. len {
let slice = cumulative . get_slice ( & input , start ) ;
let len = slice . len ( ) ;
assert! ( len > 0 ) ;
assert_eq! ( & src [ start .. ( start + len ) ] , slice ) ;
}
let input : Vec < Vec < u32 > > = vec! [ vec! [ ] ] ;
let cumulative = CumulativeOffsets ::from_raw ( & input ) ;
2021-08-03 15:14:34 -07:00
let len = input . into_iter ( ) . flatten ( ) . count ( ) ;
2021-02-16 16:29:50 -08:00
assert_eq! ( cumulative . total_count , len ) ;
assert_eq! ( cumulative . cumulative_offsets . len ( ) , 0 ) ; // 2 non-empty vectors
}
2021-06-07 10:09:17 -07:00
#[ should_panic(expected = " is_empty " ) ]
#[ test ]
fn test_accountsdb_cumulative_find_empty ( ) {
let input = CumulativeOffsets {
cumulative_offsets : vec ! [ ] ,
total_count : 0 ,
} ;
input . find ( 0 ) ;
}
#[ test ]
fn test_accountsdb_cumulative_find ( ) {
let input = CumulativeOffsets {
cumulative_offsets : vec ! [ CumulativeOffset {
index : vec ! [ 0 ] ,
start_offset : 0 ,
} ] ,
total_count : 0 ,
} ;
assert_eq! ( input . find ( 0 ) , ( 0 , & input . cumulative_offsets [ 0 ] ) ) ;
let input = CumulativeOffsets {
cumulative_offsets : vec ! [
CumulativeOffset {
index : vec ! [ 0 ] ,
start_offset : 0 ,
} ,
CumulativeOffset {
index : vec ! [ 1 ] ,
start_offset : 2 ,
} ,
] ,
total_count : 0 ,
} ;
assert_eq! ( input . find ( 0 ) , ( 0 , & input . cumulative_offsets [ 0 ] ) ) ; // = first start_offset
assert_eq! ( input . find ( 1 ) , ( 1 , & input . cumulative_offsets [ 0 ] ) ) ; // > first start_offset
assert_eq! ( input . find ( 2 ) , ( 0 , & input . cumulative_offsets [ 1 ] ) ) ; // = last start_offset
assert_eq! ( input . find ( 3 ) , ( 1 , & input . cumulative_offsets [ 1 ] ) ) ; // > last start_offset
}
2021-02-16 16:29:50 -08:00
#[ test ]
fn test_accountsdb_cumulative_offsets2_d ( ) {
2021-06-14 06:43:59 -07:00
let input : Vec < Vec < Vec < u64 > > > = vec! [ vec! [ vec! [ 0 , 1 ] , vec! [ ] , vec! [ 2 , 3 , 4 ] , vec! [ ] ] ] ;
2021-02-16 16:29:50 -08:00
let cumulative = CumulativeOffsets ::from_raw_2d ( & input ) ;
2023-01-05 10:05:32 -08:00
let src : Vec < _ > = input . clone ( ) . into_iter ( ) . flatten ( ) . flatten ( ) . collect ( ) ;
2021-02-16 16:29:50 -08:00
let len = src . len ( ) ;
assert_eq! ( cumulative . total_count , len ) ;
assert_eq! ( cumulative . cumulative_offsets . len ( ) , 2 ) ; // 2 non-empty vectors
const DIMENSION_0 : usize = 0 ;
const DIMENSION_1 : usize = 1 ;
assert_eq! ( cumulative . cumulative_offsets [ 0 ] . index [ DIMENSION_0 ] , 0 ) ;
assert_eq! ( cumulative . cumulative_offsets [ 0 ] . index [ DIMENSION_1 ] , 0 ) ;
assert_eq! ( cumulative . cumulative_offsets [ 1 ] . index [ DIMENSION_0 ] , 0 ) ;
assert_eq! ( cumulative . cumulative_offsets [ 1 ] . index [ DIMENSION_1 ] , 2 ) ;
assert_eq! ( cumulative . cumulative_offsets [ 0 ] . start_offset , 0 ) ;
assert_eq! ( cumulative . cumulative_offsets [ 1 ] . start_offset , 2 ) ;
for start in 0 .. len {
2021-06-14 06:43:59 -07:00
let slice : & [ u64 ] = cumulative . get_slice ( & input , start ) ;
2021-02-16 16:29:50 -08:00
let len = slice . len ( ) ;
assert! ( len > 0 ) ;
assert_eq! ( & src [ start .. ( start + len ) ] , slice ) ;
}
let input = vec! [ vec! [ vec! [ ] , vec! [ 0 , 1 ] , vec! [ ] , vec! [ 2 , 3 , 4 ] , vec! [ ] ] ] ;
let cumulative = CumulativeOffsets ::from_raw_2d ( & input ) ;
2023-01-05 10:05:32 -08:00
let src : Vec < _ > = input . clone ( ) . into_iter ( ) . flatten ( ) . flatten ( ) . collect ( ) ;
2021-02-16 16:29:50 -08:00
let len = src . len ( ) ;
assert_eq! ( cumulative . total_count , len ) ;
assert_eq! ( cumulative . cumulative_offsets . len ( ) , 2 ) ; // 2 non-empty vectors
assert_eq! ( cumulative . cumulative_offsets [ 0 ] . index [ DIMENSION_0 ] , 0 ) ;
assert_eq! ( cumulative . cumulative_offsets [ 0 ] . index [ DIMENSION_1 ] , 1 ) ;
assert_eq! ( cumulative . cumulative_offsets [ 1 ] . index [ DIMENSION_0 ] , 0 ) ;
assert_eq! ( cumulative . cumulative_offsets [ 1 ] . index [ DIMENSION_1 ] , 3 ) ;
assert_eq! ( cumulative . cumulative_offsets [ 0 ] . start_offset , 0 ) ;
assert_eq! ( cumulative . cumulative_offsets [ 1 ] . start_offset , 2 ) ;
for start in 0 .. len {
2021-06-14 06:43:59 -07:00
let slice : & [ u64 ] = cumulative . get_slice ( & input , start ) ;
2021-02-16 16:29:50 -08:00
let len = slice . len ( ) ;
assert! ( len > 0 ) ;
assert_eq! ( & src [ start .. ( start + len ) ] , slice ) ;
}
let input : Vec < Vec < Vec < u32 > > > = vec! [ vec! [ ] ] ;
let cumulative = CumulativeOffsets ::from_raw_2d ( & input ) ;
2021-08-03 15:14:34 -07:00
let len = input . into_iter ( ) . flatten ( ) . count ( ) ;
2021-02-16 16:29:50 -08:00
assert_eq! ( cumulative . total_count , len ) ;
assert_eq! ( cumulative . cumulative_offsets . len ( ) , 0 ) ; // 2 non-empty vectors
let input = vec! [
vec! [ vec! [ 0 , 1 ] ] ,
vec! [ vec! [ ] ] ,
vec! [ vec! [ ] , vec! [ 2 , 3 , 4 ] , vec! [ ] ] ,
] ;
let cumulative = CumulativeOffsets ::from_raw_2d ( & input ) ;
2023-01-05 10:05:32 -08:00
let src : Vec < _ > = input . clone ( ) . into_iter ( ) . flatten ( ) . flatten ( ) . collect ( ) ;
2021-02-16 16:29:50 -08:00
let len = src . len ( ) ;
assert_eq! ( cumulative . total_count , len ) ;
assert_eq! ( cumulative . cumulative_offsets . len ( ) , 2 ) ; // 2 non-empty vectors
assert_eq! ( cumulative . cumulative_offsets [ 0 ] . index [ DIMENSION_0 ] , 0 ) ;
assert_eq! ( cumulative . cumulative_offsets [ 0 ] . index [ DIMENSION_1 ] , 0 ) ;
assert_eq! ( cumulative . cumulative_offsets [ 1 ] . index [ DIMENSION_0 ] , 2 ) ;
assert_eq! ( cumulative . cumulative_offsets [ 1 ] . index [ DIMENSION_1 ] , 1 ) ;
assert_eq! ( cumulative . cumulative_offsets [ 0 ] . start_offset , 0 ) ;
assert_eq! ( cumulative . cumulative_offsets [ 1 ] . start_offset , 2 ) ;
for start in 0 .. len {
2021-06-14 06:43:59 -07:00
let slice : & [ u64 ] = cumulative . get_slice ( & input , start ) ;
2021-02-16 16:29:50 -08:00
let len = slice . len ( ) ;
assert! ( len > 0 ) ;
assert_eq! ( & src [ start .. ( start + len ) ] , slice ) ;
}
}
fn test_hashing_larger ( hashes : Vec < ( Pubkey , Hash ) > , fanout : usize ) -> Hash {
2022-11-18 15:25:44 -08:00
let result = AccountsHasher ::compute_merkle_root ( hashes . clone ( ) , fanout ) ;
2021-02-16 16:29:50 -08:00
let reduced : Vec < _ > = hashes . iter ( ) . map ( | x | x . 1 ) . collect ( ) ;
let result2 = test_hashing ( reduced , fanout ) ;
assert_eq! ( result , result2 , " len: {} " , hashes . len ( ) ) ;
result
}
fn test_hashing ( hashes : Vec < Hash > , fanout : usize ) -> Hash {
let temp : Vec < _ > = hashes . iter ( ) . map ( | h | ( Pubkey ::default ( ) , * h ) ) . collect ( ) ;
2022-11-18 15:25:44 -08:00
let result = AccountsHasher ::compute_merkle_root ( temp , fanout ) ;
2021-02-16 16:29:50 -08:00
let reduced : Vec < _ > = hashes . clone ( ) ;
2022-11-18 15:25:44 -08:00
let result2 = AccountsHasher ::compute_merkle_root_from_slices (
2021-03-18 08:32:07 -07:00
hashes . len ( ) ,
fanout ,
None ,
| start | & reduced [ start .. ] ,
None ,
) ;
assert_eq! ( result , result2 . 0 , " len: {} " , hashes . len ( ) ) ;
2021-02-16 16:29:50 -08:00
2022-11-18 15:25:44 -08:00
let result2 = AccountsHasher ::compute_merkle_root_from_slices (
2021-03-18 08:32:07 -07:00
hashes . len ( ) ,
2021-02-16 16:29:50 -08:00
fanout ,
2021-03-18 08:32:07 -07:00
Some ( 1 ) ,
| start | & reduced [ start .. ] ,
None ,
2021-02-16 16:29:50 -08:00
) ;
2021-03-18 08:32:07 -07:00
assert_eq! ( result , result2 . 0 , " len: {} " , hashes . len ( ) ) ;
2021-02-16 16:29:50 -08:00
let max = std ::cmp ::min ( reduced . len ( ) , fanout * 2 ) ;
for left in 0 .. max {
for right in left + 1 .. max {
let src = vec! [
vec! [ reduced [ 0 .. left ] . to_vec ( ) , reduced [ left .. right ] . to_vec ( ) ] ,
vec! [ reduced [ right .. ] . to_vec ( ) ] ,
] ;
2021-03-18 08:32:07 -07:00
let offsets = CumulativeOffsets ::from_raw_2d ( & src ) ;
2021-06-14 06:43:59 -07:00
let get_slice = | start : usize | -> & [ Hash ] { offsets . get_slice ( & src , start ) } ;
2022-11-18 15:25:44 -08:00
let result2 = AccountsHasher ::compute_merkle_root_from_slices (
2021-03-18 08:32:07 -07:00
offsets . total_count ,
fanout ,
None ,
get_slice ,
None ,
) ;
assert_eq! ( result , result2 . 0 ) ;
2021-02-16 16:29:50 -08:00
}
}
result
}
#[ test ]
fn test_accountsdb_compute_merkle_root_large ( ) {
solana_logger ::setup ( ) ;
// handle fanout^x -1, +0, +1 for a few 'x's
const FANOUT : usize = 3 ;
let mut hash_counts : Vec < _ > = ( 1 .. 6 )
2021-04-08 11:40:37 -07:00
. flat_map ( | x | {
2021-02-16 16:29:50 -08:00
let mark = FANOUT . pow ( x ) ;
vec! [ mark - 1 , mark , mark + 1 ]
} )
. collect ( ) ;
// saturate the test space for threshold to threshold + target
// this hits right before we use the 3 deep optimization and all the way through all possible partial last chunks
let target = FANOUT . pow ( 3 ) ;
let threshold = target * FANOUT ;
hash_counts . extend ( threshold - 1 ..= threshold + target ) ;
for hash_count in hash_counts {
2023-01-05 10:05:32 -08:00
let hashes : Vec < _ > = ( 0 .. hash_count ) . map ( | _ | Hash ::new_unique ( ) ) . collect ( ) ;
2021-02-16 16:29:50 -08:00
test_hashing ( hashes , FANOUT ) ;
}
}
#[ test ]
fn test_accountsdb_compute_merkle_root ( ) {
solana_logger ::setup ( ) ;
let expected_results = vec! [
( 0 , 0 , " GKot5hBsd81kMupNCXHaqbhv3huEbxAFMLnpcX2hniwn " , 0 ) ,
( 0 , 1 , " 8unXKJYTxrR423HgQxbDmx29mFri1QNrzVKKDxEfc6bj " , 0 ) ,
( 0 , 2 , " 6QfkevXLLqbfAaR1kVjvMLFtEXvNUVrpmkwXqgsYtCFW " , 1 ) ,
( 0 , 3 , " G3FrJd9JrXcMiqChTSfvEdBL2sCPny3ebiUy9Xxbn7a2 " , 3 ) ,
( 0 , 4 , " G3sZXHhwoCFuNyWy7Efffr47RBW33ibEp7b2hqNDmXdu " , 6 ) ,
( 0 , 5 , " 78atJJYpokAPKMJwHxUW8SBDvPkkSpTBV7GiB27HwosJ " , 10 ) ,
( 0 , 6 , " 7c9SM2BmCRVVXdrEdKcMK91MviPqXqQMd8QAb77tgLEy " , 15 ) ,
( 0 , 7 , " 3hsmnZPhf22UvBLiZ4dVa21Qsdh65CCrtYXsb8MxoVAa " , 21 ) ,
( 0 , 8 , " 5bwXUiC6RCRhb8fqvjvUXT6waU25str3UXA3a6Aq1jux " , 28 ) ,
( 0 , 9 , " 3NNtQKH6PaYpCnFBtyi2icK9eYX3YM5pqA3SKaXtUNzu " , 36 ) ,
( 1 , 0 , " GKot5hBsd81kMupNCXHaqbhv3huEbxAFMLnpcX2hniwn " , 0 ) ,
( 1 , 1 , " 4GWVCsnEu1iRyxjAB3F7J7C4MMvcoxFWtP9ihvwvDgxY " , 0 ) ,
( 1 , 2 , " 8ML8Te6Uw2mipFr2v9sMZDcziXzhVqJo2qeMJohg1CJx " , 1 ) ,
( 1 , 3 , " AMEuC3AgqAeRBGBhSfTmuMdfbAiXJnGmKv99kHmcAE1H " , 3 ) ,
( 1 , 4 , " HEnDuJLHpsQfrApimGrovTqPEF6Vkrx2dKFr3BDtYzWx " , 6 ) ,
( 1 , 5 , " 6rH69iP2yM1o565noZN1EqjySW4PhYUskz3c5tXePUfV " , 10 ) ,
( 1 , 6 , " 7qEQMEXdfSPjbZ3q4cuuZwebDMvTvuaQ3dBiHoDUKo9a " , 15 ) ,
( 1 , 7 , " GDJz7LSKYjqqz6ujCaaQRJRmQ7TLNCwYJhdT84qT4qwk " , 21 ) ,
( 1 , 8 , " HT9krPLVTo3rr5WZQBQFrbqWs8SbYScXfnt8EVuobboM " , 28 ) ,
( 1 , 9 , " 8y2pMgqMdRsvqw6BQXm6wtz3qxGPss72i6H6gVpPyeda " , 36 ) ,
] ;
let mut expected_index = 0 ;
let start = 0 ;
let default_fanout = 2 ;
// test 0..3 recursions (at fanout = 2) and 1 item remainder. The internals have 1 special case first loop and subsequent loops are the same types.
let iterations = default_fanout * default_fanout * default_fanout + 2 ;
for pass in 0 .. 2 {
let fanout = if pass = = 0 {
default_fanout
} else {
MERKLE_FANOUT
} ;
for count in start .. iterations {
let mut input : Vec < _ > = ( 0 .. count )
. map ( | i | {
2023-01-21 10:06:27 -08:00
let key = Pubkey ::from ( [ ( pass * iterations + count ) as u8 ; 32 ] ) ;
2021-02-16 16:29:50 -08:00
let hash = Hash ::new ( & [ ( pass * iterations + count + i + 1 ) as u8 ; 32 ] ) ;
( key , hash )
} )
. collect ( ) ;
let result = if pass = = 0 {
2023-10-09 13:00:52 -07:00
test_hashing_larger ( input , fanout )
2021-02-16 16:29:50 -08:00
} else {
// this sorts inside
2022-11-18 15:25:44 -08:00
let early_result = AccountsHasher ::accumulate_account_hashes (
2023-10-09 13:00:52 -07:00
input
. iter ( )
. map ( | i | ( i . 0 , AccountHash ( i . 1 ) ) )
. collect ::< Vec < _ > > ( ) ,
2021-02-16 16:29:50 -08:00
) ;
2023-10-09 13:00:52 -07:00
input . par_sort_unstable_by ( | a , b | a . 0. cmp ( & b . 0 ) ) ;
let result = AccountsHasher ::compute_merkle_root ( input , fanout ) ;
2021-02-16 16:29:50 -08:00
assert_eq! ( early_result , result ) ;
result
} ;
// compare against captured, expected results for hash (and lamports)
assert_eq! (
(
pass ,
count ,
& * ( result . to_string ( ) ) ,
expected_results [ expected_index ] . 3
) , // we no longer calculate lamports
expected_results [ expected_index ]
) ;
expected_index + = 1 ;
}
}
}
#[ test ]
#[ should_panic(expected = " overflow is detected while summing capitalization " ) ]
fn test_accountsdb_lamport_overflow ( ) {
solana_logger ::setup ( ) ;
let offset = 2 ;
let input = vec! [
2023-09-11 15:33:25 -07:00
CalculateHashIntermediate {
hash : Hash ::new ( & [ 1 u8 ; 32 ] ) ,
lamports : u64 ::MAX - offset ,
pubkey : Pubkey ::new_unique ( ) ,
} ,
CalculateHashIntermediate {
hash : Hash ::new ( & [ 2 u8 ; 32 ] ) ,
lamports : offset + 1 ,
pubkey : Pubkey ::new_unique ( ) ,
} ,
2021-02-16 16:29:50 -08:00
] ;
2023-05-23 14:09:16 -07:00
let dir_for_temp_cache_files = tempdir ( ) . unwrap ( ) ;
let accounts_hasher = AccountsHasher ::new ( dir_for_temp_cache_files . path ( ) . to_path_buf ( ) ) ;
2023-07-31 13:13:19 -07:00
accounts_hasher . de_dup_accounts_in_parallel (
& convert_to_slice ( & [ input ] ) ,
0 ,
1 ,
& HashStats ::default ( ) ,
) ;
2022-10-18 07:51:38 -07:00
}
fn convert_to_slice (
input : & [ Vec < CalculateHashIntermediate > ] ,
) -> Vec < & [ CalculateHashIntermediate ] > {
input . iter ( ) . map ( | v | & v [ .. ] ) . collect ::< Vec < _ > > ( )
}
2021-02-16 16:29:50 -08:00
#[ test ]
#[ should_panic(expected = " overflow is detected while summing capitalization " ) ]
fn test_accountsdb_lamport_overflow2 ( ) {
solana_logger ::setup ( ) ;
let offset = 2 ;
let input = vec! [
2023-09-11 15:33:25 -07:00
vec! [ CalculateHashIntermediate {
hash : Hash ::new ( & [ 1 u8 ; 32 ] ) ,
lamports : u64 ::MAX - offset ,
pubkey : Pubkey ::new_unique ( ) ,
} ] ,
vec! [ CalculateHashIntermediate {
hash : Hash ::new ( & [ 2 u8 ; 32 ] ) ,
lamports : offset + 1 ,
pubkey : Pubkey ::new_unique ( ) ,
} ] ,
2021-02-16 16:29:50 -08:00
] ;
2023-05-23 14:09:16 -07:00
let dir_for_temp_cache_files = tempdir ( ) . unwrap ( ) ;
let accounts_hasher = AccountsHasher ::new ( dir_for_temp_cache_files . path ( ) . to_path_buf ( ) ) ;
accounts_hasher . de_dup_accounts (
2023-07-31 13:13:19 -07:00
& convert_to_slice ( & input ) ,
2021-06-21 13:32:03 -07:00
& mut HashStats ::default ( ) ,
2 , // accounts above are in 2 groups
) ;
2021-02-16 16:29:50 -08:00
}
}