2021-12-03 09:00:31 -08:00
|
|
|
use {
|
2022-11-30 12:27:27 -08:00
|
|
|
crate::{
|
2023-05-11 13:23:29 -07:00
|
|
|
accounts_db::{AccountStorageEntry, IncludeSlotInHash, PUBKEY_BINS_FOR_CALCULATING_HASHES},
|
2023-08-08 07:16:10 -07:00
|
|
|
active_stats::{ActiveStatItem, ActiveStats},
|
2022-11-30 12:27:27 -08:00
|
|
|
ancestors::Ancestors,
|
2023-07-31 13:13:19 -07:00
|
|
|
pubkey_bins::PubkeyBinCalculator24,
|
2022-11-30 12:27:27 -08:00
|
|
|
rent_collector::RentCollector,
|
|
|
|
},
|
2021-12-03 09:00:31 -08:00
|
|
|
log::*,
|
2022-11-30 12:27:27 -08:00
|
|
|
memmap2::MmapMut,
|
2022-03-23 18:52:38 -07:00
|
|
|
rayon::prelude::*,
|
2023-07-31 13:13:19 -07:00
|
|
|
solana_measure::{measure::Measure, measure_us},
|
2021-12-03 09:00:31 -08:00
|
|
|
solana_sdk::{
|
|
|
|
hash::{Hash, Hasher},
|
|
|
|
pubkey::Pubkey,
|
2022-07-28 07:46:34 -07:00
|
|
|
slot_history::Slot,
|
2022-05-02 11:46:17 -07:00
|
|
|
sysvar::epoch_schedule::EpochSchedule,
|
2021-12-03 09:00:31 -08:00
|
|
|
},
|
2022-04-12 09:32:23 -07:00
|
|
|
std::{
|
|
|
|
borrow::Borrow,
|
|
|
|
convert::TryInto,
|
2022-11-30 12:27:27 -08:00
|
|
|
fs::File,
|
|
|
|
io::{BufWriter, Write},
|
2023-05-23 14:09:16 -07:00
|
|
|
path::PathBuf,
|
2022-04-12 09:32:23 -07:00
|
|
|
sync::{
|
2022-04-19 06:29:09 -07:00
|
|
|
atomic::{AtomicU64, AtomicUsize, Ordering},
|
2023-08-09 14:27:04 -07:00
|
|
|
Arc,
|
2022-04-12 09:32:23 -07:00
|
|
|
},
|
|
|
|
},
|
2023-05-23 14:09:16 -07:00
|
|
|
tempfile::tempfile_in,
|
2021-02-16 16:29:50 -08:00
|
|
|
};
|
|
|
|
pub const MERKLE_FANOUT: usize = 16;
|
|
|
|
|
2022-11-30 12:27:27 -08:00
|
|
|
/// 1 file containing account hashes sorted by pubkey, mapped into memory
|
|
|
|
struct MmapAccountHashesFile {
|
|
|
|
mmap: MmapMut,
|
|
|
|
}
|
|
|
|
|
|
|
|
impl MmapAccountHashesFile {
|
|
|
|
/// return a slice of account hashes starting at 'index'
|
|
|
|
fn read(&self, index: usize) -> &[Hash] {
|
|
|
|
let start = std::mem::size_of::<Hash>() * index;
|
|
|
|
let item_slice: &[u8] = &self.mmap[start..];
|
|
|
|
let remaining_elements = item_slice.len() / std::mem::size_of::<Hash>();
|
|
|
|
unsafe {
|
|
|
|
let item = item_slice.as_ptr() as *const Hash;
|
|
|
|
std::slice::from_raw_parts(item, remaining_elements)
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
/// 1 file containing account hashes sorted by pubkey
|
|
|
|
pub struct AccountHashesFile {
|
|
|
|
/// # hashes and an open file that will be deleted on drop. None if there are zero hashes to represent, and thus, no file.
|
|
|
|
count_and_writer: Option<(usize, BufWriter<File>)>,
|
2023-05-23 14:09:16 -07:00
|
|
|
/// The directory where temporary cache files are put
|
|
|
|
dir_for_temp_cache_files: PathBuf,
|
2022-11-30 12:27:27 -08:00
|
|
|
}
|
|
|
|
|
|
|
|
impl AccountHashesFile {
|
|
|
|
/// map the file into memory and return a reader that can access it by slice
|
|
|
|
fn get_reader(&mut self) -> Option<(usize, MmapAccountHashesFile)> {
|
|
|
|
std::mem::take(&mut self.count_and_writer).map(|(count, writer)| {
|
|
|
|
let file = Some(writer.into_inner().unwrap());
|
|
|
|
(
|
|
|
|
count,
|
|
|
|
MmapAccountHashesFile {
|
|
|
|
mmap: unsafe { MmapMut::map_mut(file.as_ref().unwrap()).unwrap() },
|
|
|
|
},
|
|
|
|
)
|
|
|
|
})
|
|
|
|
}
|
|
|
|
|
|
|
|
/// # hashes stored in this file
|
|
|
|
pub fn count(&self) -> usize {
|
|
|
|
self.count_and_writer
|
|
|
|
.as_ref()
|
|
|
|
.map(|(count, _)| *count)
|
|
|
|
.unwrap_or_default()
|
|
|
|
}
|
|
|
|
|
|
|
|
/// write 'hash' to the file
|
|
|
|
/// If the file isn't open, create it first.
|
|
|
|
pub fn write(&mut self, hash: &Hash) {
|
|
|
|
if self.count_and_writer.is_none() {
|
|
|
|
// we have hashes to write but no file yet, so create a file that will auto-delete on drop
|
2023-05-23 14:09:16 -07:00
|
|
|
self.count_and_writer = Some((
|
|
|
|
0,
|
2023-06-12 09:45:26 -07:00
|
|
|
BufWriter::new(
|
|
|
|
tempfile_in(&self.dir_for_temp_cache_files).unwrap_or_else(|err| {
|
|
|
|
panic!(
|
|
|
|
"Unable to create file within {}: {err}",
|
|
|
|
self.dir_for_temp_cache_files.display()
|
|
|
|
)
|
|
|
|
}),
|
|
|
|
),
|
2023-05-23 14:09:16 -07:00
|
|
|
));
|
2022-11-30 12:27:27 -08:00
|
|
|
}
|
2023-05-18 11:08:13 -07:00
|
|
|
let count_and_writer = self.count_and_writer.as_mut().unwrap();
|
2022-11-30 12:27:27 -08:00
|
|
|
assert_eq!(
|
|
|
|
std::mem::size_of::<Hash>(),
|
2023-06-12 09:45:26 -07:00
|
|
|
count_and_writer
|
|
|
|
.1
|
|
|
|
.write(hash.as_ref())
|
|
|
|
.unwrap_or_else(|err| {
|
|
|
|
panic!(
|
|
|
|
"Unable to write file within {}: {err}",
|
|
|
|
self.dir_for_temp_cache_files.display()
|
|
|
|
)
|
|
|
|
})
|
2022-11-30 12:27:27 -08:00
|
|
|
);
|
|
|
|
count_and_writer.0 += 1;
|
|
|
|
}
|
2021-03-18 08:32:07 -07:00
|
|
|
}
|
|
|
|
|
2022-03-23 11:44:23 -07:00
|
|
|
/// parameters to calculate accounts hash
|
2022-03-31 07:29:45 -07:00
|
|
|
#[derive(Debug)]
|
2022-03-23 11:44:23 -07:00
|
|
|
pub struct CalcAccountsHashConfig<'a> {
|
2022-03-30 15:19:34 -07:00
|
|
|
/// true to use a thread pool dedicated to bg operations
|
2022-03-23 18:52:38 -07:00
|
|
|
pub use_bg_thread_pool: bool,
|
2022-03-30 15:19:34 -07:00
|
|
|
/// verify every hash in append vec/write cache with a recalculated hash
|
2022-03-23 11:44:23 -07:00
|
|
|
pub check_hash: bool,
|
2022-09-30 10:55:27 -07:00
|
|
|
/// 'ancestors' is used to get storages
|
2022-03-24 08:34:28 -07:00
|
|
|
pub ancestors: Option<&'a Ancestors>,
|
2022-03-30 15:19:34 -07:00
|
|
|
/// does hash calc need to consider account data that exists in the write cache?
|
|
|
|
/// if so, 'ancestors' will be used for this purpose as well as storages.
|
2022-05-02 11:46:17 -07:00
|
|
|
pub epoch_schedule: &'a EpochSchedule,
|
2022-03-31 08:51:18 -07:00
|
|
|
pub rent_collector: &'a RentCollector,
|
2022-07-19 07:55:52 -07:00
|
|
|
/// used for tracking down hash mismatches after the fact
|
|
|
|
pub store_detailed_debug_info_on_failure: bool,
|
2023-05-11 13:23:29 -07:00
|
|
|
pub include_slot_in_hash: IncludeSlotInHash,
|
2022-03-23 11:44:23 -07:00
|
|
|
}
|
|
|
|
|
2022-03-24 07:40:35 -07:00
|
|
|
// smallest, 3 quartiles, largest, average
|
|
|
|
pub type StorageSizeQuartileStats = [usize; 6];
|
|
|
|
|
2021-02-16 16:29:50 -08:00
|
|
|
#[derive(Debug, Default)]
|
|
|
|
pub struct HashStats {
|
2023-07-05 12:53:36 -07:00
|
|
|
pub total_us: u64,
|
2022-08-23 09:45:48 -07:00
|
|
|
pub mark_time_us: u64,
|
2023-07-05 10:57:07 -07:00
|
|
|
pub cache_hash_data_us: u64,
|
2021-02-16 16:29:50 -08:00
|
|
|
pub scan_time_total_us: u64,
|
|
|
|
pub zeros_time_total_us: u64,
|
|
|
|
pub hash_time_total_us: u64,
|
|
|
|
pub sort_time_total_us: u64,
|
|
|
|
pub hash_total: usize,
|
|
|
|
pub num_snapshot_storage: usize,
|
2023-02-20 12:55:48 -08:00
|
|
|
pub scan_chunks: usize,
|
2021-10-25 12:37:50 -07:00
|
|
|
pub num_slots: usize,
|
2022-08-23 09:45:48 -07:00
|
|
|
pub num_dirty_slots: usize,
|
2021-06-01 11:17:49 -07:00
|
|
|
pub collect_snapshots_us: u64,
|
2021-06-01 13:07:46 -07:00
|
|
|
pub storage_sort_us: u64,
|
2022-03-24 07:40:35 -07:00
|
|
|
pub storage_size_quartiles: StorageSizeQuartileStats,
|
2022-08-03 08:02:08 -07:00
|
|
|
pub oldest_root: Slot,
|
2022-04-22 13:01:51 -07:00
|
|
|
pub roots_older_than_epoch: AtomicUsize,
|
|
|
|
pub accounts_in_roots_older_than_epoch: AtomicUsize,
|
|
|
|
pub append_vec_sizes_older_than_epoch: AtomicUsize,
|
2022-11-15 09:31:24 -08:00
|
|
|
pub longest_ancient_scan_us: AtomicU64,
|
|
|
|
pub sum_ancient_scans_us: AtomicU64,
|
|
|
|
pub count_ancient_scans: AtomicU64,
|
2023-07-31 13:13:19 -07:00
|
|
|
pub pubkey_bin_search_us: AtomicU64,
|
2021-02-16 16:29:50 -08:00
|
|
|
}
|
|
|
|
impl HashStats {
|
2023-01-18 11:51:08 -08:00
|
|
|
pub fn calc_storage_size_quartiles(&mut self, storages: &[Arc<AccountStorageEntry>]) {
|
2022-03-24 07:40:35 -07:00
|
|
|
let mut sum = 0;
|
|
|
|
let mut sizes = storages
|
|
|
|
.iter()
|
2023-01-11 12:05:15 -08:00
|
|
|
.map(|storage| {
|
|
|
|
let cap = storage.accounts.capacity() as usize;
|
|
|
|
sum += cap;
|
|
|
|
cap
|
2022-03-24 07:40:35 -07:00
|
|
|
})
|
|
|
|
.collect::<Vec<_>>();
|
|
|
|
sizes.sort_unstable();
|
|
|
|
let len = sizes.len();
|
|
|
|
self.storage_size_quartiles = if len == 0 {
|
|
|
|
StorageSizeQuartileStats::default()
|
|
|
|
} else {
|
|
|
|
[
|
|
|
|
*sizes.first().unwrap(),
|
|
|
|
sizes[len / 4],
|
|
|
|
sizes[len * 2 / 4],
|
|
|
|
sizes[len * 3 / 4],
|
|
|
|
*sizes.last().unwrap(),
|
|
|
|
sum / len,
|
|
|
|
]
|
|
|
|
};
|
|
|
|
}
|
|
|
|
|
2023-07-05 12:53:36 -07:00
|
|
|
pub fn log(&self) {
|
2021-02-16 16:29:50 -08:00
|
|
|
datapoint_info!(
|
2022-10-24 18:07:00 -07:00
|
|
|
"calculate_accounts_hash_from_storages",
|
2023-07-05 12:53:36 -07:00
|
|
|
("total_us", self.total_us, i64),
|
2022-08-23 09:45:48 -07:00
|
|
|
("mark_time_us", self.mark_time_us, i64),
|
2023-07-05 10:57:07 -07:00
|
|
|
("cache_hash_data_us", self.cache_hash_data_us, i64),
|
2022-08-23 13:13:16 -07:00
|
|
|
("accounts_scan_us", self.scan_time_total_us, i64),
|
|
|
|
("eliminate_zeros_us", self.zeros_time_total_us, i64),
|
|
|
|
("hash_us", self.hash_time_total_us, i64),
|
2023-07-05 06:35:44 -07:00
|
|
|
("sort_us", self.sort_time_total_us, i64),
|
2021-02-16 16:29:50 -08:00
|
|
|
("hash_total", self.hash_total, i64),
|
2021-06-01 13:07:46 -07:00
|
|
|
("storage_sort_us", self.storage_sort_us, i64),
|
2023-06-29 07:43:07 -07:00
|
|
|
("collect_snapshots_us", self.collect_snapshots_us, i64),
|
|
|
|
("num_snapshot_storage", self.num_snapshot_storage, i64),
|
|
|
|
("scan_chunks", self.scan_chunks, i64),
|
|
|
|
("num_slots", self.num_slots, i64),
|
|
|
|
("num_dirty_slots", self.num_dirty_slots, i64),
|
|
|
|
("storage_size_min", self.storage_size_quartiles[0], i64),
|
2022-03-24 07:40:35 -07:00
|
|
|
(
|
|
|
|
"storage_size_quartile_1",
|
2023-06-29 07:43:07 -07:00
|
|
|
self.storage_size_quartiles[1],
|
2022-03-24 07:40:35 -07:00
|
|
|
i64
|
|
|
|
),
|
|
|
|
(
|
|
|
|
"storage_size_quartile_2",
|
2023-06-29 07:43:07 -07:00
|
|
|
self.storage_size_quartiles[2],
|
2022-03-24 07:40:35 -07:00
|
|
|
i64
|
|
|
|
),
|
|
|
|
(
|
|
|
|
"storage_size_quartile_3",
|
2023-06-29 07:43:07 -07:00
|
|
|
self.storage_size_quartiles[3],
|
2022-03-24 07:40:35 -07:00
|
|
|
i64
|
|
|
|
),
|
2023-06-29 07:43:07 -07:00
|
|
|
("storage_size_max", self.storage_size_quartiles[4], i64),
|
|
|
|
("storage_size_avg", self.storage_size_quartiles[5], i64),
|
2022-04-22 13:01:51 -07:00
|
|
|
(
|
|
|
|
"roots_older_than_epoch",
|
2023-06-29 07:43:07 -07:00
|
|
|
self.roots_older_than_epoch.load(Ordering::Relaxed),
|
2022-04-22 13:01:51 -07:00
|
|
|
i64
|
|
|
|
),
|
2023-06-29 07:43:07 -07:00
|
|
|
("oldest_root", self.oldest_root, i64),
|
2022-11-15 09:31:24 -08:00
|
|
|
(
|
|
|
|
"longest_ancient_scan_us",
|
2023-06-29 07:43:07 -07:00
|
|
|
self.longest_ancient_scan_us.load(Ordering::Relaxed),
|
2022-11-15 09:31:24 -08:00
|
|
|
i64
|
|
|
|
),
|
|
|
|
(
|
|
|
|
"sum_ancient_scans_us",
|
2023-06-29 07:43:07 -07:00
|
|
|
self.sum_ancient_scans_us.load(Ordering::Relaxed),
|
2022-11-15 09:31:24 -08:00
|
|
|
i64
|
|
|
|
),
|
|
|
|
(
|
|
|
|
"count_ancient_scans",
|
2023-06-29 07:43:07 -07:00
|
|
|
self.count_ancient_scans.load(Ordering::Relaxed),
|
2022-11-15 09:31:24 -08:00
|
|
|
i64
|
|
|
|
),
|
2022-04-22 13:01:51 -07:00
|
|
|
(
|
|
|
|
"append_vec_sizes_older_than_epoch",
|
|
|
|
self.append_vec_sizes_older_than_epoch
|
2023-06-29 07:43:07 -07:00
|
|
|
.load(Ordering::Relaxed),
|
2022-04-22 13:01:51 -07:00
|
|
|
i64
|
|
|
|
),
|
|
|
|
(
|
|
|
|
"accounts_in_roots_older_than_epoch",
|
|
|
|
self.accounts_in_roots_older_than_epoch
|
2023-06-29 07:43:07 -07:00
|
|
|
.load(Ordering::Relaxed),
|
2022-04-22 13:01:51 -07:00
|
|
|
i64
|
|
|
|
),
|
2023-07-31 13:13:19 -07:00
|
|
|
(
|
|
|
|
"pubkey_bin_search_us",
|
|
|
|
self.pubkey_bin_search_us.load(Ordering::Relaxed),
|
|
|
|
i64
|
|
|
|
),
|
2021-02-16 16:29:50 -08:00
|
|
|
);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2022-09-30 07:34:18 -07:00
|
|
|
/// While scanning appendvecs, this is the info that needs to be extracted, de-duped, and sorted from what is stored in an append vec.
|
|
|
|
/// Note this can be saved/loaded during hash calculation to a memory mapped file whose contents are
|
|
|
|
/// [CalculateHashIntermediate]
|
|
|
|
#[repr(C)]
|
2022-05-22 18:00:42 -07:00
|
|
|
#[derive(Default, Debug, PartialEq, Eq, Clone)]
|
2021-02-16 16:29:50 -08:00
|
|
|
pub struct CalculateHashIntermediate {
|
|
|
|
pub hash: Hash,
|
|
|
|
pub lamports: u64,
|
|
|
|
pub pubkey: Pubkey,
|
|
|
|
}
|
|
|
|
|
|
|
|
impl CalculateHashIntermediate {
|
2021-08-18 07:07:34 -07:00
|
|
|
pub fn new(hash: Hash, lamports: u64, pubkey: Pubkey) -> Self {
|
2021-06-07 07:01:16 -07:00
|
|
|
Self {
|
|
|
|
hash,
|
|
|
|
lamports,
|
|
|
|
pubkey,
|
|
|
|
}
|
|
|
|
}
|
2021-02-16 16:29:50 -08:00
|
|
|
}
|
|
|
|
|
2022-05-22 18:00:42 -07:00
|
|
|
#[derive(Default, Debug, PartialEq, Eq)]
|
2021-02-16 16:29:50 -08:00
|
|
|
pub struct CumulativeOffset {
|
|
|
|
pub index: Vec<usize>,
|
|
|
|
pub start_offset: usize,
|
|
|
|
}
|
|
|
|
|
|
|
|
impl CumulativeOffset {
|
|
|
|
pub fn new(index: Vec<usize>, start_offset: usize) -> CumulativeOffset {
|
|
|
|
Self {
|
|
|
|
index,
|
|
|
|
start_offset,
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2021-06-14 06:43:59 -07:00
|
|
|
pub trait ExtractSliceFromRawData<'b, T: 'b> {
|
|
|
|
fn extract<'a>(&'b self, offset: &'a CumulativeOffset, start: usize) -> &'b [T];
|
|
|
|
}
|
|
|
|
|
|
|
|
impl<'b, T: 'b> ExtractSliceFromRawData<'b, T> for Vec<Vec<T>> {
|
|
|
|
fn extract<'a>(&'b self, offset: &'a CumulativeOffset, start: usize) -> &'b [T] {
|
|
|
|
&self[offset.index[0]][start..]
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
impl<'b, T: 'b> ExtractSliceFromRawData<'b, T> for Vec<Vec<Vec<T>>> {
|
|
|
|
fn extract<'a>(&'b self, offset: &'a CumulativeOffset, start: usize) -> &'b [T] {
|
|
|
|
&self[offset.index[0]][offset.index[1]][start..]
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2021-03-02 06:36:49 -08:00
|
|
|
// Allow retrieving &[start..end] from a logical src: Vec<T>, where src is really Vec<Vec<T>> (or later Vec<Vec<Vec<T>>>)
|
2021-02-16 16:29:50 -08:00
|
|
|
// This model prevents callers from having to flatten which saves both working memory and time.
|
|
|
|
#[derive(Default, Debug)]
|
|
|
|
pub struct CumulativeOffsets {
|
|
|
|
cumulative_offsets: Vec<CumulativeOffset>,
|
|
|
|
total_count: usize,
|
|
|
|
}
|
|
|
|
|
2022-11-30 12:27:27 -08:00
|
|
|
/// used by merkle tree calculation to lookup account hashes by overall index
|
|
|
|
#[derive(Default)]
|
|
|
|
pub struct CumulativeHashesFromFiles {
|
|
|
|
/// source of hashes in order
|
|
|
|
readers: Vec<MmapAccountHashesFile>,
|
|
|
|
/// look up reader index and offset by overall index
|
|
|
|
cumulative: CumulativeOffsets,
|
|
|
|
}
|
|
|
|
|
|
|
|
impl CumulativeHashesFromFiles {
|
|
|
|
/// Calculate offset from overall index to which file and offset within that file based on the length of each hash file.
|
|
|
|
/// Also collect readers to access the data.
|
|
|
|
pub fn from_files(hashes: Vec<AccountHashesFile>) -> Self {
|
|
|
|
let mut readers = Vec::with_capacity(hashes.len());
|
|
|
|
let cumulative = CumulativeOffsets::new(hashes.into_iter().filter_map(|mut hash_file| {
|
|
|
|
// ignores all hashfiles that have zero entries
|
|
|
|
hash_file.get_reader().map(|(count, reader)| {
|
|
|
|
readers.push(reader);
|
|
|
|
count
|
|
|
|
})
|
|
|
|
}));
|
|
|
|
Self {
|
|
|
|
cumulative,
|
|
|
|
readers,
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
/// total # of items referenced
|
|
|
|
pub fn total_count(&self) -> usize {
|
|
|
|
self.cumulative.total_count
|
|
|
|
}
|
|
|
|
|
|
|
|
// return the biggest slice possible that starts at the overall index 'start'
|
|
|
|
pub fn get_slice(&self, start: usize) -> &[Hash] {
|
|
|
|
let (start, offset) = self.cumulative.find(start);
|
|
|
|
let data_source_index = offset.index[0];
|
|
|
|
let data = &self.readers[data_source_index];
|
|
|
|
// unwrap here because we should never ask for data that doesn't exist. If we do, then cumulative calculated incorrectly.
|
|
|
|
data.read(start)
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2021-02-16 16:29:50 -08:00
|
|
|
impl CumulativeOffsets {
|
2022-11-30 12:27:27 -08:00
|
|
|
pub fn new<I>(iter: I) -> Self
|
|
|
|
where
|
|
|
|
I: Iterator<Item = usize>,
|
|
|
|
{
|
2021-02-16 16:29:50 -08:00
|
|
|
let mut total_count: usize = 0;
|
2022-11-30 12:27:27 -08:00
|
|
|
let cumulative_offsets: Vec<_> = iter
|
2021-02-16 16:29:50 -08:00
|
|
|
.enumerate()
|
2022-11-30 12:27:27 -08:00
|
|
|
.filter_map(|(i, len)| {
|
2021-02-16 16:29:50 -08:00
|
|
|
if len > 0 {
|
|
|
|
let result = CumulativeOffset::new(vec![i], total_count);
|
|
|
|
total_count += len;
|
|
|
|
Some(result)
|
|
|
|
} else {
|
|
|
|
None
|
|
|
|
}
|
|
|
|
})
|
|
|
|
.collect();
|
|
|
|
|
|
|
|
Self {
|
|
|
|
cumulative_offsets,
|
|
|
|
total_count,
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2022-11-30 12:27:27 -08:00
|
|
|
pub fn from_raw<T>(raw: &[Vec<T>]) -> Self {
|
|
|
|
Self::new(raw.iter().map(|v| v.len()))
|
|
|
|
}
|
|
|
|
|
|
|
|
pub fn from_raw_2d<T>(raw: &[Vec<Vec<T>>]) -> Self {
|
2021-02-16 16:29:50 -08:00
|
|
|
let mut total_count: usize = 0;
|
|
|
|
let mut cumulative_offsets = Vec::with_capacity(0);
|
|
|
|
for (i, v_outer) in raw.iter().enumerate() {
|
|
|
|
for (j, v) in v_outer.iter().enumerate() {
|
|
|
|
let len = v.len();
|
|
|
|
if len > 0 {
|
|
|
|
if cumulative_offsets.is_empty() {
|
|
|
|
// the first inner, non-empty vector we find gives us an approximate rectangular shape
|
|
|
|
cumulative_offsets = Vec::with_capacity(raw.len() * v_outer.len());
|
|
|
|
}
|
|
|
|
cumulative_offsets.push(CumulativeOffset::new(vec![i, j], total_count));
|
|
|
|
total_count += len;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
Self {
|
|
|
|
cumulative_offsets,
|
|
|
|
total_count,
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2022-11-30 12:27:27 -08:00
|
|
|
/// find the index of the data source that contains 'start'
|
2021-06-14 06:43:59 -07:00
|
|
|
fn find_index(&self, start: usize) -> usize {
|
2021-06-07 10:09:17 -07:00
|
|
|
assert!(!self.cumulative_offsets.is_empty());
|
2021-06-14 06:43:59 -07:00
|
|
|
match self.cumulative_offsets[..].binary_search_by(|index| index.start_offset.cmp(&start)) {
|
2021-06-07 10:09:17 -07:00
|
|
|
Ok(index) => index,
|
|
|
|
Err(index) => index - 1, // we would insert at index so we are before the item at index
|
2021-06-14 06:43:59 -07:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2022-11-30 12:27:27 -08:00
|
|
|
/// given overall start index 'start'
|
|
|
|
/// return ('start', which is the offset into the data source at 'index',
|
|
|
|
/// and 'index', which is the data source to use)
|
2021-06-14 06:43:59 -07:00
|
|
|
fn find(&self, start: usize) -> (usize, &CumulativeOffset) {
|
|
|
|
let index = self.find_index(start);
|
2021-06-07 10:09:17 -07:00
|
|
|
let index = &self.cumulative_offsets[index];
|
|
|
|
let start = start - index.start_offset;
|
|
|
|
(start, index)
|
|
|
|
}
|
|
|
|
|
2021-02-16 16:29:50 -08:00
|
|
|
// return the biggest slice possible that starts at 'start'
|
2021-06-14 06:43:59 -07:00
|
|
|
pub fn get_slice<'a, 'b, T, U>(&'a self, raw: &'b U, start: usize) -> &'b [T]
|
|
|
|
where
|
|
|
|
U: ExtractSliceFromRawData<'b, T> + 'b,
|
|
|
|
{
|
2021-06-07 10:09:17 -07:00
|
|
|
let (start, index) = self.find(start);
|
2021-06-14 06:43:59 -07:00
|
|
|
raw.extract(index, start)
|
2021-02-16 16:29:50 -08:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2023-01-17 12:04:29 -08:00
|
|
|
#[derive(Debug)]
|
2023-08-08 07:16:10 -07:00
|
|
|
pub struct AccountsHasher<'a> {
|
2021-10-11 10:46:27 -07:00
|
|
|
pub filler_account_suffix: Option<Pubkey>,
|
2023-01-17 12:04:29 -08:00
|
|
|
pub zero_lamport_accounts: ZeroLamportAccounts,
|
2023-05-23 14:09:16 -07:00
|
|
|
/// The directory where temporary cache files are put
|
|
|
|
pub dir_for_temp_cache_files: PathBuf,
|
2023-08-08 07:16:10 -07:00
|
|
|
pub(crate) active_stats: &'a ActiveStats,
|
2021-02-16 16:29:50 -08:00
|
|
|
}
|
|
|
|
|
2023-08-08 07:16:10 -07:00
|
|
|
impl<'a> AccountsHasher<'a> {
|
2021-11-12 07:53:21 -08:00
|
|
|
/// true if it is possible that there are filler accounts present
|
|
|
|
pub fn filler_accounts_enabled(&self) -> bool {
|
|
|
|
self.filler_account_suffix.is_some()
|
|
|
|
}
|
|
|
|
|
2021-02-16 16:29:50 -08:00
|
|
|
pub fn calculate_hash(hashes: Vec<Vec<Hash>>) -> (Hash, usize) {
|
|
|
|
let cumulative_offsets = CumulativeOffsets::from_raw(&hashes);
|
|
|
|
|
|
|
|
let hash_total = cumulative_offsets.total_count;
|
2022-11-18 15:25:44 -08:00
|
|
|
let result = AccountsHasher::compute_merkle_root_from_slices(
|
2021-02-16 16:29:50 -08:00
|
|
|
hash_total,
|
|
|
|
MERKLE_FANOUT,
|
|
|
|
None,
|
|
|
|
|start: usize| cumulative_offsets.get_slice(&hashes, start),
|
2021-03-18 08:32:07 -07:00
|
|
|
None,
|
2021-02-16 16:29:50 -08:00
|
|
|
);
|
2021-03-18 08:32:07 -07:00
|
|
|
(result.0, hash_total)
|
2021-02-16 16:29:50 -08:00
|
|
|
}
|
|
|
|
|
|
|
|
pub fn compute_merkle_root(hashes: Vec<(Pubkey, Hash)>, fanout: usize) -> Hash {
|
2022-12-13 07:20:14 -08:00
|
|
|
Self::compute_merkle_root_loop(hashes, fanout, |t| &t.1)
|
2021-02-16 16:29:50 -08:00
|
|
|
}
|
|
|
|
|
|
|
|
// this function avoids an infinite recursion compiler error
|
|
|
|
pub fn compute_merkle_root_recurse(hashes: Vec<Hash>, fanout: usize) -> Hash {
|
2022-12-13 07:20:14 -08:00
|
|
|
Self::compute_merkle_root_loop(hashes, fanout, |t| t)
|
2021-02-16 16:29:50 -08:00
|
|
|
}
|
|
|
|
|
|
|
|
pub fn div_ceil(x: usize, y: usize) -> usize {
|
|
|
|
let mut result = x / y;
|
|
|
|
if x % y != 0 {
|
|
|
|
result += 1;
|
|
|
|
}
|
|
|
|
result
|
|
|
|
}
|
|
|
|
|
|
|
|
// For the first iteration, there could be more items in the tuple than just hash and lamports.
|
|
|
|
// Using extractor allows us to avoid an unnecessary array copy on the first iteration.
|
|
|
|
pub fn compute_merkle_root_loop<T, F>(hashes: Vec<T>, fanout: usize, extractor: F) -> Hash
|
|
|
|
where
|
2022-12-13 07:20:14 -08:00
|
|
|
F: Fn(&T) -> &Hash + std::marker::Sync,
|
2021-02-16 16:29:50 -08:00
|
|
|
T: std::marker::Sync,
|
|
|
|
{
|
|
|
|
if hashes.is_empty() {
|
|
|
|
return Hasher::default().result();
|
|
|
|
}
|
|
|
|
|
|
|
|
let mut time = Measure::start("time");
|
|
|
|
|
|
|
|
let total_hashes = hashes.len();
|
|
|
|
let chunks = Self::div_ceil(total_hashes, fanout);
|
|
|
|
|
|
|
|
let result: Vec<_> = (0..chunks)
|
|
|
|
.into_par_iter()
|
|
|
|
.map(|i| {
|
|
|
|
let start_index = i * fanout;
|
|
|
|
let end_index = std::cmp::min(start_index + fanout, total_hashes);
|
|
|
|
|
|
|
|
let mut hasher = Hasher::default();
|
|
|
|
for item in hashes.iter().take(end_index).skip(start_index) {
|
2021-06-18 06:34:46 -07:00
|
|
|
let h = extractor(item);
|
2021-02-16 16:29:50 -08:00
|
|
|
hasher.hash(h.as_ref());
|
|
|
|
}
|
|
|
|
|
|
|
|
hasher.result()
|
|
|
|
})
|
|
|
|
.collect();
|
|
|
|
time.stop();
|
|
|
|
debug!("hashing {} {}", total_hashes, time);
|
|
|
|
|
|
|
|
if result.len() == 1 {
|
|
|
|
result[0]
|
|
|
|
} else {
|
|
|
|
Self::compute_merkle_root_recurse(result, fanout)
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2021-03-18 08:32:07 -07:00
|
|
|
fn calculate_three_level_chunks(
|
|
|
|
total_hashes: usize,
|
|
|
|
fanout: usize,
|
|
|
|
max_levels_per_pass: Option<usize>,
|
|
|
|
specific_level_count: Option<usize>,
|
|
|
|
) -> (usize, usize, bool) {
|
|
|
|
const THREE_LEVEL_OPTIMIZATION: usize = 3; // this '3' is dependent on the code structure below where we manually unroll
|
|
|
|
let target = fanout.pow(THREE_LEVEL_OPTIMIZATION as u32);
|
|
|
|
|
|
|
|
// Only use the 3 level optimization if we have at least 4 levels of data.
|
|
|
|
// Otherwise, we'll be serializing a parallel operation.
|
|
|
|
let threshold = target * fanout;
|
|
|
|
let mut three_level = max_levels_per_pass.unwrap_or(usize::MAX) >= THREE_LEVEL_OPTIMIZATION
|
|
|
|
&& total_hashes >= threshold;
|
|
|
|
if three_level {
|
|
|
|
if let Some(specific_level_count_value) = specific_level_count {
|
|
|
|
three_level = specific_level_count_value >= THREE_LEVEL_OPTIMIZATION;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
let (num_hashes_per_chunk, levels_hashed) = if three_level {
|
|
|
|
(target, THREE_LEVEL_OPTIMIZATION)
|
|
|
|
} else {
|
|
|
|
(fanout, 1)
|
|
|
|
};
|
|
|
|
(num_hashes_per_chunk, levels_hashed, three_level)
|
|
|
|
}
|
|
|
|
|
2021-02-16 16:29:50 -08:00
|
|
|
// This function is designed to allow hashes to be located in multiple, perhaps multiply deep vecs.
|
|
|
|
// The caller provides a function to return a slice from the source data.
|
2023-08-08 07:16:10 -07:00
|
|
|
pub fn compute_merkle_root_from_slices<'b, F, T>(
|
2021-02-16 16:29:50 -08:00
|
|
|
total_hashes: usize,
|
|
|
|
fanout: usize,
|
|
|
|
max_levels_per_pass: Option<usize>,
|
2021-03-18 08:32:07 -07:00
|
|
|
get_hash_slice_starting_at_index: F,
|
|
|
|
specific_level_count: Option<usize>,
|
|
|
|
) -> (Hash, Vec<Hash>)
|
2021-02-16 16:29:50 -08:00
|
|
|
where
|
2022-11-30 12:27:27 -08:00
|
|
|
// returns a slice of hashes starting at the given overall index
|
2023-08-08 07:16:10 -07:00
|
|
|
F: Fn(usize) -> &'b [T] + std::marker::Sync,
|
|
|
|
T: Borrow<Hash> + std::marker::Sync + 'b,
|
2021-02-16 16:29:50 -08:00
|
|
|
{
|
|
|
|
if total_hashes == 0 {
|
2021-03-18 08:32:07 -07:00
|
|
|
return (Hasher::default().result(), vec![]);
|
2021-02-16 16:29:50 -08:00
|
|
|
}
|
|
|
|
|
|
|
|
let mut time = Measure::start("time");
|
|
|
|
|
2021-03-18 08:32:07 -07:00
|
|
|
let (num_hashes_per_chunk, levels_hashed, three_level) = Self::calculate_three_level_chunks(
|
|
|
|
total_hashes,
|
|
|
|
fanout,
|
|
|
|
max_levels_per_pass,
|
|
|
|
specific_level_count,
|
|
|
|
);
|
2021-02-16 16:29:50 -08:00
|
|
|
|
|
|
|
let chunks = Self::div_ceil(total_hashes, num_hashes_per_chunk);
|
|
|
|
|
|
|
|
// initial fetch - could return entire slice
|
2021-11-16 08:30:55 -08:00
|
|
|
let data = get_hash_slice_starting_at_index(0);
|
2021-02-16 16:29:50 -08:00
|
|
|
let data_len = data.len();
|
|
|
|
|
|
|
|
let result: Vec<_> = (0..chunks)
|
|
|
|
.into_par_iter()
|
|
|
|
.map(|i| {
|
2021-03-18 08:32:07 -07:00
|
|
|
// summary:
|
|
|
|
// this closure computes 1 or 3 levels of merkle tree (all chunks will be 1 or all will be 3)
|
|
|
|
// for a subset (our chunk) of the input data [start_index..end_index]
|
|
|
|
|
|
|
|
// index into get_hash_slice_starting_at_index where this chunk's range begins
|
2021-02-16 16:29:50 -08:00
|
|
|
let start_index = i * num_hashes_per_chunk;
|
2021-03-18 08:32:07 -07:00
|
|
|
// index into get_hash_slice_starting_at_index where this chunk's range ends
|
2021-02-16 16:29:50 -08:00
|
|
|
let end_index = std::cmp::min(start_index + num_hashes_per_chunk, total_hashes);
|
|
|
|
|
2021-03-18 08:32:07 -07:00
|
|
|
// will compute the final result for this closure
|
2021-02-16 16:29:50 -08:00
|
|
|
let mut hasher = Hasher::default();
|
2021-03-18 08:32:07 -07:00
|
|
|
|
|
|
|
// index into 'data' where we are currently pulling data
|
|
|
|
// if we exhaust our data, then we will request a new slice, and data_index resets to 0, the beginning of the new slice
|
2021-02-16 16:29:50 -08:00
|
|
|
let mut data_index = start_index;
|
2021-03-18 08:32:07 -07:00
|
|
|
// source data, which we may refresh when we exhaust
|
2021-02-16 16:29:50 -08:00
|
|
|
let mut data = data;
|
2021-03-18 08:32:07 -07:00
|
|
|
// len of the source data
|
2021-02-16 16:29:50 -08:00
|
|
|
let mut data_len = data_len;
|
|
|
|
|
|
|
|
if !three_level {
|
|
|
|
// 1 group of fanout
|
|
|
|
// The result of this loop is a single hash value from fanout input hashes.
|
|
|
|
for i in start_index..end_index {
|
|
|
|
if data_index >= data_len {
|
2021-03-18 08:32:07 -07:00
|
|
|
// we exhausted our data, fetch next slice starting at i
|
|
|
|
data = get_hash_slice_starting_at_index(i);
|
2021-02-16 16:29:50 -08:00
|
|
|
data_len = data.len();
|
|
|
|
data_index = 0;
|
|
|
|
}
|
2021-11-16 08:30:55 -08:00
|
|
|
hasher.hash(data[data_index].borrow().as_ref());
|
2021-02-16 16:29:50 -08:00
|
|
|
data_index += 1;
|
|
|
|
}
|
|
|
|
} else {
|
|
|
|
// hash 3 levels of fanout simultaneously.
|
2021-03-18 08:32:07 -07:00
|
|
|
// This codepath produces 1 hash value for between 1..=fanout^3 input hashes.
|
|
|
|
// It is equivalent to running the normal merkle tree calculation 3 iterations on the input.
|
|
|
|
//
|
|
|
|
// big idea:
|
|
|
|
// merkle trees usually reduce the input vector by a factor of fanout with each iteration
|
|
|
|
// example with fanout 2:
|
|
|
|
// start: [0,1,2,3,4,5,6,7] in our case: [...16M...] or really, 1B
|
|
|
|
// iteration0 [.5, 2.5, 4.5, 6.5] [... 1M...]
|
|
|
|
// iteration1 [1.5, 5.5] [...65k...]
|
|
|
|
// iteration2 3.5 [...4k... ]
|
|
|
|
// So iteration 0 consumes N elements, hashes them in groups of 'fanout' and produces a vector of N/fanout elements
|
|
|
|
// and the process repeats until there is only 1 hash left.
|
|
|
|
//
|
|
|
|
// With the three_level code path, we make each chunk we iterate of size fanout^3 (4096)
|
|
|
|
// So, the input could be 16M hashes and the output will be 4k hashes, or N/fanout^3
|
|
|
|
// The goal is to reduce the amount of data that has to be constructed and held in memory.
|
|
|
|
// When we know we have enough hashes, then, in 1 pass, we hash 3 levels simultaneously, storing far fewer intermediate hashes.
|
|
|
|
//
|
|
|
|
// Now, some details:
|
2021-02-16 16:29:50 -08:00
|
|
|
// The result of this loop is a single hash value from fanout^3 input hashes.
|
2021-03-18 08:32:07 -07:00
|
|
|
// concepts:
|
|
|
|
// what we're conceptually hashing: "raw_hashes"[start_index..end_index]
|
|
|
|
// example: [a,b,c,d,e,f]
|
|
|
|
// but... hashes[] may really be multiple vectors that are pieced together.
|
|
|
|
// example: [[a,b],[c],[d,e,f]]
|
|
|
|
// get_hash_slice_starting_at_index(any_index) abstracts that and returns a slice starting at raw_hashes[any_index..]
|
|
|
|
// such that the end of get_hash_slice_starting_at_index may be <, >, or = end_index
|
|
|
|
// example: get_hash_slice_starting_at_index(1) returns [b]
|
|
|
|
// get_hash_slice_starting_at_index(3) returns [d,e,f]
|
|
|
|
// This code is basically 3 iterations of merkle tree hashing occurring simultaneously.
|
|
|
|
// The first fanout raw hashes are hashed in hasher_k. This is iteration0
|
|
|
|
// Once hasher_k has hashed fanout hashes, hasher_k's result hash is hashed in hasher_j and then discarded
|
|
|
|
// hasher_k then starts over fresh and hashes the next fanout raw hashes. This is iteration0 again for a new set of data.
|
|
|
|
// Once hasher_j has hashed fanout hashes (from k), hasher_j's result hash is hashed in hasher and then discarded
|
|
|
|
// Once hasher has hashed fanout hashes (from j), then the result of hasher is the hash for fanout^3 raw hashes.
|
|
|
|
// If there are < fanout^3 hashes, then this code stops when it runs out of raw hashes and returns whatever it hashed.
|
|
|
|
// This is always how the very last elements work in a merkle tree.
|
2021-02-16 16:29:50 -08:00
|
|
|
let mut i = start_index;
|
|
|
|
while i < end_index {
|
|
|
|
let mut hasher_j = Hasher::default();
|
|
|
|
for _j in 0..fanout {
|
|
|
|
let mut hasher_k = Hasher::default();
|
|
|
|
let end = std::cmp::min(end_index - i, fanout);
|
|
|
|
for _k in 0..end {
|
|
|
|
if data_index >= data_len {
|
2021-03-18 08:32:07 -07:00
|
|
|
// we exhausted our data, fetch next slice starting at i
|
|
|
|
data = get_hash_slice_starting_at_index(i);
|
2021-02-16 16:29:50 -08:00
|
|
|
data_len = data.len();
|
|
|
|
data_index = 0;
|
|
|
|
}
|
2021-11-16 08:30:55 -08:00
|
|
|
hasher_k.hash(data[data_index].borrow().as_ref());
|
2021-02-16 16:29:50 -08:00
|
|
|
data_index += 1;
|
|
|
|
i += 1;
|
|
|
|
}
|
|
|
|
hasher_j.hash(hasher_k.result().as_ref());
|
|
|
|
if i >= end_index {
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
hasher.hash(hasher_j.result().as_ref());
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
hasher.result()
|
|
|
|
})
|
|
|
|
.collect();
|
|
|
|
time.stop();
|
|
|
|
debug!("hashing {} {}", total_hashes, time);
|
|
|
|
|
2021-03-18 08:32:07 -07:00
|
|
|
if let Some(mut specific_level_count_value) = specific_level_count {
|
|
|
|
specific_level_count_value -= levels_hashed;
|
|
|
|
if specific_level_count_value == 0 {
|
|
|
|
(Hash::default(), result)
|
|
|
|
} else {
|
|
|
|
assert!(specific_level_count_value > 0);
|
|
|
|
// We did not hash the number of levels required by 'specific_level_count', so repeat
|
|
|
|
Self::compute_merkle_root_from_slices_recurse(
|
|
|
|
result,
|
|
|
|
fanout,
|
|
|
|
max_levels_per_pass,
|
|
|
|
Some(specific_level_count_value),
|
|
|
|
)
|
|
|
|
}
|
2021-02-16 16:29:50 -08:00
|
|
|
} else {
|
2021-03-18 08:32:07 -07:00
|
|
|
(
|
|
|
|
if result.len() == 1 {
|
|
|
|
result[0]
|
|
|
|
} else {
|
|
|
|
Self::compute_merkle_root_recurse(result, fanout)
|
|
|
|
},
|
|
|
|
vec![], // no intermediate results needed by caller
|
|
|
|
)
|
2021-02-16 16:29:50 -08:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2021-03-18 08:32:07 -07:00
|
|
|
pub fn compute_merkle_root_from_slices_recurse(
|
|
|
|
hashes: Vec<Hash>,
|
|
|
|
fanout: usize,
|
|
|
|
max_levels_per_pass: Option<usize>,
|
|
|
|
specific_level_count: Option<usize>,
|
|
|
|
) -> (Hash, Vec<Hash>) {
|
|
|
|
Self::compute_merkle_root_from_slices(
|
|
|
|
hashes.len(),
|
|
|
|
fanout,
|
|
|
|
max_levels_per_pass,
|
|
|
|
|start| &hashes[start..],
|
|
|
|
specific_level_count,
|
|
|
|
)
|
|
|
|
}
|
|
|
|
|
2021-02-16 16:29:50 -08:00
|
|
|
pub fn accumulate_account_hashes(mut hashes: Vec<(Pubkey, Hash)>) -> Hash {
|
|
|
|
Self::sort_hashes_by_pubkey(&mut hashes);
|
|
|
|
|
2022-12-13 07:20:14 -08:00
|
|
|
Self::compute_merkle_root_loop(hashes, MERKLE_FANOUT, |i| &i.1)
|
2021-02-16 16:29:50 -08:00
|
|
|
}
|
|
|
|
|
|
|
|
pub fn sort_hashes_by_pubkey(hashes: &mut Vec<(Pubkey, Hash)>) {
|
|
|
|
hashes.par_sort_unstable_by(|a, b| a.0.cmp(&b.0));
|
|
|
|
}
|
|
|
|
|
|
|
|
pub fn compare_two_hash_entries(
|
|
|
|
a: &CalculateHashIntermediate,
|
|
|
|
b: &CalculateHashIntermediate,
|
|
|
|
) -> std::cmp::Ordering {
|
|
|
|
// note partial_cmp only returns None with floating point comparisons
|
2021-06-07 07:01:16 -07:00
|
|
|
a.pubkey.partial_cmp(&b.pubkey).unwrap()
|
2021-02-16 16:29:50 -08:00
|
|
|
}
|
|
|
|
|
|
|
|
pub fn checked_cast_for_capitalization(balance: u128) -> u64 {
|
2022-11-29 13:55:07 -08:00
|
|
|
balance.try_into().unwrap_or_else(|_| {
|
2022-12-06 06:30:06 -08:00
|
|
|
panic!("overflow is detected while summing capitalization: {balance}")
|
2022-11-29 13:55:07 -08:00
|
|
|
})
|
2021-02-16 16:29:50 -08:00
|
|
|
}
|
|
|
|
|
2022-11-21 09:56:07 -08:00
|
|
|
/// returns:
|
|
|
|
/// Vec, with one entry per bin
|
|
|
|
/// for each entry, Vec<Hash> in pubkey order
|
2022-11-30 12:27:27 -08:00
|
|
|
/// If return Vec<AccountHashesFile> was flattened, it would be all hashes, in pubkey order.
|
2023-07-31 13:13:19 -07:00
|
|
|
fn de_dup_accounts(
|
2021-10-11 10:46:27 -07:00
|
|
|
&self,
|
2023-07-31 13:13:19 -07:00
|
|
|
sorted_data_by_pubkey: &[&[CalculateHashIntermediate]],
|
2021-02-16 16:29:50 -08:00
|
|
|
stats: &mut HashStats,
|
2021-06-21 13:32:03 -07:00
|
|
|
max_bin: usize,
|
2022-11-30 12:27:27 -08:00
|
|
|
) -> (Vec<AccountHashesFile>, u64) {
|
2021-02-16 16:29:50 -08:00
|
|
|
// 1. eliminate zero lamport accounts
|
|
|
|
// 2. pick the highest slot or (slot = and highest version) of each pubkey
|
|
|
|
// 3. produce this output:
|
2021-06-21 13:32:03 -07:00
|
|
|
// a. vec: PUBKEY_BINS_FOR_CALCULATING_HASHES in pubkey order
|
|
|
|
// vec: individual hashes in pubkey order, 1 hash per
|
|
|
|
// b. lamports
|
2023-08-10 08:56:00 -07:00
|
|
|
let _guard = self.active_stats.activate(ActiveStatItem::HashDeDup);
|
2023-08-08 07:16:10 -07:00
|
|
|
|
2021-02-16 16:29:50 -08:00
|
|
|
let mut zeros = Measure::start("eliminate zeros");
|
2023-08-09 14:27:04 -07:00
|
|
|
let (hashes, hash_total, lamports_total) = (0..max_bin)
|
2021-02-16 16:29:50 -08:00
|
|
|
.into_par_iter()
|
2023-08-09 14:27:04 -07:00
|
|
|
.fold(
|
|
|
|
|| {
|
|
|
|
(
|
|
|
|
/*hashes files*/ Vec::with_capacity(max_bin),
|
|
|
|
/*hashes count*/ 0_usize,
|
|
|
|
/*lamports sum*/ 0_u64,
|
|
|
|
)
|
|
|
|
},
|
|
|
|
|mut accum, bin| {
|
|
|
|
let (hashes_file, lamports_bin) = self.de_dup_accounts_in_parallel(
|
|
|
|
sorted_data_by_pubkey,
|
|
|
|
bin,
|
|
|
|
max_bin,
|
|
|
|
stats,
|
2021-06-23 10:57:05 -07:00
|
|
|
);
|
2023-08-09 14:27:04 -07:00
|
|
|
accum.2 = accum
|
|
|
|
.2
|
|
|
|
.checked_add(lamports_bin)
|
|
|
|
.expect("summing capitalization cannot overflow");
|
|
|
|
accum.1 += hashes_file.count();
|
|
|
|
accum.0.push(hashes_file);
|
|
|
|
accum
|
|
|
|
},
|
|
|
|
)
|
|
|
|
.reduce(
|
|
|
|
|| {
|
|
|
|
(
|
|
|
|
/*hashes files*/ Vec::with_capacity(max_bin),
|
|
|
|
/*hashes count*/ 0,
|
|
|
|
/*lamports sum*/ 0,
|
|
|
|
)
|
|
|
|
},
|
|
|
|
|mut a, mut b| {
|
|
|
|
a.2 =
|
|
|
|
a.2.checked_add(b.2)
|
|
|
|
.expect("summing capitalization cannot overflow");
|
|
|
|
a.1 += b.1;
|
|
|
|
a.0.append(&mut b.0);
|
|
|
|
a
|
|
|
|
},
|
|
|
|
);
|
2021-02-16 16:29:50 -08:00
|
|
|
zeros.stop();
|
|
|
|
stats.zeros_time_total_us += zeros.as_us();
|
2023-08-09 14:27:04 -07:00
|
|
|
stats.hash_total += hash_total;
|
|
|
|
(hashes, lamports_total)
|
2021-02-16 16:29:50 -08:00
|
|
|
}
|
|
|
|
|
2023-07-31 13:13:19 -07:00
|
|
|
/// returns the item referenced by `min_index`
|
|
|
|
/// updates `indexes` to skip over the pubkey and its duplicates
|
|
|
|
/// updates `first_items` to point to the next pubkey
|
|
|
|
/// or removes the entire pubkey division entries (for `min_index`) if the referenced pubkey is the last entry in the same `bin`
|
|
|
|
/// removed from: `first_items`, `indexes`, and `first_item_pubkey_division`
|
2023-08-08 07:16:10 -07:00
|
|
|
fn get_item<'b>(
|
2021-06-21 13:32:03 -07:00
|
|
|
min_index: usize,
|
|
|
|
bin: usize,
|
2023-07-31 13:13:19 -07:00
|
|
|
first_items: &mut Vec<Pubkey>,
|
2023-08-08 07:16:10 -07:00
|
|
|
sorted_data_by_pubkey: &[&'b [CalculateHashIntermediate]],
|
2023-07-31 13:13:19 -07:00
|
|
|
indexes: &mut Vec<usize>,
|
|
|
|
first_item_to_pubkey_division: &mut Vec<usize>,
|
|
|
|
binner: &PubkeyBinCalculator24,
|
2023-08-08 07:16:10 -07:00
|
|
|
) -> &'b CalculateHashIntermediate {
|
2021-06-21 13:32:03 -07:00
|
|
|
let first_item = first_items[min_index];
|
2021-11-11 01:44:32 -08:00
|
|
|
let key = &first_item;
|
|
|
|
let division_index = first_item_to_pubkey_division[min_index];
|
2023-07-31 13:13:19 -07:00
|
|
|
let division_data = &sorted_data_by_pubkey[division_index];
|
|
|
|
let mut index = indexes[min_index];
|
2021-06-21 13:32:03 -07:00
|
|
|
index += 1;
|
2023-07-31 13:13:19 -07:00
|
|
|
let mut end;
|
|
|
|
loop {
|
|
|
|
end = index >= division_data.len();
|
|
|
|
if end {
|
|
|
|
break;
|
|
|
|
}
|
2021-06-21 13:32:03 -07:00
|
|
|
// still more items where we found the previous key, so just increment the index for that slot group, skipping all pubkeys that are equal
|
2023-07-31 13:13:19 -07:00
|
|
|
let next_key = &division_data[index].pubkey;
|
|
|
|
if next_key == key {
|
2021-06-21 13:32:03 -07:00
|
|
|
index += 1;
|
|
|
|
continue; // duplicate entries of same pubkey, so keep skipping
|
|
|
|
}
|
|
|
|
|
2023-07-31 13:13:19 -07:00
|
|
|
if binner.bin_from_pubkey(next_key) > bin {
|
|
|
|
// the next pubkey is not in our bin
|
|
|
|
end = true;
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
|
2021-06-21 13:32:03 -07:00
|
|
|
// point to the next pubkey > key
|
2023-07-31 13:13:19 -07:00
|
|
|
first_items[min_index] = *next_key;
|
|
|
|
indexes[min_index] = index;
|
2021-06-21 13:32:03 -07:00
|
|
|
break;
|
|
|
|
}
|
2023-07-31 13:13:19 -07:00
|
|
|
if end {
|
2021-11-11 01:44:32 -08:00
|
|
|
// stop looking in this vector - we exhausted it
|
|
|
|
first_items.remove(min_index);
|
|
|
|
first_item_to_pubkey_division.remove(min_index);
|
2023-07-31 13:13:19 -07:00
|
|
|
indexes.remove(min_index);
|
2021-11-08 12:27:48 -08:00
|
|
|
}
|
|
|
|
|
|
|
|
// this is the previous first item that was requested
|
2023-07-31 13:13:19 -07:00
|
|
|
&division_data[index - 1]
|
|
|
|
}
|
|
|
|
|
|
|
|
/// `hash_data` must be sorted by `binner.bin_from_pubkey()`
|
|
|
|
/// return index in `hash_data` of first pubkey that is in `bin`, based on `binner`
|
|
|
|
fn binary_search_for_first_pubkey_in_bin(
|
|
|
|
hash_data: &[CalculateHashIntermediate],
|
|
|
|
bin: usize,
|
|
|
|
binner: &PubkeyBinCalculator24,
|
|
|
|
) -> Option<usize> {
|
|
|
|
let potential_index = if bin == 0 {
|
|
|
|
// `bin` == 0 is special because there cannot be `bin`-1
|
|
|
|
// so either element[0] is in bin 0 or there is nothing in bin 0.
|
|
|
|
0
|
|
|
|
} else {
|
|
|
|
// search for the first pubkey that is in `bin`
|
|
|
|
// There could be many keys in a row with the same `bin`.
|
|
|
|
// So, for each pubkey, use calculated_bin * 2 + 1 as the bin of a given pubkey for binary search.
|
|
|
|
// And compare the bin of each pubkey with `bin` * 2.
|
|
|
|
// So all keys that are in `bin` will compare as `bin` * 2 + 1
|
|
|
|
// all keys that are in `bin`-1 will compare as ((`bin` - 1) * 2 + 1), which is (`bin` * 2 - 1)
|
|
|
|
// NO keys will compare as `bin` * 2 because we add 1.
|
|
|
|
// So, the binary search will NEVER return Ok(found_index), but will always return Err(index of first key in `bin`).
|
|
|
|
// Note that if NO key is in `bin`, then the key at the found index will be in a bin > `bin`, so return None.
|
|
|
|
let just_prior_to_desired_bin = bin * 2;
|
|
|
|
let search = hash_data.binary_search_by(|data| {
|
|
|
|
(1 + 2 * binner.bin_from_pubkey(&data.pubkey)).cmp(&just_prior_to_desired_bin)
|
|
|
|
});
|
|
|
|
// returns Err(index where item should be) since the desired item will never exist
|
|
|
|
search.expect_err("it is impossible to find a matching bin")
|
|
|
|
};
|
|
|
|
// note that `potential_index` could be == hash_data.len(). This indicates the first key in `bin` would be
|
|
|
|
// after the data we have. Thus, no key is in `bin`.
|
|
|
|
// This also handles the case where `hash_data` is empty, since len() will be 0 and `get` will return None.
|
|
|
|
hash_data.get(potential_index).and_then(|potential_data| {
|
|
|
|
(binner.bin_from_pubkey(&potential_data.pubkey) == bin).then_some(potential_index)
|
|
|
|
})
|
|
|
|
}
|
|
|
|
|
|
|
|
/// `hash_data` must be sorted by `binner.bin_from_pubkey()`
|
|
|
|
/// return index in `hash_data` of first pubkey that is in `bin`, based on `binner`
|
|
|
|
fn find_first_pubkey_in_bin(
|
|
|
|
hash_data: &[CalculateHashIntermediate],
|
|
|
|
bin: usize,
|
|
|
|
bins: usize,
|
|
|
|
binner: &PubkeyBinCalculator24,
|
|
|
|
stats: &HashStats,
|
|
|
|
) -> Option<usize> {
|
|
|
|
if hash_data.is_empty() {
|
|
|
|
return None;
|
|
|
|
}
|
|
|
|
let (result, us) = measure_us!({
|
|
|
|
// assume uniform distribution of pubkeys and choose first guess based on bin we're looking for
|
|
|
|
let i = hash_data.len() * bin / bins;
|
|
|
|
let estimate = &hash_data[i];
|
|
|
|
|
|
|
|
let pubkey_bin = binner.bin_from_pubkey(&estimate.pubkey);
|
|
|
|
let range = if pubkey_bin >= bin {
|
|
|
|
// i pubkey matches or is too large, so look <= i for the first pubkey in the right bin
|
|
|
|
// i+1 could be the first pubkey in the right bin
|
|
|
|
0..(i + 1)
|
|
|
|
} else {
|
|
|
|
// i pubkey is too small, so look after i
|
|
|
|
(i + 1)..hash_data.len()
|
|
|
|
};
|
|
|
|
Some(
|
|
|
|
range.start +
|
|
|
|
// binary search the subset
|
|
|
|
Self::binary_search_for_first_pubkey_in_bin(
|
|
|
|
&hash_data[range],
|
|
|
|
bin,
|
|
|
|
binner,
|
|
|
|
)?,
|
|
|
|
)
|
|
|
|
});
|
|
|
|
stats.pubkey_bin_search_us.fetch_add(us, Ordering::Relaxed);
|
|
|
|
result
|
2021-06-21 13:32:03 -07:00
|
|
|
}
|
|
|
|
|
|
|
|
// go through: [..][pubkey_bin][..] and return hashes and lamport sum
|
|
|
|
// slot groups^ ^accounts found in a slot group, sorted by pubkey, higher slot, write_version
|
2023-01-17 12:04:29 -08:00
|
|
|
// 1. handle zero lamport accounts
|
2021-02-16 16:29:50 -08:00
|
|
|
// 2. pick the highest slot or (slot = and highest version) of each pubkey
|
|
|
|
// 3. produce this output:
|
2022-11-30 12:27:27 -08:00
|
|
|
// a. AccountHashesFile: individual account hashes in pubkey order
|
2021-06-21 13:32:03 -07:00
|
|
|
// b. lamport sum
|
2023-07-31 13:13:19 -07:00
|
|
|
fn de_dup_accounts_in_parallel(
|
2021-10-11 10:46:27 -07:00
|
|
|
&self,
|
2023-07-31 13:13:19 -07:00
|
|
|
sorted_data_by_pubkey: &[&[CalculateHashIntermediate]],
|
2021-06-21 13:32:03 -07:00
|
|
|
pubkey_bin: usize,
|
2023-07-31 13:13:19 -07:00
|
|
|
bins: usize,
|
|
|
|
stats: &HashStats,
|
|
|
|
) -> (AccountHashesFile, u64) {
|
|
|
|
let binner = PubkeyBinCalculator24::new(bins);
|
|
|
|
|
|
|
|
let len = sorted_data_by_pubkey.len();
|
|
|
|
let mut indexes = Vec::with_capacity(len);
|
2021-06-21 13:32:03 -07:00
|
|
|
let mut first_items = Vec::with_capacity(len);
|
2023-07-31 13:13:19 -07:00
|
|
|
// map from index of an item in first_items[] to index of the corresponding item in sorted_data_by_pubkey[]
|
|
|
|
// this will change as items in sorted_data_by_pubkey[] are exhausted
|
2021-11-11 01:44:32 -08:00
|
|
|
let mut first_item_to_pubkey_division = Vec::with_capacity(len);
|
2023-05-23 14:09:16 -07:00
|
|
|
let mut hashes = AccountHashesFile {
|
|
|
|
count_and_writer: None,
|
|
|
|
dir_for_temp_cache_files: self.dir_for_temp_cache_files.clone(),
|
|
|
|
};
|
2021-11-05 08:54:49 -07:00
|
|
|
// initialize 'first_items', which holds the current lowest item in each slot group
|
2023-07-31 13:13:19 -07:00
|
|
|
sorted_data_by_pubkey
|
|
|
|
.iter()
|
|
|
|
.enumerate()
|
|
|
|
.for_each(|(i, hash_data)| {
|
|
|
|
let first_pubkey_in_bin =
|
|
|
|
Self::find_first_pubkey_in_bin(hash_data, pubkey_bin, bins, &binner, stats);
|
|
|
|
if let Some(first_pubkey_in_bin) = first_pubkey_in_bin {
|
|
|
|
let k = hash_data[first_pubkey_in_bin].pubkey;
|
|
|
|
first_items.push(k);
|
2021-11-11 01:44:32 -08:00
|
|
|
first_item_to_pubkey_division.push(i);
|
2023-07-31 13:13:19 -07:00
|
|
|
indexes.push(first_pubkey_in_bin);
|
2021-02-16 16:29:50 -08:00
|
|
|
}
|
2023-07-31 13:13:19 -07:00
|
|
|
});
|
2021-06-21 13:32:03 -07:00
|
|
|
let mut overall_sum = 0;
|
2021-11-05 08:54:49 -07:00
|
|
|
let mut duplicate_pubkey_indexes = Vec::with_capacity(len);
|
2021-11-12 07:53:21 -08:00
|
|
|
let filler_accounts_enabled = self.filler_accounts_enabled();
|
2021-06-21 13:32:03 -07:00
|
|
|
|
2021-11-05 08:54:49 -07:00
|
|
|
// this loop runs once per unique pubkey contained in any slot group
|
2021-06-21 13:32:03 -07:00
|
|
|
while !first_items.is_empty() {
|
2021-11-04 09:16:36 -07:00
|
|
|
let loop_stop = { first_items.len() - 1 }; // we increment at the beginning of the loop
|
2021-06-21 13:32:03 -07:00
|
|
|
let mut min_index = 0;
|
2021-11-11 01:44:32 -08:00
|
|
|
let mut min_pubkey = first_items[min_index];
|
2021-06-21 13:32:03 -07:00
|
|
|
let mut first_item_index = 0; // we will start iterating at item 1. +=1 is first instruction in loop
|
|
|
|
|
2021-11-05 08:54:49 -07:00
|
|
|
// this loop iterates over each slot group to find the minimum pubkey at the maximum slot
|
2021-11-11 01:44:32 -08:00
|
|
|
// it also identifies duplicate pubkey entries at lower slots and remembers those to skip them after
|
2021-06-21 13:32:03 -07:00
|
|
|
while first_item_index < loop_stop {
|
|
|
|
first_item_index += 1;
|
2021-11-11 01:44:32 -08:00
|
|
|
let key = &first_items[first_item_index];
|
2021-11-04 09:16:36 -07:00
|
|
|
let cmp = min_pubkey.cmp(key);
|
2021-06-21 13:32:03 -07:00
|
|
|
match cmp {
|
|
|
|
std::cmp::Ordering::Less => {
|
|
|
|
continue; // we still have the min item
|
2021-06-07 07:01:16 -07:00
|
|
|
}
|
2021-06-21 13:32:03 -07:00
|
|
|
std::cmp::Ordering::Equal => {
|
2021-11-11 01:44:32 -08:00
|
|
|
// we found the same pubkey in a later slot, so remember the lower slot as a duplicate
|
2021-11-05 08:54:49 -07:00
|
|
|
duplicate_pubkey_indexes.push(min_index);
|
2021-11-04 09:16:36 -07:00
|
|
|
}
|
|
|
|
std::cmp::Ordering::Greater => {
|
|
|
|
// this is the new min pubkey
|
|
|
|
min_pubkey = *key;
|
2021-02-16 16:29:50 -08:00
|
|
|
}
|
|
|
|
}
|
2021-11-04 09:16:36 -07:00
|
|
|
// this is the new index of the min entry
|
2021-06-21 13:32:03 -07:00
|
|
|
min_index = first_item_index;
|
|
|
|
}
|
|
|
|
// get the min item, add lamports, get hash
|
2021-11-08 12:27:48 -08:00
|
|
|
let item = Self::get_item(
|
2021-06-21 13:32:03 -07:00
|
|
|
min_index,
|
|
|
|
pubkey_bin,
|
|
|
|
&mut first_items,
|
2023-07-31 13:13:19 -07:00
|
|
|
sorted_data_by_pubkey,
|
2021-06-21 13:32:03 -07:00
|
|
|
&mut indexes,
|
2021-11-11 01:44:32 -08:00
|
|
|
&mut first_item_to_pubkey_division,
|
2023-07-31 13:13:19 -07:00
|
|
|
&binner,
|
2021-06-21 13:32:03 -07:00
|
|
|
);
|
2021-11-11 01:44:32 -08:00
|
|
|
|
2023-01-17 12:04:29 -08:00
|
|
|
// add lamports and get hash
|
|
|
|
if item.lamports != 0 {
|
|
|
|
// do not include filler accounts in the hash
|
|
|
|
if !(filler_accounts_enabled && self.is_filler_account(&item.pubkey)) {
|
|
|
|
overall_sum = Self::checked_cast_for_capitalization(
|
|
|
|
item.lamports as u128 + overall_sum as u128,
|
|
|
|
);
|
|
|
|
hashes.write(&item.hash);
|
|
|
|
}
|
|
|
|
} else {
|
|
|
|
// if lamports == 0, check if they should be included
|
|
|
|
if self.zero_lamport_accounts == ZeroLamportAccounts::Included {
|
|
|
|
// For incremental accounts hash, the hash of a zero lamport account is
|
|
|
|
// the hash of its pubkey
|
|
|
|
let hash = blake3::hash(bytemuck::bytes_of(&item.pubkey));
|
|
|
|
let hash = Hash::new_from_array(hash.into());
|
|
|
|
hashes.write(&hash);
|
|
|
|
}
|
2021-02-16 16:29:50 -08:00
|
|
|
}
|
2023-01-17 12:04:29 -08:00
|
|
|
|
2021-11-05 08:54:49 -07:00
|
|
|
if !duplicate_pubkey_indexes.is_empty() {
|
|
|
|
// skip past duplicate keys in earlier slots
|
|
|
|
// reverse this list because get_item can remove first_items[*i] when *i is exhausted
|
|
|
|
// and that would mess up subsequent *i values
|
|
|
|
duplicate_pubkey_indexes.iter().rev().for_each(|i| {
|
2021-11-08 12:27:48 -08:00
|
|
|
Self::get_item(
|
2021-11-05 08:54:49 -07:00
|
|
|
*i,
|
|
|
|
pubkey_bin,
|
|
|
|
&mut first_items,
|
2023-07-31 13:13:19 -07:00
|
|
|
sorted_data_by_pubkey,
|
2021-11-05 08:54:49 -07:00
|
|
|
&mut indexes,
|
2021-11-11 01:44:32 -08:00
|
|
|
&mut first_item_to_pubkey_division,
|
2023-07-31 13:13:19 -07:00
|
|
|
&binner,
|
2021-11-05 08:54:49 -07:00
|
|
|
);
|
|
|
|
});
|
|
|
|
duplicate_pubkey_indexes.clear();
|
|
|
|
}
|
2021-02-16 16:29:50 -08:00
|
|
|
}
|
2023-07-31 13:13:19 -07:00
|
|
|
|
|
|
|
(hashes, overall_sum)
|
2021-02-16 16:29:50 -08:00
|
|
|
}
|
|
|
|
|
2021-10-11 10:46:27 -07:00
|
|
|
fn is_filler_account(&self, pubkey: &Pubkey) -> bool {
|
|
|
|
crate::accounts_db::AccountsDb::is_filler_account_helper(
|
|
|
|
pubkey,
|
|
|
|
self.filler_account_suffix.as_ref(),
|
|
|
|
)
|
|
|
|
}
|
|
|
|
|
2023-08-21 10:04:37 -07:00
|
|
|
/// input:
|
|
|
|
/// vec: group of slot data, ordered by Slot (low to high)
|
|
|
|
/// vec: [..] - items found in that slot range Sorted by: Pubkey, higher Slot, higher Write version (if pubkey =)
|
2021-02-16 16:29:50 -08:00
|
|
|
pub fn rest_of_hash_calculation(
|
2021-10-11 10:46:27 -07:00
|
|
|
&self,
|
2023-07-31 13:13:19 -07:00
|
|
|
sorted_data_by_pubkey: &[&[CalculateHashIntermediate]],
|
2023-05-18 11:08:13 -07:00
|
|
|
stats: &mut HashStats,
|
2022-11-30 12:27:27 -08:00
|
|
|
) -> (Hash, u64) {
|
2023-01-17 12:04:29 -08:00
|
|
|
let (hashes, total_lamports) = self.de_dup_accounts(
|
2023-07-31 13:13:19 -07:00
|
|
|
sorted_data_by_pubkey,
|
2022-11-30 12:27:27 -08:00
|
|
|
stats,
|
|
|
|
PUBKEY_BINS_FOR_CALCULATING_HASHES,
|
|
|
|
);
|
|
|
|
|
|
|
|
let cumulative = CumulativeHashesFromFiles::from_files(hashes);
|
2021-03-18 08:32:07 -07:00
|
|
|
|
2023-08-10 08:56:00 -07:00
|
|
|
let _guard = self.active_stats.activate(ActiveStatItem::HashMerkleTree);
|
2022-11-30 12:27:27 -08:00
|
|
|
let mut hash_time = Measure::start("hash");
|
|
|
|
let (hash, _) = Self::compute_merkle_root_from_slices(
|
|
|
|
cumulative.total_count(),
|
|
|
|
MERKLE_FANOUT,
|
|
|
|
None,
|
|
|
|
|start| cumulative.get_slice(start),
|
|
|
|
None,
|
|
|
|
);
|
|
|
|
hash_time.stop();
|
|
|
|
stats.hash_time_total_us += hash_time.as_us();
|
|
|
|
(hash, total_lamports)
|
|
|
|
}
|
|
|
|
}
|
2021-03-18 08:32:07 -07:00
|
|
|
|
2023-01-17 12:04:29 -08:00
|
|
|
/// How should zero-lamport accounts be treated by the accounts hasher?
|
|
|
|
#[derive(Debug, Copy, Clone, Eq, PartialEq)]
|
|
|
|
pub enum ZeroLamportAccounts {
|
|
|
|
Excluded,
|
|
|
|
Included,
|
|
|
|
}
|
|
|
|
|
2023-02-21 12:20:51 -08:00
|
|
|
/// Hash of accounts
|
|
|
|
#[derive(Debug, Copy, Clone, Eq, PartialEq)]
|
2023-08-21 07:34:56 -07:00
|
|
|
pub enum AccountsHashKind {
|
2023-02-21 12:20:51 -08:00
|
|
|
Full(AccountsHash),
|
|
|
|
Incremental(IncrementalAccountsHash),
|
|
|
|
}
|
2023-08-21 07:34:56 -07:00
|
|
|
impl AccountsHashKind {
|
2023-02-21 12:20:51 -08:00
|
|
|
pub fn as_hash(&self) -> &Hash {
|
|
|
|
match self {
|
2023-08-21 07:34:56 -07:00
|
|
|
AccountsHashKind::Full(AccountsHash(hash))
|
|
|
|
| AccountsHashKind::Incremental(IncrementalAccountsHash(hash)) => hash,
|
2023-02-21 12:20:51 -08:00
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
2023-08-21 07:34:56 -07:00
|
|
|
impl From<AccountsHash> for AccountsHashKind {
|
2023-02-21 12:20:51 -08:00
|
|
|
fn from(accounts_hash: AccountsHash) -> Self {
|
2023-08-21 07:34:56 -07:00
|
|
|
AccountsHashKind::Full(accounts_hash)
|
2023-02-21 12:20:51 -08:00
|
|
|
}
|
|
|
|
}
|
2023-08-21 07:34:56 -07:00
|
|
|
impl From<IncrementalAccountsHash> for AccountsHashKind {
|
2023-02-21 12:20:51 -08:00
|
|
|
fn from(incremental_accounts_hash: IncrementalAccountsHash) -> Self {
|
2023-08-21 07:34:56 -07:00
|
|
|
AccountsHashKind::Incremental(incremental_accounts_hash)
|
2023-02-21 12:20:51 -08:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2023-01-17 12:04:29 -08:00
|
|
|
/// Hash of accounts
|
2023-02-20 13:00:53 -08:00
|
|
|
#[derive(Debug, Copy, Clone, Eq, PartialEq)]
|
2022-11-30 12:27:27 -08:00
|
|
|
pub struct AccountsHash(pub Hash);
|
2023-02-21 12:20:51 -08:00
|
|
|
/// Hash of accounts that includes zero-lamport accounts
|
|
|
|
/// Used with incremental snapshots
|
|
|
|
#[derive(Debug, Copy, Clone, Eq, PartialEq)]
|
|
|
|
pub struct IncrementalAccountsHash(pub Hash);
|
2022-11-30 12:27:27 -08:00
|
|
|
|
2023-01-20 11:23:56 -08:00
|
|
|
/// Hash of accounts written in a single slot
|
2023-02-20 13:00:53 -08:00
|
|
|
#[derive(Debug, Copy, Clone, Eq, PartialEq)]
|
2023-01-20 11:23:56 -08:00
|
|
|
pub struct AccountsDeltaHash(pub Hash);
|
|
|
|
|
2023-08-09 13:03:36 -07:00
|
|
|
/// Snapshot serde-safe accounts delta hash
|
|
|
|
#[derive(Clone, Default, Debug, Serialize, Deserialize, PartialEq, Eq, AbiExample)]
|
|
|
|
pub struct SerdeAccountsDeltaHash(pub Hash);
|
|
|
|
|
|
|
|
impl From<SerdeAccountsDeltaHash> for AccountsDeltaHash {
|
|
|
|
fn from(accounts_delta_hash: SerdeAccountsDeltaHash) -> Self {
|
|
|
|
Self(accounts_delta_hash.0)
|
|
|
|
}
|
|
|
|
}
|
|
|
|
impl From<AccountsDeltaHash> for SerdeAccountsDeltaHash {
|
|
|
|
fn from(accounts_delta_hash: AccountsDeltaHash) -> Self {
|
|
|
|
Self(accounts_delta_hash.0)
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
/// Snapshot serde-safe accounts hash
|
|
|
|
#[derive(Clone, Default, Debug, Serialize, Deserialize, PartialEq, Eq, AbiExample)]
|
|
|
|
pub struct SerdeAccountsHash(pub Hash);
|
|
|
|
|
|
|
|
impl From<SerdeAccountsHash> for AccountsHash {
|
|
|
|
fn from(accounts_hash: SerdeAccountsHash) -> Self {
|
|
|
|
Self(accounts_hash.0)
|
|
|
|
}
|
|
|
|
}
|
|
|
|
impl From<AccountsHash> for SerdeAccountsHash {
|
|
|
|
fn from(accounts_hash: AccountsHash) -> Self {
|
|
|
|
Self(accounts_hash.0)
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
/// Snapshot serde-safe incremental accounts hash
|
|
|
|
#[derive(Clone, Default, Debug, Serialize, Deserialize, PartialEq, Eq, AbiExample)]
|
|
|
|
pub struct SerdeIncrementalAccountsHash(pub Hash);
|
|
|
|
|
|
|
|
impl From<SerdeIncrementalAccountsHash> for IncrementalAccountsHash {
|
|
|
|
fn from(incremental_accounts_hash: SerdeIncrementalAccountsHash) -> Self {
|
|
|
|
Self(incremental_accounts_hash.0)
|
|
|
|
}
|
|
|
|
}
|
|
|
|
impl From<IncrementalAccountsHash> for SerdeIncrementalAccountsHash {
|
|
|
|
fn from(incremental_accounts_hash: IncrementalAccountsHash) -> Self {
|
|
|
|
Self(incremental_accounts_hash.0)
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2022-11-30 12:27:27 -08:00
|
|
|
#[cfg(test)]
|
|
|
|
pub mod tests {
|
2023-07-31 13:13:19 -07:00
|
|
|
use {super::*, itertools::Itertools, std::str::FromStr, tempfile::tempdir};
|
2023-05-23 14:09:16 -07:00
|
|
|
|
2023-08-08 07:16:10 -07:00
|
|
|
lazy_static! {
|
|
|
|
static ref ACTIVE_STATS: ActiveStats = ActiveStats::default();
|
|
|
|
}
|
|
|
|
|
|
|
|
impl<'a> AccountsHasher<'a> {
|
2023-05-23 14:09:16 -07:00
|
|
|
fn new(dir_for_temp_cache_files: PathBuf) -> Self {
|
|
|
|
Self {
|
|
|
|
filler_account_suffix: None,
|
|
|
|
zero_lamport_accounts: ZeroLamportAccounts::Excluded,
|
|
|
|
dir_for_temp_cache_files,
|
2023-08-08 07:16:10 -07:00
|
|
|
active_stats: &ACTIVE_STATS,
|
2023-05-23 14:09:16 -07:00
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
impl AccountHashesFile {
|
|
|
|
fn new(dir_for_temp_cache_files: PathBuf) -> Self {
|
|
|
|
Self {
|
|
|
|
count_and_writer: None,
|
|
|
|
dir_for_temp_cache_files,
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
2021-03-18 08:32:07 -07:00
|
|
|
|
2023-07-31 13:13:19 -07:00
|
|
|
#[test]
|
|
|
|
fn test_find_first_pubkey_in_bin() {
|
|
|
|
let stats = HashStats::default();
|
|
|
|
for (bins, expected_count) in [1, 2, 4].into_iter().zip([5, 20, 120]) {
|
|
|
|
let bins: usize = bins;
|
|
|
|
let binner = PubkeyBinCalculator24::new(bins);
|
|
|
|
|
|
|
|
let mut count = 0usize;
|
|
|
|
// # pubkeys in each bin are permutations of these
|
|
|
|
// 0 means none in this bin
|
|
|
|
// large number (20) means the found key will be well before or after the expected index based on an assumption of uniform distribution
|
|
|
|
for counts in [0, 1, 2, 20, 0].into_iter().permutations(bins) {
|
|
|
|
count += 1;
|
|
|
|
let hash_data = counts
|
|
|
|
.iter()
|
|
|
|
.enumerate()
|
|
|
|
.flat_map(|(bin, count)| {
|
|
|
|
(0..*count).map(move |_| {
|
|
|
|
let binner = PubkeyBinCalculator24::new(bins);
|
|
|
|
CalculateHashIntermediate::new(
|
|
|
|
Hash::default(),
|
|
|
|
0,
|
|
|
|
binner.lowest_pubkey_from_bin(bin, bins),
|
|
|
|
)
|
|
|
|
})
|
|
|
|
})
|
|
|
|
.collect::<Vec<_>>();
|
|
|
|
// look for the first pubkey in each bin
|
|
|
|
for (bin, count_in_bin) in counts.iter().enumerate().take(bins) {
|
|
|
|
let first = AccountsHasher::find_first_pubkey_in_bin(
|
|
|
|
&hash_data, bin, bins, &binner, &stats,
|
|
|
|
);
|
|
|
|
// test both functions
|
|
|
|
let first_again = AccountsHasher::binary_search_for_first_pubkey_in_bin(
|
|
|
|
&hash_data, bin, &binner,
|
|
|
|
);
|
|
|
|
assert_eq!(first, first_again);
|
|
|
|
assert_eq!(first.is_none(), count_in_bin == &0);
|
|
|
|
if let Some(first) = first {
|
|
|
|
assert_eq!(binner.bin_from_pubkey(&hash_data[first].pubkey), bin);
|
|
|
|
if first > 0 {
|
|
|
|
assert!(binner.bin_from_pubkey(&hash_data[first - 1].pubkey) < bin);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
assert_eq!(
|
|
|
|
count, expected_count,
|
|
|
|
"too few iterations in test. bins: {bins}"
|
|
|
|
);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2022-11-30 12:27:27 -08:00
|
|
|
#[test]
|
|
|
|
fn test_account_hashes_file() {
|
2023-05-23 14:09:16 -07:00
|
|
|
let dir_for_temp_cache_files = tempdir().unwrap();
|
2022-11-30 12:27:27 -08:00
|
|
|
// 0 hashes
|
2023-05-23 14:09:16 -07:00
|
|
|
let mut file = AccountHashesFile::new(dir_for_temp_cache_files.path().to_path_buf());
|
2022-11-30 12:27:27 -08:00
|
|
|
assert!(file.get_reader().is_none());
|
|
|
|
let hashes = (0..2).map(|i| Hash::new(&[i; 32])).collect::<Vec<_>>();
|
|
|
|
|
|
|
|
// 1 hash
|
|
|
|
file.write(&hashes[0]);
|
|
|
|
let reader = file.get_reader().unwrap();
|
|
|
|
assert_eq!(&[hashes[0]][..], reader.1.read(0));
|
|
|
|
assert!(reader.1.read(1).is_empty());
|
|
|
|
|
|
|
|
// multiple hashes
|
2023-05-23 14:09:16 -07:00
|
|
|
let mut file = AccountHashesFile::new(dir_for_temp_cache_files.path().to_path_buf());
|
2022-11-30 12:27:27 -08:00
|
|
|
assert!(file.get_reader().is_none());
|
|
|
|
hashes.iter().for_each(|hash| file.write(hash));
|
|
|
|
let reader = file.get_reader().unwrap();
|
|
|
|
(0..2).for_each(|i| assert_eq!(&hashes[i..], reader.1.read(i)));
|
|
|
|
assert!(reader.1.read(2).is_empty());
|
|
|
|
}
|
2021-03-18 08:32:07 -07:00
|
|
|
|
2022-11-30 12:27:27 -08:00
|
|
|
#[test]
|
|
|
|
fn test_cumulative_hashes_from_files() {
|
2023-05-23 14:09:16 -07:00
|
|
|
let dir_for_temp_cache_files = tempdir().unwrap();
|
2022-11-30 12:27:27 -08:00
|
|
|
(0..4).for_each(|permutation| {
|
|
|
|
let hashes = (0..2).map(|i| Hash::new(&[i + 1; 32])).collect::<Vec<_>>();
|
2021-03-18 08:32:07 -07:00
|
|
|
|
2022-11-30 12:27:27 -08:00
|
|
|
let mut combined = Vec::default();
|
2021-03-18 08:32:07 -07:00
|
|
|
|
2022-11-30 12:27:27 -08:00
|
|
|
// 0 hashes
|
2023-05-23 14:09:16 -07:00
|
|
|
let file0 = AccountHashesFile::new(dir_for_temp_cache_files.path().to_path_buf());
|
2021-03-18 08:32:07 -07:00
|
|
|
|
2022-11-30 12:27:27 -08:00
|
|
|
// 1 hash
|
2023-05-23 14:09:16 -07:00
|
|
|
let mut file1 = AccountHashesFile::new(dir_for_temp_cache_files.path().to_path_buf());
|
2022-11-30 12:27:27 -08:00
|
|
|
file1.write(&hashes[0]);
|
|
|
|
combined.push(hashes[0]);
|
2021-02-16 16:29:50 -08:00
|
|
|
|
2022-11-30 12:27:27 -08:00
|
|
|
// multiple hashes
|
2023-05-23 14:09:16 -07:00
|
|
|
let mut file2 = AccountHashesFile::new(dir_for_temp_cache_files.path().to_path_buf());
|
2022-11-30 12:27:27 -08:00
|
|
|
hashes.iter().for_each(|hash| {
|
|
|
|
file2.write(hash);
|
|
|
|
combined.push(*hash);
|
2021-03-18 08:32:07 -07:00
|
|
|
});
|
|
|
|
|
2022-11-30 12:27:27 -08:00
|
|
|
let hashes = if permutation == 0 {
|
|
|
|
vec![file0, file1, file2]
|
|
|
|
} else if permutation == 1 {
|
|
|
|
// include more empty files
|
|
|
|
vec![
|
|
|
|
file0,
|
|
|
|
file1,
|
2023-05-23 14:09:16 -07:00
|
|
|
AccountHashesFile::new(dir_for_temp_cache_files.path().to_path_buf()),
|
2022-11-30 12:27:27 -08:00
|
|
|
file2,
|
2023-05-23 14:09:16 -07:00
|
|
|
AccountHashesFile::new(dir_for_temp_cache_files.path().to_path_buf()),
|
2022-11-30 12:27:27 -08:00
|
|
|
]
|
|
|
|
} else if permutation == 2 {
|
|
|
|
vec![file1, file2]
|
2021-03-18 08:32:07 -07:00
|
|
|
} else {
|
2022-11-30 12:27:27 -08:00
|
|
|
// swap file2 and 1
|
|
|
|
let one = combined.remove(0);
|
|
|
|
combined.push(one);
|
|
|
|
vec![
|
|
|
|
file2,
|
2023-05-23 14:09:16 -07:00
|
|
|
AccountHashesFile::new(dir_for_temp_cache_files.path().to_path_buf()),
|
|
|
|
AccountHashesFile::new(dir_for_temp_cache_files.path().to_path_buf()),
|
2022-11-30 12:27:27 -08:00
|
|
|
file1,
|
|
|
|
]
|
2021-03-18 08:32:07 -07:00
|
|
|
};
|
2022-11-28 07:09:47 -08:00
|
|
|
|
2022-11-30 12:27:27 -08:00
|
|
|
let cumulative = CumulativeHashesFromFiles::from_files(hashes);
|
|
|
|
let len = combined.len();
|
|
|
|
assert_eq!(cumulative.total_count(), len);
|
|
|
|
(0..combined.len()).for_each(|start| {
|
|
|
|
let mut retreived = Vec::default();
|
|
|
|
let mut cumulative_start = start;
|
|
|
|
// read all data
|
|
|
|
while retreived.len() < (len - start) {
|
|
|
|
let this_one = cumulative.get_slice(cumulative_start);
|
|
|
|
retreived.extend(this_one.iter());
|
|
|
|
cumulative_start += this_one.len();
|
|
|
|
assert_ne!(0, this_one.len());
|
|
|
|
}
|
|
|
|
assert_eq!(
|
|
|
|
&combined[start..],
|
|
|
|
&retreived[..],
|
|
|
|
"permutation: {permutation}"
|
|
|
|
);
|
|
|
|
});
|
|
|
|
});
|
|
|
|
}
|
2021-02-16 16:29:50 -08:00
|
|
|
|
|
|
|
#[test]
|
|
|
|
fn test_accountsdb_div_ceil() {
|
2022-11-18 15:25:44 -08:00
|
|
|
assert_eq!(AccountsHasher::div_ceil(10, 3), 4);
|
|
|
|
assert_eq!(AccountsHasher::div_ceil(0, 1), 0);
|
|
|
|
assert_eq!(AccountsHasher::div_ceil(0, 5), 0);
|
|
|
|
assert_eq!(AccountsHasher::div_ceil(9, 3), 3);
|
|
|
|
assert_eq!(AccountsHasher::div_ceil(9, 9), 1);
|
2021-02-16 16:29:50 -08:00
|
|
|
}
|
|
|
|
|
|
|
|
#[test]
|
|
|
|
#[should_panic(expected = "attempt to divide by zero")]
|
|
|
|
fn test_accountsdb_div_ceil_fail() {
|
2022-11-18 15:25:44 -08:00
|
|
|
assert_eq!(AccountsHasher::div_ceil(10, 0), 0);
|
2021-02-16 16:29:50 -08:00
|
|
|
}
|
|
|
|
|
2023-07-31 13:13:19 -07:00
|
|
|
fn for_rest(original: &[CalculateHashIntermediate]) -> Vec<&[CalculateHashIntermediate]> {
|
|
|
|
vec![original]
|
2021-06-21 13:32:03 -07:00
|
|
|
}
|
|
|
|
|
2021-02-16 16:29:50 -08:00
|
|
|
#[test]
|
|
|
|
fn test_accountsdb_rest_of_hash_calculation() {
|
|
|
|
solana_logger::setup();
|
|
|
|
|
2021-06-02 06:35:10 -07:00
|
|
|
let mut account_maps = Vec::new();
|
2021-02-16 16:29:50 -08:00
|
|
|
|
2023-01-21 10:06:27 -08:00
|
|
|
let key = Pubkey::from([11u8; 32]);
|
2021-02-16 16:29:50 -08:00
|
|
|
let hash = Hash::new(&[1u8; 32]);
|
2021-08-18 07:07:34 -07:00
|
|
|
let val = CalculateHashIntermediate::new(hash, 88, key);
|
2021-02-16 16:29:50 -08:00
|
|
|
account_maps.push(val);
|
|
|
|
|
|
|
|
// 2nd key - zero lamports, so will be removed
|
2023-01-21 10:06:27 -08:00
|
|
|
let key = Pubkey::from([12u8; 32]);
|
2021-02-16 16:29:50 -08:00
|
|
|
let hash = Hash::new(&[2u8; 32]);
|
2022-08-19 12:18:00 -07:00
|
|
|
let val = CalculateHashIntermediate::new(hash, 0, key);
|
2021-02-16 16:29:50 -08:00
|
|
|
account_maps.push(val);
|
|
|
|
|
2023-05-23 14:09:16 -07:00
|
|
|
let dir_for_temp_cache_files = tempdir().unwrap();
|
|
|
|
let accounts_hash = AccountsHasher::new(dir_for_temp_cache_files.path().to_path_buf());
|
2022-11-30 12:27:27 -08:00
|
|
|
let result = accounts_hash
|
2023-07-31 13:13:19 -07:00
|
|
|
.rest_of_hash_calculation(&for_rest(&account_maps), &mut HashStats::default());
|
2021-02-16 16:29:50 -08:00
|
|
|
let expected_hash = Hash::from_str("8j9ARGFv4W2GfML7d3sVJK2MePwrikqYnu6yqer28cCa").unwrap();
|
|
|
|
assert_eq!((result.0, result.1), (expected_hash, 88));
|
|
|
|
|
|
|
|
// 3rd key - with pubkey value before 1st key so it will be sorted first
|
2023-01-21 10:06:27 -08:00
|
|
|
let key = Pubkey::from([10u8; 32]);
|
2021-02-16 16:29:50 -08:00
|
|
|
let hash = Hash::new(&[2u8; 32]);
|
2021-08-18 07:07:34 -07:00
|
|
|
let val = CalculateHashIntermediate::new(hash, 20, key);
|
2021-06-21 13:32:03 -07:00
|
|
|
account_maps.insert(0, val);
|
2021-02-16 16:29:50 -08:00
|
|
|
|
2022-11-30 12:27:27 -08:00
|
|
|
let result = accounts_hash
|
2023-07-31 13:13:19 -07:00
|
|
|
.rest_of_hash_calculation(&for_rest(&account_maps), &mut HashStats::default());
|
2021-02-16 16:29:50 -08:00
|
|
|
let expected_hash = Hash::from_str("EHv9C5vX7xQjjMpsJMzudnDTzoTSRwYkqLzY8tVMihGj").unwrap();
|
|
|
|
assert_eq!((result.0, result.1), (expected_hash, 108));
|
|
|
|
|
|
|
|
// 3rd key - with later slot
|
2023-01-21 10:06:27 -08:00
|
|
|
let key = Pubkey::from([10u8; 32]);
|
2021-02-16 16:29:50 -08:00
|
|
|
let hash = Hash::new(&[99u8; 32]);
|
2021-08-18 07:07:34 -07:00
|
|
|
let val = CalculateHashIntermediate::new(hash, 30, key);
|
2021-06-21 13:32:03 -07:00
|
|
|
account_maps.insert(1, val);
|
2021-02-16 16:29:50 -08:00
|
|
|
|
2022-11-30 12:27:27 -08:00
|
|
|
let result = accounts_hash
|
2023-07-31 13:13:19 -07:00
|
|
|
.rest_of_hash_calculation(&for_rest(&account_maps), &mut HashStats::default());
|
2021-02-16 16:29:50 -08:00
|
|
|
let expected_hash = Hash::from_str("7NNPg5A8Xsg1uv4UFm6KZNwsipyyUnmgCrznP6MBWoBZ").unwrap();
|
|
|
|
assert_eq!((result.0, result.1), (expected_hash, 118));
|
|
|
|
}
|
|
|
|
|
2021-06-21 13:32:03 -07:00
|
|
|
fn one_range() -> usize {
|
|
|
|
1
|
|
|
|
}
|
|
|
|
|
|
|
|
fn zero_range() -> usize {
|
|
|
|
0
|
|
|
|
}
|
|
|
|
|
2021-02-16 16:29:50 -08:00
|
|
|
#[test]
|
|
|
|
fn test_accountsdb_de_dup_accounts_zero_chunks() {
|
2023-07-31 13:13:19 -07:00
|
|
|
let vec = vec![vec![CalculateHashIntermediate {
|
2022-08-19 12:18:00 -07:00
|
|
|
lamports: 1,
|
|
|
|
..CalculateHashIntermediate::default()
|
2023-07-31 13:13:19 -07:00
|
|
|
}]];
|
2022-10-18 07:51:38 -07:00
|
|
|
let temp_vec = vec.to_vec();
|
2023-07-31 13:13:19 -07:00
|
|
|
let slice = convert_to_slice(&temp_vec);
|
2023-05-23 14:09:16 -07:00
|
|
|
let dir_for_temp_cache_files = tempdir().unwrap();
|
|
|
|
let accounts_hasher = AccountsHasher::new(dir_for_temp_cache_files.path().to_path_buf());
|
2023-07-31 13:13:19 -07:00
|
|
|
let (mut hashes, lamports) =
|
|
|
|
accounts_hasher.de_dup_accounts_in_parallel(&slice, 0, 1, &HashStats::default());
|
2022-11-30 12:27:27 -08:00
|
|
|
assert_eq!(&[Hash::default()], hashes.get_reader().unwrap().1.read(0));
|
2022-08-19 12:18:00 -07:00
|
|
|
assert_eq!(lamports, 1);
|
2021-02-16 16:29:50 -08:00
|
|
|
}
|
|
|
|
|
2022-11-30 12:27:27 -08:00
|
|
|
fn get_vec_vec(hashes: Vec<AccountHashesFile>) -> Vec<Vec<Hash>> {
|
|
|
|
hashes.into_iter().map(get_vec).collect()
|
|
|
|
}
|
|
|
|
fn get_vec(mut hashes: AccountHashesFile) -> Vec<Hash> {
|
|
|
|
hashes
|
|
|
|
.get_reader()
|
|
|
|
.map(|r| r.1.read(0).to_vec())
|
|
|
|
.unwrap_or_default()
|
|
|
|
}
|
|
|
|
|
2021-02-16 16:29:50 -08:00
|
|
|
#[test]
|
|
|
|
fn test_accountsdb_de_dup_accounts_empty() {
|
|
|
|
solana_logger::setup();
|
2023-05-23 14:09:16 -07:00
|
|
|
let dir_for_temp_cache_files = tempdir().unwrap();
|
|
|
|
let accounts_hash = AccountsHasher::new(dir_for_temp_cache_files.path().to_path_buf());
|
2021-02-16 16:29:50 -08:00
|
|
|
|
2023-07-31 13:13:19 -07:00
|
|
|
let empty = [];
|
|
|
|
let vec = ∅
|
2021-11-16 08:30:55 -08:00
|
|
|
let (hashes, lamports) =
|
2023-07-31 13:13:19 -07:00
|
|
|
accounts_hash.de_dup_accounts(vec, &mut HashStats::default(), one_range());
|
2021-02-16 16:29:50 -08:00
|
|
|
assert_eq!(
|
2022-11-30 12:27:27 -08:00
|
|
|
vec![Hash::default(); 0],
|
|
|
|
get_vec_vec(hashes)
|
|
|
|
.into_iter()
|
|
|
|
.flatten()
|
|
|
|
.collect::<Vec<_>>(),
|
2021-02-16 16:29:50 -08:00
|
|
|
);
|
|
|
|
assert_eq!(lamports, 0);
|
2021-11-16 08:30:55 -08:00
|
|
|
let vec = vec![];
|
|
|
|
let (hashes, lamports) =
|
2023-01-17 12:04:29 -08:00
|
|
|
accounts_hash.de_dup_accounts(&vec, &mut HashStats::default(), zero_range());
|
2022-11-30 12:27:27 -08:00
|
|
|
let empty: Vec<Vec<Hash>> = Vec::default();
|
|
|
|
assert_eq!(empty, get_vec_vec(hashes));
|
2021-02-16 16:29:50 -08:00
|
|
|
assert_eq!(lamports, 0);
|
|
|
|
|
2023-07-31 13:13:19 -07:00
|
|
|
let (hashes, lamports) =
|
|
|
|
accounts_hash.de_dup_accounts_in_parallel(&[], 1, 1, &HashStats::default());
|
2022-11-30 12:27:27 -08:00
|
|
|
assert_eq!(vec![Hash::default(); 0], get_vec(hashes));
|
2021-02-16 16:29:50 -08:00
|
|
|
assert_eq!(lamports, 0);
|
|
|
|
|
2023-07-31 13:13:19 -07:00
|
|
|
let (hashes, lamports) =
|
|
|
|
accounts_hash.de_dup_accounts_in_parallel(&[], 2, 1, &HashStats::default());
|
2022-11-30 12:27:27 -08:00
|
|
|
assert_eq!(vec![Hash::default(); 0], get_vec(hashes));
|
2021-02-16 16:29:50 -08:00
|
|
|
assert_eq!(lamports, 0);
|
|
|
|
}
|
|
|
|
|
|
|
|
#[test]
|
|
|
|
fn test_accountsdb_de_dup_accounts_from_stores() {
|
|
|
|
solana_logger::setup();
|
|
|
|
|
2023-01-21 10:06:27 -08:00
|
|
|
let key_a = Pubkey::from([1u8; 32]);
|
|
|
|
let key_b = Pubkey::from([2u8; 32]);
|
|
|
|
let key_c = Pubkey::from([3u8; 32]);
|
2021-02-16 16:29:50 -08:00
|
|
|
const COUNT: usize = 6;
|
2023-01-05 10:05:32 -08:00
|
|
|
let hashes = (0..COUNT).map(|i| Hash::new(&[i as u8; 32]));
|
2021-02-16 16:29:50 -08:00
|
|
|
// create this vector
|
|
|
|
// abbbcc
|
|
|
|
let keys = [key_a, key_b, key_b, key_b, key_c, key_c];
|
|
|
|
|
|
|
|
let accounts: Vec<_> = hashes
|
|
|
|
.zip(keys.iter())
|
|
|
|
.enumerate()
|
2021-08-18 07:07:34 -07:00
|
|
|
.map(|(i, (hash, key))| CalculateHashIntermediate::new(hash, (i + 1) as u64, *key))
|
2021-02-16 16:29:50 -08:00
|
|
|
.collect();
|
|
|
|
|
|
|
|
type ExpectedType = (String, bool, u64, String);
|
|
|
|
let expected:Vec<ExpectedType> = vec![
|
|
|
|
// ("key/lamports key2/lamports ...",
|
2021-06-07 07:01:16 -07:00
|
|
|
// is_last_slice
|
2021-02-16 16:29:50 -08:00
|
|
|
// result lamports
|
|
|
|
// result hashes)
|
|
|
|
// "a5" = key_a, 5 lamports
|
2021-06-21 13:32:03 -07:00
|
|
|
("a1", false, 1, "[11111111111111111111111111111111]"),
|
|
|
|
("a1b2", false, 3, "[11111111111111111111111111111111, 4vJ9JU1bJJE96FWSJKvHsmmFADCg4gpZQff4P3bkLKi]"),
|
|
|
|
("a1b2b3", false, 4, "[11111111111111111111111111111111, 8qbHbw2BbbTHBW1sbeqakYXVKRQM8Ne7pLK7m6CVfeR]"),
|
|
|
|
("a1b2b3b4", false, 5, "[11111111111111111111111111111111, CktRuQ2mttgRGkXJtyksdKHjUdc2C4TgDzyB98oEzy8]"),
|
|
|
|
("a1b2b3b4c5", false, 10, "[11111111111111111111111111111111, CktRuQ2mttgRGkXJtyksdKHjUdc2C4TgDzyB98oEzy8, GgBaCs3NCBuZN12kCJgAW63ydqohFkHEdfdEXBPzLHq]"),
|
|
|
|
("b2", false, 2, "[4vJ9JU1bJJE96FWSJKvHsmmFADCg4gpZQff4P3bkLKi]"),
|
|
|
|
("b2b3", false, 3, "[8qbHbw2BbbTHBW1sbeqakYXVKRQM8Ne7pLK7m6CVfeR]"),
|
|
|
|
("b2b3b4", false, 4, "[CktRuQ2mttgRGkXJtyksdKHjUdc2C4TgDzyB98oEzy8]"),
|
|
|
|
("b2b3b4c5", false, 9, "[CktRuQ2mttgRGkXJtyksdKHjUdc2C4TgDzyB98oEzy8, GgBaCs3NCBuZN12kCJgAW63ydqohFkHEdfdEXBPzLHq]"),
|
|
|
|
("b3", false, 3, "[8qbHbw2BbbTHBW1sbeqakYXVKRQM8Ne7pLK7m6CVfeR]"),
|
|
|
|
("b3b4", false, 4, "[CktRuQ2mttgRGkXJtyksdKHjUdc2C4TgDzyB98oEzy8]"),
|
|
|
|
("b3b4c5", false, 9, "[CktRuQ2mttgRGkXJtyksdKHjUdc2C4TgDzyB98oEzy8, GgBaCs3NCBuZN12kCJgAW63ydqohFkHEdfdEXBPzLHq]"),
|
|
|
|
("b4", false, 4, "[CktRuQ2mttgRGkXJtyksdKHjUdc2C4TgDzyB98oEzy8]"),
|
|
|
|
("b4c5", false, 9, "[CktRuQ2mttgRGkXJtyksdKHjUdc2C4TgDzyB98oEzy8, GgBaCs3NCBuZN12kCJgAW63ydqohFkHEdfdEXBPzLHq]"),
|
|
|
|
("c5", false, 5, "[GgBaCs3NCBuZN12kCJgAW63ydqohFkHEdfdEXBPzLHq]"),
|
2021-02-16 16:29:50 -08:00
|
|
|
("a1", true, 1, "[11111111111111111111111111111111]"),
|
|
|
|
("a1b2", true, 3, "[11111111111111111111111111111111, 4vJ9JU1bJJE96FWSJKvHsmmFADCg4gpZQff4P3bkLKi]"),
|
2021-06-07 07:01:16 -07:00
|
|
|
("a1b2b3", true, 4, "[11111111111111111111111111111111, 8qbHbw2BbbTHBW1sbeqakYXVKRQM8Ne7pLK7m6CVfeR]"),
|
|
|
|
("a1b2b3b4", true, 5, "[11111111111111111111111111111111, CktRuQ2mttgRGkXJtyksdKHjUdc2C4TgDzyB98oEzy8]"),
|
|
|
|
("a1b2b3b4c5", true, 10, "[11111111111111111111111111111111, CktRuQ2mttgRGkXJtyksdKHjUdc2C4TgDzyB98oEzy8, GgBaCs3NCBuZN12kCJgAW63ydqohFkHEdfdEXBPzLHq]"),
|
2021-02-16 16:29:50 -08:00
|
|
|
("b2", true, 2, "[4vJ9JU1bJJE96FWSJKvHsmmFADCg4gpZQff4P3bkLKi]"),
|
2021-06-07 07:01:16 -07:00
|
|
|
("b2b3", true, 3, "[8qbHbw2BbbTHBW1sbeqakYXVKRQM8Ne7pLK7m6CVfeR]"),
|
|
|
|
("b2b3b4", true, 4, "[CktRuQ2mttgRGkXJtyksdKHjUdc2C4TgDzyB98oEzy8]"),
|
|
|
|
("b2b3b4c5", true, 9, "[CktRuQ2mttgRGkXJtyksdKHjUdc2C4TgDzyB98oEzy8, GgBaCs3NCBuZN12kCJgAW63ydqohFkHEdfdEXBPzLHq]"),
|
2021-02-16 16:29:50 -08:00
|
|
|
("b3", true, 3, "[8qbHbw2BbbTHBW1sbeqakYXVKRQM8Ne7pLK7m6CVfeR]"),
|
2021-06-07 07:01:16 -07:00
|
|
|
("b3b4", true, 4, "[CktRuQ2mttgRGkXJtyksdKHjUdc2C4TgDzyB98oEzy8]"),
|
|
|
|
("b3b4c5", true, 9, "[CktRuQ2mttgRGkXJtyksdKHjUdc2C4TgDzyB98oEzy8, GgBaCs3NCBuZN12kCJgAW63ydqohFkHEdfdEXBPzLHq]"),
|
2021-02-16 16:29:50 -08:00
|
|
|
("b4", true, 4, "[CktRuQ2mttgRGkXJtyksdKHjUdc2C4TgDzyB98oEzy8]"),
|
|
|
|
("b4c5", true, 9, "[CktRuQ2mttgRGkXJtyksdKHjUdc2C4TgDzyB98oEzy8, GgBaCs3NCBuZN12kCJgAW63ydqohFkHEdfdEXBPzLHq]"),
|
|
|
|
("c5", true, 5, "[GgBaCs3NCBuZN12kCJgAW63ydqohFkHEdfdEXBPzLHq]"),
|
|
|
|
].into_iter().map(|item| {
|
|
|
|
let result: ExpectedType = (
|
|
|
|
item.0.to_string(),
|
|
|
|
item.1,
|
|
|
|
item.2,
|
|
|
|
item.3.to_string(),
|
|
|
|
);
|
|
|
|
result
|
|
|
|
}).collect();
|
|
|
|
|
2023-05-23 14:09:16 -07:00
|
|
|
let dir_for_temp_cache_files = tempdir().unwrap();
|
|
|
|
let hash = AccountsHasher::new(dir_for_temp_cache_files.path().to_path_buf());
|
2021-02-16 16:29:50 -08:00
|
|
|
let mut expected_index = 0;
|
2021-06-07 07:01:16 -07:00
|
|
|
for last_slice in 0..2 {
|
2021-02-16 16:29:50 -08:00
|
|
|
for start in 0..COUNT {
|
|
|
|
for end in start + 1..COUNT {
|
2021-06-07 07:01:16 -07:00
|
|
|
let is_last_slice = last_slice == 1;
|
2021-02-16 16:29:50 -08:00
|
|
|
let accounts = accounts.clone();
|
|
|
|
let slice = &accounts[start..end];
|
|
|
|
|
2023-07-31 13:13:19 -07:00
|
|
|
let slice2 = vec![slice.to_vec()];
|
2021-06-21 13:32:03 -07:00
|
|
|
let slice = &slice2[..];
|
2023-07-31 13:13:19 -07:00
|
|
|
let slice_temp = convert_to_slice(&slice2);
|
|
|
|
let (hashes2, lamports2) =
|
|
|
|
hash.de_dup_accounts_in_parallel(&slice_temp, 0, 1, &HashStats::default());
|
|
|
|
let slice3 = convert_to_slice(&slice2);
|
|
|
|
let (hashes3, lamports3) =
|
|
|
|
hash.de_dup_accounts_in_parallel(&slice3, 0, 1, &HashStats::default());
|
2021-11-16 08:30:55 -08:00
|
|
|
let vec = slice.to_vec();
|
2023-07-31 13:13:19 -07:00
|
|
|
let slice4 = convert_to_slice(&vec);
|
|
|
|
let mut max_bin = end - start;
|
|
|
|
if !max_bin.is_power_of_two() {
|
|
|
|
max_bin = 1;
|
|
|
|
}
|
|
|
|
|
2023-01-17 12:04:29 -08:00
|
|
|
let (hashes4, lamports4) =
|
2023-07-31 13:13:19 -07:00
|
|
|
hash.de_dup_accounts(&slice4, &mut HashStats::default(), max_bin);
|
2021-11-16 08:30:55 -08:00
|
|
|
let vec = slice.to_vec();
|
2023-07-31 13:13:19 -07:00
|
|
|
let slice5 = convert_to_slice(&vec);
|
2023-01-17 12:04:29 -08:00
|
|
|
let (hashes5, lamports5) =
|
2023-07-31 13:13:19 -07:00
|
|
|
hash.de_dup_accounts(&slice5, &mut HashStats::default(), max_bin);
|
2021-11-16 08:30:55 -08:00
|
|
|
let vec = slice.to_vec();
|
2023-07-31 13:13:19 -07:00
|
|
|
let slice5 = convert_to_slice(&vec);
|
2023-01-17 12:04:29 -08:00
|
|
|
let (hashes6, lamports6) =
|
2023-07-31 13:13:19 -07:00
|
|
|
hash.de_dup_accounts(&slice5, &mut HashStats::default(), max_bin);
|
2021-02-16 16:29:50 -08:00
|
|
|
|
2022-11-30 12:27:27 -08:00
|
|
|
let hashes2 = get_vec(hashes2);
|
|
|
|
let hashes3 = get_vec(hashes3);
|
|
|
|
let hashes4 = get_vec_vec(hashes4);
|
|
|
|
let hashes5 = get_vec_vec(hashes5);
|
|
|
|
let hashes6 = get_vec_vec(hashes6);
|
|
|
|
|
2021-06-21 13:32:03 -07:00
|
|
|
assert_eq!(hashes2, hashes3);
|
|
|
|
let expected2 = hashes2.clone();
|
2021-02-16 16:29:50 -08:00
|
|
|
assert_eq!(
|
|
|
|
expected2,
|
2021-06-21 13:32:03 -07:00
|
|
|
hashes4.into_iter().flatten().collect::<Vec<_>>(),
|
2022-12-06 06:30:06 -08:00
|
|
|
"last_slice: {last_slice}, start: {start}, end: {end}, slice: {slice:?}"
|
2021-02-16 16:29:50 -08:00
|
|
|
);
|
|
|
|
assert_eq!(
|
2021-06-21 13:32:03 -07:00
|
|
|
expected2.clone(),
|
|
|
|
hashes5.iter().flatten().copied().collect::<Vec<_>>(),
|
2022-12-06 06:30:06 -08:00
|
|
|
"last_slice: {last_slice}, start: {start}, end: {end}, slice: {slice:?}"
|
2021-02-16 16:29:50 -08:00
|
|
|
);
|
|
|
|
assert_eq!(
|
2021-06-21 13:32:03 -07:00
|
|
|
expected2.clone(),
|
|
|
|
hashes6.iter().flatten().copied().collect::<Vec<_>>()
|
2021-02-16 16:29:50 -08:00
|
|
|
);
|
|
|
|
assert_eq!(lamports2, lamports3);
|
|
|
|
assert_eq!(lamports2, lamports4);
|
2021-06-21 13:32:03 -07:00
|
|
|
assert_eq!(lamports2, lamports5);
|
2021-02-16 16:29:50 -08:00
|
|
|
assert_eq!(lamports2, lamports6);
|
|
|
|
|
2023-07-31 13:13:19 -07:00
|
|
|
let human_readable = slice[0]
|
2021-02-16 16:29:50 -08:00
|
|
|
.iter()
|
|
|
|
.map(|v| {
|
|
|
|
let mut s = (if v.pubkey == key_a {
|
|
|
|
"a"
|
|
|
|
} else if v.pubkey == key_b {
|
|
|
|
"b"
|
|
|
|
} else {
|
|
|
|
"c"
|
|
|
|
})
|
|
|
|
.to_string();
|
|
|
|
|
|
|
|
s.push_str(&v.lamports.to_string());
|
|
|
|
s
|
|
|
|
})
|
|
|
|
.collect::<String>();
|
|
|
|
|
2022-12-06 06:30:06 -08:00
|
|
|
let hash_result_as_string = format!("{hashes2:?}");
|
2021-02-16 16:29:50 -08:00
|
|
|
|
|
|
|
let packaged_result: ExpectedType = (
|
|
|
|
human_readable,
|
2021-06-07 07:01:16 -07:00
|
|
|
is_last_slice,
|
2022-11-09 11:39:38 -08:00
|
|
|
lamports2,
|
2021-02-16 16:29:50 -08:00
|
|
|
hash_result_as_string,
|
|
|
|
);
|
|
|
|
assert_eq!(expected[expected_index], packaged_result);
|
|
|
|
|
|
|
|
// for generating expected results
|
2021-06-21 13:32:03 -07:00
|
|
|
// error!("{:?},", packaged_result);
|
2021-02-16 16:29:50 -08:00
|
|
|
expected_index += 1;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
#[test]
|
|
|
|
fn test_accountsdb_compare_two_hash_entries() {
|
|
|
|
solana_logger::setup();
|
|
|
|
let key = Pubkey::new_unique();
|
|
|
|
let hash = Hash::new_unique();
|
2021-08-18 07:07:34 -07:00
|
|
|
let val = CalculateHashIntermediate::new(hash, 1, key);
|
2021-02-16 16:29:50 -08:00
|
|
|
|
|
|
|
// slot same, version <
|
|
|
|
let hash2 = Hash::new_unique();
|
2021-08-18 07:07:34 -07:00
|
|
|
let val2 = CalculateHashIntermediate::new(hash2, 4, key);
|
2021-02-16 16:29:50 -08:00
|
|
|
assert_eq!(
|
2021-06-07 07:01:16 -07:00
|
|
|
std::cmp::Ordering::Equal, // no longer comparing slots or versions
|
2022-11-18 15:25:44 -08:00
|
|
|
AccountsHasher::compare_two_hash_entries(&val, &val2)
|
2021-02-16 16:29:50 -08:00
|
|
|
);
|
|
|
|
|
|
|
|
// slot same, vers =
|
|
|
|
let hash3 = Hash::new_unique();
|
2021-08-18 07:07:34 -07:00
|
|
|
let val3 = CalculateHashIntermediate::new(hash3, 2, key);
|
2021-02-16 16:29:50 -08:00
|
|
|
assert_eq!(
|
|
|
|
std::cmp::Ordering::Equal,
|
2022-11-18 15:25:44 -08:00
|
|
|
AccountsHasher::compare_two_hash_entries(&val, &val3)
|
2021-02-16 16:29:50 -08:00
|
|
|
);
|
|
|
|
|
|
|
|
// slot same, vers >
|
|
|
|
let hash4 = Hash::new_unique();
|
2021-08-18 07:07:34 -07:00
|
|
|
let val4 = CalculateHashIntermediate::new(hash4, 6, key);
|
2021-02-16 16:29:50 -08:00
|
|
|
assert_eq!(
|
2021-06-07 07:01:16 -07:00
|
|
|
std::cmp::Ordering::Equal, // no longer comparing slots or versions
|
2022-11-18 15:25:44 -08:00
|
|
|
AccountsHasher::compare_two_hash_entries(&val, &val4)
|
2021-02-16 16:29:50 -08:00
|
|
|
);
|
|
|
|
|
|
|
|
// slot >, version <
|
|
|
|
let hash5 = Hash::new_unique();
|
2021-08-18 07:07:34 -07:00
|
|
|
let val5 = CalculateHashIntermediate::new(hash5, 8, key);
|
2021-02-16 16:29:50 -08:00
|
|
|
assert_eq!(
|
2021-06-07 07:01:16 -07:00
|
|
|
std::cmp::Ordering::Equal, // no longer comparing slots or versions
|
2022-11-18 15:25:44 -08:00
|
|
|
AccountsHasher::compare_two_hash_entries(&val, &val5)
|
2021-02-16 16:29:50 -08:00
|
|
|
);
|
|
|
|
}
|
|
|
|
|
2022-10-18 07:51:38 -07:00
|
|
|
fn test_de_dup_accounts_in_parallel<'a>(
|
2023-07-31 13:13:19 -07:00
|
|
|
account_maps: &'a [&'a [CalculateHashIntermediate]],
|
|
|
|
) -> (AccountHashesFile, u64) {
|
2023-05-23 14:09:16 -07:00
|
|
|
let dir_for_temp_cache_files = tempdir().unwrap();
|
|
|
|
let accounts_hasher = AccountsHasher::new(dir_for_temp_cache_files.path().to_path_buf());
|
2023-07-31 13:13:19 -07:00
|
|
|
accounts_hasher.de_dup_accounts_in_parallel(account_maps, 0, 1, &HashStats::default())
|
2021-06-21 13:32:03 -07:00
|
|
|
}
|
|
|
|
|
2021-02-16 16:29:50 -08:00
|
|
|
#[test]
|
|
|
|
fn test_accountsdb_remove_zero_balance_accounts() {
|
|
|
|
solana_logger::setup();
|
|
|
|
|
|
|
|
let key = Pubkey::new_unique();
|
|
|
|
let hash = Hash::new_unique();
|
2021-06-02 06:35:10 -07:00
|
|
|
let mut account_maps = Vec::new();
|
2021-08-18 07:07:34 -07:00
|
|
|
let val = CalculateHashIntermediate::new(hash, 1, key);
|
2021-02-16 16:29:50 -08:00
|
|
|
account_maps.push(val.clone());
|
|
|
|
|
2023-07-31 13:13:19 -07:00
|
|
|
let vecs = vec![account_maps.to_vec()];
|
|
|
|
let slice = convert_to_slice(&vecs);
|
|
|
|
let (hashfile, lamports) = test_de_dup_accounts_in_parallel(&slice);
|
2022-11-30 12:27:27 -08:00
|
|
|
assert_eq!(
|
2023-07-31 13:13:19 -07:00
|
|
|
(get_vec(hashfile), lamports),
|
|
|
|
(vec![val.hash], val.lamports)
|
2022-11-30 12:27:27 -08:00
|
|
|
);
|
2021-02-16 16:29:50 -08:00
|
|
|
|
|
|
|
// zero original lamports, higher version
|
2022-08-19 12:18:00 -07:00
|
|
|
let val = CalculateHashIntermediate::new(hash, 0, key);
|
2021-06-07 07:01:16 -07:00
|
|
|
account_maps.push(val); // has to be after previous entry since account_maps are in slot order
|
2021-02-16 16:29:50 -08:00
|
|
|
|
2023-07-31 13:13:19 -07:00
|
|
|
let vecs = vec![account_maps.to_vec()];
|
|
|
|
let slice = convert_to_slice(&vecs);
|
|
|
|
let (hashfile, lamports) = test_de_dup_accounts_in_parallel(&slice);
|
|
|
|
assert_eq!((get_vec(hashfile), lamports), (vec![], 0));
|
2021-02-16 16:29:50 -08:00
|
|
|
}
|
|
|
|
|
|
|
|
#[test]
|
|
|
|
fn test_accountsdb_cumulative_offsets1_d() {
|
|
|
|
let input = vec![vec![0, 1], vec![], vec![2, 3, 4], vec![]];
|
|
|
|
let cumulative = CumulativeOffsets::from_raw(&input);
|
|
|
|
|
|
|
|
let src: Vec<_> = input.clone().into_iter().flatten().collect();
|
|
|
|
let len = src.len();
|
|
|
|
assert_eq!(cumulative.total_count, len);
|
|
|
|
assert_eq!(cumulative.cumulative_offsets.len(), 2); // 2 non-empty vectors
|
|
|
|
|
|
|
|
const DIMENSION: usize = 0;
|
|
|
|
assert_eq!(cumulative.cumulative_offsets[0].index[DIMENSION], 0);
|
|
|
|
assert_eq!(cumulative.cumulative_offsets[1].index[DIMENSION], 2);
|
|
|
|
|
|
|
|
assert_eq!(cumulative.cumulative_offsets[0].start_offset, 0);
|
|
|
|
assert_eq!(cumulative.cumulative_offsets[1].start_offset, 2);
|
|
|
|
|
|
|
|
for start in 0..len {
|
|
|
|
let slice = cumulative.get_slice(&input, start);
|
|
|
|
let len = slice.len();
|
|
|
|
assert!(len > 0);
|
|
|
|
assert_eq!(&src[start..(start + len)], slice);
|
|
|
|
}
|
|
|
|
|
|
|
|
let input = vec![vec![], vec![0, 1], vec![], vec![2, 3, 4], vec![]];
|
|
|
|
let cumulative = CumulativeOffsets::from_raw(&input);
|
|
|
|
|
|
|
|
let src: Vec<_> = input.clone().into_iter().flatten().collect();
|
|
|
|
let len = src.len();
|
|
|
|
assert_eq!(cumulative.total_count, len);
|
|
|
|
assert_eq!(cumulative.cumulative_offsets.len(), 2); // 2 non-empty vectors
|
|
|
|
|
|
|
|
assert_eq!(cumulative.cumulative_offsets[0].index[DIMENSION], 1);
|
|
|
|
assert_eq!(cumulative.cumulative_offsets[1].index[DIMENSION], 3);
|
|
|
|
|
|
|
|
assert_eq!(cumulative.cumulative_offsets[0].start_offset, 0);
|
|
|
|
assert_eq!(cumulative.cumulative_offsets[1].start_offset, 2);
|
|
|
|
|
|
|
|
for start in 0..len {
|
|
|
|
let slice = cumulative.get_slice(&input, start);
|
|
|
|
let len = slice.len();
|
|
|
|
assert!(len > 0);
|
|
|
|
assert_eq!(&src[start..(start + len)], slice);
|
|
|
|
}
|
|
|
|
|
|
|
|
let input: Vec<Vec<u32>> = vec![vec![]];
|
|
|
|
let cumulative = CumulativeOffsets::from_raw(&input);
|
|
|
|
|
2021-08-03 15:14:34 -07:00
|
|
|
let len = input.into_iter().flatten().count();
|
2021-02-16 16:29:50 -08:00
|
|
|
assert_eq!(cumulative.total_count, len);
|
|
|
|
assert_eq!(cumulative.cumulative_offsets.len(), 0); // 2 non-empty vectors
|
|
|
|
}
|
|
|
|
|
2021-06-07 10:09:17 -07:00
|
|
|
#[should_panic(expected = "is_empty")]
|
|
|
|
#[test]
|
|
|
|
fn test_accountsdb_cumulative_find_empty() {
|
|
|
|
let input = CumulativeOffsets {
|
|
|
|
cumulative_offsets: vec![],
|
|
|
|
total_count: 0,
|
|
|
|
};
|
|
|
|
input.find(0);
|
|
|
|
}
|
|
|
|
|
|
|
|
#[test]
|
|
|
|
fn test_accountsdb_cumulative_find() {
|
|
|
|
let input = CumulativeOffsets {
|
|
|
|
cumulative_offsets: vec![CumulativeOffset {
|
|
|
|
index: vec![0],
|
|
|
|
start_offset: 0,
|
|
|
|
}],
|
|
|
|
total_count: 0,
|
|
|
|
};
|
|
|
|
assert_eq!(input.find(0), (0, &input.cumulative_offsets[0]));
|
|
|
|
|
|
|
|
let input = CumulativeOffsets {
|
|
|
|
cumulative_offsets: vec![
|
|
|
|
CumulativeOffset {
|
|
|
|
index: vec![0],
|
|
|
|
start_offset: 0,
|
|
|
|
},
|
|
|
|
CumulativeOffset {
|
|
|
|
index: vec![1],
|
|
|
|
start_offset: 2,
|
|
|
|
},
|
|
|
|
],
|
|
|
|
total_count: 0,
|
|
|
|
};
|
|
|
|
assert_eq!(input.find(0), (0, &input.cumulative_offsets[0])); // = first start_offset
|
|
|
|
assert_eq!(input.find(1), (1, &input.cumulative_offsets[0])); // > first start_offset
|
|
|
|
assert_eq!(input.find(2), (0, &input.cumulative_offsets[1])); // = last start_offset
|
|
|
|
assert_eq!(input.find(3), (1, &input.cumulative_offsets[1])); // > last start_offset
|
|
|
|
}
|
|
|
|
|
2021-02-16 16:29:50 -08:00
|
|
|
#[test]
|
|
|
|
fn test_accountsdb_cumulative_offsets2_d() {
|
2021-06-14 06:43:59 -07:00
|
|
|
let input: Vec<Vec<Vec<u64>>> = vec![vec![vec![0, 1], vec![], vec![2, 3, 4], vec![]]];
|
2021-02-16 16:29:50 -08:00
|
|
|
let cumulative = CumulativeOffsets::from_raw_2d(&input);
|
|
|
|
|
2023-01-05 10:05:32 -08:00
|
|
|
let src: Vec<_> = input.clone().into_iter().flatten().flatten().collect();
|
2021-02-16 16:29:50 -08:00
|
|
|
let len = src.len();
|
|
|
|
assert_eq!(cumulative.total_count, len);
|
|
|
|
assert_eq!(cumulative.cumulative_offsets.len(), 2); // 2 non-empty vectors
|
|
|
|
|
|
|
|
const DIMENSION_0: usize = 0;
|
|
|
|
const DIMENSION_1: usize = 1;
|
|
|
|
assert_eq!(cumulative.cumulative_offsets[0].index[DIMENSION_0], 0);
|
|
|
|
assert_eq!(cumulative.cumulative_offsets[0].index[DIMENSION_1], 0);
|
|
|
|
assert_eq!(cumulative.cumulative_offsets[1].index[DIMENSION_0], 0);
|
|
|
|
assert_eq!(cumulative.cumulative_offsets[1].index[DIMENSION_1], 2);
|
|
|
|
|
|
|
|
assert_eq!(cumulative.cumulative_offsets[0].start_offset, 0);
|
|
|
|
assert_eq!(cumulative.cumulative_offsets[1].start_offset, 2);
|
|
|
|
|
|
|
|
for start in 0..len {
|
2021-06-14 06:43:59 -07:00
|
|
|
let slice: &[u64] = cumulative.get_slice(&input, start);
|
2021-02-16 16:29:50 -08:00
|
|
|
let len = slice.len();
|
|
|
|
assert!(len > 0);
|
|
|
|
assert_eq!(&src[start..(start + len)], slice);
|
|
|
|
}
|
|
|
|
|
|
|
|
let input = vec![vec![vec![], vec![0, 1], vec![], vec![2, 3, 4], vec![]]];
|
|
|
|
let cumulative = CumulativeOffsets::from_raw_2d(&input);
|
|
|
|
|
2023-01-05 10:05:32 -08:00
|
|
|
let src: Vec<_> = input.clone().into_iter().flatten().flatten().collect();
|
2021-02-16 16:29:50 -08:00
|
|
|
let len = src.len();
|
|
|
|
assert_eq!(cumulative.total_count, len);
|
|
|
|
assert_eq!(cumulative.cumulative_offsets.len(), 2); // 2 non-empty vectors
|
|
|
|
|
|
|
|
assert_eq!(cumulative.cumulative_offsets[0].index[DIMENSION_0], 0);
|
|
|
|
assert_eq!(cumulative.cumulative_offsets[0].index[DIMENSION_1], 1);
|
|
|
|
assert_eq!(cumulative.cumulative_offsets[1].index[DIMENSION_0], 0);
|
|
|
|
assert_eq!(cumulative.cumulative_offsets[1].index[DIMENSION_1], 3);
|
|
|
|
|
|
|
|
assert_eq!(cumulative.cumulative_offsets[0].start_offset, 0);
|
|
|
|
assert_eq!(cumulative.cumulative_offsets[1].start_offset, 2);
|
|
|
|
|
|
|
|
for start in 0..len {
|
2021-06-14 06:43:59 -07:00
|
|
|
let slice: &[u64] = cumulative.get_slice(&input, start);
|
2021-02-16 16:29:50 -08:00
|
|
|
let len = slice.len();
|
|
|
|
assert!(len > 0);
|
|
|
|
assert_eq!(&src[start..(start + len)], slice);
|
|
|
|
}
|
|
|
|
|
|
|
|
let input: Vec<Vec<Vec<u32>>> = vec![vec![]];
|
|
|
|
let cumulative = CumulativeOffsets::from_raw_2d(&input);
|
|
|
|
|
2021-08-03 15:14:34 -07:00
|
|
|
let len = input.into_iter().flatten().count();
|
2021-02-16 16:29:50 -08:00
|
|
|
assert_eq!(cumulative.total_count, len);
|
|
|
|
assert_eq!(cumulative.cumulative_offsets.len(), 0); // 2 non-empty vectors
|
|
|
|
|
|
|
|
let input = vec![
|
|
|
|
vec![vec![0, 1]],
|
|
|
|
vec![vec![]],
|
|
|
|
vec![vec![], vec![2, 3, 4], vec![]],
|
|
|
|
];
|
|
|
|
let cumulative = CumulativeOffsets::from_raw_2d(&input);
|
|
|
|
|
2023-01-05 10:05:32 -08:00
|
|
|
let src: Vec<_> = input.clone().into_iter().flatten().flatten().collect();
|
2021-02-16 16:29:50 -08:00
|
|
|
let len = src.len();
|
|
|
|
assert_eq!(cumulative.total_count, len);
|
|
|
|
assert_eq!(cumulative.cumulative_offsets.len(), 2); // 2 non-empty vectors
|
|
|
|
|
|
|
|
assert_eq!(cumulative.cumulative_offsets[0].index[DIMENSION_0], 0);
|
|
|
|
assert_eq!(cumulative.cumulative_offsets[0].index[DIMENSION_1], 0);
|
|
|
|
assert_eq!(cumulative.cumulative_offsets[1].index[DIMENSION_0], 2);
|
|
|
|
assert_eq!(cumulative.cumulative_offsets[1].index[DIMENSION_1], 1);
|
|
|
|
|
|
|
|
assert_eq!(cumulative.cumulative_offsets[0].start_offset, 0);
|
|
|
|
assert_eq!(cumulative.cumulative_offsets[1].start_offset, 2);
|
|
|
|
|
|
|
|
for start in 0..len {
|
2021-06-14 06:43:59 -07:00
|
|
|
let slice: &[u64] = cumulative.get_slice(&input, start);
|
2021-02-16 16:29:50 -08:00
|
|
|
let len = slice.len();
|
|
|
|
assert!(len > 0);
|
|
|
|
assert_eq!(&src[start..(start + len)], slice);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
fn test_hashing_larger(hashes: Vec<(Pubkey, Hash)>, fanout: usize) -> Hash {
|
2022-11-18 15:25:44 -08:00
|
|
|
let result = AccountsHasher::compute_merkle_root(hashes.clone(), fanout);
|
2021-02-16 16:29:50 -08:00
|
|
|
let reduced: Vec<_> = hashes.iter().map(|x| x.1).collect();
|
|
|
|
let result2 = test_hashing(reduced, fanout);
|
|
|
|
assert_eq!(result, result2, "len: {}", hashes.len());
|
|
|
|
result
|
|
|
|
}
|
|
|
|
|
|
|
|
fn test_hashing(hashes: Vec<Hash>, fanout: usize) -> Hash {
|
|
|
|
let temp: Vec<_> = hashes.iter().map(|h| (Pubkey::default(), *h)).collect();
|
2022-11-18 15:25:44 -08:00
|
|
|
let result = AccountsHasher::compute_merkle_root(temp, fanout);
|
2021-02-16 16:29:50 -08:00
|
|
|
let reduced: Vec<_> = hashes.clone();
|
2022-11-18 15:25:44 -08:00
|
|
|
let result2 = AccountsHasher::compute_merkle_root_from_slices(
|
2021-03-18 08:32:07 -07:00
|
|
|
hashes.len(),
|
|
|
|
fanout,
|
|
|
|
None,
|
|
|
|
|start| &reduced[start..],
|
|
|
|
None,
|
|
|
|
);
|
|
|
|
assert_eq!(result, result2.0, "len: {}", hashes.len());
|
2021-02-16 16:29:50 -08:00
|
|
|
|
2022-11-18 15:25:44 -08:00
|
|
|
let result2 = AccountsHasher::compute_merkle_root_from_slices(
|
2021-03-18 08:32:07 -07:00
|
|
|
hashes.len(),
|
2021-02-16 16:29:50 -08:00
|
|
|
fanout,
|
2021-03-18 08:32:07 -07:00
|
|
|
Some(1),
|
|
|
|
|start| &reduced[start..],
|
|
|
|
None,
|
2021-02-16 16:29:50 -08:00
|
|
|
);
|
2021-03-18 08:32:07 -07:00
|
|
|
assert_eq!(result, result2.0, "len: {}", hashes.len());
|
2021-02-16 16:29:50 -08:00
|
|
|
|
|
|
|
let max = std::cmp::min(reduced.len(), fanout * 2);
|
|
|
|
for left in 0..max {
|
|
|
|
for right in left + 1..max {
|
|
|
|
let src = vec![
|
|
|
|
vec![reduced[0..left].to_vec(), reduced[left..right].to_vec()],
|
|
|
|
vec![reduced[right..].to_vec()],
|
|
|
|
];
|
2021-03-18 08:32:07 -07:00
|
|
|
let offsets = CumulativeOffsets::from_raw_2d(&src);
|
|
|
|
|
2021-06-14 06:43:59 -07:00
|
|
|
let get_slice = |start: usize| -> &[Hash] { offsets.get_slice(&src, start) };
|
2022-11-18 15:25:44 -08:00
|
|
|
let result2 = AccountsHasher::compute_merkle_root_from_slices(
|
2021-03-18 08:32:07 -07:00
|
|
|
offsets.total_count,
|
|
|
|
fanout,
|
|
|
|
None,
|
|
|
|
get_slice,
|
|
|
|
None,
|
|
|
|
);
|
|
|
|
assert_eq!(result, result2.0);
|
2021-02-16 16:29:50 -08:00
|
|
|
}
|
|
|
|
}
|
|
|
|
result
|
|
|
|
}
|
|
|
|
|
|
|
|
#[test]
|
|
|
|
fn test_accountsdb_compute_merkle_root_large() {
|
|
|
|
solana_logger::setup();
|
|
|
|
|
|
|
|
// handle fanout^x -1, +0, +1 for a few 'x's
|
|
|
|
const FANOUT: usize = 3;
|
|
|
|
let mut hash_counts: Vec<_> = (1..6)
|
2021-04-08 11:40:37 -07:00
|
|
|
.flat_map(|x| {
|
2021-02-16 16:29:50 -08:00
|
|
|
let mark = FANOUT.pow(x);
|
|
|
|
vec![mark - 1, mark, mark + 1]
|
|
|
|
})
|
|
|
|
.collect();
|
|
|
|
|
|
|
|
// saturate the test space for threshold to threshold + target
|
|
|
|
// this hits right before we use the 3 deep optimization and all the way through all possible partial last chunks
|
|
|
|
let target = FANOUT.pow(3);
|
|
|
|
let threshold = target * FANOUT;
|
|
|
|
hash_counts.extend(threshold - 1..=threshold + target);
|
|
|
|
|
|
|
|
for hash_count in hash_counts {
|
2023-01-05 10:05:32 -08:00
|
|
|
let hashes: Vec<_> = (0..hash_count).map(|_| Hash::new_unique()).collect();
|
2021-02-16 16:29:50 -08:00
|
|
|
|
|
|
|
test_hashing(hashes, FANOUT);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
#[test]
|
|
|
|
fn test_accountsdb_compute_merkle_root() {
|
|
|
|
solana_logger::setup();
|
|
|
|
|
|
|
|
let expected_results = vec![
|
|
|
|
(0, 0, "GKot5hBsd81kMupNCXHaqbhv3huEbxAFMLnpcX2hniwn", 0),
|
|
|
|
(0, 1, "8unXKJYTxrR423HgQxbDmx29mFri1QNrzVKKDxEfc6bj", 0),
|
|
|
|
(0, 2, "6QfkevXLLqbfAaR1kVjvMLFtEXvNUVrpmkwXqgsYtCFW", 1),
|
|
|
|
(0, 3, "G3FrJd9JrXcMiqChTSfvEdBL2sCPny3ebiUy9Xxbn7a2", 3),
|
|
|
|
(0, 4, "G3sZXHhwoCFuNyWy7Efffr47RBW33ibEp7b2hqNDmXdu", 6),
|
|
|
|
(0, 5, "78atJJYpokAPKMJwHxUW8SBDvPkkSpTBV7GiB27HwosJ", 10),
|
|
|
|
(0, 6, "7c9SM2BmCRVVXdrEdKcMK91MviPqXqQMd8QAb77tgLEy", 15),
|
|
|
|
(0, 7, "3hsmnZPhf22UvBLiZ4dVa21Qsdh65CCrtYXsb8MxoVAa", 21),
|
|
|
|
(0, 8, "5bwXUiC6RCRhb8fqvjvUXT6waU25str3UXA3a6Aq1jux", 28),
|
|
|
|
(0, 9, "3NNtQKH6PaYpCnFBtyi2icK9eYX3YM5pqA3SKaXtUNzu", 36),
|
|
|
|
(1, 0, "GKot5hBsd81kMupNCXHaqbhv3huEbxAFMLnpcX2hniwn", 0),
|
|
|
|
(1, 1, "4GWVCsnEu1iRyxjAB3F7J7C4MMvcoxFWtP9ihvwvDgxY", 0),
|
|
|
|
(1, 2, "8ML8Te6Uw2mipFr2v9sMZDcziXzhVqJo2qeMJohg1CJx", 1),
|
|
|
|
(1, 3, "AMEuC3AgqAeRBGBhSfTmuMdfbAiXJnGmKv99kHmcAE1H", 3),
|
|
|
|
(1, 4, "HEnDuJLHpsQfrApimGrovTqPEF6Vkrx2dKFr3BDtYzWx", 6),
|
|
|
|
(1, 5, "6rH69iP2yM1o565noZN1EqjySW4PhYUskz3c5tXePUfV", 10),
|
|
|
|
(1, 6, "7qEQMEXdfSPjbZ3q4cuuZwebDMvTvuaQ3dBiHoDUKo9a", 15),
|
|
|
|
(1, 7, "GDJz7LSKYjqqz6ujCaaQRJRmQ7TLNCwYJhdT84qT4qwk", 21),
|
|
|
|
(1, 8, "HT9krPLVTo3rr5WZQBQFrbqWs8SbYScXfnt8EVuobboM", 28),
|
|
|
|
(1, 9, "8y2pMgqMdRsvqw6BQXm6wtz3qxGPss72i6H6gVpPyeda", 36),
|
|
|
|
];
|
|
|
|
|
|
|
|
let mut expected_index = 0;
|
|
|
|
let start = 0;
|
|
|
|
let default_fanout = 2;
|
|
|
|
// test 0..3 recursions (at fanout = 2) and 1 item remainder. The internals have 1 special case first loop and subsequent loops are the same types.
|
|
|
|
let iterations = default_fanout * default_fanout * default_fanout + 2;
|
|
|
|
for pass in 0..2 {
|
|
|
|
let fanout = if pass == 0 {
|
|
|
|
default_fanout
|
|
|
|
} else {
|
|
|
|
MERKLE_FANOUT
|
|
|
|
};
|
|
|
|
for count in start..iterations {
|
|
|
|
let mut input: Vec<_> = (0..count)
|
|
|
|
.map(|i| {
|
2023-01-21 10:06:27 -08:00
|
|
|
let key = Pubkey::from([(pass * iterations + count) as u8; 32]);
|
2021-02-16 16:29:50 -08:00
|
|
|
let hash = Hash::new(&[(pass * iterations + count + i + 1) as u8; 32]);
|
|
|
|
(key, hash)
|
|
|
|
})
|
|
|
|
.collect();
|
|
|
|
|
|
|
|
let result = if pass == 0 {
|
|
|
|
test_hashing_larger(input.clone(), fanout)
|
|
|
|
} else {
|
|
|
|
// this sorts inside
|
2022-11-18 15:25:44 -08:00
|
|
|
let early_result = AccountsHasher::accumulate_account_hashes(
|
2021-02-16 16:29:50 -08:00
|
|
|
input.iter().map(|i| (i.0, i.1)).collect::<Vec<_>>(),
|
|
|
|
);
|
2022-11-18 15:25:44 -08:00
|
|
|
AccountsHasher::sort_hashes_by_pubkey(&mut input);
|
|
|
|
let result = AccountsHasher::compute_merkle_root(input.clone(), fanout);
|
2021-02-16 16:29:50 -08:00
|
|
|
assert_eq!(early_result, result);
|
|
|
|
result
|
|
|
|
};
|
|
|
|
// compare against captured, expected results for hash (and lamports)
|
|
|
|
assert_eq!(
|
|
|
|
(
|
|
|
|
pass,
|
|
|
|
count,
|
|
|
|
&*(result.to_string()),
|
|
|
|
expected_results[expected_index].3
|
|
|
|
), // we no longer calculate lamports
|
|
|
|
expected_results[expected_index]
|
|
|
|
);
|
|
|
|
expected_index += 1;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
#[test]
|
|
|
|
#[should_panic(expected = "overflow is detected while summing capitalization")]
|
|
|
|
fn test_accountsdb_lamport_overflow() {
|
|
|
|
solana_logger::setup();
|
|
|
|
|
|
|
|
let offset = 2;
|
|
|
|
let input = vec![
|
2021-08-18 07:07:34 -07:00
|
|
|
CalculateHashIntermediate::new(
|
2021-06-21 13:32:03 -07:00
|
|
|
Hash::new(&[1u8; 32]),
|
2021-02-16 16:29:50 -08:00
|
|
|
u64::MAX - offset,
|
|
|
|
Pubkey::new_unique(),
|
|
|
|
),
|
2021-08-18 07:07:34 -07:00
|
|
|
CalculateHashIntermediate::new(Hash::new(&[2u8; 32]), offset + 1, Pubkey::new_unique()),
|
2021-02-16 16:29:50 -08:00
|
|
|
];
|
2023-05-23 14:09:16 -07:00
|
|
|
let dir_for_temp_cache_files = tempdir().unwrap();
|
|
|
|
let accounts_hasher = AccountsHasher::new(dir_for_temp_cache_files.path().to_path_buf());
|
2023-07-31 13:13:19 -07:00
|
|
|
accounts_hasher.de_dup_accounts_in_parallel(
|
|
|
|
&convert_to_slice(&[input]),
|
|
|
|
0,
|
|
|
|
1,
|
|
|
|
&HashStats::default(),
|
|
|
|
);
|
2022-10-18 07:51:38 -07:00
|
|
|
}
|
|
|
|
|
|
|
|
fn convert_to_slice(
|
|
|
|
input: &[Vec<CalculateHashIntermediate>],
|
|
|
|
) -> Vec<&[CalculateHashIntermediate]> {
|
|
|
|
input.iter().map(|v| &v[..]).collect::<Vec<_>>()
|
|
|
|
}
|
|
|
|
|
2021-02-16 16:29:50 -08:00
|
|
|
#[test]
|
|
|
|
#[should_panic(expected = "overflow is detected while summing capitalization")]
|
|
|
|
fn test_accountsdb_lamport_overflow2() {
|
|
|
|
solana_logger::setup();
|
|
|
|
|
|
|
|
let offset = 2;
|
|
|
|
let input = vec![
|
2021-08-18 07:07:34 -07:00
|
|
|
vec![CalculateHashIntermediate::new(
|
2021-06-21 13:32:03 -07:00
|
|
|
Hash::new(&[1u8; 32]),
|
2021-02-16 16:29:50 -08:00
|
|
|
u64::MAX - offset,
|
|
|
|
Pubkey::new_unique(),
|
|
|
|
)],
|
2021-08-18 07:07:34 -07:00
|
|
|
vec![CalculateHashIntermediate::new(
|
2021-06-21 13:32:03 -07:00
|
|
|
Hash::new(&[2u8; 32]),
|
2021-02-16 16:29:50 -08:00
|
|
|
offset + 1,
|
|
|
|
Pubkey::new_unique(),
|
|
|
|
)],
|
|
|
|
];
|
2023-05-23 14:09:16 -07:00
|
|
|
let dir_for_temp_cache_files = tempdir().unwrap();
|
|
|
|
let accounts_hasher = AccountsHasher::new(dir_for_temp_cache_files.path().to_path_buf());
|
|
|
|
accounts_hasher.de_dup_accounts(
|
2023-07-31 13:13:19 -07:00
|
|
|
&convert_to_slice(&input),
|
2021-06-21 13:32:03 -07:00
|
|
|
&mut HashStats::default(),
|
|
|
|
2, // accounts above are in 2 groups
|
|
|
|
);
|
2021-02-16 16:29:50 -08:00
|
|
|
}
|
|
|
|
}
|