From 46bf7d4a4a709a1407b911c7e439a8769e944b0f Mon Sep 17 00:00:00 2001 From: "Jeff Washington (jwash)" <75863576+jeffwashington@users.noreply.github.com> Date: Mon, 18 Oct 2021 14:05:16 -0500 Subject: [PATCH] AcctIdx: support 2^24 bins (#20739) --- runtime/src/accounts.rs | 2 +- runtime/src/accounts_db.rs | 8 ++-- runtime/src/accounts_index.rs | 10 ++-- runtime/src/cache_hash_data.rs | 12 ++--- runtime/src/pubkey_bins.rs | 83 +++++++++++++++++++++++++--------- 5 files changed, 78 insertions(+), 37 deletions(-) diff --git a/runtime/src/accounts.rs b/runtime/src/accounts.rs index 3c20bc0391..fd8256f548 100644 --- a/runtime/src/accounts.rs +++ b/runtime/src/accounts.rs @@ -1271,7 +1271,7 @@ mod tests { let bins = idx.account_maps.len(); // use bins * 2 to get the first half of the range within bin 0 let bins_2 = bins * 2; - let binner = crate::pubkey_bins::PubkeyBinCalculator16::new(bins_2); + let binner = crate::pubkey_bins::PubkeyBinCalculator24::new(bins_2); let range2 = binner.lowest_pubkey_from_bin(0, bins_2)..binner.lowest_pubkey_from_bin(1, bins_2); let range2_inclusive = range2.start..=range2.end; diff --git a/runtime/src/accounts_db.rs b/runtime/src/accounts_db.rs index 66a395d3c6..a15d61db5d 100644 --- a/runtime/src/accounts_db.rs +++ b/runtime/src/accounts_db.rs @@ -33,7 +33,7 @@ use crate::{ append_vec::{AppendVec, StoredAccountMeta, StoredMeta, StoredMetaWriteVersion}, cache_hash_data::CacheHashData, contains::Contains, - pubkey_bins::PubkeyBinCalculator16, + pubkey_bins::PubkeyBinCalculator24, read_only_accounts_cache::ReadOnlyAccountsCache, sorted_storages::SortedStorages, }; @@ -5155,7 +5155,7 @@ impl AccountsDb { scan_func: F, after_func: F2, bin_range: &Range, - bin_calculator: &PubkeyBinCalculator16, + bin_calculator: &PubkeyBinCalculator24, ) -> Vec where F: Fn(LoadedAccount, &mut BinnedHashData, Slot) + Send + Sync, @@ -5475,7 +5475,7 @@ impl AccountsDb { )>, filler_account_suffix: Option<&Pubkey>, ) -> Result, BankHashVerificationError> { - let bin_calculator = PubkeyBinCalculator16::new(bins); + let bin_calculator = PubkeyBinCalculator24::new(bins); assert!(bin_range.start < bins && bin_range.end <= bins && bin_range.start < bin_range.end); let mut time = Measure::start("scan all accounts"); stats.num_snapshot_storage = storage.slot_count(); @@ -7757,7 +7757,7 @@ pub mod tests { }, |a| a, &Range { start: 0, end: 1 }, - &PubkeyBinCalculator16::new(1), + &PubkeyBinCalculator24::new(1), ); assert_eq!(calls.load(Ordering::Relaxed), 1); assert_eq!( diff --git a/runtime/src/accounts_index.rs b/runtime/src/accounts_index.rs index e6ec29b249..a7804ea12f 100644 --- a/runtime/src/accounts_index.rs +++ b/runtime/src/accounts_index.rs @@ -5,7 +5,7 @@ use crate::{ contains::Contains, in_mem_accounts_index::InMemAccountsIndex, inline_spl_token_v2_0::{self, SPL_TOKEN_ACCOUNT_MINT_OFFSET, SPL_TOKEN_ACCOUNT_OWNER_OFFSET}, - pubkey_bins::PubkeyBinCalculator16, + pubkey_bins::PubkeyBinCalculator24, secondary_index::*, }; use bv::BitVec; @@ -585,7 +585,7 @@ pub struct AccountsIndexRootsStats { pub struct AccountsIndexIterator<'a, T: IndexValue> { account_maps: &'a LockMapTypeSlice, - bin_calculator: &'a PubkeyBinCalculator16, + bin_calculator: &'a PubkeyBinCalculator24, start_bound: Bound, end_bound: Bound, is_finished: bool, @@ -755,7 +755,7 @@ impl ScanSlotTracker { #[derive(Debug)] pub struct AccountsIndex { pub account_maps: LockMapType, - pub bin_calculator: PubkeyBinCalculator16, + pub bin_calculator: PubkeyBinCalculator24, program_id_index: SecondaryIndex, spl_token_mint_index: SecondaryIndex, spl_token_owner_index: SecondaryIndex, @@ -806,7 +806,7 @@ impl AccountsIndex { config: Option, ) -> ( LockMapType, - PubkeyBinCalculator16, + PubkeyBinCalculator24, AccountsIndexStorage, ) { let bins = config @@ -814,7 +814,7 @@ impl AccountsIndex { .and_then(|config| config.bins) .unwrap_or(BINS_DEFAULT); // create bin_calculator early to verify # bins is reasonable - let bin_calculator = PubkeyBinCalculator16::new(bins); + let bin_calculator = PubkeyBinCalculator24::new(bins); let storage = AccountsIndexStorage::new(bins, &config); let account_maps = (0..bins) .into_iter() diff --git a/runtime/src/cache_hash_data.rs b/runtime/src/cache_hash_data.rs index b4948dd639..758f095fd6 100644 --- a/runtime/src/cache_hash_data.rs +++ b/runtime/src/cache_hash_data.rs @@ -1,7 +1,7 @@ //! Cached data for hashing accounts use crate::accounts_hash::CalculateHashIntermediate; use crate::cache_hash_data_stats::CacheHashDataStats; -use crate::pubkey_bins::PubkeyBinCalculator16; +use crate::pubkey_bins::PubkeyBinCalculator24; use log::*; use memmap2::MmapMut; use solana_measure::measure::Measure; @@ -151,7 +151,7 @@ impl CacheHashData { file_name: &P, accumulator: &mut SavedType, start_bin_index: usize, - bin_calculator: &PubkeyBinCalculator16, + bin_calculator: &PubkeyBinCalculator24, ) -> Result<(), std::io::Error> { let mut stats = CacheHashDataStats::default(); let result = self.load_internal( @@ -170,7 +170,7 @@ impl CacheHashData { file_name: &P, accumulator: &mut SavedType, start_bin_index: usize, - bin_calculator: &PubkeyBinCalculator16, + bin_calculator: &PubkeyBinCalculator24, stats: &mut CacheHashDataStats, ) -> Result<(), std::io::Error> { let mut m = Measure::start("overall"); @@ -316,7 +316,7 @@ pub mod tests { std::fs::create_dir_all(&tmpdir).unwrap(); for bins in [1, 2, 4] { - let bin_calculator = PubkeyBinCalculator16::new(bins); + let bin_calculator = PubkeyBinCalculator24::new(bins); let num_points = 5; let (data, _total_points) = generate_test_data(num_points, bins, &bin_calculator); for passes in [1, 2] { @@ -379,7 +379,7 @@ pub mod tests { fn bin_data( data: &mut SavedType, - bin_calculator: &PubkeyBinCalculator16, + bin_calculator: &PubkeyBinCalculator24, bins: usize, start_bin: usize, ) { @@ -396,7 +396,7 @@ pub mod tests { fn generate_test_data( count: usize, bins: usize, - binner: &PubkeyBinCalculator16, + binner: &PubkeyBinCalculator24, ) -> (SavedType, usize) { let mut rng = rand::thread_rng(); let mut ct = 0; diff --git a/runtime/src/pubkey_bins.rs b/runtime/src/pubkey_bins.rs index ec54005a4c..9d1938758d 100644 --- a/runtime/src/pubkey_bins.rs +++ b/runtime/src/pubkey_bins.rs @@ -1,12 +1,12 @@ use solana_sdk::pubkey::Pubkey; #[derive(Debug)] -pub struct PubkeyBinCalculator16 { +pub struct PubkeyBinCalculator24 { // how many bits from the first 2 bytes to shift away to ignore when calculating bin shift_bits: u32, } -impl PubkeyBinCalculator16 { +impl PubkeyBinCalculator24 { const fn num_bits() -> usize { std::mem::size_of::() * 8 } @@ -17,7 +17,7 @@ impl PubkeyBinCalculator16 { } pub fn new(bins: usize) -> Self { - const MAX_BITS: u32 = 16; + const MAX_BITS: u32 = 24; assert!(bins > 0); let max_plus_1 = 1 << MAX_BITS; assert!(bins <= max_plus_1); @@ -30,15 +30,17 @@ impl PubkeyBinCalculator16 { pub fn bin_from_pubkey(&self, pubkey: &Pubkey) -> usize { let as_ref = pubkey.as_ref(); - ((as_ref[0] as usize * 256 + as_ref[1] as usize) as usize) >> self.shift_bits + (((as_ref[0] as usize * 256 + as_ref[1] as usize) * 256 + as_ref[2] as usize) as usize) + >> self.shift_bits } pub fn lowest_pubkey_from_bin(&self, mut bin: usize, bins: usize) -> Pubkey { assert!(bin < bins); bin <<= self.shift_bits; let mut pubkey = Pubkey::new(&[0; 32]); - pubkey.as_mut()[0] = (bin / 256) as u8; - pubkey.as_mut()[1] = (bin & 0xff) as u8; + pubkey.as_mut()[0] = ((bin / 256 / 256) & 0xff) as u8; + pubkey.as_mut()[1] = ((bin / 256) & 0xff) as u8; + pubkey.as_mut()[2] = (bin & 0xff) as u8; pubkey } } @@ -49,19 +51,19 @@ pub mod tests { #[test] fn test_pubkey_bins_log2() { - assert_eq!(PubkeyBinCalculator16::num_bits::(), 8); - assert_eq!(PubkeyBinCalculator16::num_bits::(), 32); + assert_eq!(PubkeyBinCalculator24::num_bits::(), 8); + assert_eq!(PubkeyBinCalculator24::num_bits::(), 32); for i in 0..32 { - assert_eq!(PubkeyBinCalculator16::log_2(2u32.pow(i)), i); + assert_eq!(PubkeyBinCalculator24::log_2(2u32.pow(i)), i); } } #[test] fn test_pubkey_bins() { - for i in 0..=16 { + for i in 0..=24 { let bins = 2u32.pow(i); - let calc = PubkeyBinCalculator16::new(bins as usize); - assert_eq!(calc.shift_bits, 16 - i, "i: {}", i); + let calc = PubkeyBinCalculator24::new(bins as usize); + assert_eq!(calc.shift_bits, 24 - i, "i: {}", i); for bin in 0..bins { assert_eq!( bin as usize, @@ -76,9 +78,9 @@ pub mod tests { let mut pk = Pubkey::new(&[0; 32]); for i in 0..=8 { let bins = 2usize.pow(i); - let calc = PubkeyBinCalculator16::new(bins); + let calc = PubkeyBinCalculator24::new(bins); - let shift_bits = calc.shift_bits - 8; // we are only dealing with first byte + let shift_bits = calc.shift_bits - 16; // we are only dealing with first byte pk.as_mut()[0] = 0; assert_eq!(0, calc.bin_from_pubkey(&pk)); @@ -90,7 +92,7 @@ pub mod tests { assert_eq!( bin, calc.bin_from_pubkey(&pk), - "bin: {}/{}, bits: {}, val: {}", + "bin: {}/{}, shift_bits: {}, val: {}", bin, bins, shift_bits, @@ -106,9 +108,9 @@ pub mod tests { for i in 9..=16 { let mut pk = Pubkey::new(&[0; 32]); let bins = 2usize.pow(i); - let calc = PubkeyBinCalculator16::new(bins); + let calc = PubkeyBinCalculator24::new(bins); - let shift_bits = calc.shift_bits; + let shift_bits = calc.shift_bits - 8; pk.as_mut()[1] = 0; assert_eq!(0, calc.bin_from_pubkey(&pk)); @@ -124,7 +126,7 @@ pub mod tests { assert_eq!( bin, calc.bin_from_pubkey(&pk), - "bin: {}/{}, bits: {}, val: {}", + "bin: {}/{}, shift_bits: {}, val: {}", bin, bins, shift_bits, @@ -138,22 +140,61 @@ pub mod tests { } } } + + for i in 17..=24 { + let mut pk = Pubkey::new(&[0; 32]); + let bins = 2usize.pow(i); + let calc = PubkeyBinCalculator24::new(bins); + + let shift_bits = calc.shift_bits; + + pk.as_mut()[1] = 0; + assert_eq!(0, calc.bin_from_pubkey(&pk)); + pk.as_mut()[0] = 0xff; + pk.as_mut()[1] = 0xff; + pk.as_mut()[2] = 0xff; + assert_eq!(bins - 1, calc.bin_from_pubkey(&pk)); + + let mut pk = Pubkey::new(&[0; 32]); + for bin in 0..bins { + let mut target = (bin << shift_bits) as u32; + pk.as_mut()[0] = (target / 256 / 256) as u8; + pk.as_mut()[1] = ((target / 256) % 256) as u8; + pk.as_mut()[2] = (target % 256) as u8; + assert_eq!( + bin, + calc.bin_from_pubkey(&pk), + "bin: {}/{}, shift_bits: {}, val: {:?}", + bin, + bins, + shift_bits, + &pk.as_ref()[0..3], + ); + if bin > 0 { + target -= 1; + pk.as_mut()[0] = (target / 256 / 256) as u8; + pk.as_mut()[1] = ((target / 256) % 256) as u8; + pk.as_mut()[2] = (target % 256) as u8; + assert_eq!(bin - 1, calc.bin_from_pubkey(&pk)); + } + } + } } #[test] #[should_panic(expected = "bins.is_power_of_two()")] fn test_pubkey_bins_illegal_bins3() { - PubkeyBinCalculator16::new(3); + PubkeyBinCalculator24::new(3); } #[test] #[should_panic(expected = "bins <= max_plus_1")] fn test_pubkey_bins_illegal_bins2() { - PubkeyBinCalculator16::new(65537); + PubkeyBinCalculator24::new(65536 * 256 + 1); } #[test] #[should_panic(expected = "bins > 0")] fn test_pubkey_bins_illegal_bins() { - PubkeyBinCalculator16::new(0); + PubkeyBinCalculator24::new(0); } }