AcctIdx: support 2^24 bins (#20739)

This commit is contained in:
Jeff Washington (jwash) 2021-10-18 14:05:16 -05:00 committed by GitHub
parent 33f4e79589
commit 46bf7d4a4a
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
5 changed files with 78 additions and 37 deletions

View File

@ -1271,7 +1271,7 @@ mod tests {
let bins = idx.account_maps.len();
// use bins * 2 to get the first half of the range within bin 0
let bins_2 = bins * 2;
let binner = crate::pubkey_bins::PubkeyBinCalculator16::new(bins_2);
let binner = crate::pubkey_bins::PubkeyBinCalculator24::new(bins_2);
let range2 =
binner.lowest_pubkey_from_bin(0, bins_2)..binner.lowest_pubkey_from_bin(1, bins_2);
let range2_inclusive = range2.start..=range2.end;

View File

@ -33,7 +33,7 @@ use crate::{
append_vec::{AppendVec, StoredAccountMeta, StoredMeta, StoredMetaWriteVersion},
cache_hash_data::CacheHashData,
contains::Contains,
pubkey_bins::PubkeyBinCalculator16,
pubkey_bins::PubkeyBinCalculator24,
read_only_accounts_cache::ReadOnlyAccountsCache,
sorted_storages::SortedStorages,
};
@ -5155,7 +5155,7 @@ impl AccountsDb {
scan_func: F,
after_func: F2,
bin_range: &Range<usize>,
bin_calculator: &PubkeyBinCalculator16,
bin_calculator: &PubkeyBinCalculator24,
) -> Vec<BinnedHashData>
where
F: Fn(LoadedAccount, &mut BinnedHashData, Slot) + Send + Sync,
@ -5475,7 +5475,7 @@ impl AccountsDb {
)>,
filler_account_suffix: Option<&Pubkey>,
) -> Result<Vec<BinnedHashData>, BankHashVerificationError> {
let bin_calculator = PubkeyBinCalculator16::new(bins);
let bin_calculator = PubkeyBinCalculator24::new(bins);
assert!(bin_range.start < bins && bin_range.end <= bins && bin_range.start < bin_range.end);
let mut time = Measure::start("scan all accounts");
stats.num_snapshot_storage = storage.slot_count();
@ -7757,7 +7757,7 @@ pub mod tests {
},
|a| a,
&Range { start: 0, end: 1 },
&PubkeyBinCalculator16::new(1),
&PubkeyBinCalculator24::new(1),
);
assert_eq!(calls.load(Ordering::Relaxed), 1);
assert_eq!(

View File

@ -5,7 +5,7 @@ use crate::{
contains::Contains,
in_mem_accounts_index::InMemAccountsIndex,
inline_spl_token_v2_0::{self, SPL_TOKEN_ACCOUNT_MINT_OFFSET, SPL_TOKEN_ACCOUNT_OWNER_OFFSET},
pubkey_bins::PubkeyBinCalculator16,
pubkey_bins::PubkeyBinCalculator24,
secondary_index::*,
};
use bv::BitVec;
@ -585,7 +585,7 @@ pub struct AccountsIndexRootsStats {
pub struct AccountsIndexIterator<'a, T: IndexValue> {
account_maps: &'a LockMapTypeSlice<T>,
bin_calculator: &'a PubkeyBinCalculator16,
bin_calculator: &'a PubkeyBinCalculator24,
start_bound: Bound<Pubkey>,
end_bound: Bound<Pubkey>,
is_finished: bool,
@ -755,7 +755,7 @@ impl ScanSlotTracker {
#[derive(Debug)]
pub struct AccountsIndex<T: IndexValue> {
pub account_maps: LockMapType<T>,
pub bin_calculator: PubkeyBinCalculator16,
pub bin_calculator: PubkeyBinCalculator24,
program_id_index: SecondaryIndex<DashMapSecondaryIndexEntry>,
spl_token_mint_index: SecondaryIndex<DashMapSecondaryIndexEntry>,
spl_token_owner_index: SecondaryIndex<RwLockSecondaryIndexEntry>,
@ -806,7 +806,7 @@ impl<T: IndexValue> AccountsIndex<T> {
config: Option<AccountsIndexConfig>,
) -> (
LockMapType<T>,
PubkeyBinCalculator16,
PubkeyBinCalculator24,
AccountsIndexStorage<T>,
) {
let bins = config
@ -814,7 +814,7 @@ impl<T: IndexValue> AccountsIndex<T> {
.and_then(|config| config.bins)
.unwrap_or(BINS_DEFAULT);
// create bin_calculator early to verify # bins is reasonable
let bin_calculator = PubkeyBinCalculator16::new(bins);
let bin_calculator = PubkeyBinCalculator24::new(bins);
let storage = AccountsIndexStorage::new(bins, &config);
let account_maps = (0..bins)
.into_iter()

View File

@ -1,7 +1,7 @@
//! Cached data for hashing accounts
use crate::accounts_hash::CalculateHashIntermediate;
use crate::cache_hash_data_stats::CacheHashDataStats;
use crate::pubkey_bins::PubkeyBinCalculator16;
use crate::pubkey_bins::PubkeyBinCalculator24;
use log::*;
use memmap2::MmapMut;
use solana_measure::measure::Measure;
@ -151,7 +151,7 @@ impl CacheHashData {
file_name: &P,
accumulator: &mut SavedType,
start_bin_index: usize,
bin_calculator: &PubkeyBinCalculator16,
bin_calculator: &PubkeyBinCalculator24,
) -> Result<(), std::io::Error> {
let mut stats = CacheHashDataStats::default();
let result = self.load_internal(
@ -170,7 +170,7 @@ impl CacheHashData {
file_name: &P,
accumulator: &mut SavedType,
start_bin_index: usize,
bin_calculator: &PubkeyBinCalculator16,
bin_calculator: &PubkeyBinCalculator24,
stats: &mut CacheHashDataStats,
) -> Result<(), std::io::Error> {
let mut m = Measure::start("overall");
@ -316,7 +316,7 @@ pub mod tests {
std::fs::create_dir_all(&tmpdir).unwrap();
for bins in [1, 2, 4] {
let bin_calculator = PubkeyBinCalculator16::new(bins);
let bin_calculator = PubkeyBinCalculator24::new(bins);
let num_points = 5;
let (data, _total_points) = generate_test_data(num_points, bins, &bin_calculator);
for passes in [1, 2] {
@ -379,7 +379,7 @@ pub mod tests {
fn bin_data(
data: &mut SavedType,
bin_calculator: &PubkeyBinCalculator16,
bin_calculator: &PubkeyBinCalculator24,
bins: usize,
start_bin: usize,
) {
@ -396,7 +396,7 @@ pub mod tests {
fn generate_test_data(
count: usize,
bins: usize,
binner: &PubkeyBinCalculator16,
binner: &PubkeyBinCalculator24,
) -> (SavedType, usize) {
let mut rng = rand::thread_rng();
let mut ct = 0;

View File

@ -1,12 +1,12 @@
use solana_sdk::pubkey::Pubkey;
#[derive(Debug)]
pub struct PubkeyBinCalculator16 {
pub struct PubkeyBinCalculator24 {
// how many bits from the first 2 bytes to shift away to ignore when calculating bin
shift_bits: u32,
}
impl PubkeyBinCalculator16 {
impl PubkeyBinCalculator24 {
const fn num_bits<T>() -> usize {
std::mem::size_of::<T>() * 8
}
@ -17,7 +17,7 @@ impl PubkeyBinCalculator16 {
}
pub fn new(bins: usize) -> Self {
const MAX_BITS: u32 = 16;
const MAX_BITS: u32 = 24;
assert!(bins > 0);
let max_plus_1 = 1 << MAX_BITS;
assert!(bins <= max_plus_1);
@ -30,15 +30,17 @@ impl PubkeyBinCalculator16 {
pub fn bin_from_pubkey(&self, pubkey: &Pubkey) -> usize {
let as_ref = pubkey.as_ref();
((as_ref[0] as usize * 256 + as_ref[1] as usize) as usize) >> self.shift_bits
(((as_ref[0] as usize * 256 + as_ref[1] as usize) * 256 + as_ref[2] as usize) as usize)
>> self.shift_bits
}
pub fn lowest_pubkey_from_bin(&self, mut bin: usize, bins: usize) -> Pubkey {
assert!(bin < bins);
bin <<= self.shift_bits;
let mut pubkey = Pubkey::new(&[0; 32]);
pubkey.as_mut()[0] = (bin / 256) as u8;
pubkey.as_mut()[1] = (bin & 0xff) as u8;
pubkey.as_mut()[0] = ((bin / 256 / 256) & 0xff) as u8;
pubkey.as_mut()[1] = ((bin / 256) & 0xff) as u8;
pubkey.as_mut()[2] = (bin & 0xff) as u8;
pubkey
}
}
@ -49,19 +51,19 @@ pub mod tests {
#[test]
fn test_pubkey_bins_log2() {
assert_eq!(PubkeyBinCalculator16::num_bits::<u8>(), 8);
assert_eq!(PubkeyBinCalculator16::num_bits::<u32>(), 32);
assert_eq!(PubkeyBinCalculator24::num_bits::<u8>(), 8);
assert_eq!(PubkeyBinCalculator24::num_bits::<u32>(), 32);
for i in 0..32 {
assert_eq!(PubkeyBinCalculator16::log_2(2u32.pow(i)), i);
assert_eq!(PubkeyBinCalculator24::log_2(2u32.pow(i)), i);
}
}
#[test]
fn test_pubkey_bins() {
for i in 0..=16 {
for i in 0..=24 {
let bins = 2u32.pow(i);
let calc = PubkeyBinCalculator16::new(bins as usize);
assert_eq!(calc.shift_bits, 16 - i, "i: {}", i);
let calc = PubkeyBinCalculator24::new(bins as usize);
assert_eq!(calc.shift_bits, 24 - i, "i: {}", i);
for bin in 0..bins {
assert_eq!(
bin as usize,
@ -76,9 +78,9 @@ pub mod tests {
let mut pk = Pubkey::new(&[0; 32]);
for i in 0..=8 {
let bins = 2usize.pow(i);
let calc = PubkeyBinCalculator16::new(bins);
let calc = PubkeyBinCalculator24::new(bins);
let shift_bits = calc.shift_bits - 8; // we are only dealing with first byte
let shift_bits = calc.shift_bits - 16; // we are only dealing with first byte
pk.as_mut()[0] = 0;
assert_eq!(0, calc.bin_from_pubkey(&pk));
@ -90,7 +92,7 @@ pub mod tests {
assert_eq!(
bin,
calc.bin_from_pubkey(&pk),
"bin: {}/{}, bits: {}, val: {}",
"bin: {}/{}, shift_bits: {}, val: {}",
bin,
bins,
shift_bits,
@ -106,9 +108,9 @@ pub mod tests {
for i in 9..=16 {
let mut pk = Pubkey::new(&[0; 32]);
let bins = 2usize.pow(i);
let calc = PubkeyBinCalculator16::new(bins);
let calc = PubkeyBinCalculator24::new(bins);
let shift_bits = calc.shift_bits;
let shift_bits = calc.shift_bits - 8;
pk.as_mut()[1] = 0;
assert_eq!(0, calc.bin_from_pubkey(&pk));
@ -124,7 +126,7 @@ pub mod tests {
assert_eq!(
bin,
calc.bin_from_pubkey(&pk),
"bin: {}/{}, bits: {}, val: {}",
"bin: {}/{}, shift_bits: {}, val: {}",
bin,
bins,
shift_bits,
@ -138,22 +140,61 @@ pub mod tests {
}
}
}
for i in 17..=24 {
let mut pk = Pubkey::new(&[0; 32]);
let bins = 2usize.pow(i);
let calc = PubkeyBinCalculator24::new(bins);
let shift_bits = calc.shift_bits;
pk.as_mut()[1] = 0;
assert_eq!(0, calc.bin_from_pubkey(&pk));
pk.as_mut()[0] = 0xff;
pk.as_mut()[1] = 0xff;
pk.as_mut()[2] = 0xff;
assert_eq!(bins - 1, calc.bin_from_pubkey(&pk));
let mut pk = Pubkey::new(&[0; 32]);
for bin in 0..bins {
let mut target = (bin << shift_bits) as u32;
pk.as_mut()[0] = (target / 256 / 256) as u8;
pk.as_mut()[1] = ((target / 256) % 256) as u8;
pk.as_mut()[2] = (target % 256) as u8;
assert_eq!(
bin,
calc.bin_from_pubkey(&pk),
"bin: {}/{}, shift_bits: {}, val: {:?}",
bin,
bins,
shift_bits,
&pk.as_ref()[0..3],
);
if bin > 0 {
target -= 1;
pk.as_mut()[0] = (target / 256 / 256) as u8;
pk.as_mut()[1] = ((target / 256) % 256) as u8;
pk.as_mut()[2] = (target % 256) as u8;
assert_eq!(bin - 1, calc.bin_from_pubkey(&pk));
}
}
}
}
#[test]
#[should_panic(expected = "bins.is_power_of_two()")]
fn test_pubkey_bins_illegal_bins3() {
PubkeyBinCalculator16::new(3);
PubkeyBinCalculator24::new(3);
}
#[test]
#[should_panic(expected = "bins <= max_plus_1")]
fn test_pubkey_bins_illegal_bins2() {
PubkeyBinCalculator16::new(65537);
PubkeyBinCalculator24::new(65536 * 256 + 1);
}
#[test]
#[should_panic(expected = "bins > 0")]
fn test_pubkey_bins_illegal_bins() {
PubkeyBinCalculator16::new(0);
PubkeyBinCalculator24::new(0);
}
}