From fd28fd1d341916d46b0e56666414565175a17981 Mon Sep 17 00:00:00 2001 From: "Jeff Washington (jwash)" Date: Tue, 4 Apr 2023 15:08:13 -0500 Subject: [PATCH] disk index: move capacity to contents (#31040) --- bucket_map/src/bucket.rs | 12 ++++++------ bucket_map/src/bucket_storage.rs | 9 +++------ bucket_map/src/index_entry.rs | 30 +++++++++++++++++++++++++++--- 3 files changed, 36 insertions(+), 15 deletions(-) diff --git a/bucket_map/src/bucket.rs b/bucket_map/src/bucket.rs index ee244337b..5ae398be4 100644 --- a/bucket_map/src/bucket.rs +++ b/bucket_map/src/bucket.rs @@ -213,7 +213,7 @@ impl<'b, T: Clone + Copy + 'static> Bucket { .fetch_add(m.as_us(), Ordering::Relaxed); match first_free { Some(ii) => Ok((None, ii)), - None => Err(BucketMapError::IndexNoSpace(index.capacity.capacity_pow2())), + None => Err(BucketMapError::IndexNoSpace(index.contents.capacity_pow2())), } } @@ -266,7 +266,7 @@ impl<'b, T: Clone + Copy + 'static> Bucket { .stats .find_index_entry_mut_us .fetch_add(m.as_us(), Ordering::Relaxed); - Err(BucketMapError::IndexNoSpace(index.capacity.capacity_pow2())) + Err(BucketMapError::IndexNoSpace(index.contents.capacity_pow2())) } pub fn read_value(&self, key: &Pubkey) -> Option<(&[T], RefCount)> { @@ -359,7 +359,7 @@ impl<'b, T: Clone + Copy + 'static> Bucket { // need to move the allocation to a best fit spot let best_bucket = &self.data[best_fit_bucket as usize]; - let cap_power = best_bucket.capacity.capacity_pow2(); + let cap_power = best_bucket.contents.capacity_pow2(); let cap = best_bucket.capacity(); let pos = thread_rng().gen_range(0, cap); let mut success = false; @@ -377,7 +377,7 @@ impl<'b, T: Clone + Copy + 'static> Bucket { let mut multiple_slots = MultipleSlots::default(); multiple_slots.set_storage_offset(ix); multiple_slots - .set_storage_capacity_when_created_pow2(best_bucket.capacity.capacity_pow2()); + .set_storage_capacity_when_created_pow2(best_bucket.contents.capacity_pow2()); multiple_slots.set_num_slots(num_slots); elem.set_slot_count_enum_value( &mut self.index, @@ -434,8 +434,8 @@ impl<'b, T: Clone + Copy + 'static> Bucket { } pub fn grow_index(&self, current_capacity_pow2: u8) { - if self.index.capacity.capacity_pow2() == current_capacity_pow2 { - let mut starting_size_pow2 = self.index.capacity.capacity_pow2(); + if self.index.contents.capacity_pow2() == current_capacity_pow2 { + let mut starting_size_pow2 = self.index.contents.capacity_pow2(); if self.anticipated_size > 0 { // start the growth at the next pow2 larger than what would be required to hold `anticipated_size`. // This will prevent unnecessary repeated grows at startup. diff --git a/bucket_map/src/bucket_storage.rs b/bucket_map/src/bucket_storage.rs index d6a2ce8c5..efac32c7d 100644 --- a/bucket_map/src/bucket_storage.rs +++ b/bucket_map/src/bucket_storage.rs @@ -37,7 +37,7 @@ pub const DEFAULT_CAPACITY_POW2: u8 = 5; /// keep track of an individual element's occupied vs. free state /// every element must either be occupied or free and should never be double occupied or double freed /// For parameters below, `element` is used to view/modify header fields or fields within the element data. -pub trait BucketOccupied { +pub trait BucketOccupied: BucketCapacity { /// set entry at `ix` as occupied (as opposed to free) fn occupy(&mut self, element: &mut [u8], ix: usize); /// set entry at `ix` as free @@ -75,8 +75,6 @@ pub struct BucketStorage { path: PathBuf, mmap: MmapMut, pub cell_size: u64, - /// number of cells this bucket can hold - pub capacity: Capacity, pub count: Arc, pub stats: Arc, pub max_search: MaxSearch, @@ -155,7 +153,6 @@ impl BucketStorage { mmap, cell_size, count, - capacity, stats, max_search, contents: O::new(capacity), @@ -385,7 +382,7 @@ impl BucketStorage { let old_cap = old_bucket.capacity(); let old_map = &old_bucket.mmap; - let increment = self.capacity.capacity_pow2() - old_bucket.capacity.capacity_pow2(); + let increment = self.contents.capacity_pow2() - old_bucket.contents.capacity_pow2(); let index_grow = 1 << increment; (0..old_cap as usize).for_each(|i| { if !old_bucket.is_free(i as u64) { @@ -455,7 +452,7 @@ impl BucketStorage { /// Return the number of cells currently allocated pub fn capacity(&self) -> u64 { - self.capacity.capacity() + self.contents.capacity() } } diff --git a/bucket_map/src/index_entry.rs b/bucket_map/src/index_entry.rs index 06f0b52ec..a24ba2f78 100644 --- a/bucket_map/src/index_entry.rs +++ b/bucket_map/src/index_entry.rs @@ -14,7 +14,17 @@ use { /// allocated in `contents` in a BucketStorage pub struct BucketWithBitVec { - pub occupied: BitVec, + occupied: BitVec, + capacity_pow2: Capacity, +} + +impl BucketCapacity for BucketWithBitVec { + fn capacity(&self) -> u64 { + self.capacity_pow2.capacity() + } + fn capacity_pow2(&self) -> u8 { + self.capacity_pow2.capacity_pow2() + } } impl BucketOccupied for BucketWithBitVec { @@ -34,16 +44,20 @@ impl BucketOccupied for BucketWithBitVec { 0 } fn new(capacity: Capacity) -> Self { + assert!(matches!(capacity, Capacity::Pow2(_))); Self { occupied: BitVec::new_fill(false, capacity.capacity()), + capacity_pow2: capacity, } } } -#[derive(Debug, Default)] +/// allocated in `contents` in a BucketStorage +#[derive(Debug)] pub struct IndexBucketUsingBitVecBits { /// 2 bits per entry that represent a 4 state enum tag pub enum_tag: BitVec, + capacity_pow2: Capacity, _phantom: PhantomData<&'static T>, } @@ -90,6 +104,7 @@ impl BucketOccupied for IndexBucketUsingBitVecBits { Self { // note: twice as many bits allocated as `num_elements` because we store 2 bits per element enum_tag: BitVec::new_fill(false, capacity.capacity() * 2), + capacity_pow2: capacity, _phantom: PhantomData, } } @@ -107,6 +122,15 @@ impl BucketOccupied for IndexBucketUsingBitVecBits { } } +impl BucketCapacity for IndexBucketUsingBitVecBits { + fn capacity(&self) -> u64 { + self.capacity_pow2.capacity() + } + fn capacity_pow2(&self) -> u8 { + self.capacity_pow2.capacity_pow2() + } +} + pub type DataBucket = BucketWithBitVec; pub type IndexBucket = IndexBucketUsingBitVecBits; @@ -205,7 +229,7 @@ impl MultipleSlots { /// This is coupled with how we resize bucket storages. pub(crate) fn data_loc(&self, storage: &BucketStorage) -> u64 { self.storage_offset() - << (storage.capacity.capacity_pow2() - self.storage_capacity_when_created_pow2()) + << (storage.contents.capacity_pow2() - self.storage_capacity_when_created_pow2()) } }