From a2797ebfa9ab6c9acaaf310f9600aa30b67e4bdf Mon Sep 17 00:00:00 2001 From: "Jeff Washington (jwash)" Date: Fri, 31 Mar 2023 12:50:55 -0500 Subject: [PATCH] disk index: use bits in ref count to store occupied (#31004) --- bucket_map/src/bucket_storage.rs | 12 ++++ bucket_map/src/index_entry.rs | 96 +++++++++++++++++++++++++++++--- 2 files changed, 99 insertions(+), 9 deletions(-) diff --git a/bucket_map/src/bucket_storage.rs b/bucket_map/src/bucket_storage.rs index effda2cc97..a8e9b953a3 100644 --- a/bucket_map/src/bucket_storage.rs +++ b/bucket_map/src/bucket_storage.rs @@ -190,6 +190,18 @@ impl BucketStorage { unsafe { slice.get_unchecked_mut(0) } } + pub(crate) fn get_mut_from_parts(item_slice: &mut [u8]) -> &mut T { + debug_assert!(std::mem::size_of::() <= item_slice.len()); + let item = item_slice.as_mut_ptr() as *mut T; + unsafe { &mut *item } + } + + pub(crate) fn get_from_parts(item_slice: &[u8]) -> &T { + debug_assert!(std::mem::size_of::() <= item_slice.len()); + let item = item_slice.as_ptr() as *const T; + unsafe { &*item } + } + pub fn get_cell_slice(&self, ix: u64, len: u64) -> &[T] { let start = self.get_start_offset_no_header(ix); let slice = { diff --git a/bucket_map/src/index_entry.rs b/bucket_map/src/index_entry.rs index 8d6bc753d0..a784bd2faa 100644 --- a/bucket_map/src/index_entry.rs +++ b/bucket_map/src/index_entry.rs @@ -12,12 +12,11 @@ use { }; /// allocated in `contents` in a BucketStorage -pub struct BucketWithBitVec { +pub struct BucketWithBitVec { pub occupied: BitVec, - _phantom: PhantomData<&'static T>, } -impl BucketOccupied for BucketWithBitVec { +impl BucketOccupied for BucketWithBitVec { fn occupy(&mut self, element: &mut [u8], ix: usize) { assert!(self.is_free(element, ix)); self.occupied.set(ix as u64, true); @@ -36,13 +35,45 @@ impl BucketOccupied for BucketWithBitVec { fn new(num_elements: usize) -> Self { Self { occupied: BitVec::new_fill(false, num_elements as u64), + } + } +} + +#[derive(Debug, Default)] +pub struct IndexBucketUsingRefCountBits { + _phantom: PhantomData<&'static T>, +} + +impl BucketOccupied for IndexBucketUsingRefCountBits { + fn occupy(&mut self, element: &mut [u8], ix: usize) { + assert!(self.is_free(element, ix)); + let entry: &mut IndexEntry = + BucketStorage::>::get_mut_from_parts(element); + entry.set_slot_count_enum_value(OccupiedEnum::Occupied); + } + fn free(&mut self, element: &mut [u8], ix: usize) { + assert!(!self.is_free(element, ix)); + let entry: &mut IndexEntry = + BucketStorage::>::get_mut_from_parts(element); + entry.set_slot_count_enum_value(OccupiedEnum::Free); + } + fn is_free(&self, element: &[u8], _ix: usize) -> bool { + let entry: &IndexEntry = + BucketStorage::>::get_from_parts(element); + matches!(entry.get_slot_count_enum(), OccupiedEnum::Free) + } + fn offset_to_first_data() -> usize { + 0 + } + fn new(_num_elements: usize) -> Self { + Self { _phantom: PhantomData, } } } -pub type DataBucket = BucketWithBitVec<()>; -pub type IndexBucket = BucketWithBitVec; +pub type DataBucket = BucketWithBitVec; +pub type IndexBucket = IndexBucketUsingRefCountBits; /// contains the index of an entry in the index bucket. /// This type allows us to call methods to interact with the index entry on this type. @@ -67,15 +98,15 @@ pub struct IndexEntry { #[repr(C)] #[derive(Debug, Default, Copy, Clone, Eq, PartialEq)] struct PackedRefCount { - /// reserved for future use - unused: B2, + /// tag for Enum + slot_count_enum: B2, /// ref_count of this entry. We don't need any where near 62 bits for this value ref_count: B62, } /// required fields when an index element references the data file #[repr(C)] -#[derive(Debug, Default, Copy, Clone)] +#[derive(Debug, Default, Copy, Clone, Eq, PartialEq)] pub(crate) struct MultipleSlots { // if the bucket doubled, the index can be recomputed using storage_cap_and_offset.create_bucket_capacity_pow2 storage_cap_and_offset: PackedStorage, @@ -139,6 +170,36 @@ impl MultipleSlots { } } +#[repr(u8)] +#[derive(Debug, Eq, PartialEq)] +pub(crate) enum OccupiedEnum { + /// this spot is free (ie. not occupied) + Free = 0, + /// this spot is occupied + Occupied = 1, +} + +impl IndexEntry { + /// enum value stored in 2 spare bits taken from ref_count + fn get_slot_count_enum(&self) -> OccupiedEnum { + match self.packed_ref_count.slot_count_enum() { + 0 => OccupiedEnum::Free, + 1 => OccupiedEnum::Occupied, + _ => { + panic!("unexpected value"); + } + } + } + + /// enum value stored in 2 spare bits taken from ref_count + fn set_slot_count_enum_value(&mut self, value: OccupiedEnum) { + self.packed_ref_count.set_slot_count_enum(match value { + OccupiedEnum::Free => 0, + OccupiedEnum::Occupied => 1, + }); + } +} + /// Pack the storage offset and capacity-when-crated-pow2 fields into a single u64 #[bitfield(bits = 64)] #[repr(C)] @@ -148,7 +209,7 @@ struct PackedStorage { offset: B56, } -impl IndexEntryPlaceInBucket { +impl IndexEntryPlaceInBucket { pub fn init(&self, index_bucket: &mut BucketStorage>, pubkey: &Pubkey) { let index_entry = index_bucket.get_mut::>(self.ix); index_entry.key = *pubkey; @@ -172,6 +233,23 @@ impl IndexEntryPlaceInBucket { .multiple_slots } + pub(crate) fn get_slot_count_enum( + &self, + index_bucket: &BucketStorage>, + ) -> OccupiedEnum { + let index_entry = index_bucket.get::>(self.ix); + index_entry.get_slot_count_enum() + } + + pub(crate) fn set_slot_count_enum_value( + &self, + index_bucket: &mut BucketStorage>, + value: OccupiedEnum, + ) { + let index_entry = index_bucket.get_mut::>(self.ix); + index_entry.set_slot_count_enum_value(value); + } + pub fn ref_count(&self, index_bucket: &BucketStorage>) -> RefCount { let index_entry = index_bucket.get::>(self.ix); index_entry.packed_ref_count.ref_count()