disk index: use bits in ref count to store occupied (#31004)

This commit is contained in:
Jeff Washington (jwash) 2023-03-31 12:50:55 -05:00 committed by GitHub
parent a2f3a219d8
commit a2797ebfa9
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
2 changed files with 99 additions and 9 deletions

View File

@ -190,6 +190,18 @@ impl<O: BucketOccupied> BucketStorage<O> {
unsafe { slice.get_unchecked_mut(0) }
}
pub(crate) fn get_mut_from_parts<T: Sized>(item_slice: &mut [u8]) -> &mut T {
debug_assert!(std::mem::size_of::<T>() <= item_slice.len());
let item = item_slice.as_mut_ptr() as *mut T;
unsafe { &mut *item }
}
pub(crate) fn get_from_parts<T: Sized>(item_slice: &[u8]) -> &T {
debug_assert!(std::mem::size_of::<T>() <= item_slice.len());
let item = item_slice.as_ptr() as *const T;
unsafe { &*item }
}
pub fn get_cell_slice<T>(&self, ix: u64, len: u64) -> &[T] {
let start = self.get_start_offset_no_header(ix);
let slice = {

View File

@ -12,12 +12,11 @@ use {
};
/// allocated in `contents` in a BucketStorage
pub struct BucketWithBitVec<T: 'static> {
pub struct BucketWithBitVec {
pub occupied: BitVec,
_phantom: PhantomData<&'static T>,
}
impl<T> BucketOccupied for BucketWithBitVec<T> {
impl BucketOccupied for BucketWithBitVec {
fn occupy(&mut self, element: &mut [u8], ix: usize) {
assert!(self.is_free(element, ix));
self.occupied.set(ix as u64, true);
@ -36,13 +35,45 @@ impl<T> BucketOccupied for BucketWithBitVec<T> {
fn new(num_elements: usize) -> Self {
Self {
occupied: BitVec::new_fill(false, num_elements as u64),
}
}
}
#[derive(Debug, Default)]
pub struct IndexBucketUsingRefCountBits<T: 'static> {
_phantom: PhantomData<&'static T>,
}
impl<T: 'static> BucketOccupied for IndexBucketUsingRefCountBits<T> {
fn occupy(&mut self, element: &mut [u8], ix: usize) {
assert!(self.is_free(element, ix));
let entry: &mut IndexEntry<T> =
BucketStorage::<IndexBucketUsingRefCountBits<T>>::get_mut_from_parts(element);
entry.set_slot_count_enum_value(OccupiedEnum::Occupied);
}
fn free(&mut self, element: &mut [u8], ix: usize) {
assert!(!self.is_free(element, ix));
let entry: &mut IndexEntry<T> =
BucketStorage::<IndexBucketUsingRefCountBits<T>>::get_mut_from_parts(element);
entry.set_slot_count_enum_value(OccupiedEnum::Free);
}
fn is_free(&self, element: &[u8], _ix: usize) -> bool {
let entry: &IndexEntry<T> =
BucketStorage::<IndexBucketUsingRefCountBits<T>>::get_from_parts(element);
matches!(entry.get_slot_count_enum(), OccupiedEnum::Free)
}
fn offset_to_first_data() -> usize {
0
}
fn new(_num_elements: usize) -> Self {
Self {
_phantom: PhantomData,
}
}
}
pub type DataBucket = BucketWithBitVec<()>;
pub type IndexBucket<T> = BucketWithBitVec<T>;
pub type DataBucket = BucketWithBitVec;
pub type IndexBucket<T> = IndexBucketUsingRefCountBits<T>;
/// contains the index of an entry in the index bucket.
/// This type allows us to call methods to interact with the index entry on this type.
@ -67,15 +98,15 @@ pub struct IndexEntry<T: 'static> {
#[repr(C)]
#[derive(Debug, Default, Copy, Clone, Eq, PartialEq)]
struct PackedRefCount {
/// reserved for future use
unused: B2,
/// tag for Enum
slot_count_enum: B2,
/// ref_count of this entry. We don't need any where near 62 bits for this value
ref_count: B62,
}
/// required fields when an index element references the data file
#[repr(C)]
#[derive(Debug, Default, Copy, Clone)]
#[derive(Debug, Default, Copy, Clone, Eq, PartialEq)]
pub(crate) struct MultipleSlots {
// if the bucket doubled, the index can be recomputed using storage_cap_and_offset.create_bucket_capacity_pow2
storage_cap_and_offset: PackedStorage,
@ -139,6 +170,36 @@ impl MultipleSlots {
}
}
#[repr(u8)]
#[derive(Debug, Eq, PartialEq)]
pub(crate) enum OccupiedEnum {
/// this spot is free (ie. not occupied)
Free = 0,
/// this spot is occupied
Occupied = 1,
}
impl<T: 'static> IndexEntry<T> {
/// enum value stored in 2 spare bits taken from ref_count
fn get_slot_count_enum(&self) -> OccupiedEnum {
match self.packed_ref_count.slot_count_enum() {
0 => OccupiedEnum::Free,
1 => OccupiedEnum::Occupied,
_ => {
panic!("unexpected value");
}
}
}
/// enum value stored in 2 spare bits taken from ref_count
fn set_slot_count_enum_value(&mut self, value: OccupiedEnum) {
self.packed_ref_count.set_slot_count_enum(match value {
OccupiedEnum::Free => 0,
OccupiedEnum::Occupied => 1,
});
}
}
/// Pack the storage offset and capacity-when-crated-pow2 fields into a single u64
#[bitfield(bits = 64)]
#[repr(C)]
@ -148,7 +209,7 @@ struct PackedStorage {
offset: B56,
}
impl<T> IndexEntryPlaceInBucket<T> {
impl<T: 'static> IndexEntryPlaceInBucket<T> {
pub fn init(&self, index_bucket: &mut BucketStorage<IndexBucket<T>>, pubkey: &Pubkey) {
let index_entry = index_bucket.get_mut::<IndexEntry<T>>(self.ix);
index_entry.key = *pubkey;
@ -172,6 +233,23 @@ impl<T> IndexEntryPlaceInBucket<T> {
.multiple_slots
}
pub(crate) fn get_slot_count_enum(
&self,
index_bucket: &BucketStorage<IndexBucket<T>>,
) -> OccupiedEnum {
let index_entry = index_bucket.get::<IndexEntry<T>>(self.ix);
index_entry.get_slot_count_enum()
}
pub(crate) fn set_slot_count_enum_value(
&self,
index_bucket: &mut BucketStorage<IndexBucket<T>>,
value: OccupiedEnum,
) {
let index_entry = index_bucket.get_mut::<IndexEntry<T>>(self.ix);
index_entry.set_slot_count_enum_value(value);
}
pub fn ref_count(&self, index_bucket: &BucketStorage<IndexBucket<T>>) -> RefCount {
let index_entry = index_bucket.get::<IndexEntry<T>>(self.ix);
index_entry.packed_ref_count.ref_count()