disk index: index files grow by bytes instead of doubling (#31013)
This commit is contained in:
parent
7d17d7094a
commit
06461fb348
|
@ -213,7 +213,7 @@ impl<'b, T: Clone + Copy + 'static> Bucket<T> {
|
|||
.fetch_add(m.as_us(), Ordering::Relaxed);
|
||||
match first_free {
|
||||
Some(ii) => Ok((None, ii)),
|
||||
None => Err(BucketMapError::IndexNoSpace(index.contents.capacity_pow2())),
|
||||
None => Err(BucketMapError::IndexNoSpace(index.contents.capacity())),
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -266,7 +266,7 @@ impl<'b, T: Clone + Copy + 'static> Bucket<T> {
|
|||
.stats
|
||||
.find_index_entry_mut_us
|
||||
.fetch_add(m.as_us(), Ordering::Relaxed);
|
||||
Err(BucketMapError::IndexNoSpace(index.contents.capacity_pow2()))
|
||||
Err(BucketMapError::IndexNoSpace(index.contents.capacity()))
|
||||
}
|
||||
|
||||
pub fn read_value(&self, key: &Pubkey) -> Option<(&[T], RefCount)> {
|
||||
|
@ -436,29 +436,30 @@ impl<'b, T: Clone + Copy + 'static> Bucket<T> {
|
|||
self.anticipated_size = count;
|
||||
}
|
||||
|
||||
pub fn grow_index(&self, current_capacity_pow2: u8) {
|
||||
if self.index.contents.capacity_pow2() == current_capacity_pow2 {
|
||||
let mut starting_size_pow2 = self.index.contents.capacity_pow2();
|
||||
if self.anticipated_size > 0 {
|
||||
// start the growth at the next pow2 larger than what would be required to hold `anticipated_size`.
|
||||
// This will prevent unnecessary repeated grows at startup.
|
||||
starting_size_pow2 = starting_size_pow2.max(self.anticipated_size.ilog2() as u8);
|
||||
}
|
||||
pub fn grow_index(&self, mut current_capacity: u64) {
|
||||
if self.index.contents.capacity() == current_capacity {
|
||||
// make sure to grow to at least % more than the anticipated size
|
||||
// The indexing algorithm expects to require some over-allocation.
|
||||
let anticipated_size = self.anticipated_size * 140 / 100;
|
||||
let mut m = Measure::start("grow_index");
|
||||
//debug!("GROW_INDEX: {}", current_capacity_pow2);
|
||||
let mut count = 0;
|
||||
loop {
|
||||
count += 1;
|
||||
// grow relative to the current capacity
|
||||
let new_capacity = (current_capacity * 110 / 100).max(anticipated_size);
|
||||
let mut index = BucketStorage::new_with_capacity(
|
||||
Arc::clone(&self.drives),
|
||||
1,
|
||||
std::mem::size_of::<IndexEntry<T>>() as u64,
|
||||
// the subtle `+ i` here causes us to grow from the starting size by a power of 2 on each iteration of the for loop
|
||||
Capacity::Pow2(starting_size_pow2 + count),
|
||||
Capacity::Actual(new_capacity),
|
||||
self.index.max_search,
|
||||
Arc::clone(&self.stats.index),
|
||||
Arc::clone(&self.index.count),
|
||||
);
|
||||
// index may have allocated something larger than we asked for,
|
||||
// so, in case we fail to reindex into this larger size, grow from this size next iteration.
|
||||
current_capacity = index.capacity();
|
||||
let random = thread_rng().gen();
|
||||
let mut valid = true;
|
||||
for ix in 0..self.index.capacity() {
|
||||
|
@ -495,7 +496,7 @@ impl<'b, T: Clone + Copy + 'static> Bucket<T> {
|
|||
self.stats
|
||||
.index
|
||||
.failed_resizes
|
||||
.fetch_add(count as u64 - 1, Ordering::Relaxed);
|
||||
.fetch_add(count - 1, Ordering::Relaxed);
|
||||
}
|
||||
self.stats.index.resizes.fetch_add(1, Ordering::Relaxed);
|
||||
self.stats
|
||||
|
@ -587,9 +588,9 @@ impl<'b, T: Clone + Copy + 'static> Bucket<T> {
|
|||
//debug!("GROWING SPACE {:?}", (data_index, current_capacity_pow2));
|
||||
self.grow_data(data_index, current_capacity_pow2);
|
||||
}
|
||||
BucketMapError::IndexNoSpace(current_capacity_pow2) => {
|
||||
BucketMapError::IndexNoSpace(current_capacity) => {
|
||||
//debug!("GROWING INDEX {}", sz);
|
||||
self.grow_index(current_capacity_pow2);
|
||||
self.grow_index(current_capacity);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -51,12 +51,12 @@ impl<T: Clone + Copy + Debug> std::fmt::Debug for BucketMap<T> {
|
|||
#[derive(Debug)]
|
||||
pub enum BucketMapError {
|
||||
/// (bucket_index, current_capacity_pow2)
|
||||
/// Note that this is specific to data buckets
|
||||
/// Note that this is specific to data buckets, which grow in powers of 2
|
||||
DataNoSpace((u64, u8)),
|
||||
|
||||
/// current_capacity_pow2
|
||||
/// Note that this is specific to index buckets
|
||||
IndexNoSpace(u8),
|
||||
/// current_capacity_entries
|
||||
/// Note that this is specific to index buckets, which can be 'Actual' sizes
|
||||
IndexNoSpace(u64),
|
||||
}
|
||||
|
||||
impl<T: Clone + Copy + Debug> BucketMap<T> {
|
||||
|
|
|
@ -77,7 +77,8 @@ impl BucketOccupied for BucketWithHeader {
|
|||
pub struct IndexBucketUsingBitVecBits<T: 'static> {
|
||||
/// 2 bits per entry that represent a 4 state enum tag
|
||||
pub enum_tag: BitVec,
|
||||
capacity_pow2: Capacity,
|
||||
/// number of elements allocated
|
||||
capacity: u64,
|
||||
_phantom: PhantomData<&'static T>,
|
||||
}
|
||||
|
||||
|
@ -124,7 +125,7 @@ impl<T: Copy + 'static> BucketOccupied for IndexBucketUsingBitVecBits<T> {
|
|||
Self {
|
||||
// note: twice as many bits allocated as `num_elements` because we store 2 bits per element
|
||||
enum_tag: BitVec::new_fill(false, capacity.capacity() * 2),
|
||||
capacity_pow2: capacity,
|
||||
capacity: capacity.capacity(),
|
||||
_phantom: PhantomData,
|
||||
}
|
||||
}
|
||||
|
@ -144,10 +145,7 @@ impl<T: Copy + 'static> BucketOccupied for IndexBucketUsingBitVecBits<T> {
|
|||
|
||||
impl<T> BucketCapacity for IndexBucketUsingBitVecBits<T> {
|
||||
fn capacity(&self) -> u64 {
|
||||
self.capacity_pow2.capacity()
|
||||
}
|
||||
fn capacity_pow2(&self) -> u8 {
|
||||
self.capacity_pow2.capacity_pow2()
|
||||
self.capacity
|
||||
}
|
||||
}
|
||||
|
||||
|
|
Loading…
Reference in New Issue