disk index: index files grow by bytes instead of doubling (#31013)
This commit is contained in:
parent
7d17d7094a
commit
06461fb348
|
@ -213,7 +213,7 @@ impl<'b, T: Clone + Copy + 'static> Bucket<T> {
|
||||||
.fetch_add(m.as_us(), Ordering::Relaxed);
|
.fetch_add(m.as_us(), Ordering::Relaxed);
|
||||||
match first_free {
|
match first_free {
|
||||||
Some(ii) => Ok((None, ii)),
|
Some(ii) => Ok((None, ii)),
|
||||||
None => Err(BucketMapError::IndexNoSpace(index.contents.capacity_pow2())),
|
None => Err(BucketMapError::IndexNoSpace(index.contents.capacity())),
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -266,7 +266,7 @@ impl<'b, T: Clone + Copy + 'static> Bucket<T> {
|
||||||
.stats
|
.stats
|
||||||
.find_index_entry_mut_us
|
.find_index_entry_mut_us
|
||||||
.fetch_add(m.as_us(), Ordering::Relaxed);
|
.fetch_add(m.as_us(), Ordering::Relaxed);
|
||||||
Err(BucketMapError::IndexNoSpace(index.contents.capacity_pow2()))
|
Err(BucketMapError::IndexNoSpace(index.contents.capacity()))
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn read_value(&self, key: &Pubkey) -> Option<(&[T], RefCount)> {
|
pub fn read_value(&self, key: &Pubkey) -> Option<(&[T], RefCount)> {
|
||||||
|
@ -436,29 +436,30 @@ impl<'b, T: Clone + Copy + 'static> Bucket<T> {
|
||||||
self.anticipated_size = count;
|
self.anticipated_size = count;
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn grow_index(&self, current_capacity_pow2: u8) {
|
pub fn grow_index(&self, mut current_capacity: u64) {
|
||||||
if self.index.contents.capacity_pow2() == current_capacity_pow2 {
|
if self.index.contents.capacity() == current_capacity {
|
||||||
let mut starting_size_pow2 = self.index.contents.capacity_pow2();
|
// make sure to grow to at least % more than the anticipated size
|
||||||
if self.anticipated_size > 0 {
|
// The indexing algorithm expects to require some over-allocation.
|
||||||
// start the growth at the next pow2 larger than what would be required to hold `anticipated_size`.
|
let anticipated_size = self.anticipated_size * 140 / 100;
|
||||||
// This will prevent unnecessary repeated grows at startup.
|
|
||||||
starting_size_pow2 = starting_size_pow2.max(self.anticipated_size.ilog2() as u8);
|
|
||||||
}
|
|
||||||
let mut m = Measure::start("grow_index");
|
let mut m = Measure::start("grow_index");
|
||||||
//debug!("GROW_INDEX: {}", current_capacity_pow2);
|
//debug!("GROW_INDEX: {}", current_capacity_pow2);
|
||||||
let mut count = 0;
|
let mut count = 0;
|
||||||
loop {
|
loop {
|
||||||
count += 1;
|
count += 1;
|
||||||
|
// grow relative to the current capacity
|
||||||
|
let new_capacity = (current_capacity * 110 / 100).max(anticipated_size);
|
||||||
let mut index = BucketStorage::new_with_capacity(
|
let mut index = BucketStorage::new_with_capacity(
|
||||||
Arc::clone(&self.drives),
|
Arc::clone(&self.drives),
|
||||||
1,
|
1,
|
||||||
std::mem::size_of::<IndexEntry<T>>() as u64,
|
std::mem::size_of::<IndexEntry<T>>() as u64,
|
||||||
// the subtle `+ i` here causes us to grow from the starting size by a power of 2 on each iteration of the for loop
|
Capacity::Actual(new_capacity),
|
||||||
Capacity::Pow2(starting_size_pow2 + count),
|
|
||||||
self.index.max_search,
|
self.index.max_search,
|
||||||
Arc::clone(&self.stats.index),
|
Arc::clone(&self.stats.index),
|
||||||
Arc::clone(&self.index.count),
|
Arc::clone(&self.index.count),
|
||||||
);
|
);
|
||||||
|
// index may have allocated something larger than we asked for,
|
||||||
|
// so, in case we fail to reindex into this larger size, grow from this size next iteration.
|
||||||
|
current_capacity = index.capacity();
|
||||||
let random = thread_rng().gen();
|
let random = thread_rng().gen();
|
||||||
let mut valid = true;
|
let mut valid = true;
|
||||||
for ix in 0..self.index.capacity() {
|
for ix in 0..self.index.capacity() {
|
||||||
|
@ -495,7 +496,7 @@ impl<'b, T: Clone + Copy + 'static> Bucket<T> {
|
||||||
self.stats
|
self.stats
|
||||||
.index
|
.index
|
||||||
.failed_resizes
|
.failed_resizes
|
||||||
.fetch_add(count as u64 - 1, Ordering::Relaxed);
|
.fetch_add(count - 1, Ordering::Relaxed);
|
||||||
}
|
}
|
||||||
self.stats.index.resizes.fetch_add(1, Ordering::Relaxed);
|
self.stats.index.resizes.fetch_add(1, Ordering::Relaxed);
|
||||||
self.stats
|
self.stats
|
||||||
|
@ -587,9 +588,9 @@ impl<'b, T: Clone + Copy + 'static> Bucket<T> {
|
||||||
//debug!("GROWING SPACE {:?}", (data_index, current_capacity_pow2));
|
//debug!("GROWING SPACE {:?}", (data_index, current_capacity_pow2));
|
||||||
self.grow_data(data_index, current_capacity_pow2);
|
self.grow_data(data_index, current_capacity_pow2);
|
||||||
}
|
}
|
||||||
BucketMapError::IndexNoSpace(current_capacity_pow2) => {
|
BucketMapError::IndexNoSpace(current_capacity) => {
|
||||||
//debug!("GROWING INDEX {}", sz);
|
//debug!("GROWING INDEX {}", sz);
|
||||||
self.grow_index(current_capacity_pow2);
|
self.grow_index(current_capacity);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -51,12 +51,12 @@ impl<T: Clone + Copy + Debug> std::fmt::Debug for BucketMap<T> {
|
||||||
#[derive(Debug)]
|
#[derive(Debug)]
|
||||||
pub enum BucketMapError {
|
pub enum BucketMapError {
|
||||||
/// (bucket_index, current_capacity_pow2)
|
/// (bucket_index, current_capacity_pow2)
|
||||||
/// Note that this is specific to data buckets
|
/// Note that this is specific to data buckets, which grow in powers of 2
|
||||||
DataNoSpace((u64, u8)),
|
DataNoSpace((u64, u8)),
|
||||||
|
|
||||||
/// current_capacity_pow2
|
/// current_capacity_entries
|
||||||
/// Note that this is specific to index buckets
|
/// Note that this is specific to index buckets, which can be 'Actual' sizes
|
||||||
IndexNoSpace(u8),
|
IndexNoSpace(u64),
|
||||||
}
|
}
|
||||||
|
|
||||||
impl<T: Clone + Copy + Debug> BucketMap<T> {
|
impl<T: Clone + Copy + Debug> BucketMap<T> {
|
||||||
|
|
|
@ -77,7 +77,8 @@ impl BucketOccupied for BucketWithHeader {
|
||||||
pub struct IndexBucketUsingBitVecBits<T: 'static> {
|
pub struct IndexBucketUsingBitVecBits<T: 'static> {
|
||||||
/// 2 bits per entry that represent a 4 state enum tag
|
/// 2 bits per entry that represent a 4 state enum tag
|
||||||
pub enum_tag: BitVec,
|
pub enum_tag: BitVec,
|
||||||
capacity_pow2: Capacity,
|
/// number of elements allocated
|
||||||
|
capacity: u64,
|
||||||
_phantom: PhantomData<&'static T>,
|
_phantom: PhantomData<&'static T>,
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -124,7 +125,7 @@ impl<T: Copy + 'static> BucketOccupied for IndexBucketUsingBitVecBits<T> {
|
||||||
Self {
|
Self {
|
||||||
// note: twice as many bits allocated as `num_elements` because we store 2 bits per element
|
// note: twice as many bits allocated as `num_elements` because we store 2 bits per element
|
||||||
enum_tag: BitVec::new_fill(false, capacity.capacity() * 2),
|
enum_tag: BitVec::new_fill(false, capacity.capacity() * 2),
|
||||||
capacity_pow2: capacity,
|
capacity: capacity.capacity(),
|
||||||
_phantom: PhantomData,
|
_phantom: PhantomData,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -144,10 +145,7 @@ impl<T: Copy + 'static> BucketOccupied for IndexBucketUsingBitVecBits<T> {
|
||||||
|
|
||||||
impl<T> BucketCapacity for IndexBucketUsingBitVecBits<T> {
|
impl<T> BucketCapacity for IndexBucketUsingBitVecBits<T> {
|
||||||
fn capacity(&self) -> u64 {
|
fn capacity(&self) -> u64 {
|
||||||
self.capacity_pow2.capacity()
|
self.capacity
|
||||||
}
|
|
||||||
fn capacity_pow2(&self) -> u8 {
|
|
||||||
self.capacity_pow2.capacity_pow2()
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue