add `ancient collect_sort_filter_info` (#30169)

This commit is contained in:
Jeff Washington (jwash) 2023-02-08 09:08:20 -06:00 committed by GitHub
parent 10dde654e7
commit 70d635127e
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
1 changed files with 210 additions and 109 deletions

View File

@ -25,6 +25,8 @@ struct PackedAncientStorageTuning {
max_ancient_slots: usize,
/// # of bytes in an ideal ancient storage size
ideal_storage_size: NonZeroU64,
/// true if storages can be randomly shrunk even if they aren't eligible
can_randomly_shrink: bool,
}
/// info about a storage eligible to be combined into an ancient append vec.
@ -203,6 +205,20 @@ impl AncientSlotInfos {
}
impl AccountsDb {
/// calculate all storage info for the storages in slots
/// prune the ones to ignore given tuning parameters
#[allow(dead_code)]
fn collect_sort_filter_info(
&self,
slots: Vec<Slot>,
tuning: PackedAncientStorageTuning,
) -> AncientSlotInfos {
let mut ancient_slot_infos = self.calc_ancient_slot_info(slots, tuning.can_randomly_shrink);
ancient_slot_infos.filter_ancient_slots(&tuning);
ancient_slot_infos
}
/// go through all slots and populate 'SlotInfo', per slot
/// This provides the list of possible ancient slots to sort, filter, and then combine.
#[allow(dead_code)]
@ -556,30 +572,57 @@ pub mod tests {
assert_eq!(should_shrink, info.should_shrink);
}
#[derive(EnumIter, Debug, PartialEq, Eq)]
enum TestCollectInfo {
CollectSortFilterInfo,
CalcAncientSlotInfo,
Add,
}
#[test]
fn test_calc_ancient_slot_info_one_alive() {
fn test_calc_ancient_slot_info_one_alive_only() {
let can_randomly_shrink = false;
let alive = true;
let slots = 1;
for call_add in [false, true] {
for method in TestCollectInfo::iter() {
// 1_040_000 is big enough relative to page size to cause shrink ratio to be triggered
for data_size in [None, Some(1_040_000)] {
let (db, slot1) = create_db_with_storages_and_index(alive, slots, data_size);
let mut infos = AncientSlotInfos::default();
let storage = db.storage.get_slot_storage_entry(slot1).unwrap();
let alive_bytes_expected = storage.alive_bytes();
if call_add {
// test lower level 'add'
infos.add(slot1, Arc::clone(&storage), can_randomly_shrink);
} else {
infos = db.calc_ancient_slot_info(vec![slot1], can_randomly_shrink);
match method {
TestCollectInfo::Add => {
// test lower level 'add'
infos.add(slot1, Arc::clone(&storage), can_randomly_shrink);
}
TestCollectInfo::CalcAncientSlotInfo => {
infos = db.calc_ancient_slot_info(vec![slot1], can_randomly_shrink);
}
TestCollectInfo::CollectSortFilterInfo => {
let tuning = PackedAncientStorageTuning {
percent_of_alive_shrunk_data: 100,
max_ancient_slots: 0,
// irrelevant
ideal_storage_size: NonZeroU64::new(1).unwrap(),
can_randomly_shrink,
};
infos = db.collect_sort_filter_info(vec![slot1], tuning);
}
}
assert_eq!(infos.all_infos.len(), 1);
let should_shrink = data_size.is_none();
assert_storage_info(infos.all_infos.first().unwrap(), &storage, should_shrink);
if should_shrink {
// data size is so small compared to min aligned file size that the storage is marked as should_shrink
assert_eq!(infos.shrink_indexes, vec![0]);
assert_eq!(
infos.shrink_indexes,
if !matches!(method, TestCollectInfo::CollectSortFilterInfo) {
vec![0]
} else {
Vec::default()
}
);
assert_eq!(infos.total_alive_bytes, alive_bytes_expected as u64);
assert_eq!(infos.total_alive_bytes_shrink, alive_bytes_expected as u64);
} else {
@ -670,54 +713,84 @@ pub mod tests {
#[test]
fn test_calc_ancient_slot_info_one_alive_one_dead() {
let can_randomly_shrink = false;
for slot1_is_alive in [false, true] {
let alives = vec![false /*dummy*/, slot1_is_alive, !slot1_is_alive];
let slots = 2;
// 1_040_000 is big enough relative to page size to cause shrink ratio to be triggered
for data_size in [None, Some(1_040_000)] {
let (db, slot1) =
create_db_with_storages_and_index(true /*alive*/, slots, data_size);
assert_eq!(slot1, 1); // make sure index into alives will be correct
assert_eq!(alives[slot1 as usize], slot1_is_alive);
let slot_vec = (slot1..(slot1 + slots as Slot)).collect::<Vec<_>>();
let storages = slot_vec
.iter()
.map(|slot| db.storage.get_slot_storage_entry(*slot).unwrap())
.collect::<Vec<_>>();
storages.iter().for_each(|storage| {
let slot = storage.slot();
let alive = alives[slot as usize];
if !alive {
// make this storage not alive
remove_account_for_tests(storage, storage.written_bytes() as usize, false);
}
});
let alive_storages = storages
.iter()
.filter_map(|storage| alives[storage.slot() as usize].then_some(storage))
.collect::<Vec<_>>();
let alive_bytes_expected = alive_storages
.iter()
.map(|storage| storage.alive_bytes() as u64)
.sum::<u64>();
let infos = db.calc_ancient_slot_info(slot_vec.clone(), can_randomly_shrink);
assert_eq!(infos.all_infos.len(), 1);
let should_shrink = data_size.is_none();
alive_storages
.iter()
.zip(infos.all_infos.iter())
.for_each(|(storage, info)| {
assert_storage_info(info, storage, should_shrink);
for method in TestCollectInfo::iter() {
for slot1_is_alive in [false, true] {
let alives = vec![false /*dummy*/, slot1_is_alive, !slot1_is_alive];
let slots = 2;
// 1_040_000 is big enough relative to page size to cause shrink ratio to be triggered
for data_size in [None, Some(1_040_000)] {
let (db, slot1) =
create_db_with_storages_and_index(true /*alive*/, slots, data_size);
assert_eq!(slot1, 1); // make sure index into alives will be correct
assert_eq!(alives[slot1 as usize], slot1_is_alive);
let slot_vec = (slot1..(slot1 + slots as Slot)).collect::<Vec<_>>();
let storages = slot_vec
.iter()
.map(|slot| db.storage.get_slot_storage_entry(*slot).unwrap())
.collect::<Vec<_>>();
storages.iter().for_each(|storage| {
let slot = storage.slot();
let alive = alives[slot as usize];
if !alive {
// make this storage not alive
remove_account_for_tests(
storage,
storage.written_bytes() as usize,
false,
);
}
});
if should_shrink {
// data size is so small compared to min aligned file size that the storage is marked as should_shrink
assert_eq!(infos.shrink_indexes, vec![0]);
assert_eq!(infos.total_alive_bytes, alive_bytes_expected);
assert_eq!(infos.total_alive_bytes_shrink, alive_bytes_expected);
} else {
assert!(infos.shrink_indexes.is_empty());
assert_eq!(infos.total_alive_bytes, alive_bytes_expected);
assert_eq!(infos.total_alive_bytes_shrink, 0);
let alive_storages = storages
.iter()
.filter_map(|storage| alives[storage.slot() as usize].then_some(storage))
.collect::<Vec<_>>();
let alive_bytes_expected = alive_storages
.iter()
.map(|storage| storage.alive_bytes() as u64)
.sum::<u64>();
let infos = match method {
TestCollectInfo::CalcAncientSlotInfo => {
db.calc_ancient_slot_info(slot_vec.clone(), can_randomly_shrink)
}
TestCollectInfo::Add => {
continue; // unsupportable
}
TestCollectInfo::CollectSortFilterInfo => {
let tuning = PackedAncientStorageTuning {
percent_of_alive_shrunk_data: 100,
max_ancient_slots: 0,
// irrelevant
ideal_storage_size: NonZeroU64::new(1).unwrap(),
can_randomly_shrink,
};
db.collect_sort_filter_info(slot_vec.clone(), tuning)
}
};
assert_eq!(infos.all_infos.len(), 1, "method: {method:?}");
let should_shrink = data_size.is_none();
alive_storages.iter().zip(infos.all_infos.iter()).for_each(
|(storage, info)| {
assert_storage_info(info, storage, should_shrink);
},
);
if should_shrink {
// data size is so small compared to min aligned file size that the storage is marked as should_shrink
assert_eq!(
infos.shrink_indexes,
if !matches!(method, TestCollectInfo::CollectSortFilterInfo) {
vec![0]
} else {
Vec::default()
}
);
assert_eq!(infos.total_alive_bytes, alive_bytes_expected);
assert_eq!(infos.total_alive_bytes_shrink, alive_bytes_expected);
} else {
assert!(infos.shrink_indexes.is_empty());
assert_eq!(infos.total_alive_bytes, alive_bytes_expected);
assert_eq!(infos.total_alive_bytes_shrink, 0);
}
}
}
}
@ -761,6 +834,7 @@ pub mod tests {
ideal_storage_size: NonZeroU64::new(ideal_storage_size_large).unwrap(),
// irrelevant since we clear 'shrink_indexes'
percent_of_alive_shrunk_data: 0,
can_randomly_shrink: false,
};
infos.shrink_indexes.clear();
infos.filter_ancient_slots(&tuning);
@ -811,6 +885,7 @@ pub mod tests {
ideal_storage_size: NonZeroU64::new(ideal_storage_size_large).unwrap(),
// irrelevant since we clear 'shrink_indexes'
percent_of_alive_shrunk_data: 0,
can_randomly_shrink: false,
};
infos.shrink_indexes.clear();
infos.filter_ancient_slots(&tuning);
@ -929,61 +1004,86 @@ pub mod tests {
#[test]
fn test_calc_ancient_slot_info_one_shrink_one_not() {
let can_randomly_shrink = false;
for slot1_shrink in [false, true] {
let shrinks = vec![false /*dummy*/, slot1_shrink, !slot1_shrink];
let slots = 2;
// 1_040_000 is big enough relative to page size to cause shrink ratio to be triggered
let data_sizes = shrinks
.iter()
.map(|shrink| (!shrink).then_some(1_040_000))
.collect::<Vec<_>>();
let (db, slot1) =
create_db_with_storages_and_index(true /*alive*/, 1, data_sizes[1]);
let dead_bytes = 184; // constant based on None data size
create_storages_and_update_index(
&db,
None,
slot1 + 1,
1,
true,
data_sizes[(slot1 + 1) as usize],
);
assert_eq!(slot1, 1); // make sure index into shrinks will be correct
assert_eq!(shrinks[slot1 as usize], slot1_shrink);
let slot_vec = (slot1..(slot1 + slots as Slot)).collect::<Vec<_>>();
let storages = slot_vec
.iter()
.map(|slot| {
let storage = db.storage.get_slot_storage_entry(*slot).unwrap();
assert_eq!(*slot, storage.slot());
storage
})
.collect::<Vec<_>>();
let alive_bytes_expected = storages
.iter()
.map(|storage| storage.alive_bytes() as u64)
.sum::<u64>();
let infos = db.calc_ancient_slot_info(slot_vec.clone(), can_randomly_shrink);
assert_eq!(infos.all_infos.len(), 2);
storages
.iter()
.zip(infos.all_infos.iter())
.for_each(|(storage, info)| {
assert_storage_info(info, storage, shrinks[storage.slot() as usize]);
});
// data size is so small compared to min aligned file size that the storage is marked as should_shrink
assert_eq!(
infos.shrink_indexes,
shrinks
for method in TestCollectInfo::iter() {
for slot1_shrink in [false, true] {
let shrinks = vec![false /*dummy*/, slot1_shrink, !slot1_shrink];
let slots = 2;
// 1_040_000 is big enough relative to page size to cause shrink ratio to be triggered
let data_sizes = shrinks
.iter()
.skip(1)
.enumerate()
.filter_map(|(i, shrink)| shrink.then_some(i))
.collect::<Vec<_>>()
);
assert_eq!(infos.total_alive_bytes, alive_bytes_expected);
assert_eq!(infos.total_alive_bytes_shrink, dead_bytes);
.map(|shrink| (!shrink).then_some(1_040_000))
.collect::<Vec<_>>();
let (db, slot1) =
create_db_with_storages_and_index(true /*alive*/, 1, data_sizes[1]);
let dead_bytes = 184; // constant based on None data size
create_storages_and_update_index(
&db,
None,
slot1 + 1,
1,
true,
data_sizes[(slot1 + 1) as usize],
);
assert_eq!(slot1, 1); // make sure index into shrinks will be correct
assert_eq!(shrinks[slot1 as usize], slot1_shrink);
let slot_vec = (slot1..(slot1 + slots as Slot)).collect::<Vec<_>>();
let storages = slot_vec
.iter()
.map(|slot| {
let storage = db.storage.get_slot_storage_entry(*slot).unwrap();
assert_eq!(*slot, storage.slot());
storage
})
.collect::<Vec<_>>();
let alive_bytes_expected = storages
.iter()
.map(|storage| storage.alive_bytes() as u64)
.sum::<u64>();
let infos = match method {
TestCollectInfo::CalcAncientSlotInfo => {
db.calc_ancient_slot_info(slot_vec.clone(), can_randomly_shrink)
}
TestCollectInfo::Add => {
continue; // unsupportable
}
TestCollectInfo::CollectSortFilterInfo => {
let tuning = PackedAncientStorageTuning {
percent_of_alive_shrunk_data: 100,
max_ancient_slots: 0,
// irrelevant
ideal_storage_size: NonZeroU64::new(1).unwrap(),
can_randomly_shrink,
};
// note this can sort infos.all_infos
db.collect_sort_filter_info(slot_vec.clone(), tuning)
}
};
assert_eq!(infos.all_infos.len(), 2);
storages.iter().for_each(|storage| {
assert!(infos
.all_infos
.iter()
.any(|info| info.slot == storage.slot()));
});
// data size is so small compared to min aligned file size that the storage is marked as should_shrink
assert_eq!(
infos.shrink_indexes,
if matches!(method, TestCollectInfo::CollectSortFilterInfo) {
Vec::default()
} else {
shrinks
.iter()
.skip(1)
.enumerate()
.filter_map(|(i, shrink)| shrink.then_some(i))
.collect::<Vec<_>>()
}
);
assert_eq!(infos.total_alive_bytes, alive_bytes_expected);
assert_eq!(infos.total_alive_bytes_shrink, dead_bytes);
}
}
}
@ -1047,6 +1147,7 @@ pub mod tests {
max_ancient_slots: 0,
// this is irrelevant since the limit is artificially 0
ideal_storage_size: NonZeroU64::new(1).unwrap(),
can_randomly_shrink: false,
};
infos.filter_ancient_slots(&tuning);
}