Cleanup backup_and_clear_blockstore() (#32461)
Some of the cleanup tasks include ... - Make subfunctions return a Result and allow error handling above - Add some clarifying comments - Give backup directory name a more meaningful name - Add some additional logs (with timing info) for long running parts
This commit is contained in:
parent
e3f253d559
commit
e337631f32
|
@ -31,7 +31,6 @@ use {
|
|||
crossbeam_channel::{bounded, unbounded, Receiver},
|
||||
lazy_static::lazy_static,
|
||||
quinn::Endpoint,
|
||||
rand::{thread_rng, Rng},
|
||||
solana_client::connection_cache::{ConnectionCache, Protocol},
|
||||
solana_entry::poh::compute_hash_time_ns,
|
||||
solana_geyser_plugin_manager::{
|
||||
|
@ -552,15 +551,21 @@ impl Validator {
|
|||
));
|
||||
}
|
||||
|
||||
if let Some(shred_version) = config.expected_shred_version {
|
||||
if let Some(expected_shred_version) = config.expected_shred_version {
|
||||
if let Some(wait_for_supermajority_slot) = config.wait_for_supermajority {
|
||||
*start_progress.write().unwrap() = ValidatorStartProgress::CleaningBlockStore;
|
||||
backup_and_clear_blockstore(
|
||||
ledger_path,
|
||||
config,
|
||||
wait_for_supermajority_slot + 1,
|
||||
shred_version,
|
||||
);
|
||||
expected_shred_version,
|
||||
)
|
||||
.map_err(|err| {
|
||||
format!(
|
||||
"Failed to backup and clear shreds with incorrect \
|
||||
shred version from blockstore: {err}"
|
||||
)
|
||||
})?;
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -1951,85 +1956,104 @@ fn maybe_warp_slot(
|
|||
Ok(())
|
||||
}
|
||||
|
||||
/// Searches the blockstore for data shreds with the incorrect shred version.
|
||||
fn blockstore_contains_bad_shred_version(
|
||||
blockstore: &Blockstore,
|
||||
start_slot: Slot,
|
||||
shred_version: u16,
|
||||
) -> bool {
|
||||
let now = Instant::now();
|
||||
expected_shred_version: u16,
|
||||
) -> Result<bool, BlockstoreError> {
|
||||
const TIMEOUT: Duration = Duration::from_secs(60);
|
||||
let timer = Instant::now();
|
||||
// Search for shreds with incompatible version in blockstore
|
||||
if let Ok(slot_meta_iterator) = blockstore.slot_meta_iterator(start_slot) {
|
||||
info!("Searching for incorrect shreds..");
|
||||
let slot_meta_iterator = blockstore.slot_meta_iterator(start_slot)?;
|
||||
|
||||
info!("Searching blockstore for shred with incorrect version..");
|
||||
for (slot, _meta) in slot_meta_iterator {
|
||||
if let Ok(shreds) = blockstore.get_data_shreds_for_slot(slot, 0) {
|
||||
let shreds = blockstore.get_data_shreds_for_slot(slot, 0)?;
|
||||
for shred in &shreds {
|
||||
if shred.version() != shred_version {
|
||||
return true;
|
||||
if shred.version() != expected_shred_version {
|
||||
return Ok(true);
|
||||
}
|
||||
}
|
||||
}
|
||||
if now.elapsed().as_secs() > 60 {
|
||||
if timer.elapsed() > TIMEOUT {
|
||||
info!("Didn't find incorrect shreds after 60 seconds, aborting");
|
||||
return false;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
false
|
||||
Ok(false)
|
||||
}
|
||||
|
||||
/// If the blockstore contains any shreds with the incorrect shred version,
|
||||
/// copy them to a backup blockstore and purge them from the actual blockstore.
|
||||
fn backup_and_clear_blockstore(
|
||||
ledger_path: &Path,
|
||||
config: &ValidatorConfig,
|
||||
start_slot: Slot,
|
||||
shred_version: u16,
|
||||
) {
|
||||
expected_shred_version: u16,
|
||||
) -> Result<(), BlockstoreError> {
|
||||
let blockstore =
|
||||
Blockstore::open_with_options(ledger_path, blockstore_options_from_config(config)).unwrap();
|
||||
Blockstore::open_with_options(ledger_path, blockstore_options_from_config(config))?;
|
||||
let do_copy_and_clear =
|
||||
blockstore_contains_bad_shred_version(&blockstore, start_slot, shred_version);
|
||||
blockstore_contains_bad_shred_version(&blockstore, start_slot, expected_shred_version)?;
|
||||
|
||||
// If found, then copy shreds to another db and clear from start_slot
|
||||
if do_copy_and_clear {
|
||||
let folder_name = format!(
|
||||
"backup_{}_{}",
|
||||
// .unwrap() safe because getting to this point implies blockstore has slots/shreds
|
||||
let end_slot = blockstore.highest_slot()?.unwrap();
|
||||
|
||||
// Backing up the shreds that will be deleted from primary blockstore is
|
||||
// not critical, so swallow errors from backup blockstore operations.
|
||||
let backup_folder = format!(
|
||||
"{}_backup_{}_{}_{}",
|
||||
config
|
||||
.ledger_column_options
|
||||
.shred_storage_type
|
||||
.blockstore_directory(),
|
||||
thread_rng().gen_range(0, 99999)
|
||||
expected_shred_version,
|
||||
start_slot,
|
||||
end_slot
|
||||
);
|
||||
let backup_blockstore = Blockstore::open_with_options(
|
||||
&ledger_path.join(folder_name),
|
||||
match Blockstore::open_with_options(
|
||||
&ledger_path.join(backup_folder),
|
||||
blockstore_options_from_config(config),
|
||||
);
|
||||
let mut last_print = Instant::now();
|
||||
let mut copied = 0;
|
||||
let mut last_slot = None;
|
||||
let slot_meta_iterator = blockstore.slot_meta_iterator(start_slot).unwrap();
|
||||
) {
|
||||
Ok(backup_blockstore) => {
|
||||
info!("Backing up slots from {start_slot} to {end_slot}");
|
||||
let mut timer = Measure::start("blockstore backup");
|
||||
|
||||
const PRINT_INTERVAL: Duration = Duration::from_secs(5);
|
||||
let mut print_timer = Instant::now();
|
||||
let mut num_slots_copied = 0;
|
||||
let slot_meta_iterator = blockstore.slot_meta_iterator(start_slot)?;
|
||||
for (slot, _meta) in slot_meta_iterator {
|
||||
if let Ok(shreds) = blockstore.get_data_shreds_for_slot(slot, 0) {
|
||||
if let Ok(ref backup_blockstore) = backup_blockstore {
|
||||
copied += shreds.len();
|
||||
let shreds = blockstore.get_data_shreds_for_slot(slot, 0)?;
|
||||
let _ = backup_blockstore.insert_shreds(shreds, None, true);
|
||||
num_slots_copied += 1;
|
||||
|
||||
if print_timer.elapsed() > PRINT_INTERVAL {
|
||||
info!("Backed up {num_slots_copied} slots thus far");
|
||||
print_timer = Instant::now();
|
||||
}
|
||||
}
|
||||
if last_print.elapsed().as_millis() > 3000 {
|
||||
info!(
|
||||
"Copying shreds from slot {} copied {} so far.",
|
||||
start_slot, copied
|
||||
);
|
||||
last_print = Instant::now();
|
||||
}
|
||||
last_slot = Some(slot);
|
||||
}
|
||||
|
||||
let end_slot = last_slot.unwrap();
|
||||
info!("Purging slots {} to {}", start_slot, end_slot);
|
||||
timer.stop();
|
||||
info!("Backing up slots done. {timer}");
|
||||
}
|
||||
Err(err) => {
|
||||
warn!("Unable to backup shreds with incorrect shred version: {err}");
|
||||
}
|
||||
}
|
||||
|
||||
info!("Purging slots {start_slot} to {end_slot} from blockstore");
|
||||
let mut timer = Measure::start("blockstore purge");
|
||||
blockstore.purge_from_next_slots(start_slot, end_slot);
|
||||
blockstore.purge_slots(start_slot, end_slot, PurgeType::Exact);
|
||||
info!("done");
|
||||
timer.stop();
|
||||
info!("Purging slots done. {timer}");
|
||||
} else {
|
||||
info!("Only shreds with the correct version were found in the blockstore");
|
||||
}
|
||||
drop(blockstore);
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn initialize_rpc_transaction_history_services(
|
||||
|
@ -2316,8 +2340,12 @@ mod tests {
|
|||
use {
|
||||
super::*,
|
||||
crossbeam_channel::{bounded, RecvTimeoutError},
|
||||
solana_entry::entry,
|
||||
solana_gossip::contact_info::{ContactInfo, LegacyContactInfo},
|
||||
solana_ledger::{create_new_tmp_ledger, genesis_utils::create_genesis_config_with_leader},
|
||||
solana_ledger::{
|
||||
blockstore, create_new_tmp_ledger, genesis_utils::create_genesis_config_with_leader,
|
||||
get_tmp_ledger_path_auto_delete,
|
||||
},
|
||||
solana_sdk::{genesis_config::create_genesis_config, poh_config::PohConfig},
|
||||
solana_tpu_client::tpu_client::{
|
||||
DEFAULT_TPU_CONNECTION_POOL_SIZE, DEFAULT_TPU_ENABLE_UDP, DEFAULT_TPU_USE_QUIC,
|
||||
|
@ -2375,22 +2403,13 @@ mod tests {
|
|||
|
||||
#[test]
|
||||
fn test_backup_and_clear_blockstore() {
|
||||
use std::time::Instant;
|
||||
solana_logger::setup();
|
||||
use {
|
||||
solana_entry::entry,
|
||||
solana_ledger::{blockstore, get_tmp_ledger_path},
|
||||
};
|
||||
|
||||
let validator_config = ValidatorConfig::default_for_test();
|
||||
let blockstore_path = get_tmp_ledger_path!();
|
||||
{
|
||||
let blockstore = Blockstore::open(&blockstore_path).unwrap();
|
||||
let ledger_path = get_tmp_ledger_path_auto_delete!();
|
||||
let blockstore = Blockstore::open(ledger_path.path()).unwrap();
|
||||
|
||||
let entries = entry::create_ticks(1, 0, Hash::default());
|
||||
|
||||
info!("creating shreds");
|
||||
let mut last_print = Instant::now();
|
||||
for i in 1..10 {
|
||||
let shreds = blockstore::entries_to_test_shreds(
|
||||
&entries,
|
||||
|
@ -2401,17 +2420,13 @@ mod tests {
|
|||
true, // merkle_variant
|
||||
);
|
||||
blockstore.insert_shreds(shreds, None, true).unwrap();
|
||||
if last_print.elapsed().as_millis() > 5000 {
|
||||
info!("inserted {}", i);
|
||||
last_print = Instant::now();
|
||||
}
|
||||
}
|
||||
drop(blockstore);
|
||||
|
||||
// this purges and compacts all slots greater than or equal to 5
|
||||
backup_and_clear_blockstore(&blockstore_path, &validator_config, 5, 2);
|
||||
backup_and_clear_blockstore(ledger_path.path(), &validator_config, 5, 2).unwrap();
|
||||
|
||||
let blockstore = Blockstore::open(&blockstore_path).unwrap();
|
||||
let blockstore = Blockstore::open(ledger_path.path()).unwrap();
|
||||
// assert that slots less than 5 aren't affected
|
||||
assert!(blockstore.meta(4).unwrap().unwrap().next_slots.is_empty());
|
||||
for i in 5..10 {
|
||||
|
@ -2421,7 +2436,6 @@ mod tests {
|
|||
.is_empty());
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn validator_parallel_exit() {
|
||||
|
|
Loading…
Reference in New Issue