From adcc36da8e1961e2730c926b77faede12c9c5407 Mon Sep 17 00:00:00 2001 From: steviez Date: Sun, 14 Aug 2022 05:20:13 -0500 Subject: [PATCH] Add fallback for ledger-tool commands to create new column families (#26565) RocksDB settings include an option to create_if_missing, which will create missing columns or the entire rocksdb directory if starting from scratch. However, create_if_missing functionality only works if the session has Primary (read+write) access. Many ledger-tool commands only need Secondary (read-only) access to the database, so these commands are unable to open the Blockstore when a column must be added. This change detects when Secondary access open fails due to missing column(s) or files, opens the database temporarily with Primary access, and then reattempts to open the database Secondary access. --- ledger-tool/src/bigtable.rs | 2 + ledger-tool/src/main.rs | 119 ++++++++++++++++++++++++++++++++++-- 2 files changed, 115 insertions(+), 6 deletions(-) diff --git a/ledger-tool/src/bigtable.rs b/ledger-tool/src/bigtable.rs index 8dcce9253..1b93f15b6 100644 --- a/ledger-tool/src/bigtable.rs +++ b/ledger-tool/src/bigtable.rs @@ -624,6 +624,7 @@ pub fn bigtable_process_command( let runtime = tokio::runtime::Runtime::new().unwrap(); let verbose = matches.is_present("verbose"); + let force_update_to_open = matches.is_present("force_update_to_open"); let output_format = OutputFormat::from_matches(matches, "output_format", verbose); let (subcommand, sub_matches) = matches.subcommand(); @@ -650,6 +651,7 @@ pub fn bigtable_process_command( AccessType::Secondary, None, shred_storage_type, + force_update_to_open, ); let config = solana_storage_bigtable::LedgerStorageConfig { read_only: false, diff --git a/ledger-tool/src/main.rs b/ledger-tool/src/main.rs index e885d5eda..e2b6fdc2e 100644 --- a/ledger-tool/src/main.rs +++ b/ledger-tool/src/main.rs @@ -25,7 +25,7 @@ use { solana_ledger::{ ancestor_iterator::AncestorIterator, bank_forks_utils, - blockstore::{create_new_ledger, Blockstore, PurgeType}, + blockstore::{create_new_ledger, Blockstore, BlockstoreError, PurgeType}, blockstore_db::{self, Database}, blockstore_options::{ AccessType, BlockstoreOptions, BlockstoreRecoveryMode, LedgerColumnOptions, @@ -805,17 +805,56 @@ fn analyze_storage(database: &Database) { analyze_column::(database, "OptimisticSlots"); } +/// Open blockstore with temporary primary access to allow necessary, +/// persistent changes to be made to the blockstore (such as creation of new +/// column family(s)). Then, continue opening with `original_access_type` +fn open_blockstore_with_temporary_primary_access( + ledger_path: &Path, + original_access_type: AccessType, + wal_recovery_mode: Option, +) -> Result { + // Open with Primary will allow any configuration that automatically + // updates to take effect + info!("Attempting to temporarily open blockstore with Primary access in order to update"); + { + let _ = Blockstore::open_with_options( + ledger_path, + BlockstoreOptions { + access_type: AccessType::PrimaryForMaintenance, + recovery_mode: wal_recovery_mode.clone(), + enforce_ulimit_nofile: true, + ..BlockstoreOptions::default() + }, + )?; + } + // Now, attempt to open the blockstore with original AccessType + info!( + "Blockstore forced open succeeded, retrying with original access: {:?}", + original_access_type + ); + Blockstore::open_with_options( + ledger_path, + BlockstoreOptions { + access_type: original_access_type, + recovery_mode: wal_recovery_mode, + enforce_ulimit_nofile: true, + ..BlockstoreOptions::default() + }, + ) +} + fn open_blockstore( ledger_path: &Path, access_type: AccessType, wal_recovery_mode: Option, shred_storage_type: &ShredStorageType, + force_update_to_open: bool, ) -> Blockstore { match Blockstore::open_with_options( ledger_path, BlockstoreOptions { - access_type, - recovery_mode: wal_recovery_mode, + access_type: access_type.clone(), + recovery_mode: wal_recovery_mode.clone(), enforce_ulimit_nofile: true, column_options: LedgerColumnOptions { shred_storage_type: shred_storage_type.clone(), @@ -824,8 +863,37 @@ fn open_blockstore( }, ) { Ok(blockstore) => blockstore, + Err(BlockstoreError::RocksDb(err)) + if (err + .to_string() + // Missing column family + .starts_with("Invalid argument: Column family not found:") + || err + .to_string() + // Missing essential file, indicative of blockstore not existing + .starts_with("IO error: No such file or directory:")) + && access_type == AccessType::Secondary => + { + error!("Blockstore is incompatible with current software and requires updates"); + if !force_update_to_open { + error!("Use --force-update-to-open to allow blockstore to update"); + exit(1); + } + open_blockstore_with_temporary_primary_access( + ledger_path, + access_type, + wal_recovery_mode, + ) + .unwrap_or_else(|err| { + error!( + "Failed to open blockstore (with --force-update-to-open) at {:?}: {:?}", + ledger_path, err + ); + exit(1); + }) + } Err(err) => { - eprintln!("Failed to open ledger at {:?}: {:?}", ledger_path, err); + eprintln!("Failed to open blockstore at {:?}: {:?}", ledger_path, err); exit(1); } } @@ -1345,6 +1413,14 @@ fn main() { "Mode to recovery the ledger db write ahead log" ), ) + .arg( + Arg::with_name("force_update_to_open") + .long("force-update-to-open") + .takes_value(false) + .global(true) + .help("Allow commands that would otherwise not alter the \ + blockstore to make necessary updates in order to open it"), + ) .arg( Arg::with_name("snapshot_archive_path") .long("snapshot-archive-path") @@ -2039,6 +2115,7 @@ fn main() { let wal_recovery_mode = matches .value_of("wal_recovery_mode") .map(BlockstoreRecoveryMode::from); + let force_update_to_open = matches.is_present("force_update_to_open"); let verbose_level = matches.occurrences_of("verbose"); // TODO: the following shred_storage_type inference must be updated once the @@ -2074,6 +2151,7 @@ fn main() { AccessType::Secondary, wal_recovery_mode, &shred_storage_type, + force_update_to_open, ), starting_slot, ending_slot, @@ -2093,9 +2171,16 @@ fn main() { AccessType::Secondary, None, &shred_storage_type, + force_update_to_open, ); - let target = - open_blockstore(&target_db, AccessType::Primary, None, &shred_storage_type); + let target = open_blockstore( + &target_db, + AccessType::Primary, + None, + &shred_storage_type, + force_update_to_open, + ); + for (slot, _meta) in source.slot_meta_iterator(starting_slot).unwrap() { if slot > ending_slot { break; @@ -2173,6 +2258,7 @@ fn main() { AccessType::Secondary, wal_recovery_mode, &shred_storage_type, + force_update_to_open, ); match load_bank_forks( arg_matches, @@ -2225,6 +2311,7 @@ fn main() { AccessType::Secondary, None, &shred_storage_type, + force_update_to_open, ); for (slot, _meta) in ledger .slot_meta_iterator(starting_slot) @@ -2264,6 +2351,7 @@ fn main() { AccessType::Secondary, wal_recovery_mode, &shred_storage_type, + force_update_to_open, ); match load_bank_forks( arg_matches, @@ -2290,6 +2378,7 @@ fn main() { AccessType::Secondary, wal_recovery_mode, &shred_storage_type, + force_update_to_open, ); for slot in slots { println!("Slot {}", slot); @@ -2314,6 +2403,7 @@ fn main() { AccessType::Secondary, wal_recovery_mode, &shred_storage_type, + force_update_to_open, ), starting_slot, Slot::MAX, @@ -2330,6 +2420,7 @@ fn main() { AccessType::Secondary, wal_recovery_mode, &shred_storage_type, + force_update_to_open, ); let starting_slot = value_t_or_exit!(arg_matches, "starting_slot", Slot); for slot in blockstore.dead_slots_iterator(starting_slot).unwrap() { @@ -2342,6 +2433,7 @@ fn main() { AccessType::Secondary, wal_recovery_mode, &shred_storage_type, + force_update_to_open, ); let starting_slot = value_t_or_exit!(arg_matches, "starting_slot", Slot); for slot in blockstore.duplicate_slots_iterator(starting_slot).unwrap() { @@ -2355,6 +2447,7 @@ fn main() { AccessType::Primary, wal_recovery_mode, &shred_storage_type, + force_update_to_open, ); for slot in slots { match blockstore.set_dead_slot(slot) { @@ -2370,6 +2463,7 @@ fn main() { AccessType::Primary, wal_recovery_mode, &shred_storage_type, + force_update_to_open, ); for slot in slots { match blockstore.remove_dead_slot(slot) { @@ -2388,6 +2482,7 @@ fn main() { AccessType::Secondary, wal_recovery_mode, &shred_storage_type, + force_update_to_open, ); let mut ancestors = BTreeSet::new(); assert!( @@ -2549,6 +2644,7 @@ fn main() { AccessType::Secondary, wal_recovery_mode, &shred_storage_type, + force_update_to_open, ); let (bank_forks, ..) = load_bank_forks( arg_matches, @@ -2593,6 +2689,7 @@ fn main() { AccessType::Secondary, wal_recovery_mode, &shred_storage_type, + force_update_to_open, ); match load_bank_forks( arg_matches, @@ -2707,6 +2804,7 @@ fn main() { AccessType::Secondary, wal_recovery_mode, &shred_storage_type, + force_update_to_open, ); let snapshot_slot = if Some("ROOT") == arg_matches.value_of("snapshot_slot") { @@ -3095,6 +3193,7 @@ fn main() { AccessType::Secondary, wal_recovery_mode, &shred_storage_type, + force_update_to_open, ); let (bank_forks, ..) = load_bank_forks( arg_matches, @@ -3159,6 +3258,7 @@ fn main() { AccessType::Secondary, wal_recovery_mode, &shred_storage_type, + force_update_to_open, ); match load_bank_forks( arg_matches, @@ -3694,6 +3794,7 @@ fn main() { access_type, wal_recovery_mode, &shred_storage_type, + force_update_to_open, ); let end_slot = match end_slot { @@ -3770,6 +3871,7 @@ fn main() { AccessType::Secondary, wal_recovery_mode, &shred_storage_type, + force_update_to_open, ); let max_height = if let Some(height) = arg_matches.value_of("max_height") { usize::from_str(height).expect("Maximum height must be a number") @@ -3837,6 +3939,7 @@ fn main() { AccessType::Secondary, wal_recovery_mode, &shred_storage_type, + force_update_to_open, ); let num_slots = value_t_or_exit!(arg_matches, "num_slots", usize); let slots = blockstore @@ -3861,6 +3964,7 @@ fn main() { AccessType::Primary, wal_recovery_mode, &shred_storage_type, + force_update_to_open, ); let start_root = if let Some(root) = arg_matches.value_of("start_root") { Slot::from_str(root).expect("Before root must be a number") @@ -3914,6 +4018,7 @@ fn main() { AccessType::Secondary, wal_recovery_mode, &shred_storage_type, + force_update_to_open, ); match blockstore.slot_meta_iterator(0) { Ok(metas) => { @@ -3980,6 +4085,7 @@ fn main() { AccessType::Secondary, wal_recovery_mode, &shred_storage_type, + force_update_to_open, ) .db(), ); @@ -3991,6 +4097,7 @@ fn main() { AccessType::Secondary, wal_recovery_mode, &shred_storage_type, + force_update_to_open, ); let mut slots: Vec = vec![];