Refactor bank_forks_utils::load() and some snapshot logic (#17492)

Refactor a few functions that are on the load-from-snapshot path, to facilitate
adding in incremental snapshots more easily.

Additionally, add some tests and doc comments.
This commit is contained in:
Brooks Prumo 2021-05-27 12:00:27 -05:00 committed by GitHub
parent 983828a2a9
commit 1953543274
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
3 changed files with 187 additions and 98 deletions

View File

@ -9,7 +9,7 @@ use crate::{
};
use log::*;
use solana_runtime::{
bank_forks::{BankForks, SnapshotConfig},
bank_forks::{ArchiveFormat, BankForks, SnapshotConfig},
snapshot_utils,
};
use solana_sdk::{clock::Slot, genesis_config::GenesisConfig, hash::Hash};
@ -21,14 +21,18 @@ pub type LoadResult = result::Result<
>;
fn to_loadresult(
brp: BlockstoreProcessorResult,
snapshot_hash: Option<(Slot, Hash)>,
bpr: BlockstoreProcessorResult,
snapshot_slot_and_hash: Option<(Slot, Hash)>,
) -> LoadResult {
brp.map(|(bank_forks, leader_schedule_cache)| {
(bank_forks, leader_schedule_cache, snapshot_hash)
bpr.map(|(bank_forks, leader_schedule_cache)| {
(bank_forks, leader_schedule_cache, snapshot_slot_and_hash)
})
}
/// Load the banks and accounts
///
/// If a snapshot config is given, and a snapshot is found, it will be loaded. Otherwise, load
/// from genesis.
pub fn load(
genesis_config: &GenesisConfig,
blockstore: &Blockstore,
@ -48,71 +52,48 @@ pub fn load(
fs::create_dir_all(&snapshot_config.snapshot_path)
.expect("Couldn't create snapshot directory");
match snapshot_utils::get_highest_snapshot_archive_path(
&snapshot_config.snapshot_package_output_path,
) {
Some((archive_filename, (archive_slot, archive_snapshot_hash, compression))) => {
info!("Loading snapshot package: {:?}", archive_filename);
// Fail hard here if snapshot fails to load, don't silently continue
if account_paths.is_empty() {
error!("Account paths not present when booting from snapshot");
process::exit(1);
}
let deserialized_bank = snapshot_utils::bank_from_archive(
&account_paths,
&process_options.frozen_accounts,
&snapshot_config.snapshot_path,
&archive_filename,
compression,
genesis_config,
process_options.debug_keys.clone(),
Some(&crate::builtins::get(process_options.bpf_jit)),
process_options.account_indexes.clone(),
process_options.accounts_db_caching_enabled,
process_options.limit_load_slot_count_from_snapshot,
)
.expect("Load from snapshot failed");
if let Some(shrink_paths) = shrink_paths {
deserialized_bank.set_shrink_paths(shrink_paths);
}
let deserialized_snapshot_hash = (
deserialized_bank.slot(),
deserialized_bank.get_accounts_hash(),
);
if process_options.accounts_db_test_hash_calculation {
deserialized_bank.update_accounts_hash_with_index_option(false, true);
}
if deserialized_snapshot_hash != (archive_slot, archive_snapshot_hash) {
error!(
"Snapshot has mismatch:\narchive: {:?}\ndeserialized: {:?}",
archive_snapshot_hash, deserialized_snapshot_hash
);
process::exit(1);
}
return to_loadresult(
blockstore_processor::process_blockstore_from_root(
blockstore,
deserialized_bank,
&process_options,
&VerifyRecyclers::default(),
transaction_status_sender,
cache_block_meta_sender,
),
Some(deserialized_snapshot_hash),
);
}
None => info!("No snapshot package available"),
if let Some((archive_filename, (archive_slot, archive_hash, archive_format))) =
snapshot_utils::get_highest_snapshot_archive_path(
&snapshot_config.snapshot_package_output_path,
)
{
return load_from_snapshot(
&genesis_config,
&blockstore,
account_paths,
shrink_paths,
snapshot_config,
process_options,
transaction_status_sender,
cache_block_meta_sender,
archive_filename,
archive_slot,
archive_hash,
archive_format,
);
} else {
info!("No snapshot package available; will load from genesis");
}
} else {
info!("Snapshots disabled");
info!("Snapshots disabled; will load from genesis");
}
load_from_genesis(
&genesis_config,
&blockstore,
account_paths,
process_options,
cache_block_meta_sender,
)
}
fn load_from_genesis(
genesis_config: &GenesisConfig,
blockstore: &Blockstore,
account_paths: Vec<PathBuf>,
process_options: ProcessOptions,
cache_block_meta_sender: Option<&CacheBlockMetaSender>,
) -> LoadResult {
info!("Processing ledger from genesis");
to_loadresult(
blockstore_processor::process_blockstore(
@ -125,3 +106,74 @@ pub fn load(
None,
)
}
#[allow(clippy::too_many_arguments)]
fn load_from_snapshot(
genesis_config: &GenesisConfig,
blockstore: &Blockstore,
account_paths: Vec<PathBuf>,
shrink_paths: Option<Vec<PathBuf>>,
snapshot_config: &SnapshotConfig,
process_options: ProcessOptions,
transaction_status_sender: Option<&TransactionStatusSender>,
cache_block_meta_sender: Option<&CacheBlockMetaSender>,
archive_filename: PathBuf,
archive_slot: Slot,
archive_hash: Hash,
archive_format: ArchiveFormat,
) -> LoadResult {
info!("Loading snapshot package: {:?}", archive_filename);
// Fail hard here if snapshot fails to load, don't silently continue
if account_paths.is_empty() {
error!("Account paths not present when booting from snapshot");
process::exit(1);
}
let deserialized_bank = snapshot_utils::bank_from_archive(
&account_paths,
&process_options.frozen_accounts,
&snapshot_config.snapshot_path,
&archive_filename,
archive_format,
genesis_config,
process_options.debug_keys.clone(),
Some(&crate::builtins::get(process_options.bpf_jit)),
process_options.account_indexes.clone(),
process_options.accounts_db_caching_enabled,
process_options.limit_load_slot_count_from_snapshot,
)
.expect("Load from snapshot failed");
if let Some(shrink_paths) = shrink_paths {
deserialized_bank.set_shrink_paths(shrink_paths);
}
if process_options.accounts_db_test_hash_calculation {
deserialized_bank.update_accounts_hash_with_index_option(false, true);
}
let deserialized_bank_slot_and_hash = (
deserialized_bank.slot(),
deserialized_bank.get_accounts_hash(),
);
if deserialized_bank_slot_and_hash != (archive_slot, archive_hash) {
error!(
"Snapshot has mismatch:\narchive: {:?}\ndeserialized: {:?}",
archive_hash, deserialized_bank_slot_and_hash
);
process::exit(1);
}
to_loadresult(
blockstore_processor::process_blockstore_from_root(
blockstore,
deserialized_bank,
&process_options,
&VerifyRecyclers::default(),
transaction_status_sender,
cache_block_meta_sender,
),
Some(deserialized_bank_slot_and_hash),
)
}

View File

@ -190,7 +190,7 @@ where
}
}
// Map from AppendVec file name to unpacked file system location
/// Map from AppendVec file name to unpacked file system location
pub type UnpackedAppendVecMap = HashMap<String, PathBuf>;
pub fn unpack_snapshot<A: Read>(

View File

@ -197,10 +197,10 @@ pub fn package_snapshot<P: AsRef<Path>, Q: AsRef<Path>>(
fn get_archive_ext(archive_format: ArchiveFormat) -> &'static str {
match archive_format {
ArchiveFormat::TarBzip2 => ".tar.bz2",
ArchiveFormat::TarGzip => ".tar.gz",
ArchiveFormat::TarZstd => ".tar.zst",
ArchiveFormat::Tar => ".tar",
ArchiveFormat::TarBzip2 => "tar.bz2",
ArchiveFormat::TarGzip => "tar.gz",
ArchiveFormat::TarZstd => "tar.zst",
ArchiveFormat::Tar => "tar",
}
}
@ -304,7 +304,7 @@ pub fn archive_snapshot_package(
//
// system `tar` program is used for -S (sparse file support)
let archive_path = tar_dir.join(format!(
"{}{}{}",
"{}{}.{}",
TMP_SNAPSHOT_PREFIX, snapshot_package.slot, file_ext
));
@ -652,7 +652,7 @@ pub fn get_snapshot_archive_path(
archive_format: ArchiveFormat,
) -> PathBuf {
snapshot_output_dir.join(format!(
"snapshot-{}-{}{}",
"snapshot-{}-{}.{}",
snapshot_hash.0,
snapshot_hash.1,
get_archive_ext(archive_format),
@ -669,27 +669,27 @@ fn archive_format_from_str(archive_format: &str) -> Option<ArchiveFormat> {
}
}
fn snapshot_hash_of(archive_filename: &str) -> Option<(Slot, Hash, ArchiveFormat)> {
let snapshot_filename_regex =
Regex::new(r"^snapshot-(\d+)-([[:alnum:]]+)\.(tar|tar\.bz2|tar\.zst|tar\.gz)$").unwrap();
/// Parse a snapshot archive filename into its Slot, Hash, and Archive Format
fn parse_snapshot_archive_filename(archive_filename: &str) -> Option<(Slot, Hash, ArchiveFormat)> {
let snapshot_archive_filename_regex =
Regex::new(r"^snapshot-(\d+)-([[:alnum:]]+)\.(tar|tar\.bz2|tar\.zst|tar\.gz)$");
if let Some(captures) = snapshot_filename_regex.captures(archive_filename) {
let slot_str = captures.get(1).unwrap().as_str();
let hash_str = captures.get(2).unwrap().as_str();
let ext = captures.get(3).unwrap().as_str();
snapshot_archive_filename_regex
.ok()?
.captures(archive_filename)
.and_then(|captures| {
let slot = captures.get(1).map(|x| x.as_str().parse::<Slot>())?.ok()?;
let hash = captures.get(2).map(|x| x.as_str().parse::<Hash>())?.ok()?;
let archive_format = captures
.get(3)
.map(|x| archive_format_from_str(x.as_str()))??;
if let (Ok(slot), Ok(hash), Some(archive_format)) = (
slot_str.parse::<Slot>(),
hash_str.parse::<Hash>(),
archive_format_from_str(ext),
) {
return Some((slot, hash, archive_format));
}
}
None
Some((slot, hash, archive_format))
})
}
pub fn get_snapshot_archives<P: AsRef<Path>>(
/// Get a list of the snapshot archives in a directory, sorted by Slot in descending order
fn get_snapshot_archives<P: AsRef<Path>>(
snapshot_output_dir: P,
) -> Vec<(PathBuf, (Slot, Hash, ArchiveFormat))> {
match fs::read_dir(&snapshot_output_dir) {
@ -703,9 +703,9 @@ pub fn get_snapshot_archives<P: AsRef<Path>>(
if let Ok(entry) = entry {
let path = entry.path();
if path.is_file() {
if let Some(snapshot_hash) =
snapshot_hash_of(path.file_name().unwrap().to_str().unwrap())
{
if let Some(snapshot_hash) = parse_snapshot_archive_filename(
path.file_name().unwrap().to_str().unwrap(),
) {
return Some((path, snapshot_hash));
}
}
@ -720,6 +720,7 @@ pub fn get_snapshot_archives<P: AsRef<Path>>(
}
}
/// Get the snapshot archive with the highest Slot in a directory
pub fn get_highest_snapshot_archive_path<P: AsRef<Path>>(
snapshot_output_dir: P,
) -> Option<(PathBuf, (Slot, Hash, ArchiveFormat))> {
@ -1146,21 +1147,57 @@ mod tests {
}
#[test]
fn test_snapshot_hash_of() {
fn test_parse_snapshot_archive_filename() {
assert_eq!(
snapshot_hash_of(&format!("snapshot-42-{}.tar.bz2", Hash::default())),
parse_snapshot_archive_filename(&format!("snapshot-42-{}.tar.bz2", Hash::default())),
Some((42, Hash::default(), ArchiveFormat::TarBzip2))
);
assert_eq!(
snapshot_hash_of(&format!("snapshot-43-{}.tar.zst", Hash::default())),
parse_snapshot_archive_filename(&format!("snapshot-43-{}.tar.zst", Hash::default())),
Some((43, Hash::default(), ArchiveFormat::TarZstd))
);
assert_eq!(
snapshot_hash_of(&format!("snapshot-42-{}.tar", Hash::default())),
parse_snapshot_archive_filename(&format!("snapshot-42-{}.tar", Hash::default())),
Some((42, Hash::default(), ArchiveFormat::Tar))
);
assert!(snapshot_hash_of("invalid").is_none());
assert!(parse_snapshot_archive_filename("invalid").is_none());
assert!(parse_snapshot_archive_filename("snapshot-bad!slot-bad!hash.bad!ext").is_none());
assert!(parse_snapshot_archive_filename("snapshot-12345678-bad!hash.bad!ext").is_none());
assert!(parse_snapshot_archive_filename("snapshot-12345678-HASH1234.bad!ext").is_none());
assert!(parse_snapshot_archive_filename("snapshot-12345678-bad!hash.tar").is_none());
assert!(parse_snapshot_archive_filename("snapshot-bad!slot-HASH1234.bad!ext").is_none());
assert!(parse_snapshot_archive_filename("snapshot-12345678-HASH1234.bad!ext").is_none());
assert!(parse_snapshot_archive_filename("snapshot-bad!slot-HASH1234.tar").is_none());
assert!(parse_snapshot_archive_filename("snapshot-bad!slot-bad!hash.tar").is_none());
assert!(parse_snapshot_archive_filename("snapshot-12345678-bad!hash.tar").is_none());
assert!(parse_snapshot_archive_filename("snapshot-bad!slot-HASH1234.tar").is_none());
}
#[test]
fn test_get_snapshot_archives() {
let temp_snapshot_archives_dir = tempfile::TempDir::new().unwrap();
let min_slot = 123;
let max_slot = 456;
for slot in min_slot..max_slot {
let snapshot_filename = format!("snapshot-{}-{}.tar", slot, Hash::default());
let snapshot_filepath = temp_snapshot_archives_dir.path().join(snapshot_filename);
File::create(snapshot_filepath).unwrap();
}
// Add in a snapshot with a bad filename and high slot to ensure filename are filtered and
// sorted correctly
let bad_filename = format!("snapshot-{}-{}.bad!ext", max_slot + 1, Hash::default());
let bad_filepath = temp_snapshot_archives_dir.path().join(bad_filename);
File::create(bad_filepath).unwrap();
let results = get_snapshot_archives(temp_snapshot_archives_dir);
assert_eq!(results.len(), max_slot - min_slot);
assert_eq!(results[0].1 .0 as usize, max_slot - 1);
}
fn common_test_purge_old_snapshot_archives(