From 16f3dcd5d2dda9fd510dc71875b704750fa72969 Mon Sep 17 00:00:00 2001 From: Xiang Zhu Date: Mon, 24 Apr 2023 18:52:50 -0700 Subject: [PATCH] Update fn create_and_verify_snapshot (#31245) * only 1 snapshot per archive in create_and_verify_snapshot * Update create_and_verify_snapshot with the newer funtion calls * Fix test_package_snapshots * Remove account path access change * Rename slot to num_snapshots * Remove unncessary purge_old_bank_snapshots in test * Update non-deterministic format comment * Cleanup unnecessary hash calls * Use get_accounts_hash * Remove extra declaration * Remove rehash * Remove clean_accounts * Revert "Cleanup unnecessary hash calls" This reverts commit 06b1457462cf6d7acf62e0e5531633caf5d9fc58. Removing unncessary hash calls should be done for create_and_verify_snapshot, not bank_to_full_snapshot_archive * Fix typo appenedvecs * Remove bank_snapshots_dir after rebasing --- core/src/snapshot_packager_service.rs | 118 ++++------------ core/tests/snapshots.rs | 4 +- runtime/src/snapshot_utils.rs | 196 ++++++++++++++------------ 3 files changed, 132 insertions(+), 186 deletions(-) diff --git a/core/src/snapshot_packager_service.rs b/core/src/snapshot_packager_service.rs index c8b5f6c55..eb7129d1f 100644 --- a/core/src/snapshot_packager_service.rs +++ b/core/src/snapshot_packager_service.rs @@ -182,23 +182,16 @@ impl SnapshotPackagerService { mod tests { use { super::*, - bincode::serialize_into, rand::seq::SliceRandom, solana_runtime::{ - accounts_db::AccountStorageEntry, - bank::BankSlotDelta, snapshot_archive_info::SnapshotArchiveInfo, snapshot_hash::SnapshotHash, snapshot_package::{SnapshotPackage, SnapshotType}, - snapshot_utils::{ - self, create_accounts_run_and_snapshot_dirs, ArchiveFormat, SnapshotVersion, - SNAPSHOT_STATUS_CACHE_FILENAME, - }, + snapshot_utils::{self, ArchiveFormat, SnapshotVersion}, }, - solana_sdk::{clock::Slot, hash::Hash}, + solana_sdk::{clock::Slot, genesis_config::GenesisConfig, hash::Hash}, std::{ - fs::{self, remove_dir_all, OpenOptions}, - io::Write, + fs::{self, remove_dir_all}, path::{Path, PathBuf}, time::Instant, }, @@ -231,107 +224,48 @@ mod tests { } fn create_and_verify_snapshot(temp_dir: &Path) { - let accounts_dir = temp_dir.join("accounts"); - let accounts_dir = create_accounts_run_and_snapshot_dirs(accounts_dir) - .unwrap() - .0; - - let snapshots_dir = temp_dir.join("snapshots"); + let bank_snapshots_dir = temp_dir.join("snapshots"); + fs::create_dir_all(&bank_snapshots_dir).unwrap(); let full_snapshot_archives_dir = temp_dir.join("full_snapshot_archives"); let incremental_snapshot_archives_dir = temp_dir.join("incremental_snapshot_archives"); fs::create_dir_all(&full_snapshot_archives_dir).unwrap(); fs::create_dir_all(&incremental_snapshot_archives_dir).unwrap(); - fs::create_dir_all(&accounts_dir).unwrap(); - // Create some storage entries - let storage_entries: Vec<_> = (0..5) - .map(|i| Arc::new(AccountStorageEntry::new(&accounts_dir, 0, i, 10))) - .collect(); + let num_snapshots = 1; - // Create some fake snapshot - let snapshots_paths: Vec<_> = (0..5) - .map(|i| { - let snapshot_file_name = format!("{i}"); - let snapshots_dir = snapshots_dir.join(&snapshot_file_name); - fs::create_dir_all(&snapshots_dir).unwrap(); - let fake_snapshot_path = snapshots_dir.join(&snapshot_file_name); - let mut fake_snapshot_file = OpenOptions::new() - .read(true) - .write(true) - .create(true) - .open(&fake_snapshot_path) - .unwrap(); - - fake_snapshot_file.write_all(b"Hello, world!").unwrap(); - fake_snapshot_path - }) - .collect(); - - // Create directory of hard links for snapshots - let link_snapshots_dir = tempfile::tempdir_in(temp_dir).unwrap(); - for snapshots_path in snapshots_paths { - let snapshot_file_name = snapshots_path.file_name().unwrap(); - let link_snapshots_dir = link_snapshots_dir.path().join(snapshot_file_name); - fs::create_dir_all(&link_snapshots_dir).unwrap(); - let link_path = link_snapshots_dir.join(snapshot_file_name); - fs::hard_link(&snapshots_path, link_path).unwrap(); - } - - // Create a packageable snapshot - let slot = 42; - let hash = SnapshotHash(Hash::default()); - let archive_format = ArchiveFormat::TarBzip2; - let output_tar_path = snapshot_utils::build_full_snapshot_archive_path( - &full_snapshot_archives_dir, - slot, - &hash, - archive_format, + let genesis_config = GenesisConfig::default(); + let bank = snapshot_utils::create_snapshot_dirs_for_tests( + &genesis_config, + &bank_snapshots_dir, + num_snapshots, + num_snapshots, ); - let snapshot_package = SnapshotPackage { - snapshot_archive_info: SnapshotArchiveInfo { - path: output_tar_path.clone(), - slot, - hash, - archive_format, - }, - block_height: slot, - snapshot_links: link_snapshots_dir, - snapshot_storages: storage_entries, - snapshot_version: SnapshotVersion::default(), - snapshot_type: SnapshotType::FullSnapshot, - enqueued: Instant::now(), - }; - // Make tarball from packageable snapshot - snapshot_utils::archive_snapshot_package( - &snapshot_package, + let bank_snapshot_info = + snapshot_utils::get_highest_bank_snapshot(&bank_snapshots_dir).unwrap(); + let snapshot_storages = bank.get_snapshot_storages(None); + let archive_format = ArchiveFormat::TarBzip2; + + let full_archive = snapshot_utils::package_and_archive_full_snapshot( + &bank, + &bank_snapshot_info, full_snapshot_archives_dir, incremental_snapshot_archives_dir, + snapshot_storages, + archive_format, + SnapshotVersion::default(), snapshot_utils::DEFAULT_MAX_FULL_SNAPSHOT_ARCHIVES_TO_RETAIN, snapshot_utils::DEFAULT_MAX_INCREMENTAL_SNAPSHOT_ARCHIVES_TO_RETAIN, ) .unwrap(); - // before we compare, stick an empty status_cache in this dir so that the package comparison works - // This is needed since the status_cache is added by the packager and is not collected from - // the source dir for snapshots - let dummy_slot_deltas: Vec = vec![]; - snapshot_utils::serialize_snapshot_data_file( - &snapshots_dir.join(SNAPSHOT_STATUS_CACHE_FILENAME), - |stream| { - serialize_into(stream, &dummy_slot_deltas)?; - Ok(()) - }, - ) - .unwrap(); - // Check archive is correct snapshot_utils::verify_snapshot_archive( - output_tar_path, - snapshots_dir, - accounts_dir, + full_archive.path(), + bank_snapshots_dir, archive_format, snapshot_utils::VerifyBank::Deterministic, + bank_snapshot_info.slot, ); } diff --git a/core/tests/snapshots.rs b/core/tests/snapshots.rs index 3d3607b76..c9bfc54cd 100644 --- a/core/tests/snapshots.rs +++ b/core/tests/snapshots.rs @@ -562,9 +562,9 @@ fn test_concurrent_snapshot_packaging( snapshot_utils::verify_snapshot_archive( saved_archive_path.unwrap(), saved_snapshots_dir.path(), - saved_accounts_dir.path(), ArchiveFormat::TarBzip2, - snapshot_utils::VerifyBank::NonDeterministic(saved_slot), + snapshot_utils::VerifyBank::NonDeterministic, + saved_slot, ); } diff --git a/runtime/src/snapshot_utils.rs b/runtime/src/snapshot_utils.rs index 03f837cfd..f44dfacfb 100644 --- a/runtime/src/snapshot_utils.rs +++ b/runtime/src/snapshot_utils.rs @@ -2846,29 +2846,28 @@ fn get_io_error(error: &str) -> SnapshotError { pub enum VerifyBank { /// the bank's serialized format is expected to be identical to what we are comparing against Deterministic, - /// the serialized bank was 'reserialized' into a non-deterministic format at the specified slot + /// the serialized bank was 'reserialized' into a non-deterministic format /// so, deserialize both files and compare deserialized results - NonDeterministic(Slot), + NonDeterministic, } -pub fn verify_snapshot_archive( +pub fn verify_snapshot_archive( snapshot_archive: P, snapshots_to_verify: Q, - storages_to_verify: R, archive_format: ArchiveFormat, verify_bank: VerifyBank, + slot: Slot, ) where P: AsRef, Q: AsRef, - R: AsRef, { let temp_dir = tempfile::TempDir::new().unwrap(); let unpack_dir = temp_dir.path(); - let account_dir = create_accounts_run_and_snapshot_dirs(unpack_dir).unwrap().0; + let unpack_account_dir = create_accounts_run_and_snapshot_dirs(unpack_dir).unwrap().0; untar_snapshot_in( snapshot_archive, unpack_dir, - &[account_dir.clone()], + &[unpack_account_dir.clone()], archive_format, 1, ) @@ -2876,52 +2875,64 @@ pub fn verify_snapshot_archive( // Check snapshots are the same let unpacked_snapshots = unpack_dir.join("snapshots"); - if let VerifyBank::NonDeterministic(slot) = verify_bank { + + // Since the unpack code collects all the appendvecs into one directory unpack_account_dir, we need to + // collect all the appendvecs in account_paths//snapshot/ into one directory for later comparison. + let storages_to_verify = unpack_dir.join("storages_to_verify"); + // Create the directory if it doesn't exist + std::fs::create_dir_all(&storages_to_verify).unwrap(); + + let slot = slot.to_string(); + let snapshot_slot_dir = snapshots_to_verify.as_ref().join(&slot); + + if let VerifyBank::NonDeterministic = verify_bank { // file contents may be different, but deserialized structs should be equal - let slot = slot.to_string(); - let snapshot_slot_dir = snapshots_to_verify.as_ref().join(&slot); let p1 = snapshots_to_verify.as_ref().join(&slot).join(&slot); let p2 = unpacked_snapshots.join(&slot).join(&slot); assert!(crate::serde_snapshot::compare_two_serialized_banks(&p1, &p2).unwrap()); std::fs::remove_file(p1).unwrap(); std::fs::remove_file(p2).unwrap(); + } - // The new the status_cache file is inside the slot directory together with the snapshot file. - // When unpacking an archive, the status_cache file from the archive is one-level up outside of - // the slot direcotry. - // The unpacked status_cache file need to be put back into the slot directory for the directory - // comparison to pass. - let existing_unpacked_status_cache_file = - unpacked_snapshots.join(SNAPSHOT_STATUS_CACHE_FILENAME); - let new_unpacked_status_cache_file = unpacked_snapshots - .join(&slot) - .join(SNAPSHOT_STATUS_CACHE_FILENAME); - fs::rename( - existing_unpacked_status_cache_file, - new_unpacked_status_cache_file, - ) - .unwrap(); + // The new the status_cache file is inside the slot directory together with the snapshot file. + // When unpacking an archive, the status_cache file from the archive is one-level up outside of + // the slot direcotry. + // The unpacked status_cache file need to be put back into the slot directory for the directory + // comparison to pass. + let existing_unpacked_status_cache_file = + unpacked_snapshots.join(SNAPSHOT_STATUS_CACHE_FILENAME); + let new_unpacked_status_cache_file = unpacked_snapshots + .join(&slot) + .join(SNAPSHOT_STATUS_CACHE_FILENAME); + fs::rename( + existing_unpacked_status_cache_file, + new_unpacked_status_cache_file, + ) + .unwrap(); - let accounts_hardlinks_dir = snapshot_slot_dir.join("accounts_hardlinks"); - if accounts_hardlinks_dir.is_dir() { - // This directory contain symlinks to all /snapshot/ directories. - // They should all be removed. - for entry in fs::read_dir(&accounts_hardlinks_dir).unwrap() { - let dst_path = fs::read_link(entry.unwrap().path()).unwrap(); - fs::remove_dir_all(dst_path).unwrap(); + let accounts_hardlinks_dir = snapshot_slot_dir.join("accounts_hardlinks"); + if accounts_hardlinks_dir.is_dir() { + // This directory contain symlinks to all /snapshot/ directories. + for entry in fs::read_dir(&accounts_hardlinks_dir).unwrap() { + let link_dst_path = fs::read_link(entry.unwrap().path()).unwrap(); + // Copy all the files in dst_path into the storages_to_verify directory. + for entry in fs::read_dir(&link_dst_path).unwrap() { + let src_path = entry.unwrap().path(); + let dst_path = storages_to_verify.join(src_path.file_name().unwrap()); + fs::copy(src_path, dst_path).unwrap(); } - std::fs::remove_dir_all(accounts_hardlinks_dir).unwrap(); } + std::fs::remove_dir_all(accounts_hardlinks_dir).unwrap(); + } - let version_path = snapshot_slot_dir.join(SNAPSHOT_VERSION_FILENAME); - if version_path.is_file() { - std::fs::remove_file(version_path).unwrap(); - } + let version_path = snapshot_slot_dir.join(SNAPSHOT_VERSION_FILENAME); + if version_path.is_file() { + std::fs::remove_file(version_path).unwrap(); + } - let state_complete_path = snapshot_slot_dir.join(SNAPSHOT_STATE_COMPLETE_FILENAME); - if state_complete_path.is_file() { - std::fs::remove_file(state_complete_path).unwrap(); - } + let state_complete_path = snapshot_slot_dir.join(SNAPSHOT_STATE_COMPLETE_FILENAME); + if state_complete_path.is_file() { + std::fs::remove_file(state_complete_path).unwrap(); } assert!(!dir_diff::is_different(&snapshots_to_verify, unpacked_snapshots).unwrap()); @@ -2931,9 +2942,9 @@ pub fn verify_snapshot_archive( // Remove the empty "accounts" directory for the directory comparison below. // In some test cases the directory to compare do not come from unarchiving. // Ignore the error when this directory does not exist. - _ = std::fs::remove_dir(account_dir.join("accounts")); + _ = std::fs::remove_dir(unpack_account_dir.join("accounts")); // Check the account entries are the same - assert!(!dir_diff::is_different(&storages_to_verify, account_dir).unwrap()); + assert!(!dir_diff::is_different(&storages_to_verify, unpack_account_dir).unwrap()); } /// Remove outdated bank snapshots @@ -3250,6 +3261,56 @@ pub fn create_tmp_accounts_dir_for_tests() -> (TempDir, PathBuf) { (tmp_dir, account_dir) } +pub fn create_snapshot_dirs_for_tests( + genesis_config: &GenesisConfig, + bank_snapshots_dir: impl AsRef, + num_total: usize, + num_posts: usize, +) -> Bank { + let mut bank = Arc::new(Bank::new_for_tests(genesis_config)); + + let collecter_id = Pubkey::new_unique(); + let snapshot_version = SnapshotVersion::default(); + + // loop to create the banks at slot 1 to num_total + for _ in 0..num_total { + // prepare the bank + bank = Arc::new(Bank::new_from_parent(&bank, &collecter_id, bank.slot() + 1)); + bank.fill_bank_with_ticks_for_tests(); + bank.squash(); + bank.force_flush_accounts_cache(); + bank.update_accounts_hash(CalcAccountsHashDataSource::Storages, false, false); + + let snapshot_storages = bank.get_snapshot_storages(None); + let slot_deltas = bank.status_cache.read().unwrap().root_slot_deltas(); + add_bank_snapshot( + &bank_snapshots_dir, + &bank, + &snapshot_storages, + snapshot_version, + slot_deltas, + ) + .unwrap(); + + if bank.slot() as usize > num_posts { + continue; // leave the snapshot dir at PRE stage + } + + // Reserialize the snapshot dir to convert it from PRE to POST, because only the POST type can be used + // to construct a bank. + assert!( + crate::serde_snapshot::reserialize_bank_with_new_accounts_hash( + &bank_snapshots_dir, + bank.slot(), + &bank.get_accounts_hash().unwrap(), + None + ) + ); + } + + Arc::try_unwrap(bank).unwrap() +} + #[cfg(test)] mod tests { use { @@ -5120,55 +5181,6 @@ mod tests { assert!(matches!(ret, Err(SnapshotError::InvalidAppendVecPath(_)))); } - fn create_snapshot_dirs_for_tests( - genesis_config: &GenesisConfig, - bank_snapshots_dir: impl AsRef, - num_total: usize, - num_posts: usize, - ) -> Bank { - let mut bank = Arc::new(Bank::new_for_tests(genesis_config)); - - let collecter_id = Pubkey::new_unique(); - let snapshot_version = SnapshotVersion::default(); - - // loop to create the banks at slot 1 to num_total - for _ in 0..num_total { - // prepare the bank - bank = Arc::new(Bank::new_from_parent(&bank, &collecter_id, bank.slot() + 1)); - bank.fill_bank_with_ticks_for_tests(); - bank.squash(); - bank.force_flush_accounts_cache(); - - let snapshot_storages = bank.get_snapshot_storages(None); - let slot_deltas = bank.status_cache.read().unwrap().root_slot_deltas(); - add_bank_snapshot( - &bank_snapshots_dir, - &bank, - &snapshot_storages, - snapshot_version, - slot_deltas, - ) - .unwrap(); - - if bank.slot() as usize > num_posts { - continue; // leave the snapshot dir at PRE stage - } - - // Reserialize the snapshot dir to convert it from PRE to POST, because only the POST type can be used - // to construct a bank. - assert!( - crate::serde_snapshot::reserialize_bank_with_new_accounts_hash( - &bank_snapshots_dir, - bank.slot(), - &AccountsHash(Hash::new_unique()), - None - ) - ); - } - - Arc::try_unwrap(bank).unwrap() - } - #[test] fn test_get_highest_bank_snapshot() { solana_logger::setup();