Issue #17008 -- make snapshot archives to hold on to configurable. (#17158)

* purge_old_snapshot_archives is changed to take an extra argument 'maximum_snapshots_to_retain' to control the max number of latest snapshot archives to retain. Note the oldest snapshot is always retained as before and is not subjected to this new options.
* The validator and ledger-tool executables are modified with a CLI argument --maximum-snapshots-to-retain. And the options are propagated down the call chains. Their corresponding shell scripts were changed accordingly.
* SnapshotConfig is modified to have an extra field for the maximum_snapshots_to_retain
* Unit tests are developed to cover purge_old_snapshot_archives
This commit is contained in:
Lijun Wang 2021-05-12 10:32:27 -07:00 committed by GitHub
parent e3d722bb42
commit 9c42a89a43
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
13 changed files with 149 additions and 21 deletions

View File

@ -482,7 +482,10 @@ mod tests {
genesis_utils::{create_genesis_config, GenesisConfigInfo},
get_tmp_ledger_path,
};
use solana_runtime::{bank::Bank, bank_forks::ArchiveFormat, snapshot_utils::SnapshotVersion};
use solana_runtime::{
bank::Bank, bank_forks::ArchiveFormat, snapshot_utils::SnapshotVersion,
snapshot_utils::DEFAULT_MAX_SNAPSHOTS_TO_RETAIN,
};
use solana_sdk::{genesis_config::ClusterType, signature::Signer};
use std::io::Write;
use std::net::{IpAddr, Ipv4Addr};
@ -581,6 +584,7 @@ mod tests {
snapshot_path: PathBuf::from("/"),
archive_format: ArchiveFormat::TarBzip2,
snapshot_version: SnapshotVersion::default(),
maximum_snapshots_to_retain: DEFAULT_MAX_SNAPSHOTS_TO_RETAIN,
}),
bank_forks,
RpcHealth::stub(),

View File

@ -22,6 +22,7 @@ impl SnapshotPackagerService {
starting_snapshot_hash: Option<(Slot, Hash)>,
exit: &Arc<AtomicBool>,
cluster_info: &Arc<ClusterInfo>,
maximum_snapshots_to_retain: usize,
) -> Self {
let exit = exit.clone();
let cluster_info = cluster_info.clone();
@ -41,9 +42,10 @@ impl SnapshotPackagerService {
let snapshot_package = pending_snapshot_package.lock().unwrap().take();
if let Some(snapshot_package) = snapshot_package {
if let Err(err) =
snapshot_utils::archive_snapshot_package(&snapshot_package)
{
if let Err(err) = snapshot_utils::archive_snapshot_package(
&snapshot_package,
maximum_snapshots_to_retain,
) {
warn!("Failed to create snapshot archive: {}", err);
} else {
hashes.push((snapshot_package.slot, snapshot_package.hash));
@ -173,7 +175,11 @@ mod tests {
);
// Make tarball from packageable snapshot
snapshot_utils::archive_snapshot_package(&snapshot_package).unwrap();
snapshot_utils::archive_snapshot_package(
&snapshot_package,
snapshot_utils::DEFAULT_MAX_SNAPSHOTS_TO_RETAIN,
)
.unwrap();
// before we compare, stick an empty status_cache in this dir so that the package comparison works
// This is needed since the status_cache is added by the packager and is not collected from

View File

@ -12,6 +12,7 @@ use {
bank_forks::{ArchiveFormat, SnapshotConfig, SnapshotVersion},
genesis_utils::create_genesis_config_with_leader_ex,
hardened_unpack::MAX_GENESIS_ARCHIVE_UNPACKED_SIZE,
snapshot_utils::DEFAULT_MAX_SNAPSHOTS_TO_RETAIN,
},
solana_sdk::{
account::{Account, AccountSharedData},
@ -492,6 +493,7 @@ impl TestValidator {
snapshot_package_output_path: ledger_path.to_path_buf(),
archive_format: ArchiveFormat::Tar,
snapshot_version: SnapshotVersion::default(),
maximum_snapshots_to_retain: DEFAULT_MAX_SNAPSHOTS_TO_RETAIN,
}),
enforce_ulimit_nofile: false,
warp_slot: config.warp_slot,

View File

@ -626,6 +626,7 @@ impl Validator {
snapshot_hash,
&exit,
&cluster_info,
snapshot_config.maximum_snapshots_to_retain,
);
(
Some(snapshot_packager_service),
@ -1166,6 +1167,7 @@ fn new_banks_from_ledger(
&snapshot_config.snapshot_package_output_path,
snapshot_config.archive_format,
Some(&bank_forks.root_bank().get_thread_pool()),
snapshot_config.maximum_snapshots_to_retain,
)
.unwrap_or_else(|err| {
error!("Unable to create snapshot: {}", err);

View File

@ -52,7 +52,7 @@ mod tests {
bank_forks::{ArchiveFormat, BankForks, SnapshotConfig},
genesis_utils::{create_genesis_config, GenesisConfigInfo},
snapshot_utils,
snapshot_utils::SnapshotVersion,
snapshot_utils::{SnapshotVersion, DEFAULT_MAX_SNAPSHOTS_TO_RETAIN},
status_cache::MAX_CACHE_ENTRIES,
};
use solana_sdk::{
@ -120,6 +120,7 @@ mod tests {
snapshot_path: PathBuf::from(snapshot_dir.path()),
archive_format: ArchiveFormat::TarBzip2,
snapshot_version,
maximum_snapshots_to_retain: DEFAULT_MAX_SNAPSHOTS_TO_RETAIN,
};
bank_forks.set_snapshot_config(Some(snapshot_config.clone()));
SnapshotTestConfig {
@ -248,7 +249,11 @@ mod tests {
snapshot_package,
Some(&last_bank.get_thread_pool()),
);
snapshot_utils::archive_snapshot_package(&snapshot_package).unwrap();
snapshot_utils::archive_snapshot_package(
&snapshot_package,
DEFAULT_MAX_SNAPSHOTS_TO_RETAIN,
)
.unwrap();
// Restore bank from snapshot
let account_paths = &[snapshot_test_config.accounts_dir.path().to_path_buf()];
@ -442,6 +447,7 @@ mod tests {
None,
&exit,
&cluster_info,
DEFAULT_MAX_SNAPSHOTS_TO_RETAIN,
);
let thread_pool = accounts_db::make_min_priority_thread_pool();

View File

@ -178,8 +178,9 @@ pub fn download_snapshot(
snapshot_output_dir: &Path,
desired_snapshot_hash: (Slot, Hash),
use_progress_bar: bool,
maximum_snapshots_to_retain: usize,
) -> Result<(), String> {
snapshot_utils::purge_old_snapshot_archives(snapshot_output_dir);
snapshot_utils::purge_old_snapshot_archives(snapshot_output_dir, maximum_snapshots_to_retain);
for compression in &[
ArchiveFormat::TarZstd,

View File

@ -28,7 +28,7 @@ use solana_runtime::{
bank_forks::{ArchiveFormat, BankForks, SnapshotConfig},
hardened_unpack::{open_genesis_config, MAX_GENESIS_ARCHIVE_UNPACKED_SIZE},
snapshot_utils,
snapshot_utils::SnapshotVersion,
snapshot_utils::{SnapshotVersion, DEFAULT_MAX_SNAPSHOTS_TO_RETAIN},
};
use solana_sdk::{
account::{AccountSharedData, ReadableAccount, WritableAccount},
@ -674,6 +674,7 @@ fn load_bank_forks(
snapshot_path,
archive_format: ArchiveFormat::TarBzip2,
snapshot_version: SnapshotVersion::default(),
maximum_snapshots_to_retain: DEFAULT_MAX_SNAPSHOTS_TO_RETAIN,
})
};
let account_paths = if let Some(account_paths) = arg_matches.value_of("account_paths") {
@ -802,6 +803,14 @@ fn main() {
.default_value(SnapshotVersion::default().into())
.help("Output snapshot version");
let default_max_snapshot_to_retain = &DEFAULT_MAX_SNAPSHOTS_TO_RETAIN.to_string();
let maximum_snapshots_to_retain_arg = Arg::with_name("maximum_snapshots_to_retain")
.long("maximum-snapshots-to-retain")
.value_name("NUMBER")
.takes_value(true)
.default_value(&default_max_snapshot_to_retain)
.help("Maximum number of snapshots to hold on to during snapshot purge");
let rent = Rent::default();
let default_bootstrap_validator_lamports = &sol_to_lamports(500.0)
.max(VoteState::get_rent_exempt_reserve(&rent))
@ -1073,6 +1082,7 @@ fn main() {
.arg(&hard_forks_arg)
.arg(&max_genesis_archive_unpacked_size_arg)
.arg(&snapshot_version_arg)
.arg(&maximum_snapshots_to_retain_arg)
.arg(
Arg::with_name("snapshot_slot")
.index(1)
@ -1848,6 +1858,8 @@ fn main() {
})
});
let maximum_snapshots_to_retain =
value_t_or_exit!(arg_matches, "maximum_snapshots_to_retain", usize);
let genesis_config = open_genesis_config_by(&ledger_path, arg_matches);
let blockstore = open_blockstore(
&ledger_path,
@ -2062,6 +2074,7 @@ fn main() {
output_directory,
ArchiveFormat::TarZstd,
None,
maximum_snapshots_to_retain,
)
.unwrap_or_else(|err| {
eprintln!("Unable to create snapshot: {}", err);

View File

@ -1684,6 +1684,7 @@ fn test_snapshot_download() {
&validator_archive_path,
archive_snapshot_hash,
false,
snapshot_utils::DEFAULT_MAX_SNAPSHOTS_TO_RETAIN,
)
.unwrap();
@ -3134,6 +3135,7 @@ fn setup_snapshot_validator_config(
snapshot_path: PathBuf::from(snapshot_dir.path()),
archive_format: ArchiveFormat::TarBzip2,
snapshot_version: snapshot_utils::SnapshotVersion::default(),
maximum_snapshots_to_retain: snapshot_utils::DEFAULT_MAX_SNAPSHOTS_TO_RETAIN,
};
// Create the account paths

View File

@ -72,6 +72,9 @@ while [[ -n $1 ]]; do
elif [[ $1 == --accounts ]]; then
args+=("$1" "$2")
shift 2
elif [[ $1 == --maximum-snapshots-to-retain ]]; then
args+=("$1" "$2")
shift 2
else
echo "Unknown argument: $1"
$program --help

View File

@ -119,6 +119,9 @@ while [[ -n $1 ]]; do
elif [[ $1 = --snapshot-interval-slots ]]; then
args+=("$1" "$2")
shift 2
elif [[ $1 = --maximum-snapshots-to-retain ]]; then
args+=("$1" "$2")
shift 2
elif [[ $1 = --limit-ledger-size ]]; then
args+=("$1" "$2")
shift 2

View File

@ -40,6 +40,9 @@ pub struct SnapshotConfig {
// Snapshot version to generate
pub snapshot_version: SnapshotVersion,
// Maximum number of snapshots to retain
pub maximum_snapshots_to_retain: usize,
}
pub struct BankForks {

View File

@ -21,6 +21,7 @@ use {
solana_measure::measure::Measure,
solana_sdk::{clock::Slot, genesis_config::GenesisConfig, hash::Hash, pubkey::Pubkey},
std::{
cmp::max,
cmp::Ordering,
collections::HashSet,
fmt,
@ -44,6 +45,7 @@ const MAX_SNAPSHOT_DATA_FILE_SIZE: u64 = 32 * 1024 * 1024 * 1024; // 32 GiB
const VERSION_STRING_V1_2_0: &str = "1.2.0";
const DEFAULT_SNAPSHOT_VERSION: SnapshotVersion = SnapshotVersion::V1_2_0;
const TMP_SNAPSHOT_PREFIX: &str = "tmp-snapshot-";
pub const DEFAULT_MAX_SNAPSHOTS_TO_RETAIN: usize = 2;
#[derive(Copy, Clone, Eq, PartialEq, Debug)]
pub enum SnapshotVersion {
@ -226,7 +228,10 @@ pub fn remove_tmp_snapshot_archives(snapshot_path: &Path) {
}
}
pub fn archive_snapshot_package(snapshot_package: &AccountsPackage) -> Result<()> {
pub fn archive_snapshot_package(
snapshot_package: &AccountsPackage,
maximum_snapshots_to_retain: usize,
) -> Result<()> {
info!(
"Generating snapshot archive for slot {}",
snapshot_package.slot
@ -362,7 +367,10 @@ pub fn archive_snapshot_package(snapshot_package: &AccountsPackage) -> Result<()
let metadata = fs::metadata(&archive_path)?;
fs::rename(&archive_path, &snapshot_package.tar_output_file)?;
purge_old_snapshot_archives(snapshot_package.tar_output_file.parent().unwrap());
purge_old_snapshot_archives(
snapshot_package.tar_output_file.parent().unwrap(),
maximum_snapshots_to_retain,
);
timer.stop();
info!(
@ -717,11 +725,20 @@ pub fn get_highest_snapshot_archive_path<P: AsRef<Path>>(
archives.into_iter().next()
}
pub fn purge_old_snapshot_archives<P: AsRef<Path>>(snapshot_output_dir: P) {
pub fn purge_old_snapshot_archives<P: AsRef<Path>>(
snapshot_output_dir: P,
maximum_snapshots_to_retain: usize,
) {
info!(
"Purging old snapshots in {:?}, retaining {}",
snapshot_output_dir.as_ref(),
maximum_snapshots_to_retain
);
let mut archives = get_snapshot_archives(snapshot_output_dir);
// Keep the oldest snapshot so we can always play the ledger from it.
archives.pop();
for old_archive in archives.into_iter().skip(2) {
let max_snaps = max(1, maximum_snapshots_to_retain);
for old_archive in archives.into_iter().skip(max_snaps) {
fs::remove_file(old_archive.0)
.unwrap_or_else(|err| info!("Failed to remove old snapshot: {:}", err));
}
@ -936,6 +953,7 @@ pub fn bank_to_snapshot_archive<P: AsRef<Path>, Q: AsRef<Path>>(
snapshot_package_output_path: Q,
archive_format: ArchiveFormat,
thread_pool: Option<&ThreadPool>,
maximum_snapshots_to_retain: usize,
) -> Result<PathBuf> {
let snapshot_version = snapshot_version.unwrap_or_default();
@ -964,7 +982,7 @@ pub fn bank_to_snapshot_archive<P: AsRef<Path>, Q: AsRef<Path>>(
let package = process_accounts_package_pre(package, thread_pool);
archive_snapshot_package(&package)?;
archive_snapshot_package(&package, maximum_snapshots_to_retain)?;
Ok(package.tar_output_file)
}
@ -1137,13 +1155,58 @@ mod tests {
snapshot_hash_of(&format!("snapshot-42-{}.tar", Hash::default())),
Some((42, Hash::default(), ArchiveFormat::Tar))
);
assert!(snapshot_hash_of(&format!(
"{}snapshot-42-{}.tar",
TMP_SNAPSHOT_PREFIX,
Hash::default()
))
.is_none());
assert!(snapshot_hash_of("invalid").is_none());
}
fn common_test_purge_old_snapshot_archives(
snapshot_names: &[&String],
maximum_snapshots_to_retain: usize,
expected_snapshots: &[&String],
) {
let temp_snap_dir = tempfile::TempDir::new().unwrap();
for snap_name in snapshot_names {
let snap_path = temp_snap_dir.path().join(&snap_name);
let mut _snap_file = File::create(snap_path);
}
purge_old_snapshot_archives(temp_snap_dir.path(), maximum_snapshots_to_retain);
let mut retained_snaps = HashSet::new();
for entry in fs::read_dir(temp_snap_dir.path()).unwrap() {
let entry_path_buf = entry.unwrap().path();
let entry_path = entry_path_buf.as_path();
let snapshot_name = entry_path
.file_name()
.unwrap()
.to_str()
.unwrap()
.to_string();
retained_snaps.insert(snapshot_name);
}
for snap_name in expected_snapshots {
assert!(retained_snaps.contains(snap_name.as_str()));
}
assert!(retained_snaps.len() == expected_snapshots.len());
}
#[test]
fn test_purge_old_snapshot_archives() {
// Create 3 snapshots, retaining 1,
// expecting the oldest 1 and the newest 1 are retained
let snap1_name = format!("snapshot-1-{}.tar.zst", Hash::default());
let snap2_name = format!("snapshot-3-{}.tar.zst", Hash::default());
let snap3_name = format!("snapshot-50-{}.tar.zst", Hash::default());
let snapshot_names = vec![&snap1_name, &snap2_name, &snap3_name];
let expected_snapshots = vec![&snap1_name, &snap3_name];
common_test_purge_old_snapshot_archives(&snapshot_names, 1, &expected_snapshots);
// retaining 0, the expectation is the same as for 1, as at least 1 newest is expected to be retained
common_test_purge_old_snapshot_archives(&snapshot_names, 0, &expected_snapshots);
// retaining 2, all three should be retained
let expected_snapshots = vec![&snap1_name, &snap2_name, &snap3_name];
common_test_purge_old_snapshot_archives(&snapshot_names, 2, &expected_snapshots);
}
}

View File

@ -44,7 +44,7 @@ use {
},
bank_forks::{ArchiveFormat, SnapshotConfig, SnapshotVersion},
hardened_unpack::{unpack_genesis_archive, MAX_GENESIS_ARCHIVE_UNPACKED_SIZE},
snapshot_utils::get_highest_snapshot_archive_path,
snapshot_utils::{get_highest_snapshot_archive_path, DEFAULT_MAX_SNAPSHOTS_TO_RETAIN},
},
solana_sdk::{
clock::{Slot, DEFAULT_S_PER_SLOT},
@ -945,11 +945,19 @@ fn rpc_bootstrap(
gossip.take().unwrap();
cluster_info.save_contact_info();
gossip_exit_flag.store(true, Ordering::Relaxed);
let maximum_snapshots_to_retain = if let Some(snapshot_config) =
validator_config.snapshot_config.as_ref()
{
snapshot_config.maximum_snapshots_to_retain
} else {
DEFAULT_MAX_SNAPSHOTS_TO_RETAIN
};
let ret = download_snapshot(
&rpc_contact_info.rpc,
&snapshot_output_dir,
snapshot_hash,
use_progress_bar,
maximum_snapshots_to_retain,
);
gossip_service.join().unwrap();
ret
@ -1028,6 +1036,7 @@ pub fn main() {
.send_transaction_leader_forward_count
.to_string();
let default_rpc_threads = num_cpus::get().to_string();
let default_max_snapshot_to_retain = &DEFAULT_MAX_SNAPSHOTS_TO_RETAIN.to_string();
let matches = App::new(crate_name!()).about(crate_description!())
.version(solana_version::version!())
@ -1323,6 +1332,14 @@ pub fn main() {
.help("Number of slots between generating snapshots, \
0 to disable snapshots"),
)
.arg(
Arg::with_name("maximum_snapshots_to_retain")
.long("maximum-snapshots-to-retain")
.value_name("MAXIMUM_SNAPSHOTS_TO_RETAIN")
.takes_value(true)
.default_value(default_max_snapshot_to_retain)
.help("The maximum number of snapshots to hold on to when purging older snapshots.")
)
.arg(
Arg::with_name("contact_debug_interval")
.long("contact-debug-interval")
@ -2225,6 +2242,8 @@ pub fn main() {
let snapshot_interval_slots = value_t_or_exit!(matches, "snapshot_interval_slots", u64);
let maximum_local_snapshot_age = value_t_or_exit!(matches, "maximum_local_snapshot_age", u64);
let maximum_snapshots_to_retain =
value_t_or_exit!(matches, "maximum_snapshots_to_retain", usize);
let snapshot_output_dir = if matches.is_present("snapshots") {
PathBuf::from(matches.value_of("snapshots").unwrap())
} else {
@ -2269,6 +2288,7 @@ pub fn main() {
snapshot_package_output_path: snapshot_output_dir.clone(),
archive_format,
snapshot_version,
maximum_snapshots_to_retain,
});
validator_config.accounts_hash_interval_slots =