Do not check for full snapshot interval during bootstrap (#21300)
This commit is contained in:
parent
3aa49e2c69
commit
3865d9e80d
|
@ -994,16 +994,14 @@ mod with_incremental_snapshots {
|
||||||
/// Get an RPC peer node to download from.
|
/// Get an RPC peer node to download from.
|
||||||
///
|
///
|
||||||
/// This function finds the highest compatible snapshots from the cluster, then picks one peer
|
/// This function finds the highest compatible snapshots from the cluster, then picks one peer
|
||||||
/// at random to use (return). The chosen snapshots are:
|
/// at random to use (return).
|
||||||
/// 1. have a full snapshot slot that is a multiple of our full snapshot interval
|
|
||||||
/// 2. have the highest full snapshot slot
|
|
||||||
/// 3. have the highest incremental snapshot slot
|
|
||||||
///
|
///
|
||||||
/// NOTE: If the node has configured a full snapshot interval that is non-standard, it is
|
/// NOTE: If the node has configured a full snapshot interval that is not common and/or
|
||||||
/// possible that there are no compatible snapshot hashes available. At that time, a node may
|
/// different from its known validators, then it is possible that there are no compatible
|
||||||
/// (1) try again, (2) change its full snapshot interval back to a standard/default value, or
|
/// snapshot hashes available. At that time, a node may (1) try again, (2) change its full
|
||||||
/// (3) disable downloading an incremental snapshot and instead download the highest full
|
/// snapshot interval back to a standard/default value, or (3) disable downloading an
|
||||||
/// snapshot hash, regardless of compatibility.
|
/// incremental snapshot and instead download the highest full snapshot hash, regardless of
|
||||||
|
/// compatibility.
|
||||||
fn get_rpc_node(
|
fn get_rpc_node(
|
||||||
cluster_info: &ClusterInfo,
|
cluster_info: &ClusterInfo,
|
||||||
cluster_entrypoints: &[ContactInfo],
|
cluster_entrypoints: &[ContactInfo],
|
||||||
|
@ -1057,14 +1055,7 @@ mod with_incremental_snapshots {
|
||||||
None => newer_cluster_snapshot_timeout = Some(Instant::now()),
|
None => newer_cluster_snapshot_timeout = Some(Instant::now()),
|
||||||
Some(newer_cluster_snapshot_timeout) => {
|
Some(newer_cluster_snapshot_timeout) => {
|
||||||
if newer_cluster_snapshot_timeout.elapsed().as_secs() > 180 {
|
if newer_cluster_snapshot_timeout.elapsed().as_secs() > 180 {
|
||||||
warn!(
|
warn!("Giving up, did not get newer snapshots from the cluster.");
|
||||||
"Giving up, did not get newer snapshots from the cluster. \
|
|
||||||
If this persists, it may be that there are no nodes with \
|
|
||||||
the same full snapshot interval. Try using the default \
|
|
||||||
value (i.e. do not set --full-snapshot-interval-slots). \
|
|
||||||
Alternatively, disable fetching incremental snapshots, just \
|
|
||||||
fetch full snapshots, by setting --no-incremental-snapshot-fetch."
|
|
||||||
);
|
|
||||||
return None;
|
return None;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -1102,75 +1093,33 @@ mod with_incremental_snapshots {
|
||||||
/// Get peer snapshot hashes
|
/// Get peer snapshot hashes
|
||||||
///
|
///
|
||||||
/// The result is a vector of peers with snapshot hashes that:
|
/// The result is a vector of peers with snapshot hashes that:
|
||||||
/// 1. have a full snapshot slot that is a multiple of our full snapshot interval
|
/// 1. match a snapshot hash from the known validators
|
||||||
/// 2. have the highest full snapshot slot
|
/// 2. have the highest full snapshot slot
|
||||||
/// 3. have the highest incremental snapshot slot
|
/// 3. have the highest incremental snapshot slot
|
||||||
|
///
|
||||||
|
/// NOTE: If the node's full snapshot interval is different from its known validators, it may
|
||||||
|
/// be more likely to download full snapshots more often than necessary.
|
||||||
fn get_peer_snapshot_hashes(
|
fn get_peer_snapshot_hashes(
|
||||||
cluster_info: &ClusterInfo,
|
cluster_info: &ClusterInfo,
|
||||||
validator_config: &ValidatorConfig,
|
validator_config: &ValidatorConfig,
|
||||||
bootstrap_config: &RpcBootstrapConfig,
|
bootstrap_config: &RpcBootstrapConfig,
|
||||||
rpc_peers: &[ContactInfo],
|
rpc_peers: &[ContactInfo],
|
||||||
) -> Vec<PeerSnapshotHash> {
|
) -> Vec<PeerSnapshotHash> {
|
||||||
// Which strategy to use for getting the peer snapshot hashes? The standard way is what's
|
|
||||||
// described in the function's documentation. However, if there are no known peers that
|
|
||||||
// have enabled incremental snapshots, it may be possible that there are no snapshot hashes
|
|
||||||
// with a slot that is a multiple of our full snapshot archive interval. If that happens,
|
|
||||||
// we retry with the "fallback" strategy that *does not* filter based on full snapshot
|
|
||||||
// archive interval.
|
|
||||||
#[derive(Debug, Copy, Clone, PartialEq, Eq)]
|
|
||||||
enum Strategy {
|
|
||||||
Standard,
|
|
||||||
WithoutFullSnapshotArchiveIntervalFiltering,
|
|
||||||
}
|
|
||||||
|
|
||||||
for strategy in [
|
|
||||||
Strategy::Standard,
|
|
||||||
Strategy::WithoutFullSnapshotArchiveIntervalFiltering,
|
|
||||||
] {
|
|
||||||
let known_snapshot_hashes = get_known_snapshot_hashes(cluster_info, validator_config);
|
let known_snapshot_hashes = get_known_snapshot_hashes(cluster_info, validator_config);
|
||||||
|
|
||||||
let mut peer_snapshot_hashes = get_known_peer_snapshot_hashes(
|
let mut peer_snapshot_hashes = get_eligible_peer_snapshot_hashes(
|
||||||
cluster_info,
|
cluster_info,
|
||||||
validator_config,
|
validator_config,
|
||||||
bootstrap_config,
|
bootstrap_config,
|
||||||
rpc_peers,
|
rpc_peers,
|
||||||
);
|
);
|
||||||
retain_known_peer_snapshot_hashes(&known_snapshot_hashes, &mut peer_snapshot_hashes);
|
retain_known_peer_snapshot_hashes(&known_snapshot_hashes, &mut peer_snapshot_hashes);
|
||||||
|
|
||||||
if strategy == Strategy::Standard {
|
|
||||||
// The standard strategy is to retain only the peer snapshot hashes with a multiple
|
|
||||||
// of our full snapshot archive interval
|
|
||||||
retain_peer_snapshot_hashes_with_a_multiple_of_full_snapshot_archive_interval(
|
|
||||||
validator_config
|
|
||||||
.snapshot_config
|
|
||||||
.as_ref()
|
|
||||||
.unwrap()
|
|
||||||
.full_snapshot_archive_interval_slots,
|
|
||||||
&mut peer_snapshot_hashes,
|
|
||||||
);
|
|
||||||
|
|
||||||
// However, if at this point peer_snapshot_hashes is empty, then retry from the
|
|
||||||
// beginning with the "fallback" strategy and *do not* filter based on full
|
|
||||||
// snapshot archive interval.
|
|
||||||
if peer_snapshot_hashes.is_empty() {
|
|
||||||
info!(
|
|
||||||
"No peer snapshot hashes found with a slot that is a multiple of our \
|
|
||||||
full snapshot archive interval. Retrying, but without filtering based \
|
|
||||||
on full snapshot archive interval."
|
|
||||||
);
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
retain_peer_snapshot_hashes_with_highest_full_snapshot_slot(&mut peer_snapshot_hashes);
|
retain_peer_snapshot_hashes_with_highest_full_snapshot_slot(&mut peer_snapshot_hashes);
|
||||||
retain_peer_snapshot_hashes_with_highest_incremental_snapshot_slot(
|
retain_peer_snapshot_hashes_with_highest_incremental_snapshot_slot(
|
||||||
&mut peer_snapshot_hashes,
|
&mut peer_snapshot_hashes,
|
||||||
);
|
);
|
||||||
|
|
||||||
return peer_snapshot_hashes;
|
peer_snapshot_hashes
|
||||||
}
|
|
||||||
|
|
||||||
unreachable!("for-loop above is guaranteed to return");
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Get the snapshot hashes from known peers.
|
/// Get the snapshot hashes from known peers.
|
||||||
|
@ -1250,10 +1199,10 @@ mod with_incremental_snapshots {
|
||||||
known_snapshot_hashes
|
known_snapshot_hashes
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Get known snapshot hashes from all the eligible peers. This fn will get only one
|
/// Get snapshot hashes from all the eligible peers. This fn will get only one
|
||||||
/// snapshot hash per peer (the one with the highest slot). This may be just a full snapshot
|
/// snapshot hash per peer (the one with the highest slot). This may be just a full snapshot
|
||||||
/// hash, or a combo full (i.e. base) snapshot hash and incremental snapshot hash.
|
/// hash, or a combo full (i.e. base) snapshot hash and incremental snapshot hash.
|
||||||
fn get_known_peer_snapshot_hashes(
|
fn get_eligible_peer_snapshot_hashes(
|
||||||
cluster_info: &ClusterInfo,
|
cluster_info: &ClusterInfo,
|
||||||
validator_config: &ValidatorConfig,
|
validator_config: &ValidatorConfig,
|
||||||
bootstrap_config: &RpcBootstrapConfig,
|
bootstrap_config: &RpcBootstrapConfig,
|
||||||
|
@ -1319,22 +1268,6 @@ mod with_incremental_snapshots {
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Retain the peer snapshot hashes with a full snapshot slot that is a multiple of the full
|
|
||||||
/// snapshot archive interval
|
|
||||||
fn retain_peer_snapshot_hashes_with_a_multiple_of_full_snapshot_archive_interval(
|
|
||||||
full_snapshot_archive_interval: Slot,
|
|
||||||
peer_snapshot_hashes: &mut Vec<PeerSnapshotHash>,
|
|
||||||
) {
|
|
||||||
peer_snapshot_hashes.retain(|peer_snapshot_hash| {
|
|
||||||
peer_snapshot_hash.snapshot_hash.full.0 % full_snapshot_archive_interval == 0
|
|
||||||
});
|
|
||||||
|
|
||||||
trace!(
|
|
||||||
"retain peer snapshot hashes with a multiple of full snapshot archive interval: {:?}",
|
|
||||||
&peer_snapshot_hashes
|
|
||||||
);
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Retain the peer snapshot hashes with the highest full snapshot slot
|
/// Retain the peer snapshot hashes with the highest full snapshot slot
|
||||||
fn retain_peer_snapshot_hashes_with_highest_full_snapshot_slot(
|
fn retain_peer_snapshot_hashes_with_highest_full_snapshot_slot(
|
||||||
peer_snapshot_hashes: &mut Vec<PeerSnapshotHash>,
|
peer_snapshot_hashes: &mut Vec<PeerSnapshotHash>,
|
||||||
|
@ -1777,35 +1710,6 @@ mod with_incremental_snapshots {
|
||||||
assert_eq!(expected, actual);
|
assert_eq!(expected, actual);
|
||||||
}
|
}
|
||||||
|
|
||||||
#[test]
|
|
||||||
fn test_retain_peer_snapshot_hashes_with_a_multiple_of_full_snapshot_interval() {
|
|
||||||
let full_snapshot_archive_interval = 100_000;
|
|
||||||
let contact_info = default_contact_info_for_tests();
|
|
||||||
let per_snapshot_hashes = vec![
|
|
||||||
// good full snapshot hash slots
|
|
||||||
PeerSnapshotHash::new(contact_info.clone(), (100_000, Hash::default()), None),
|
|
||||||
PeerSnapshotHash::new(contact_info.clone(), (200_000, Hash::default()), None),
|
|
||||||
PeerSnapshotHash::new(contact_info.clone(), (300_000, Hash::default()), None),
|
|
||||||
// bad full snapshot hash slots
|
|
||||||
PeerSnapshotHash::new(contact_info.clone(), (999, Hash::default()), None),
|
|
||||||
PeerSnapshotHash::new(contact_info.clone(), (100_777, Hash::default()), None),
|
|
||||||
PeerSnapshotHash::new(contact_info.clone(), (101_000, Hash::default()), None),
|
|
||||||
PeerSnapshotHash::new(contact_info.clone(), (999_000, Hash::default()), None),
|
|
||||||
];
|
|
||||||
|
|
||||||
let expected = vec![
|
|
||||||
PeerSnapshotHash::new(contact_info.clone(), (100_000, Hash::default()), None),
|
|
||||||
PeerSnapshotHash::new(contact_info.clone(), (200_000, Hash::default()), None),
|
|
||||||
PeerSnapshotHash::new(contact_info, (300_000, Hash::default()), None),
|
|
||||||
];
|
|
||||||
let mut actual = per_snapshot_hashes;
|
|
||||||
retain_peer_snapshot_hashes_with_a_multiple_of_full_snapshot_archive_interval(
|
|
||||||
full_snapshot_archive_interval,
|
|
||||||
&mut actual,
|
|
||||||
);
|
|
||||||
assert_eq!(expected, actual);
|
|
||||||
}
|
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn test_retain_peer_snapshot_hashes_with_highest_full_snapshot_slot() {
|
fn test_retain_peer_snapshot_hashes_with_highest_full_snapshot_slot() {
|
||||||
let contact_info = default_contact_info_for_tests();
|
let contact_info = default_contact_info_for_tests();
|
||||||
|
|
Loading…
Reference in New Issue