From 81489ccb76cb776972fa5b32e1d3e56cdff82f19 Mon Sep 17 00:00:00 2001 From: Ryo Onodera Date: Fri, 9 Oct 2020 15:05:41 +0900 Subject: [PATCH] Only fetch snapshot if it's newer than local (#12663) * Only fetch snapshot if it's newer than local * Prefer as_ref over clone * More nits * Don't wait forwever for newer snapshot --- validator/src/main.rs | 60 ++++++++++++++++++++++++++++++++++--------- 1 file changed, 48 insertions(+), 12 deletions(-) diff --git a/validator/src/main.rs b/validator/src/main.rs index eb5f229d3..ad34fedcd 100644 --- a/validator/src/main.rs +++ b/validator/src/main.rs @@ -29,6 +29,7 @@ use solana_perf::recycler::enable_recycler_warming; use solana_runtime::{ bank_forks::{CompressionType, SnapshotConfig, SnapshotVersion}, hardened_unpack::{unpack_genesis_archive, MAX_GENESIS_ARCHIVE_UNPACKED_SIZE}, + snapshot_utils::get_highest_snapshot_archive_path, }; use solana_sdk::{ clock::Slot, @@ -146,8 +147,11 @@ fn get_rpc_node( blacklisted_rpc_nodes: &mut HashSet, snapshot_not_required: bool, no_untrusted_rpc: bool, -) -> (ContactInfo, Option<(Slot, Hash)>) { + ledger_path: &std::path::Path, +) -> Option<(ContactInfo, Option<(Slot, Hash)>)> { let mut blacklist_timeout = Instant::now(); + let mut newer_cluster_snapshot_timeout = None; + let mut retry_reason = None; loop { sleep(Duration::from_secs(1)); info!("\n{}", cluster_info.contact_info_trace()); @@ -174,8 +178,12 @@ fn get_rpc_node( } info!( - "Searching for an RPC service with shred version {}...", - shred_version + "Searching for an RPC service with shred version {}{}...", + shred_version, + retry_reason + .as_ref() + .map(|s| format!(" (Retrying: {})", s)) + .unwrap_or_default() ); let rpc_peers = cluster_info @@ -204,17 +212,21 @@ fn get_rpc_node( ); if rpc_peers_blacklisted == rpc_peers_total { - // If all nodes are blacklisted and no additional nodes are discovered after 60 seconds, - // remove the blacklist and try them all again - if blacklist_timeout.elapsed().as_secs() > 60 { - info!("Blacklist timeout expired"); + retry_reason = if blacklist_timeout.elapsed().as_secs() > 60 { + // If all nodes are blacklisted and no additional nodes are discovered after 60 seconds, + // remove the blacklist and try them all again blacklisted_rpc_nodes.clear(); - } + Some("Blacklist timeout expired".to_owned()) + } else { + Some("Wait for trusted rpc peers".to_owned()) + }; continue; } blacklist_timeout = Instant::now(); - let mut highest_snapshot_hash: Option<(Slot, Hash)> = None; + let mut highest_snapshot_hash: Option<(Slot, Hash)> = + get_highest_snapshot_archive_path(ledger_path) + .map(|(_path, (slot, hash, _compression))| (slot, hash)); let eligible_rpc_peers = if snapshot_not_required { rpc_peers } else { @@ -256,9 +268,25 @@ fn get_rpc_node( match highest_snapshot_hash { None => { assert!(eligible_rpc_peers.is_empty()); - info!("No snapshots available"); } Some(highest_snapshot_hash) => { + if eligible_rpc_peers.is_empty() { + match newer_cluster_snapshot_timeout { + None => newer_cluster_snapshot_timeout = Some(Instant::now()), + Some(newer_cluster_snapshot_timeout) => { + if newer_cluster_snapshot_timeout.elapsed().as_secs() > 180 { + warn!("giving up newer snapshot from the cluster"); + return None; + } + } + } + retry_reason = Some(format!( + "Wait for newer snapshot than local: {:?}", + highest_snapshot_hash + )); + continue; + } + info!( "Highest available snapshot slot is {}, available from {} node{}: {:?}", highest_snapshot_hash.0, @@ -281,7 +309,9 @@ fn get_rpc_node( if !eligible_rpc_peers.is_empty() { let contact_info = &eligible_rpc_peers[thread_rng().gen_range(0, eligible_rpc_peers.len())]; - return (contact_info.clone(), highest_snapshot_hash); + return Some((contact_info.clone(), highest_snapshot_hash)); + } else { + retry_reason = Some("No snapshots available".to_owned()); } } } @@ -629,14 +659,20 @@ fn rpc_bootstrap( )); } - let (rpc_contact_info, snapshot_hash) = get_rpc_node( + let rpc_node_details = get_rpc_node( &gossip.as_ref().unwrap().0, &cluster_entrypoint.gossip, &validator_config, &mut blacklisted_rpc_nodes, bootstrap_config.no_snapshot_fetch, bootstrap_config.no_untrusted_rpc, + ledger_path, ); + if rpc_node_details.is_none() { + return; + } + let (rpc_contact_info, snapshot_hash) = rpc_node_details.unwrap(); + info!( "Using RPC service from node {}: {:?}", rpc_contact_info.id, rpc_contact_info.rpc