Add validator startup process reporting before RPC is available

This commit is contained in:
Michael Vines 2021-03-04 13:01:11 -08:00
parent 2cc695bb5d
commit bd13262b42
8 changed files with 247 additions and 126 deletions

View File

@ -3,7 +3,7 @@ use {
cluster_info::Node, cluster_info::Node,
gossip_service::discover_cluster, gossip_service::discover_cluster,
rpc::JsonRpcConfig, rpc::JsonRpcConfig,
validator::{Validator, ValidatorConfig, ValidatorExit}, validator::{Validator, ValidatorConfig, ValidatorExit, ValidatorStartProgress},
}, },
solana_client::rpc_client::RpcClient, solana_client::rpc_client::RpcClient,
solana_ledger::{blockstore::create_new_ledger, create_new_tmp_ledger}, solana_ledger::{blockstore::create_new_ledger, create_new_tmp_ledger},
@ -53,6 +53,7 @@ pub struct TestValidatorGenesis {
accounts: HashMap<Pubkey, Account>, accounts: HashMap<Pubkey, Account>,
programs: Vec<ProgramInfo>, programs: Vec<ProgramInfo>,
pub validator_exit: Arc<RwLock<ValidatorExit>>, pub validator_exit: Arc<RwLock<ValidatorExit>>,
pub start_progress: Arc<RwLock<ValidatorStartProgress>>,
} }
impl TestValidatorGenesis { impl TestValidatorGenesis {
@ -415,6 +416,7 @@ impl TestValidator {
vec![], vec![],
&validator_config, &validator_config,
true, // should_check_duplicate_instance true, // should_check_duplicate_instance
config.start_progress.clone(),
)); ));
// Needed to avoid panics in `solana-responder-gossip` in tests that create a number of // Needed to avoid panics in `solana-responder-gossip` in tests that create a number of

View File

@ -186,6 +186,32 @@ impl Default for ValidatorConfig {
} }
} }
// `ValidatorStartProgress` contains status information that is surfaced to the node operator over
// the admin RPC channel to help them to follow the general progress of node startup without
// having to watch log messages.
#[derive(Debug, Clone, Copy, Serialize, Deserialize, PartialEq)]
pub enum ValidatorStartProgress {
Initializing, // Catch all, default state
SearchingForRpcService,
DownloadingSnapshot { slot: Slot, rpc_addr: SocketAddr },
CleaningBlockStore,
CleaningAccounts,
LoadingLedger,
StartingServices,
Halted, // Validator halted due to `--dev-halt-at-slot` argument
WaitingForSupermajority,
// `Running` is the terminal state once the validator fully starts and all services are
// operational
Running,
}
impl Default for ValidatorStartProgress {
fn default() -> Self {
Self::Initializing
}
}
#[derive(Default)] #[derive(Default)]
pub struct ValidatorExit { pub struct ValidatorExit {
exited: bool, exited: bool,
@ -270,6 +296,7 @@ impl Validator {
cluster_entrypoints: Vec<ContactInfo>, cluster_entrypoints: Vec<ContactInfo>,
config: &ValidatorConfig, config: &ValidatorConfig,
should_check_duplicate_instance: bool, should_check_duplicate_instance: bool,
start_progress: Arc<RwLock<ValidatorStartProgress>>,
) -> Self { ) -> Self {
let id = identity_keypair.pubkey(); let id = identity_keypair.pubkey();
assert_eq!(id, node.info.id); assert_eq!(id, node.info.id);
@ -309,6 +336,7 @@ impl Validator {
if let Some(shred_version) = config.expected_shred_version { if let Some(shred_version) = config.expected_shred_version {
if let Some(wait_for_supermajority_slot) = config.wait_for_supermajority { if let Some(wait_for_supermajority_slot) = config.wait_for_supermajority {
*start_progress.write().unwrap() = ValidatorStartProgress::CleaningBlockStore;
backup_and_clear_blockstore( backup_and_clear_blockstore(
ledger_path, ledger_path,
wait_for_supermajority_slot + 1, wait_for_supermajority_slot + 1,
@ -318,6 +346,7 @@ impl Validator {
} }
info!("Cleaning accounts paths.."); info!("Cleaning accounts paths..");
*start_progress.write().unwrap() = ValidatorStartProgress::CleaningAccounts;
let mut start = Measure::start("clean_accounts_paths"); let mut start = Measure::start("clean_accounts_paths");
for accounts_path in &config.account_paths { for accounts_path in &config.account_paths {
cleanup_accounts_path(accounts_path); cleanup_accounts_path(accounts_path);
@ -366,8 +395,11 @@ impl Validator {
config.poh_verify, config.poh_verify,
&exit, &exit,
config.enforce_ulimit_nofile, config.enforce_ulimit_nofile,
&start_progress,
); );
*start_progress.write().unwrap() = ValidatorStartProgress::StartingServices;
let leader_schedule_cache = Arc::new(leader_schedule_cache); let leader_schedule_cache = Arc::new(leader_schedule_cache);
let bank = bank_forks.working_bank(); let bank = bank_forks.working_bank();
if let Some(ref shrink_paths) = config.account_shrink_paths { if let Some(ref shrink_paths) = config.account_shrink_paths {
@ -542,6 +574,7 @@ impl Validator {
// Park with the RPC service running, ready for inspection! // Park with the RPC service running, ready for inspection!
warn!("Validator halted"); warn!("Validator halted");
*start_progress.write().unwrap() = ValidatorStartProgress::Halted;
std::thread::park(); std::thread::park();
} }
@ -593,7 +626,13 @@ impl Validator {
check_poh_speed(&genesis_config, None); check_poh_speed(&genesis_config, None);
} }
if wait_for_supermajority(config, &bank, &cluster_info, rpc_override_health_check) { if wait_for_supermajority(
config,
&bank,
&cluster_info,
rpc_override_health_check,
&start_progress,
) {
abort(); abort();
} }
@ -707,6 +746,7 @@ impl Validator {
); );
datapoint_info!("validator-new", ("id", id.to_string(), String)); datapoint_info!("validator-new", ("id", id.to_string(), String));
*start_progress.write().unwrap() = ValidatorStartProgress::Running;
Self { Self {
id, id,
gossip_service, gossip_service,
@ -963,6 +1003,7 @@ fn new_banks_from_ledger(
poh_verify: bool, poh_verify: bool,
exit: &Arc<AtomicBool>, exit: &Arc<AtomicBool>,
enforce_ulimit_nofile: bool, enforce_ulimit_nofile: bool,
start_progress: &Arc<RwLock<ValidatorStartProgress>>,
) -> ( ) -> (
GenesisConfig, GenesisConfig,
BankForks, BankForks,
@ -975,6 +1016,7 @@ fn new_banks_from_ledger(
Tower, Tower,
) { ) {
info!("loading ledger from {:?}...", ledger_path); info!("loading ledger from {:?}...", ledger_path);
*start_progress.write().unwrap() = ValidatorStartProgress::LoadingLedger;
let genesis_config = open_genesis_config(ledger_path, config.max_genesis_archive_unpacked_size); let genesis_config = open_genesis_config(ledger_path, config.max_genesis_archive_unpacked_size);
// This needs to be limited otherwise the state in the VoteAccount data // This needs to be limited otherwise the state in the VoteAccount data
@ -1249,12 +1291,18 @@ fn wait_for_supermajority(
bank: &Bank, bank: &Bank,
cluster_info: &ClusterInfo, cluster_info: &ClusterInfo,
rpc_override_health_check: Arc<AtomicBool>, rpc_override_health_check: Arc<AtomicBool>,
start_progress: &Arc<RwLock<ValidatorStartProgress>>,
) -> bool { ) -> bool {
if let Some(wait_for_supermajority) = config.wait_for_supermajority { if let Some(wait_for_supermajority) = config.wait_for_supermajority {
match wait_for_supermajority.cmp(&bank.slot()) { match wait_for_supermajority.cmp(&bank.slot()) {
std::cmp::Ordering::Less => return false, std::cmp::Ordering::Less => return false,
std::cmp::Ordering::Greater => { std::cmp::Ordering::Greater => {
error!("Ledger does not have enough data to wait for supermajority, please enable snapshot fetch. Has {} needs {}", bank.slot(), wait_for_supermajority); error!(
"Ledger does not have enough data to wait for supermajority, \
please enable snapshot fetch. Has {} needs {}",
bank.slot(),
wait_for_supermajority
);
return true; return true;
} }
_ => {} _ => {}
@ -1274,6 +1322,7 @@ fn wait_for_supermajority(
} }
} }
*start_progress.write().unwrap() = ValidatorStartProgress::WaitingForSupermajority;
for i in 1.. { for i in 1.. {
if i % 10 == 1 { if i % 10 == 1 {
info!( info!(
@ -1459,6 +1508,7 @@ mod tests {
rpc_addrs: Some((validator_node.info.rpc, validator_node.info.rpc_pubsub)), rpc_addrs: Some((validator_node.info.rpc, validator_node.info.rpc_pubsub)),
..ValidatorConfig::default() ..ValidatorConfig::default()
}; };
let start_progress = Arc::new(RwLock::new(ValidatorStartProgress::default()));
let validator = Validator::new( let validator = Validator::new(
validator_node, validator_node,
&Arc::new(validator_keypair), &Arc::new(validator_keypair),
@ -1468,6 +1518,11 @@ mod tests {
vec![leader_node.info], vec![leader_node.info],
&config, &config,
true, // should_check_duplicate_instance true, // should_check_duplicate_instance
start_progress.clone(),
);
assert_eq!(
*start_progress.read().unwrap(),
ValidatorStartProgress::Running
); );
validator.close(); validator.close();
remove_dir_all(validator_ledger_path).unwrap(); remove_dir_all(validator_ledger_path).unwrap();
@ -1539,6 +1594,7 @@ mod tests {
vec![leader_node.info.clone()], vec![leader_node.info.clone()],
&config, &config,
true, // should_check_duplicate_instance true, // should_check_duplicate_instance
Arc::new(RwLock::new(ValidatorStartProgress::default())),
) )
}) })
.collect(); .collect();
@ -1570,11 +1626,14 @@ mod tests {
let bank = Arc::new(Bank::new(&genesis_config)); let bank = Arc::new(Bank::new(&genesis_config));
let mut config = ValidatorConfig::default(); let mut config = ValidatorConfig::default();
let rpc_override_health_check = Arc::new(AtomicBool::new(false)); let rpc_override_health_check = Arc::new(AtomicBool::new(false));
let start_progress = Arc::new(RwLock::new(ValidatorStartProgress::default()));
assert!(!wait_for_supermajority( assert!(!wait_for_supermajority(
&config, &config,
&bank, &bank,
&cluster_info, &cluster_info,
rpc_override_health_check.clone() rpc_override_health_check.clone(),
&start_progress,
)); ));
// bank=0, wait=1, should fail // bank=0, wait=1, should fail
@ -1583,7 +1642,8 @@ mod tests {
&config, &config,
&bank, &bank,
&cluster_info, &cluster_info,
rpc_override_health_check.clone() rpc_override_health_check.clone(),
&start_progress,
)); ));
// bank=1, wait=0, should pass, bank is past the wait slot // bank=1, wait=0, should pass, bank is past the wait slot
@ -1593,7 +1653,8 @@ mod tests {
&config, &config,
&bank, &bank,
&cluster_info, &cluster_info,
rpc_override_health_check.clone() rpc_override_health_check.clone(),
&start_progress,
)); ));
// bank=1, wait=1, equal, but bad hash provided // bank=1, wait=1, equal, but bad hash provided
@ -1603,7 +1664,8 @@ mod tests {
&config, &config,
&bank, &bank,
&cluster_info, &cluster_info,
rpc_override_health_check rpc_override_health_check,
&start_progress,
)); ));
} }

View File

@ -10,7 +10,7 @@ use solana_core::{
cluster_info::{Node, VALIDATOR_PORT_RANGE}, cluster_info::{Node, VALIDATOR_PORT_RANGE},
contact_info::ContactInfo, contact_info::ContactInfo,
gossip_service::discover_cluster, gossip_service::discover_cluster,
validator::{Validator, ValidatorConfig}, validator::{Validator, ValidatorConfig, ValidatorStartProgress},
}; };
use solana_ledger::create_new_tmp_ledger; use solana_ledger::create_new_tmp_ledger;
use solana_runtime::genesis_utils::{ use solana_runtime::genesis_utils::{
@ -43,7 +43,7 @@ use std::{
collections::HashMap, collections::HashMap,
io::{Error, ErrorKind, Result}, io::{Error, ErrorKind, Result},
iter, iter,
sync::Arc, sync::{Arc, RwLock},
}; };
#[derive(Debug)] #[derive(Debug)]
@ -203,6 +203,7 @@ impl LocalCluster {
let leader_keypair = Arc::new(Keypair::from_bytes(&leader_keypair.to_bytes()).unwrap()); let leader_keypair = Arc::new(Keypair::from_bytes(&leader_keypair.to_bytes()).unwrap());
let leader_vote_keypair = let leader_vote_keypair =
Arc::new(Keypair::from_bytes(&leader_vote_keypair.to_bytes()).unwrap()); Arc::new(Keypair::from_bytes(&leader_vote_keypair.to_bytes()).unwrap());
let leader_server = Validator::new( let leader_server = Validator::new(
leader_node, leader_node,
&leader_keypair, &leader_keypair,
@ -212,6 +213,7 @@ impl LocalCluster {
vec![], vec![],
&leader_config, &leader_config,
true, // should_check_duplicate_instance true, // should_check_duplicate_instance
Arc::new(RwLock::new(ValidatorStartProgress::default())),
); );
let mut validators = HashMap::new(); let mut validators = HashMap::new();
@ -353,6 +355,7 @@ impl LocalCluster {
vec![self.entry_point_info.clone()], vec![self.entry_point_info.clone()],
&config, &config,
true, // should_check_duplicate_instance true, // should_check_duplicate_instance
Arc::new(RwLock::new(ValidatorStartProgress::default())),
); );
let validator_pubkey = validator_keypair.pubkey(); let validator_pubkey = validator_keypair.pubkey();
@ -669,6 +672,7 @@ impl Cluster for LocalCluster {
.unwrap_or_default(), .unwrap_or_default(),
&safe_clone_config(&cluster_validator_info.config), &safe_clone_config(&cluster_validator_info.config),
true, // should_check_duplicate_instance true, // should_check_duplicate_instance
Arc::new(RwLock::new(ValidatorStartProgress::default())),
); );
cluster_validator_info.validator = Some(restarted_node); cluster_validator_info.validator = Some(restarted_node);
cluster_validator_info cluster_validator_info

View File

@ -5,7 +5,7 @@ use {
jsonrpc_ipc_server::{RequestContext, ServerBuilder}, jsonrpc_ipc_server::{RequestContext, ServerBuilder},
jsonrpc_server_utils::tokio, jsonrpc_server_utils::tokio,
log::*, log::*,
solana_core::validator::ValidatorExit, solana_core::validator::{ValidatorExit, ValidatorStartProgress},
std::{ std::{
net::SocketAddr, net::SocketAddr,
path::Path, path::Path,
@ -19,6 +19,7 @@ use {
pub struct AdminRpcRequestMetadata { pub struct AdminRpcRequestMetadata {
pub rpc_addr: Option<SocketAddr>, pub rpc_addr: Option<SocketAddr>,
pub start_time: SystemTime, pub start_time: SystemTime,
pub start_progress: Arc<RwLock<ValidatorStartProgress>>,
pub validator_exit: Arc<RwLock<ValidatorExit>>, pub validator_exit: Arc<RwLock<ValidatorExit>>,
} }
impl Metadata for AdminRpcRequestMetadata {} impl Metadata for AdminRpcRequestMetadata {}
@ -38,6 +39,9 @@ pub trait AdminRpc {
#[rpc(meta, name = "startTime")] #[rpc(meta, name = "startTime")]
fn start_time(&self, meta: Self::Metadata) -> Result<SystemTime>; fn start_time(&self, meta: Self::Metadata) -> Result<SystemTime>;
#[rpc(meta, name = "startProgress")]
fn start_progress(&self, meta: Self::Metadata) -> Result<ValidatorStartProgress>;
} }
pub struct AdminRpcImpl; pub struct AdminRpcImpl;
@ -45,9 +49,9 @@ impl AdminRpc for AdminRpcImpl {
type Metadata = AdminRpcRequestMetadata; type Metadata = AdminRpcRequestMetadata;
fn exit(&self, meta: Self::Metadata) -> Result<()> { fn exit(&self, meta: Self::Metadata) -> Result<()> {
info!("exit admin rpc request received"); debug!("exit admin rpc request received");
// Delay exit signal until this RPC request completes, otherwise the caller of `exit` might // Delay exit signal until this RPC request completes, otherwise the caller of `exit` might
// receive a confusing error as the validator shuts down before a response is send back. // receive a confusing error as the validator shuts down before a response is sent back.
tokio::spawn(async move { tokio::spawn(async move {
meta.validator_exit.write().unwrap().exit(); meta.validator_exit.write().unwrap().exit();
}); });
@ -55,20 +59,25 @@ impl AdminRpc for AdminRpcImpl {
} }
fn rpc_addr(&self, meta: Self::Metadata) -> Result<Option<SocketAddr>> { fn rpc_addr(&self, meta: Self::Metadata) -> Result<Option<SocketAddr>> {
info!("rpc_addr admin rpc request received"); debug!("rpc_addr admin rpc request received");
Ok(meta.rpc_addr) Ok(meta.rpc_addr)
} }
fn set_log_filter(&self, filter: String) -> Result<()> { fn set_log_filter(&self, filter: String) -> Result<()> {
info!("set_log_filter admin rpc request received"); debug!("set_log_filter admin rpc request received");
solana_logger::setup_with(&filter); solana_logger::setup_with(&filter);
Ok(()) Ok(())
} }
fn start_time(&self, meta: Self::Metadata) -> Result<SystemTime> { fn start_time(&self, meta: Self::Metadata) -> Result<SystemTime> {
info!("start_time admin rpc request received"); debug!("start_time admin rpc request received");
Ok(meta.start_time) Ok(meta.start_time)
} }
fn start_progress(&self, meta: Self::Metadata) -> Result<ValidatorStartProgress> {
debug!("start_progress admin rpc request received");
Ok(*meta.start_progress.read().unwrap())
}
} }
// Start the Admin RPC interface // Start the Admin RPC interface

View File

@ -363,6 +363,7 @@ fn main() {
IpAddr::V4(Ipv4Addr::new(127, 0, 0, 1)), IpAddr::V4(Ipv4Addr::new(127, 0, 0, 1)),
rpc_port, rpc_port,
)), )),
start_progress: genesis.start_progress.clone(),
start_time: std::time::SystemTime::now(), start_time: std::time::SystemTime::now(),
validator_exit: genesis.validator_exit.clone(), validator_exit: genesis.validator_exit.clone(),
}, },

View File

@ -5,6 +5,7 @@ use {
solana_client::{ solana_client::{
client_error, rpc_client::RpcClient, rpc_request, rpc_response::RpcContactInfo, client_error, rpc_client::RpcClient, rpc_request, rpc_response::RpcContactInfo,
}, },
solana_core::validator::ValidatorStartProgress,
solana_sdk::{ solana_sdk::{
clock::{Slot, DEFAULT_TICKS_PER_SLOT, MS_PER_TICK}, clock::{Slot, DEFAULT_TICKS_PER_SLOT, MS_PER_TICK},
commitment_config::CommitmentConfig, commitment_config::CommitmentConfig,
@ -13,13 +14,14 @@ use {
}, },
std::{ std::{
io, io,
net::SocketAddr,
path::{Path, PathBuf}, path::{Path, PathBuf},
sync::{ sync::{
atomic::{AtomicBool, Ordering}, atomic::{AtomicBool, Ordering},
Arc, Arc,
}, },
thread, thread,
time::Duration, time::{Duration, SystemTime},
}, },
}; };
@ -69,52 +71,26 @@ impl Dashboard {
while !exit.load(Ordering::Relaxed) { while !exit.load(Ordering::Relaxed) {
let progress_bar = new_spinner_progress_bar(); let progress_bar = new_spinner_progress_bar();
progress_bar.set_message("Connecting..."); progress_bar.set_message("Connecting...");
let (start_time, rpc_client, identity) = loop {
if exit.load(Ordering::Relaxed) {
return;
}
let admin_client = admin_rpc_service::connect(&ledger_path); let (rpc_addr, start_time) = match runtime.block_on(wait_for_validator_startup(
let (rpc_addr, start_time) = match runtime.block_on(async move { &ledger_path,
let admin_client = admin_client.await.map_err(|err| { &exit,
format!("Unable to connect to validator process: {}", err) progress_bar,
})?; )) {
None => continue,
Some(results) => results,
};
let rpc_addr = admin_client let rpc_client = RpcClient::new_socket(rpc_addr);
.rpc_addr() let identity = match rpc_client.get_identity() {
.await Ok(identity) => identity,
.map_err(|err| format!("Unable to get validator RPC address: {}", err))? Err(err) => {
.ok_or_else(|| "RPC not available".to_string())?; println!("Failed to get validator identity over RPC: {}", err);
continue;
let start_time = admin_client
.start_time()
.await
.map_err(|err| format!("Unable to get validator start time: {}", err))?;
Ok::<_, String>((rpc_addr, start_time))
}) {
Ok((rpc_addr, start_time)) => (rpc_addr, start_time),
Err(err) => {
progress_bar.set_message(&format!("Connecting... ({})", err));
thread::sleep(Duration::from_millis(500));
continue;
}
};
let rpc_client = RpcClient::new_socket(rpc_addr);
// Wait until RPC starts responding...
match rpc_client.get_identity() {
Ok(identity) => break (start_time, rpc_client, identity),
Err(err) => {
progress_bar.set_message(&format!("Waiting for RPC... ({})", err));
}
} }
}; };
drop(progress_bar);
println_name_value("Identity:", &identity.to_string()); println_name_value("Identity:", &identity.to_string());
if let Some(contact_info) = get_contact_info(&rpc_client, &identity) { if let Some(contact_info) = get_contact_info(&rpc_client, &identity) {
println_name_value( println_name_value(
"Version:", "Version:",
@ -197,6 +173,61 @@ impl Dashboard {
} }
} }
async fn wait_for_validator_startup(
ledger_path: &Path,
exit: &Arc<AtomicBool>,
progress_bar: ProgressBar,
) -> Option<(SocketAddr, SystemTime)> {
let mut admin_client = None;
loop {
if exit.load(Ordering::Relaxed) {
return None;
}
thread::sleep(Duration::from_secs(1));
if admin_client.is_none() {
match admin_rpc_service::connect(&ledger_path).await {
Ok(new_admin_client) => admin_client = Some(new_admin_client),
Err(err) => {
progress_bar.set_message(&format!("Unable to connect to validator: {}", err));
continue;
}
}
}
match admin_client.as_ref().unwrap().start_progress().await {
Ok(start_progress) => {
if start_progress == ValidatorStartProgress::Running {
let admin_client = admin_client.take().unwrap();
match async move {
let rpc_addr = admin_client.rpc_addr().await?;
let start_time = admin_client.start_time().await?;
Ok::<_, jsonrpc_core_client::RpcError>((rpc_addr, start_time))
}
.await
{
Ok((None, _)) => progress_bar.set_message(&"RPC service not available"),
Ok((Some(rpc_addr), start_time)) => return Some((rpc_addr, start_time)),
Err(err) => {
progress_bar
.set_message(&format!("Failed to get validator info: {}", err));
}
}
} else {
progress_bar
.set_message(&format!("Validator startup: {:?}...", start_progress));
}
}
Err(err) => {
admin_client = None;
progress_bar
.set_message(&format!("Failed to get validator start progress: {}", err));
}
}
}
}
fn get_contact_info(rpc_client: &RpcClient, identity: &Pubkey) -> Option<RpcContactInfo> { fn get_contact_info(rpc_client: &RpcClient, identity: &Pubkey) -> Option<RpcContactInfo> {
rpc_client rpc_client
.get_cluster_nodes() .get_cluster_nodes()

View File

@ -65,14 +65,7 @@ pub fn redirect_stderr_to_file(logfile: Option<String>) -> Option<JoinHandle<()>
} }
}; };
solana_logger::setup_with_default( solana_logger::setup_with_default("solana=info");
&[
"solana=info,solana_runtime::message_processor=error", /* info logging for all solana modules */
"rpc=trace", /* json_rpc request/response logging */
]
.join(","),
);
logger_thread logger_thread
} }

View File

@ -1,68 +1,74 @@
#![allow(clippy::integer_arithmetic)] #![allow(clippy::integer_arithmetic)]
use clap::{ use {
crate_description, crate_name, value_t, value_t_or_exit, values_t, values_t_or_exit, App, clap::{
AppSettings, Arg, ArgMatches, SubCommand, crate_description, crate_name, value_t, value_t_or_exit, values_t, values_t_or_exit, App,
}; AppSettings, Arg, ArgMatches, SubCommand,
use console::style;
use fd_lock::FdLock;
use log::*;
use rand::{seq::SliceRandom, thread_rng, Rng};
use solana_clap_utils::{
input_parsers::{keypair_of, keypairs_of, pubkey_of, value_of},
input_validators::{
is_keypair_or_ask_keyword, is_parsable, is_pubkey, is_pubkey_or_keypair, is_slot,
}, },
keypair::SKIP_SEED_PHRASE_VALIDATION_ARG, console::style,
}; fd_lock::FdLock,
use solana_client::{rpc_client::RpcClient, rpc_request::MAX_MULTIPLE_ACCOUNTS}; log::*,
use solana_core::ledger_cleanup_service::{ rand::{seq::SliceRandom, thread_rng, Rng},
DEFAULT_MAX_LEDGER_SHREDS, DEFAULT_MIN_MAX_LEDGER_SHREDS, solana_clap_utils::{
}; input_parsers::{keypair_of, keypairs_of, pubkey_of, value_of},
use solana_core::{ input_validators::{
cluster_info::{ClusterInfo, Node, MINIMUM_VALIDATOR_PORT_RANGE_WIDTH, VALIDATOR_PORT_RANGE}, is_keypair_or_ask_keyword, is_parsable, is_pubkey, is_pubkey_or_keypair, is_slot,
contact_info::ContactInfo, },
gossip_service::GossipService, keypair::SKIP_SEED_PHRASE_VALIDATION_ARG,
poh_service, },
rpc::JsonRpcConfig, solana_client::{rpc_client::RpcClient, rpc_request::MAX_MULTIPLE_ACCOUNTS},
rpc_pubsub_service::PubSubConfig, solana_core::ledger_cleanup_service::{
tpu::DEFAULT_TPU_COALESCE_MS, DEFAULT_MAX_LEDGER_SHREDS, DEFAULT_MIN_MAX_LEDGER_SHREDS,
validator::{is_snapshot_config_invalid, Validator, ValidatorConfig}, },
}; solana_core::{
use solana_download_utils::{download_genesis_if_missing, download_snapshot}; cluster_info::{
use solana_ledger::blockstore_db::BlockstoreRecoveryMode; ClusterInfo, Node, MINIMUM_VALIDATOR_PORT_RANGE_WIDTH, VALIDATOR_PORT_RANGE,
use solana_perf::recycler::enable_recycler_warming; },
use solana_runtime::{ contact_info::ContactInfo,
accounts_index::AccountIndex, gossip_service::GossipService,
bank_forks::{ArchiveFormat, SnapshotConfig, SnapshotVersion}, poh_service,
hardened_unpack::{unpack_genesis_archive, MAX_GENESIS_ARCHIVE_UNPACKED_SIZE}, rpc::JsonRpcConfig,
snapshot_utils::get_highest_snapshot_archive_path, rpc_pubsub_service::PubSubConfig,
}; tpu::DEFAULT_TPU_COALESCE_MS,
use solana_sdk::{ validator::{
clock::{Slot, DEFAULT_S_PER_SLOT}, is_snapshot_config_invalid, Validator, ValidatorConfig, ValidatorStartProgress,
commitment_config::CommitmentConfig, },
genesis_config::GenesisConfig, },
hash::Hash, solana_download_utils::{download_genesis_if_missing, download_snapshot},
pubkey::Pubkey, solana_ledger::blockstore_db::BlockstoreRecoveryMode,
signature::{Keypair, Signer}, solana_perf::recycler::enable_recycler_warming,
}; solana_runtime::{
use solana_validator::{ accounts_index::AccountIndex,
admin_rpc_service, dashboard::Dashboard, new_spinner_progress_bar, println_name_value, bank_forks::{ArchiveFormat, SnapshotConfig, SnapshotVersion},
redirect_stderr_to_file, hardened_unpack::{unpack_genesis_archive, MAX_GENESIS_ARCHIVE_UNPACKED_SIZE},
}; snapshot_utils::get_highest_snapshot_archive_path,
use std::{ },
collections::{HashSet, VecDeque}, solana_sdk::{
env, clock::{Slot, DEFAULT_S_PER_SLOT},
fs::{self, File}, commitment_config::CommitmentConfig,
net::{IpAddr, SocketAddr, TcpListener, UdpSocket}, genesis_config::GenesisConfig,
path::{Path, PathBuf}, hash::Hash,
process::exit, pubkey::Pubkey,
str::FromStr, signature::{Keypair, Signer},
sync::{ },
atomic::{AtomicBool, Ordering}, solana_validator::{
Arc, admin_rpc_service, dashboard::Dashboard, new_spinner_progress_bar, println_name_value,
redirect_stderr_to_file,
},
std::{
collections::{HashSet, VecDeque},
env,
fs::{self, File},
net::{IpAddr, SocketAddr, TcpListener, UdpSocket},
path::{Path, PathBuf},
process::exit,
str::FromStr,
sync::{
atomic::{AtomicBool, Ordering},
Arc, RwLock,
},
thread::sleep,
time::{Duration, Instant, SystemTime},
}, },
thread::sleep,
time::{Duration, Instant, SystemTime},
}; };
#[derive(Debug, PartialEq)] #[derive(Debug, PartialEq)]
@ -753,6 +759,7 @@ fn rpc_bootstrap(
use_progress_bar: bool, use_progress_bar: bool,
maximum_local_snapshot_age: Slot, maximum_local_snapshot_age: Slot,
should_check_duplicate_instance: bool, should_check_duplicate_instance: bool,
start_progress: &Arc<RwLock<ValidatorStartProgress>>,
) { ) {
if !no_port_check { if !no_port_check {
let mut order: Vec<_> = (0..cluster_entrypoints.len()).collect(); let mut order: Vec<_> = (0..cluster_entrypoints.len()).collect();
@ -773,6 +780,8 @@ fn rpc_bootstrap(
let mut gossip = None; let mut gossip = None;
loop { loop {
if gossip.is_none() { if gossip.is_none() {
*start_progress.write().unwrap() = ValidatorStartProgress::SearchingForRpcService;
gossip = Some(start_gossip_node( gossip = Some(start_gossip_node(
&identity_keypair, &identity_keypair,
&cluster_entrypoints, &cluster_entrypoints,
@ -876,6 +885,11 @@ fn rpc_bootstrap(
.get_slot_with_commitment(CommitmentConfig::finalized()) .get_slot_with_commitment(CommitmentConfig::finalized())
.map_err(|err| format!("Failed to get RPC node slot: {}", err)) .map_err(|err| format!("Failed to get RPC node slot: {}", err))
.and_then(|slot| { .and_then(|slot| {
*start_progress.write().unwrap() =
ValidatorStartProgress::DownloadingSnapshot {
slot: snapshot_hash.0,
rpc_addr: rpc_contact_info.rpc,
};
info!("RPC node root slot: {}", slot); info!("RPC node root slot: {}", slot);
let (cluster_info, gossip_exit_flag, gossip_service) = let (cluster_info, gossip_exit_flag, gossip_service) =
gossip.take().unwrap(); gossip.take().unwrap();
@ -2118,12 +2132,14 @@ pub fn main() {
info!("{} {}", crate_name!(), solana_version::version!()); info!("{} {}", crate_name!(), solana_version::version!());
info!("Starting validator with: {:#?}", std::env::args_os()); info!("Starting validator with: {:#?}", std::env::args_os());
let start_progress = Arc::new(RwLock::new(ValidatorStartProgress::default()));
admin_rpc_service::run( admin_rpc_service::run(
&ledger_path, &ledger_path,
admin_rpc_service::AdminRpcRequestMetadata { admin_rpc_service::AdminRpcRequestMetadata {
rpc_addr: validator_config.rpc_addrs.map(|(rpc_addr, _)| rpc_addr), rpc_addr: validator_config.rpc_addrs.map(|(rpc_addr, _)| rpc_addr),
start_time: std::time::SystemTime::now(), start_time: std::time::SystemTime::now(),
validator_exit: validator_config.validator_exit.clone(), validator_exit: validator_config.validator_exit.clone(),
start_progress: start_progress.clone(),
}, },
); );
@ -2241,7 +2257,9 @@ pub fn main() {
use_progress_bar, use_progress_bar,
maximum_local_snapshot_age, maximum_local_snapshot_age,
should_check_duplicate_instance, should_check_duplicate_instance,
&start_progress,
); );
*start_progress.write().unwrap() = ValidatorStartProgress::Initializing;
} }
if operation == Operation::Initialize { if operation == Operation::Initialize {
@ -2257,6 +2275,7 @@ pub fn main() {
cluster_entrypoints, cluster_entrypoints,
&validator_config, &validator_config,
should_check_duplicate_instance, should_check_duplicate_instance,
start_progress,
); );
if let Some(filename) = init_complete_file { if let Some(filename) = init_complete_file {