Add validator startup process reporting before RPC is available
This commit is contained in:
parent
2cc695bb5d
commit
bd13262b42
|
@ -3,7 +3,7 @@ use {
|
||||||
cluster_info::Node,
|
cluster_info::Node,
|
||||||
gossip_service::discover_cluster,
|
gossip_service::discover_cluster,
|
||||||
rpc::JsonRpcConfig,
|
rpc::JsonRpcConfig,
|
||||||
validator::{Validator, ValidatorConfig, ValidatorExit},
|
validator::{Validator, ValidatorConfig, ValidatorExit, ValidatorStartProgress},
|
||||||
},
|
},
|
||||||
solana_client::rpc_client::RpcClient,
|
solana_client::rpc_client::RpcClient,
|
||||||
solana_ledger::{blockstore::create_new_ledger, create_new_tmp_ledger},
|
solana_ledger::{blockstore::create_new_ledger, create_new_tmp_ledger},
|
||||||
|
@ -53,6 +53,7 @@ pub struct TestValidatorGenesis {
|
||||||
accounts: HashMap<Pubkey, Account>,
|
accounts: HashMap<Pubkey, Account>,
|
||||||
programs: Vec<ProgramInfo>,
|
programs: Vec<ProgramInfo>,
|
||||||
pub validator_exit: Arc<RwLock<ValidatorExit>>,
|
pub validator_exit: Arc<RwLock<ValidatorExit>>,
|
||||||
|
pub start_progress: Arc<RwLock<ValidatorStartProgress>>,
|
||||||
}
|
}
|
||||||
|
|
||||||
impl TestValidatorGenesis {
|
impl TestValidatorGenesis {
|
||||||
|
@ -415,6 +416,7 @@ impl TestValidator {
|
||||||
vec![],
|
vec![],
|
||||||
&validator_config,
|
&validator_config,
|
||||||
true, // should_check_duplicate_instance
|
true, // should_check_duplicate_instance
|
||||||
|
config.start_progress.clone(),
|
||||||
));
|
));
|
||||||
|
|
||||||
// Needed to avoid panics in `solana-responder-gossip` in tests that create a number of
|
// Needed to avoid panics in `solana-responder-gossip` in tests that create a number of
|
||||||
|
|
|
@ -186,6 +186,32 @@ impl Default for ValidatorConfig {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// `ValidatorStartProgress` contains status information that is surfaced to the node operator over
|
||||||
|
// the admin RPC channel to help them to follow the general progress of node startup without
|
||||||
|
// having to watch log messages.
|
||||||
|
#[derive(Debug, Clone, Copy, Serialize, Deserialize, PartialEq)]
|
||||||
|
pub enum ValidatorStartProgress {
|
||||||
|
Initializing, // Catch all, default state
|
||||||
|
SearchingForRpcService,
|
||||||
|
DownloadingSnapshot { slot: Slot, rpc_addr: SocketAddr },
|
||||||
|
CleaningBlockStore,
|
||||||
|
CleaningAccounts,
|
||||||
|
LoadingLedger,
|
||||||
|
StartingServices,
|
||||||
|
Halted, // Validator halted due to `--dev-halt-at-slot` argument
|
||||||
|
WaitingForSupermajority,
|
||||||
|
|
||||||
|
// `Running` is the terminal state once the validator fully starts and all services are
|
||||||
|
// operational
|
||||||
|
Running,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl Default for ValidatorStartProgress {
|
||||||
|
fn default() -> Self {
|
||||||
|
Self::Initializing
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
#[derive(Default)]
|
#[derive(Default)]
|
||||||
pub struct ValidatorExit {
|
pub struct ValidatorExit {
|
||||||
exited: bool,
|
exited: bool,
|
||||||
|
@ -270,6 +296,7 @@ impl Validator {
|
||||||
cluster_entrypoints: Vec<ContactInfo>,
|
cluster_entrypoints: Vec<ContactInfo>,
|
||||||
config: &ValidatorConfig,
|
config: &ValidatorConfig,
|
||||||
should_check_duplicate_instance: bool,
|
should_check_duplicate_instance: bool,
|
||||||
|
start_progress: Arc<RwLock<ValidatorStartProgress>>,
|
||||||
) -> Self {
|
) -> Self {
|
||||||
let id = identity_keypair.pubkey();
|
let id = identity_keypair.pubkey();
|
||||||
assert_eq!(id, node.info.id);
|
assert_eq!(id, node.info.id);
|
||||||
|
@ -309,6 +336,7 @@ impl Validator {
|
||||||
|
|
||||||
if let Some(shred_version) = config.expected_shred_version {
|
if let Some(shred_version) = config.expected_shred_version {
|
||||||
if let Some(wait_for_supermajority_slot) = config.wait_for_supermajority {
|
if let Some(wait_for_supermajority_slot) = config.wait_for_supermajority {
|
||||||
|
*start_progress.write().unwrap() = ValidatorStartProgress::CleaningBlockStore;
|
||||||
backup_and_clear_blockstore(
|
backup_and_clear_blockstore(
|
||||||
ledger_path,
|
ledger_path,
|
||||||
wait_for_supermajority_slot + 1,
|
wait_for_supermajority_slot + 1,
|
||||||
|
@ -318,6 +346,7 @@ impl Validator {
|
||||||
}
|
}
|
||||||
|
|
||||||
info!("Cleaning accounts paths..");
|
info!("Cleaning accounts paths..");
|
||||||
|
*start_progress.write().unwrap() = ValidatorStartProgress::CleaningAccounts;
|
||||||
let mut start = Measure::start("clean_accounts_paths");
|
let mut start = Measure::start("clean_accounts_paths");
|
||||||
for accounts_path in &config.account_paths {
|
for accounts_path in &config.account_paths {
|
||||||
cleanup_accounts_path(accounts_path);
|
cleanup_accounts_path(accounts_path);
|
||||||
|
@ -366,8 +395,11 @@ impl Validator {
|
||||||
config.poh_verify,
|
config.poh_verify,
|
||||||
&exit,
|
&exit,
|
||||||
config.enforce_ulimit_nofile,
|
config.enforce_ulimit_nofile,
|
||||||
|
&start_progress,
|
||||||
);
|
);
|
||||||
|
|
||||||
|
*start_progress.write().unwrap() = ValidatorStartProgress::StartingServices;
|
||||||
|
|
||||||
let leader_schedule_cache = Arc::new(leader_schedule_cache);
|
let leader_schedule_cache = Arc::new(leader_schedule_cache);
|
||||||
let bank = bank_forks.working_bank();
|
let bank = bank_forks.working_bank();
|
||||||
if let Some(ref shrink_paths) = config.account_shrink_paths {
|
if let Some(ref shrink_paths) = config.account_shrink_paths {
|
||||||
|
@ -542,6 +574,7 @@ impl Validator {
|
||||||
|
|
||||||
// Park with the RPC service running, ready for inspection!
|
// Park with the RPC service running, ready for inspection!
|
||||||
warn!("Validator halted");
|
warn!("Validator halted");
|
||||||
|
*start_progress.write().unwrap() = ValidatorStartProgress::Halted;
|
||||||
std::thread::park();
|
std::thread::park();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -593,7 +626,13 @@ impl Validator {
|
||||||
check_poh_speed(&genesis_config, None);
|
check_poh_speed(&genesis_config, None);
|
||||||
}
|
}
|
||||||
|
|
||||||
if wait_for_supermajority(config, &bank, &cluster_info, rpc_override_health_check) {
|
if wait_for_supermajority(
|
||||||
|
config,
|
||||||
|
&bank,
|
||||||
|
&cluster_info,
|
||||||
|
rpc_override_health_check,
|
||||||
|
&start_progress,
|
||||||
|
) {
|
||||||
abort();
|
abort();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -707,6 +746,7 @@ impl Validator {
|
||||||
);
|
);
|
||||||
|
|
||||||
datapoint_info!("validator-new", ("id", id.to_string(), String));
|
datapoint_info!("validator-new", ("id", id.to_string(), String));
|
||||||
|
*start_progress.write().unwrap() = ValidatorStartProgress::Running;
|
||||||
Self {
|
Self {
|
||||||
id,
|
id,
|
||||||
gossip_service,
|
gossip_service,
|
||||||
|
@ -963,6 +1003,7 @@ fn new_banks_from_ledger(
|
||||||
poh_verify: bool,
|
poh_verify: bool,
|
||||||
exit: &Arc<AtomicBool>,
|
exit: &Arc<AtomicBool>,
|
||||||
enforce_ulimit_nofile: bool,
|
enforce_ulimit_nofile: bool,
|
||||||
|
start_progress: &Arc<RwLock<ValidatorStartProgress>>,
|
||||||
) -> (
|
) -> (
|
||||||
GenesisConfig,
|
GenesisConfig,
|
||||||
BankForks,
|
BankForks,
|
||||||
|
@ -975,6 +1016,7 @@ fn new_banks_from_ledger(
|
||||||
Tower,
|
Tower,
|
||||||
) {
|
) {
|
||||||
info!("loading ledger from {:?}...", ledger_path);
|
info!("loading ledger from {:?}...", ledger_path);
|
||||||
|
*start_progress.write().unwrap() = ValidatorStartProgress::LoadingLedger;
|
||||||
let genesis_config = open_genesis_config(ledger_path, config.max_genesis_archive_unpacked_size);
|
let genesis_config = open_genesis_config(ledger_path, config.max_genesis_archive_unpacked_size);
|
||||||
|
|
||||||
// This needs to be limited otherwise the state in the VoteAccount data
|
// This needs to be limited otherwise the state in the VoteAccount data
|
||||||
|
@ -1249,12 +1291,18 @@ fn wait_for_supermajority(
|
||||||
bank: &Bank,
|
bank: &Bank,
|
||||||
cluster_info: &ClusterInfo,
|
cluster_info: &ClusterInfo,
|
||||||
rpc_override_health_check: Arc<AtomicBool>,
|
rpc_override_health_check: Arc<AtomicBool>,
|
||||||
|
start_progress: &Arc<RwLock<ValidatorStartProgress>>,
|
||||||
) -> bool {
|
) -> bool {
|
||||||
if let Some(wait_for_supermajority) = config.wait_for_supermajority {
|
if let Some(wait_for_supermajority) = config.wait_for_supermajority {
|
||||||
match wait_for_supermajority.cmp(&bank.slot()) {
|
match wait_for_supermajority.cmp(&bank.slot()) {
|
||||||
std::cmp::Ordering::Less => return false,
|
std::cmp::Ordering::Less => return false,
|
||||||
std::cmp::Ordering::Greater => {
|
std::cmp::Ordering::Greater => {
|
||||||
error!("Ledger does not have enough data to wait for supermajority, please enable snapshot fetch. Has {} needs {}", bank.slot(), wait_for_supermajority);
|
error!(
|
||||||
|
"Ledger does not have enough data to wait for supermajority, \
|
||||||
|
please enable snapshot fetch. Has {} needs {}",
|
||||||
|
bank.slot(),
|
||||||
|
wait_for_supermajority
|
||||||
|
);
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
_ => {}
|
_ => {}
|
||||||
|
@ -1274,6 +1322,7 @@ fn wait_for_supermajority(
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
*start_progress.write().unwrap() = ValidatorStartProgress::WaitingForSupermajority;
|
||||||
for i in 1.. {
|
for i in 1.. {
|
||||||
if i % 10 == 1 {
|
if i % 10 == 1 {
|
||||||
info!(
|
info!(
|
||||||
|
@ -1459,6 +1508,7 @@ mod tests {
|
||||||
rpc_addrs: Some((validator_node.info.rpc, validator_node.info.rpc_pubsub)),
|
rpc_addrs: Some((validator_node.info.rpc, validator_node.info.rpc_pubsub)),
|
||||||
..ValidatorConfig::default()
|
..ValidatorConfig::default()
|
||||||
};
|
};
|
||||||
|
let start_progress = Arc::new(RwLock::new(ValidatorStartProgress::default()));
|
||||||
let validator = Validator::new(
|
let validator = Validator::new(
|
||||||
validator_node,
|
validator_node,
|
||||||
&Arc::new(validator_keypair),
|
&Arc::new(validator_keypair),
|
||||||
|
@ -1468,6 +1518,11 @@ mod tests {
|
||||||
vec![leader_node.info],
|
vec![leader_node.info],
|
||||||
&config,
|
&config,
|
||||||
true, // should_check_duplicate_instance
|
true, // should_check_duplicate_instance
|
||||||
|
start_progress.clone(),
|
||||||
|
);
|
||||||
|
assert_eq!(
|
||||||
|
*start_progress.read().unwrap(),
|
||||||
|
ValidatorStartProgress::Running
|
||||||
);
|
);
|
||||||
validator.close();
|
validator.close();
|
||||||
remove_dir_all(validator_ledger_path).unwrap();
|
remove_dir_all(validator_ledger_path).unwrap();
|
||||||
|
@ -1539,6 +1594,7 @@ mod tests {
|
||||||
vec![leader_node.info.clone()],
|
vec![leader_node.info.clone()],
|
||||||
&config,
|
&config,
|
||||||
true, // should_check_duplicate_instance
|
true, // should_check_duplicate_instance
|
||||||
|
Arc::new(RwLock::new(ValidatorStartProgress::default())),
|
||||||
)
|
)
|
||||||
})
|
})
|
||||||
.collect();
|
.collect();
|
||||||
|
@ -1570,11 +1626,14 @@ mod tests {
|
||||||
let bank = Arc::new(Bank::new(&genesis_config));
|
let bank = Arc::new(Bank::new(&genesis_config));
|
||||||
let mut config = ValidatorConfig::default();
|
let mut config = ValidatorConfig::default();
|
||||||
let rpc_override_health_check = Arc::new(AtomicBool::new(false));
|
let rpc_override_health_check = Arc::new(AtomicBool::new(false));
|
||||||
|
let start_progress = Arc::new(RwLock::new(ValidatorStartProgress::default()));
|
||||||
|
|
||||||
assert!(!wait_for_supermajority(
|
assert!(!wait_for_supermajority(
|
||||||
&config,
|
&config,
|
||||||
&bank,
|
&bank,
|
||||||
&cluster_info,
|
&cluster_info,
|
||||||
rpc_override_health_check.clone()
|
rpc_override_health_check.clone(),
|
||||||
|
&start_progress,
|
||||||
));
|
));
|
||||||
|
|
||||||
// bank=0, wait=1, should fail
|
// bank=0, wait=1, should fail
|
||||||
|
@ -1583,7 +1642,8 @@ mod tests {
|
||||||
&config,
|
&config,
|
||||||
&bank,
|
&bank,
|
||||||
&cluster_info,
|
&cluster_info,
|
||||||
rpc_override_health_check.clone()
|
rpc_override_health_check.clone(),
|
||||||
|
&start_progress,
|
||||||
));
|
));
|
||||||
|
|
||||||
// bank=1, wait=0, should pass, bank is past the wait slot
|
// bank=1, wait=0, should pass, bank is past the wait slot
|
||||||
|
@ -1593,7 +1653,8 @@ mod tests {
|
||||||
&config,
|
&config,
|
||||||
&bank,
|
&bank,
|
||||||
&cluster_info,
|
&cluster_info,
|
||||||
rpc_override_health_check.clone()
|
rpc_override_health_check.clone(),
|
||||||
|
&start_progress,
|
||||||
));
|
));
|
||||||
|
|
||||||
// bank=1, wait=1, equal, but bad hash provided
|
// bank=1, wait=1, equal, but bad hash provided
|
||||||
|
@ -1603,7 +1664,8 @@ mod tests {
|
||||||
&config,
|
&config,
|
||||||
&bank,
|
&bank,
|
||||||
&cluster_info,
|
&cluster_info,
|
||||||
rpc_override_health_check
|
rpc_override_health_check,
|
||||||
|
&start_progress,
|
||||||
));
|
));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -10,7 +10,7 @@ use solana_core::{
|
||||||
cluster_info::{Node, VALIDATOR_PORT_RANGE},
|
cluster_info::{Node, VALIDATOR_PORT_RANGE},
|
||||||
contact_info::ContactInfo,
|
contact_info::ContactInfo,
|
||||||
gossip_service::discover_cluster,
|
gossip_service::discover_cluster,
|
||||||
validator::{Validator, ValidatorConfig},
|
validator::{Validator, ValidatorConfig, ValidatorStartProgress},
|
||||||
};
|
};
|
||||||
use solana_ledger::create_new_tmp_ledger;
|
use solana_ledger::create_new_tmp_ledger;
|
||||||
use solana_runtime::genesis_utils::{
|
use solana_runtime::genesis_utils::{
|
||||||
|
@ -43,7 +43,7 @@ use std::{
|
||||||
collections::HashMap,
|
collections::HashMap,
|
||||||
io::{Error, ErrorKind, Result},
|
io::{Error, ErrorKind, Result},
|
||||||
iter,
|
iter,
|
||||||
sync::Arc,
|
sync::{Arc, RwLock},
|
||||||
};
|
};
|
||||||
|
|
||||||
#[derive(Debug)]
|
#[derive(Debug)]
|
||||||
|
@ -203,6 +203,7 @@ impl LocalCluster {
|
||||||
let leader_keypair = Arc::new(Keypair::from_bytes(&leader_keypair.to_bytes()).unwrap());
|
let leader_keypair = Arc::new(Keypair::from_bytes(&leader_keypair.to_bytes()).unwrap());
|
||||||
let leader_vote_keypair =
|
let leader_vote_keypair =
|
||||||
Arc::new(Keypair::from_bytes(&leader_vote_keypair.to_bytes()).unwrap());
|
Arc::new(Keypair::from_bytes(&leader_vote_keypair.to_bytes()).unwrap());
|
||||||
|
|
||||||
let leader_server = Validator::new(
|
let leader_server = Validator::new(
|
||||||
leader_node,
|
leader_node,
|
||||||
&leader_keypair,
|
&leader_keypair,
|
||||||
|
@ -212,6 +213,7 @@ impl LocalCluster {
|
||||||
vec![],
|
vec![],
|
||||||
&leader_config,
|
&leader_config,
|
||||||
true, // should_check_duplicate_instance
|
true, // should_check_duplicate_instance
|
||||||
|
Arc::new(RwLock::new(ValidatorStartProgress::default())),
|
||||||
);
|
);
|
||||||
|
|
||||||
let mut validators = HashMap::new();
|
let mut validators = HashMap::new();
|
||||||
|
@ -353,6 +355,7 @@ impl LocalCluster {
|
||||||
vec![self.entry_point_info.clone()],
|
vec![self.entry_point_info.clone()],
|
||||||
&config,
|
&config,
|
||||||
true, // should_check_duplicate_instance
|
true, // should_check_duplicate_instance
|
||||||
|
Arc::new(RwLock::new(ValidatorStartProgress::default())),
|
||||||
);
|
);
|
||||||
|
|
||||||
let validator_pubkey = validator_keypair.pubkey();
|
let validator_pubkey = validator_keypair.pubkey();
|
||||||
|
@ -669,6 +672,7 @@ impl Cluster for LocalCluster {
|
||||||
.unwrap_or_default(),
|
.unwrap_or_default(),
|
||||||
&safe_clone_config(&cluster_validator_info.config),
|
&safe_clone_config(&cluster_validator_info.config),
|
||||||
true, // should_check_duplicate_instance
|
true, // should_check_duplicate_instance
|
||||||
|
Arc::new(RwLock::new(ValidatorStartProgress::default())),
|
||||||
);
|
);
|
||||||
cluster_validator_info.validator = Some(restarted_node);
|
cluster_validator_info.validator = Some(restarted_node);
|
||||||
cluster_validator_info
|
cluster_validator_info
|
||||||
|
|
|
@ -5,7 +5,7 @@ use {
|
||||||
jsonrpc_ipc_server::{RequestContext, ServerBuilder},
|
jsonrpc_ipc_server::{RequestContext, ServerBuilder},
|
||||||
jsonrpc_server_utils::tokio,
|
jsonrpc_server_utils::tokio,
|
||||||
log::*,
|
log::*,
|
||||||
solana_core::validator::ValidatorExit,
|
solana_core::validator::{ValidatorExit, ValidatorStartProgress},
|
||||||
std::{
|
std::{
|
||||||
net::SocketAddr,
|
net::SocketAddr,
|
||||||
path::Path,
|
path::Path,
|
||||||
|
@ -19,6 +19,7 @@ use {
|
||||||
pub struct AdminRpcRequestMetadata {
|
pub struct AdminRpcRequestMetadata {
|
||||||
pub rpc_addr: Option<SocketAddr>,
|
pub rpc_addr: Option<SocketAddr>,
|
||||||
pub start_time: SystemTime,
|
pub start_time: SystemTime,
|
||||||
|
pub start_progress: Arc<RwLock<ValidatorStartProgress>>,
|
||||||
pub validator_exit: Arc<RwLock<ValidatorExit>>,
|
pub validator_exit: Arc<RwLock<ValidatorExit>>,
|
||||||
}
|
}
|
||||||
impl Metadata for AdminRpcRequestMetadata {}
|
impl Metadata for AdminRpcRequestMetadata {}
|
||||||
|
@ -38,6 +39,9 @@ pub trait AdminRpc {
|
||||||
|
|
||||||
#[rpc(meta, name = "startTime")]
|
#[rpc(meta, name = "startTime")]
|
||||||
fn start_time(&self, meta: Self::Metadata) -> Result<SystemTime>;
|
fn start_time(&self, meta: Self::Metadata) -> Result<SystemTime>;
|
||||||
|
|
||||||
|
#[rpc(meta, name = "startProgress")]
|
||||||
|
fn start_progress(&self, meta: Self::Metadata) -> Result<ValidatorStartProgress>;
|
||||||
}
|
}
|
||||||
|
|
||||||
pub struct AdminRpcImpl;
|
pub struct AdminRpcImpl;
|
||||||
|
@ -45,9 +49,9 @@ impl AdminRpc for AdminRpcImpl {
|
||||||
type Metadata = AdminRpcRequestMetadata;
|
type Metadata = AdminRpcRequestMetadata;
|
||||||
|
|
||||||
fn exit(&self, meta: Self::Metadata) -> Result<()> {
|
fn exit(&self, meta: Self::Metadata) -> Result<()> {
|
||||||
info!("exit admin rpc request received");
|
debug!("exit admin rpc request received");
|
||||||
// Delay exit signal until this RPC request completes, otherwise the caller of `exit` might
|
// Delay exit signal until this RPC request completes, otherwise the caller of `exit` might
|
||||||
// receive a confusing error as the validator shuts down before a response is send back.
|
// receive a confusing error as the validator shuts down before a response is sent back.
|
||||||
tokio::spawn(async move {
|
tokio::spawn(async move {
|
||||||
meta.validator_exit.write().unwrap().exit();
|
meta.validator_exit.write().unwrap().exit();
|
||||||
});
|
});
|
||||||
|
@ -55,20 +59,25 @@ impl AdminRpc for AdminRpcImpl {
|
||||||
}
|
}
|
||||||
|
|
||||||
fn rpc_addr(&self, meta: Self::Metadata) -> Result<Option<SocketAddr>> {
|
fn rpc_addr(&self, meta: Self::Metadata) -> Result<Option<SocketAddr>> {
|
||||||
info!("rpc_addr admin rpc request received");
|
debug!("rpc_addr admin rpc request received");
|
||||||
Ok(meta.rpc_addr)
|
Ok(meta.rpc_addr)
|
||||||
}
|
}
|
||||||
|
|
||||||
fn set_log_filter(&self, filter: String) -> Result<()> {
|
fn set_log_filter(&self, filter: String) -> Result<()> {
|
||||||
info!("set_log_filter admin rpc request received");
|
debug!("set_log_filter admin rpc request received");
|
||||||
solana_logger::setup_with(&filter);
|
solana_logger::setup_with(&filter);
|
||||||
Ok(())
|
Ok(())
|
||||||
}
|
}
|
||||||
|
|
||||||
fn start_time(&self, meta: Self::Metadata) -> Result<SystemTime> {
|
fn start_time(&self, meta: Self::Metadata) -> Result<SystemTime> {
|
||||||
info!("start_time admin rpc request received");
|
debug!("start_time admin rpc request received");
|
||||||
Ok(meta.start_time)
|
Ok(meta.start_time)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
fn start_progress(&self, meta: Self::Metadata) -> Result<ValidatorStartProgress> {
|
||||||
|
debug!("start_progress admin rpc request received");
|
||||||
|
Ok(*meta.start_progress.read().unwrap())
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// Start the Admin RPC interface
|
// Start the Admin RPC interface
|
||||||
|
|
|
@ -363,6 +363,7 @@ fn main() {
|
||||||
IpAddr::V4(Ipv4Addr::new(127, 0, 0, 1)),
|
IpAddr::V4(Ipv4Addr::new(127, 0, 0, 1)),
|
||||||
rpc_port,
|
rpc_port,
|
||||||
)),
|
)),
|
||||||
|
start_progress: genesis.start_progress.clone(),
|
||||||
start_time: std::time::SystemTime::now(),
|
start_time: std::time::SystemTime::now(),
|
||||||
validator_exit: genesis.validator_exit.clone(),
|
validator_exit: genesis.validator_exit.clone(),
|
||||||
},
|
},
|
||||||
|
|
|
@ -5,6 +5,7 @@ use {
|
||||||
solana_client::{
|
solana_client::{
|
||||||
client_error, rpc_client::RpcClient, rpc_request, rpc_response::RpcContactInfo,
|
client_error, rpc_client::RpcClient, rpc_request, rpc_response::RpcContactInfo,
|
||||||
},
|
},
|
||||||
|
solana_core::validator::ValidatorStartProgress,
|
||||||
solana_sdk::{
|
solana_sdk::{
|
||||||
clock::{Slot, DEFAULT_TICKS_PER_SLOT, MS_PER_TICK},
|
clock::{Slot, DEFAULT_TICKS_PER_SLOT, MS_PER_TICK},
|
||||||
commitment_config::CommitmentConfig,
|
commitment_config::CommitmentConfig,
|
||||||
|
@ -13,13 +14,14 @@ use {
|
||||||
},
|
},
|
||||||
std::{
|
std::{
|
||||||
io,
|
io,
|
||||||
|
net::SocketAddr,
|
||||||
path::{Path, PathBuf},
|
path::{Path, PathBuf},
|
||||||
sync::{
|
sync::{
|
||||||
atomic::{AtomicBool, Ordering},
|
atomic::{AtomicBool, Ordering},
|
||||||
Arc,
|
Arc,
|
||||||
},
|
},
|
||||||
thread,
|
thread,
|
||||||
time::Duration,
|
time::{Duration, SystemTime},
|
||||||
},
|
},
|
||||||
};
|
};
|
||||||
|
|
||||||
|
@ -69,52 +71,26 @@ impl Dashboard {
|
||||||
while !exit.load(Ordering::Relaxed) {
|
while !exit.load(Ordering::Relaxed) {
|
||||||
let progress_bar = new_spinner_progress_bar();
|
let progress_bar = new_spinner_progress_bar();
|
||||||
progress_bar.set_message("Connecting...");
|
progress_bar.set_message("Connecting...");
|
||||||
let (start_time, rpc_client, identity) = loop {
|
|
||||||
if exit.load(Ordering::Relaxed) {
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
|
|
||||||
let admin_client = admin_rpc_service::connect(&ledger_path);
|
let (rpc_addr, start_time) = match runtime.block_on(wait_for_validator_startup(
|
||||||
let (rpc_addr, start_time) = match runtime.block_on(async move {
|
&ledger_path,
|
||||||
let admin_client = admin_client.await.map_err(|err| {
|
&exit,
|
||||||
format!("Unable to connect to validator process: {}", err)
|
progress_bar,
|
||||||
})?;
|
)) {
|
||||||
|
None => continue,
|
||||||
|
Some(results) => results,
|
||||||
|
};
|
||||||
|
|
||||||
let rpc_addr = admin_client
|
let rpc_client = RpcClient::new_socket(rpc_addr);
|
||||||
.rpc_addr()
|
let identity = match rpc_client.get_identity() {
|
||||||
.await
|
Ok(identity) => identity,
|
||||||
.map_err(|err| format!("Unable to get validator RPC address: {}", err))?
|
Err(err) => {
|
||||||
.ok_or_else(|| "RPC not available".to_string())?;
|
println!("Failed to get validator identity over RPC: {}", err);
|
||||||
|
continue;
|
||||||
let start_time = admin_client
|
|
||||||
.start_time()
|
|
||||||
.await
|
|
||||||
.map_err(|err| format!("Unable to get validator start time: {}", err))?;
|
|
||||||
|
|
||||||
Ok::<_, String>((rpc_addr, start_time))
|
|
||||||
}) {
|
|
||||||
Ok((rpc_addr, start_time)) => (rpc_addr, start_time),
|
|
||||||
Err(err) => {
|
|
||||||
progress_bar.set_message(&format!("Connecting... ({})", err));
|
|
||||||
thread::sleep(Duration::from_millis(500));
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
};
|
|
||||||
|
|
||||||
let rpc_client = RpcClient::new_socket(rpc_addr);
|
|
||||||
|
|
||||||
// Wait until RPC starts responding...
|
|
||||||
match rpc_client.get_identity() {
|
|
||||||
Ok(identity) => break (start_time, rpc_client, identity),
|
|
||||||
Err(err) => {
|
|
||||||
progress_bar.set_message(&format!("Waiting for RPC... ({})", err));
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
drop(progress_bar);
|
|
||||||
println_name_value("Identity:", &identity.to_string());
|
println_name_value("Identity:", &identity.to_string());
|
||||||
|
|
||||||
if let Some(contact_info) = get_contact_info(&rpc_client, &identity) {
|
if let Some(contact_info) = get_contact_info(&rpc_client, &identity) {
|
||||||
println_name_value(
|
println_name_value(
|
||||||
"Version:",
|
"Version:",
|
||||||
|
@ -197,6 +173,61 @@ impl Dashboard {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
async fn wait_for_validator_startup(
|
||||||
|
ledger_path: &Path,
|
||||||
|
exit: &Arc<AtomicBool>,
|
||||||
|
progress_bar: ProgressBar,
|
||||||
|
) -> Option<(SocketAddr, SystemTime)> {
|
||||||
|
let mut admin_client = None;
|
||||||
|
loop {
|
||||||
|
if exit.load(Ordering::Relaxed) {
|
||||||
|
return None;
|
||||||
|
}
|
||||||
|
thread::sleep(Duration::from_secs(1));
|
||||||
|
|
||||||
|
if admin_client.is_none() {
|
||||||
|
match admin_rpc_service::connect(&ledger_path).await {
|
||||||
|
Ok(new_admin_client) => admin_client = Some(new_admin_client),
|
||||||
|
Err(err) => {
|
||||||
|
progress_bar.set_message(&format!("Unable to connect to validator: {}", err));
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
match admin_client.as_ref().unwrap().start_progress().await {
|
||||||
|
Ok(start_progress) => {
|
||||||
|
if start_progress == ValidatorStartProgress::Running {
|
||||||
|
let admin_client = admin_client.take().unwrap();
|
||||||
|
|
||||||
|
match async move {
|
||||||
|
let rpc_addr = admin_client.rpc_addr().await?;
|
||||||
|
let start_time = admin_client.start_time().await?;
|
||||||
|
Ok::<_, jsonrpc_core_client::RpcError>((rpc_addr, start_time))
|
||||||
|
}
|
||||||
|
.await
|
||||||
|
{
|
||||||
|
Ok((None, _)) => progress_bar.set_message(&"RPC service not available"),
|
||||||
|
Ok((Some(rpc_addr), start_time)) => return Some((rpc_addr, start_time)),
|
||||||
|
Err(err) => {
|
||||||
|
progress_bar
|
||||||
|
.set_message(&format!("Failed to get validator info: {}", err));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
progress_bar
|
||||||
|
.set_message(&format!("Validator startup: {:?}...", start_progress));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
Err(err) => {
|
||||||
|
admin_client = None;
|
||||||
|
progress_bar
|
||||||
|
.set_message(&format!("Failed to get validator start progress: {}", err));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
fn get_contact_info(rpc_client: &RpcClient, identity: &Pubkey) -> Option<RpcContactInfo> {
|
fn get_contact_info(rpc_client: &RpcClient, identity: &Pubkey) -> Option<RpcContactInfo> {
|
||||||
rpc_client
|
rpc_client
|
||||||
.get_cluster_nodes()
|
.get_cluster_nodes()
|
||||||
|
|
|
@ -65,14 +65,7 @@ pub fn redirect_stderr_to_file(logfile: Option<String>) -> Option<JoinHandle<()>
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
solana_logger::setup_with_default(
|
solana_logger::setup_with_default("solana=info");
|
||||||
&[
|
|
||||||
"solana=info,solana_runtime::message_processor=error", /* info logging for all solana modules */
|
|
||||||
"rpc=trace", /* json_rpc request/response logging */
|
|
||||||
]
|
|
||||||
.join(","),
|
|
||||||
);
|
|
||||||
|
|
||||||
logger_thread
|
logger_thread
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -1,68 +1,74 @@
|
||||||
#![allow(clippy::integer_arithmetic)]
|
#![allow(clippy::integer_arithmetic)]
|
||||||
use clap::{
|
use {
|
||||||
crate_description, crate_name, value_t, value_t_or_exit, values_t, values_t_or_exit, App,
|
clap::{
|
||||||
AppSettings, Arg, ArgMatches, SubCommand,
|
crate_description, crate_name, value_t, value_t_or_exit, values_t, values_t_or_exit, App,
|
||||||
};
|
AppSettings, Arg, ArgMatches, SubCommand,
|
||||||
use console::style;
|
|
||||||
use fd_lock::FdLock;
|
|
||||||
use log::*;
|
|
||||||
use rand::{seq::SliceRandom, thread_rng, Rng};
|
|
||||||
use solana_clap_utils::{
|
|
||||||
input_parsers::{keypair_of, keypairs_of, pubkey_of, value_of},
|
|
||||||
input_validators::{
|
|
||||||
is_keypair_or_ask_keyword, is_parsable, is_pubkey, is_pubkey_or_keypair, is_slot,
|
|
||||||
},
|
},
|
||||||
keypair::SKIP_SEED_PHRASE_VALIDATION_ARG,
|
console::style,
|
||||||
};
|
fd_lock::FdLock,
|
||||||
use solana_client::{rpc_client::RpcClient, rpc_request::MAX_MULTIPLE_ACCOUNTS};
|
log::*,
|
||||||
use solana_core::ledger_cleanup_service::{
|
rand::{seq::SliceRandom, thread_rng, Rng},
|
||||||
DEFAULT_MAX_LEDGER_SHREDS, DEFAULT_MIN_MAX_LEDGER_SHREDS,
|
solana_clap_utils::{
|
||||||
};
|
input_parsers::{keypair_of, keypairs_of, pubkey_of, value_of},
|
||||||
use solana_core::{
|
input_validators::{
|
||||||
cluster_info::{ClusterInfo, Node, MINIMUM_VALIDATOR_PORT_RANGE_WIDTH, VALIDATOR_PORT_RANGE},
|
is_keypair_or_ask_keyword, is_parsable, is_pubkey, is_pubkey_or_keypair, is_slot,
|
||||||
contact_info::ContactInfo,
|
},
|
||||||
gossip_service::GossipService,
|
keypair::SKIP_SEED_PHRASE_VALIDATION_ARG,
|
||||||
poh_service,
|
},
|
||||||
rpc::JsonRpcConfig,
|
solana_client::{rpc_client::RpcClient, rpc_request::MAX_MULTIPLE_ACCOUNTS},
|
||||||
rpc_pubsub_service::PubSubConfig,
|
solana_core::ledger_cleanup_service::{
|
||||||
tpu::DEFAULT_TPU_COALESCE_MS,
|
DEFAULT_MAX_LEDGER_SHREDS, DEFAULT_MIN_MAX_LEDGER_SHREDS,
|
||||||
validator::{is_snapshot_config_invalid, Validator, ValidatorConfig},
|
},
|
||||||
};
|
solana_core::{
|
||||||
use solana_download_utils::{download_genesis_if_missing, download_snapshot};
|
cluster_info::{
|
||||||
use solana_ledger::blockstore_db::BlockstoreRecoveryMode;
|
ClusterInfo, Node, MINIMUM_VALIDATOR_PORT_RANGE_WIDTH, VALIDATOR_PORT_RANGE,
|
||||||
use solana_perf::recycler::enable_recycler_warming;
|
},
|
||||||
use solana_runtime::{
|
contact_info::ContactInfo,
|
||||||
accounts_index::AccountIndex,
|
gossip_service::GossipService,
|
||||||
bank_forks::{ArchiveFormat, SnapshotConfig, SnapshotVersion},
|
poh_service,
|
||||||
hardened_unpack::{unpack_genesis_archive, MAX_GENESIS_ARCHIVE_UNPACKED_SIZE},
|
rpc::JsonRpcConfig,
|
||||||
snapshot_utils::get_highest_snapshot_archive_path,
|
rpc_pubsub_service::PubSubConfig,
|
||||||
};
|
tpu::DEFAULT_TPU_COALESCE_MS,
|
||||||
use solana_sdk::{
|
validator::{
|
||||||
clock::{Slot, DEFAULT_S_PER_SLOT},
|
is_snapshot_config_invalid, Validator, ValidatorConfig, ValidatorStartProgress,
|
||||||
commitment_config::CommitmentConfig,
|
},
|
||||||
genesis_config::GenesisConfig,
|
},
|
||||||
hash::Hash,
|
solana_download_utils::{download_genesis_if_missing, download_snapshot},
|
||||||
pubkey::Pubkey,
|
solana_ledger::blockstore_db::BlockstoreRecoveryMode,
|
||||||
signature::{Keypair, Signer},
|
solana_perf::recycler::enable_recycler_warming,
|
||||||
};
|
solana_runtime::{
|
||||||
use solana_validator::{
|
accounts_index::AccountIndex,
|
||||||
admin_rpc_service, dashboard::Dashboard, new_spinner_progress_bar, println_name_value,
|
bank_forks::{ArchiveFormat, SnapshotConfig, SnapshotVersion},
|
||||||
redirect_stderr_to_file,
|
hardened_unpack::{unpack_genesis_archive, MAX_GENESIS_ARCHIVE_UNPACKED_SIZE},
|
||||||
};
|
snapshot_utils::get_highest_snapshot_archive_path,
|
||||||
use std::{
|
},
|
||||||
collections::{HashSet, VecDeque},
|
solana_sdk::{
|
||||||
env,
|
clock::{Slot, DEFAULT_S_PER_SLOT},
|
||||||
fs::{self, File},
|
commitment_config::CommitmentConfig,
|
||||||
net::{IpAddr, SocketAddr, TcpListener, UdpSocket},
|
genesis_config::GenesisConfig,
|
||||||
path::{Path, PathBuf},
|
hash::Hash,
|
||||||
process::exit,
|
pubkey::Pubkey,
|
||||||
str::FromStr,
|
signature::{Keypair, Signer},
|
||||||
sync::{
|
},
|
||||||
atomic::{AtomicBool, Ordering},
|
solana_validator::{
|
||||||
Arc,
|
admin_rpc_service, dashboard::Dashboard, new_spinner_progress_bar, println_name_value,
|
||||||
|
redirect_stderr_to_file,
|
||||||
|
},
|
||||||
|
std::{
|
||||||
|
collections::{HashSet, VecDeque},
|
||||||
|
env,
|
||||||
|
fs::{self, File},
|
||||||
|
net::{IpAddr, SocketAddr, TcpListener, UdpSocket},
|
||||||
|
path::{Path, PathBuf},
|
||||||
|
process::exit,
|
||||||
|
str::FromStr,
|
||||||
|
sync::{
|
||||||
|
atomic::{AtomicBool, Ordering},
|
||||||
|
Arc, RwLock,
|
||||||
|
},
|
||||||
|
thread::sleep,
|
||||||
|
time::{Duration, Instant, SystemTime},
|
||||||
},
|
},
|
||||||
thread::sleep,
|
|
||||||
time::{Duration, Instant, SystemTime},
|
|
||||||
};
|
};
|
||||||
|
|
||||||
#[derive(Debug, PartialEq)]
|
#[derive(Debug, PartialEq)]
|
||||||
|
@ -753,6 +759,7 @@ fn rpc_bootstrap(
|
||||||
use_progress_bar: bool,
|
use_progress_bar: bool,
|
||||||
maximum_local_snapshot_age: Slot,
|
maximum_local_snapshot_age: Slot,
|
||||||
should_check_duplicate_instance: bool,
|
should_check_duplicate_instance: bool,
|
||||||
|
start_progress: &Arc<RwLock<ValidatorStartProgress>>,
|
||||||
) {
|
) {
|
||||||
if !no_port_check {
|
if !no_port_check {
|
||||||
let mut order: Vec<_> = (0..cluster_entrypoints.len()).collect();
|
let mut order: Vec<_> = (0..cluster_entrypoints.len()).collect();
|
||||||
|
@ -773,6 +780,8 @@ fn rpc_bootstrap(
|
||||||
let mut gossip = None;
|
let mut gossip = None;
|
||||||
loop {
|
loop {
|
||||||
if gossip.is_none() {
|
if gossip.is_none() {
|
||||||
|
*start_progress.write().unwrap() = ValidatorStartProgress::SearchingForRpcService;
|
||||||
|
|
||||||
gossip = Some(start_gossip_node(
|
gossip = Some(start_gossip_node(
|
||||||
&identity_keypair,
|
&identity_keypair,
|
||||||
&cluster_entrypoints,
|
&cluster_entrypoints,
|
||||||
|
@ -876,6 +885,11 @@ fn rpc_bootstrap(
|
||||||
.get_slot_with_commitment(CommitmentConfig::finalized())
|
.get_slot_with_commitment(CommitmentConfig::finalized())
|
||||||
.map_err(|err| format!("Failed to get RPC node slot: {}", err))
|
.map_err(|err| format!("Failed to get RPC node slot: {}", err))
|
||||||
.and_then(|slot| {
|
.and_then(|slot| {
|
||||||
|
*start_progress.write().unwrap() =
|
||||||
|
ValidatorStartProgress::DownloadingSnapshot {
|
||||||
|
slot: snapshot_hash.0,
|
||||||
|
rpc_addr: rpc_contact_info.rpc,
|
||||||
|
};
|
||||||
info!("RPC node root slot: {}", slot);
|
info!("RPC node root slot: {}", slot);
|
||||||
let (cluster_info, gossip_exit_flag, gossip_service) =
|
let (cluster_info, gossip_exit_flag, gossip_service) =
|
||||||
gossip.take().unwrap();
|
gossip.take().unwrap();
|
||||||
|
@ -2118,12 +2132,14 @@ pub fn main() {
|
||||||
info!("{} {}", crate_name!(), solana_version::version!());
|
info!("{} {}", crate_name!(), solana_version::version!());
|
||||||
info!("Starting validator with: {:#?}", std::env::args_os());
|
info!("Starting validator with: {:#?}", std::env::args_os());
|
||||||
|
|
||||||
|
let start_progress = Arc::new(RwLock::new(ValidatorStartProgress::default()));
|
||||||
admin_rpc_service::run(
|
admin_rpc_service::run(
|
||||||
&ledger_path,
|
&ledger_path,
|
||||||
admin_rpc_service::AdminRpcRequestMetadata {
|
admin_rpc_service::AdminRpcRequestMetadata {
|
||||||
rpc_addr: validator_config.rpc_addrs.map(|(rpc_addr, _)| rpc_addr),
|
rpc_addr: validator_config.rpc_addrs.map(|(rpc_addr, _)| rpc_addr),
|
||||||
start_time: std::time::SystemTime::now(),
|
start_time: std::time::SystemTime::now(),
|
||||||
validator_exit: validator_config.validator_exit.clone(),
|
validator_exit: validator_config.validator_exit.clone(),
|
||||||
|
start_progress: start_progress.clone(),
|
||||||
},
|
},
|
||||||
);
|
);
|
||||||
|
|
||||||
|
@ -2241,7 +2257,9 @@ pub fn main() {
|
||||||
use_progress_bar,
|
use_progress_bar,
|
||||||
maximum_local_snapshot_age,
|
maximum_local_snapshot_age,
|
||||||
should_check_duplicate_instance,
|
should_check_duplicate_instance,
|
||||||
|
&start_progress,
|
||||||
);
|
);
|
||||||
|
*start_progress.write().unwrap() = ValidatorStartProgress::Initializing;
|
||||||
}
|
}
|
||||||
|
|
||||||
if operation == Operation::Initialize {
|
if operation == Operation::Initialize {
|
||||||
|
@ -2257,6 +2275,7 @@ pub fn main() {
|
||||||
cluster_entrypoints,
|
cluster_entrypoints,
|
||||||
&validator_config,
|
&validator_config,
|
||||||
should_check_duplicate_instance,
|
should_check_duplicate_instance,
|
||||||
|
start_progress,
|
||||||
);
|
);
|
||||||
|
|
||||||
if let Some(filename) = init_complete_file {
|
if let Some(filename) = init_complete_file {
|
||||||
|
|
Loading…
Reference in New Issue