Add validator catchup command (#6922)

This commit is contained in:
Michael Vines 2019-11-13 15:58:14 -07:00 committed by GitHub
parent 5f38fa379c
commit f116cdeed9
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
7 changed files with 141 additions and 27 deletions

1
Cargo.lock generated
View File

@ -3199,6 +3199,7 @@ dependencies = [
"criterion-stats 0.3.0 (registry+https://github.com/rust-lang/crates.io-index)",
"ctrlc 3.1.3 (registry+https://github.com/rust-lang/crates.io-index)",
"dirs 2.0.2 (registry+https://github.com/rust-lang/crates.io-index)",
"indicatif 0.13.0 (registry+https://github.com/rust-lang/crates.io-index)",
"lazy_static 1.4.0 (registry+https://github.com/rust-lang/crates.io-index)",
"log 0.4.8 (registry+https://github.com/rust-lang/crates.io-index)",
"num-traits 0.2.9 (registry+https://github.com/rust-lang/crates.io-index)",

View File

@ -14,46 +14,40 @@ From another console, confirm the IP address and **identity pubkey** of your val
solana-gossip --entrypoint testnet.solana.com:8001 spy
```
## Check Vote Activity
## Monitoring Catch Up
The vote pubkey for the validator can be found by running:
It may take some time to catch up with the cluster after your validator boots.
Use the `catchup` command to monitor your validator through this process:
```bash
solana-keygen pubkey ~/validator-vote-keypair.json
solana catchup ~/validator-keypair.json
```
Provide the **vote pubkey** to the `solana show-vote-account` command to view the recent voting activity from your validator:
Until your validator has caught up, it will not be able to vote successfully and
stake cannot be delegated to it.
```bash
solana show-vote-account 2ozWvfaXQd1X6uKh8jERoRGApDqSqcEy6fF1oN13LL2G
```
Also if you find the cluster's slot advancing faster than yours, you will likely
never catch up. This typically implies some kind of networking issue between
your validator and the rest of the cluster.
## Check Your Balance
Your account balance should decrease by the transaction fee amount as your validator submits votes, and increase after serving as the leader. Pass the `--lamports` are to observe in finer detail:
Your account balance should decrease by the transaction fee amount as your
validator submits votes, and increase after serving as the leader. Pass the
`--lamports` are to observe in finer detail:
```bash
solana balance --lamports
```
## Check Slot Number
## Check Vote Activity
After your validator boots, it may take some time to catch up with the cluster. Use the `get-slot` command to view the current slot that the cluster is processing:
The `solana show-vote-account` command displays the recent voting activity from your validator:
```bash
solana get-slot
solana show-vote-account ~/validator-vote-keypair.json
```
The current slot that your validator is processing can then been seen with:
```bash
solana --url http://127.0.0.1:8899 get-slot
```
Until your validator has caught up, it will not be able to vote successfully and stake cannot be delegated to it.
Also if you find the cluster's slot advancing faster than yours, you will likely never catch up. This typically implies some kind of networking issue between your validator and the rest of the cluster.
## Get Cluster Info
There are several useful JSON-RPC endpoints for monitoring your validator on the cluster, as well as the health of the cluster:
@ -69,6 +63,7 @@ curl -X POST -H "Content-Type: application/json" -d '{"jsonrpc":"2.0","id":1, "m
curl -X POST -H "Content-Type: application/json" -d '{"jsonrpc":"2.0","id":1, "method":"getEpochInfo"}' http://testnet.solana.com:8899
```
## Validator Metrics
Metrics are available for local monitoring of your validator.

View File

@ -19,6 +19,7 @@ console = "0.9.1"
dirs = "2.0.2"
lazy_static = "1.4.0"
log = "0.4.8"
indicatif = "0.13.0"
num-traits = "0.2"
pretty-hex = "0.1.1"
reqwest = { version = "0.9.22", default-features = false, features = ["rustls-tls"] }

View File

@ -6,7 +6,6 @@ use clap::{App, AppSettings, Arg, ArgMatches, SubCommand};
use log::*;
use num_traits::FromPrimitive;
use serde_json::{self, json, Value};
use solana_budget_api::budget_instruction::{self, BudgetError};
use solana_clap_utils::{input_parsers::*, input_validators::*};
use solana_client::{client_error::ClientError, rpc_client::RpcClient};
@ -33,7 +32,6 @@ use solana_sdk::{
use solana_stake_api::stake_state::{Lockup, StakeAuthorize};
use solana_storage_api::storage_instruction::StorageAccountType;
use solana_vote_api::vote_state::VoteAuthorize;
use std::{
fs::File,
io::{Read, Write},
@ -71,6 +69,9 @@ impl std::ops::Deref for KeypairEq {
#[allow(clippy::large_enum_variant)]
pub enum CliCommand {
// Cluster Query Commands
Catchup {
node_pubkey: Pubkey,
},
ClusterVersion,
Fees,
GetEpochInfo,
@ -237,6 +238,7 @@ impl Default for CliConfig {
pub fn parse_command(matches: &ArgMatches<'_>) -> Result<CliCommandInfo, Box<dyn error::Error>> {
let response = match matches.subcommand() {
// Cluster Query Commands
("catchup", Some(matches)) => parse_catchup(matches),
("cluster-version", Some(_matches)) => Ok(CliCommandInfo {
command: CliCommand::ClusterVersion,
require_keypair: false,
@ -849,7 +851,8 @@ pub fn process_command(config: &CliConfig) -> ProcessResult {
// Cluster Query Commands
// Return software version of solana-cli and cluster entrypoint node
CliCommand::ClusterVersion => process_cluster_version(&rpc_client, config),
CliCommand::Catchup { node_pubkey } => process_catchup(&rpc_client, node_pubkey),
CliCommand::ClusterVersion => process_cluster_version(&rpc_client),
CliCommand::Fees => process_fees(&rpc_client),
CliCommand::GetGenesisHash => process_get_genesis_hash(&rpc_client),
CliCommand::GetSlot => process_get_slot(&rpc_client),

View File

@ -7,16 +7,20 @@ use crate::{
};
use clap::{value_t_or_exit, App, Arg, ArgMatches, SubCommand};
use console::{style, Emoji};
use indicatif::{ProgressBar, ProgressStyle};
use solana_clap_utils::{input_parsers::*, input_validators::*};
use solana_client::{rpc_client::RpcClient, rpc_request::RpcVoteAccountInfo};
use solana_sdk::{
clock,
commitment_config::CommitmentConfig,
hash::Hash,
pubkey::Pubkey,
signature::{Keypair, KeypairUtil},
system_transaction,
};
use std::{
collections::VecDeque,
thread::sleep,
time::{Duration, Instant},
};
@ -31,6 +35,19 @@ pub trait ClusterQuerySubCommands {
impl ClusterQuerySubCommands for App<'_, '_> {
fn cluster_query_subcommands(self) -> Self {
self.subcommand(
SubCommand::with_name("catchup")
.about("Wait for a validator to catch up to the cluster")
.arg(
Arg::with_name("node_pubkey")
.index(1)
.takes_value(true)
.value_name("PUBKEY")
.validator(is_pubkey_or_keypair)
.required(true)
.help("Identity pubkey of the validator"),
),
)
.subcommand(
SubCommand::with_name("cluster-version")
.about("Get the version of the cluster entrypoint"),
)
@ -97,6 +114,14 @@ impl ClusterQuerySubCommands for App<'_, '_> {
}
}
pub fn parse_catchup(matches: &ArgMatches<'_>) -> Result<CliCommandInfo, CliError> {
let node_pubkey = pubkey_of(matches, "node_pubkey").unwrap();
Ok(CliCommandInfo {
command: CliCommand::Catchup { node_pubkey },
require_keypair: false,
})
}
pub fn parse_cluster_ping(matches: &ArgMatches<'_>) -> Result<CliCommandInfo, CliError> {
let interval = Duration::from_secs(value_t_or_exit!(matches, "interval", u64));
let count = if matches.is_present("count") {
@ -130,7 +155,74 @@ pub fn parse_show_validators(matches: &ArgMatches<'_>) -> Result<CliCommandInfo,
})
}
pub fn process_cluster_version(rpc_client: &RpcClient, _config: &CliConfig) -> ProcessResult {
/// Creates a new process bar for processing that will take an unknown amount of time
fn new_spinner_progress_bar() -> ProgressBar {
let progress_bar = ProgressBar::new(42);
progress_bar
.set_style(ProgressStyle::default_spinner().template("{spinner:.green} {wide_msg}"));
progress_bar.enable_steady_tick(100);
progress_bar
}
pub fn process_catchup(rpc_client: &RpcClient, node_pubkey: &Pubkey) -> ProcessResult {
let cluster_nodes = rpc_client.get_cluster_nodes()?;
let rpc_addr = cluster_nodes
.iter()
.find(|contact_info| contact_info.pubkey == node_pubkey.to_string())
.ok_or_else(|| format!("Contact information not found for {}", node_pubkey))?
.rpc
.ok_or_else(|| format!("RPC service not found for {}", node_pubkey))?;
let progress_bar = new_spinner_progress_bar();
progress_bar.set_message("Connecting...");
let node_client = RpcClient::new_socket(rpc_addr);
let mut previous_rpc_slot = std::u64::MAX;
let mut previous_slot_distance = 0;
let sleep_interval = 5;
loop {
let rpc_slot = rpc_client.get_slot_with_commitment(CommitmentConfig::recent())?;
let node_slot = node_client.get_slot_with_commitment(CommitmentConfig::recent())?;
if node_slot > std::cmp::min(previous_rpc_slot, rpc_slot) {
progress_bar.finish_and_clear();
return Ok(format!(
"{} has caught up (us:{} them:{})",
node_pubkey, node_slot, rpc_slot,
));
}
let slot_distance = rpc_slot as i64 - node_slot as i64;
progress_bar.set_message(&format!(
"Validator is {} slots away (us:{} them:{}){}",
slot_distance,
node_slot,
rpc_slot,
if previous_rpc_slot == std::u64::MAX {
"".to_string()
} else {
let slots_per_second =
(previous_slot_distance - slot_distance) as f64 / f64::from(sleep_interval);
format!(
" and {} at {:.1} slots/second",
if slots_per_second < 0.0 {
"falling behind"
} else {
"gaining"
},
slots_per_second,
)
}
));
sleep(Duration::from_secs(sleep_interval as u64));
previous_rpc_slot = rpc_slot;
previous_slot_distance = slot_distance;
}
}
pub fn process_cluster_version(rpc_client: &RpcClient) -> ProcessResult {
let remote_version = rpc_client.get_version()?;
Ok(remote_version.solana_core)
}

View File

@ -4,7 +4,7 @@ use crate::{
generic_rpc_client_request::GenericRpcClientRequest,
mock_rpc_client_request::MockRpcClientRequest,
rpc_client_request::RpcClientRequest,
rpc_request::{RpcEpochInfo, RpcRequest, RpcVersionInfo, RpcVoteAccountStatus},
rpc_request::{RpcContactInfo, RpcEpochInfo, RpcRequest, RpcVersionInfo, RpcVoteAccountStatus},
};
use bincode::serialize;
use log::*;
@ -177,6 +177,25 @@ impl RpcClient {
})
}
pub fn get_cluster_nodes(&self) -> io::Result<Vec<RpcContactInfo>> {
let response = self
.client
.send(&RpcRequest::GetClusterNodes, None, 0, None)
.map_err(|err| {
io::Error::new(
io::ErrorKind::Other,
format!("GetClusterNodes request failure: {:?}", err),
)
})?;
serde_json::from_value(response).map_err(|err| {
io::Error::new(
io::ErrorKind::Other,
format!("GetClusterNodes parse failure: {}", err),
)
})
}
pub fn get_epoch_info(&self) -> io::Result<RpcEpochInfo> {
let response = self
.client

View File

@ -378,9 +378,12 @@ EOF
waitForNodeToInit
if [[ $skipSetup != true && $nodeType != blockstreamer ]]; then
# Wait for the validator to catch up to the bootstrap leader before
# delegating stake to it
solana --url http://"$entrypointIp":8899 catchup config/validator-identity.json
args=(
--url http://"$entrypointIp":8899
--force
"$stake"
)
if [[ $airdropsEnabled != true ]]; then