2019-12-11 16:05:10 -08:00
|
|
|
//! A command-line executable for monitoring the health of a cluster
|
|
|
|
|
2019-12-12 23:49:16 -08:00
|
|
|
mod notifier;
|
|
|
|
|
|
|
|
use crate::notifier::Notifier;
|
2019-12-11 16:05:10 -08:00
|
|
|
use clap::{crate_description, crate_name, value_t_or_exit, App, Arg};
|
|
|
|
use log::*;
|
2019-12-16 09:06:08 -08:00
|
|
|
use solana_clap_utils::{
|
|
|
|
input_parsers::pubkey_of,
|
|
|
|
input_validators::{is_pubkey_or_keypair, is_url},
|
|
|
|
};
|
2019-12-11 16:05:10 -08:00
|
|
|
use solana_client::rpc_client::RpcClient;
|
|
|
|
use solana_metrics::{datapoint_error, datapoint_info};
|
|
|
|
use std::{error, io, thread::sleep, time::Duration};
|
|
|
|
|
|
|
|
fn main() -> Result<(), Box<dyn error::Error>> {
|
|
|
|
let matches = App::new(crate_name!())
|
|
|
|
.about(crate_description!())
|
|
|
|
.version(solana_clap_utils::version!())
|
|
|
|
.arg(
|
|
|
|
Arg::with_name("json_rpc_url")
|
|
|
|
.long("url")
|
|
|
|
.value_name("URL")
|
|
|
|
.takes_value(true)
|
|
|
|
.required(true)
|
|
|
|
.validator(is_url)
|
|
|
|
.help("JSON RPC URL for the cluster"),
|
|
|
|
)
|
|
|
|
.arg(
|
|
|
|
Arg::with_name("interval")
|
|
|
|
.long("interval")
|
|
|
|
.value_name("SECONDS")
|
|
|
|
.takes_value(true)
|
|
|
|
.default_value("60")
|
|
|
|
.help("Wait interval seconds between checking the cluster"),
|
|
|
|
)
|
2019-12-16 09:06:08 -08:00
|
|
|
.arg(
|
|
|
|
Arg::with_name("validator_identity")
|
|
|
|
.long("validator-identity")
|
|
|
|
.value_name("VALIDATOR IDENTITY PUBKEY")
|
|
|
|
.takes_value(true)
|
|
|
|
.validator(is_pubkey_or_keypair)
|
|
|
|
.help("Monitor a specific validator only instead of the entire cluster"),
|
|
|
|
)
|
2019-12-11 16:05:10 -08:00
|
|
|
.get_matches();
|
|
|
|
|
|
|
|
let interval = Duration::from_secs(value_t_or_exit!(matches, "interval", u64));
|
|
|
|
let json_rpc_url = value_t_or_exit!(matches, "json_rpc_url", String);
|
2019-12-16 09:06:08 -08:00
|
|
|
let validator_identity = pubkey_of(&matches, "validator_identity").map(|i| i.to_string());
|
2019-12-11 16:05:10 -08:00
|
|
|
|
|
|
|
solana_logger::setup_with_filter("solana=info");
|
|
|
|
solana_metrics::set_panic_hook("watchtower");
|
|
|
|
|
|
|
|
let rpc_client = RpcClient::new(json_rpc_url.to_string());
|
|
|
|
|
2019-12-12 23:49:16 -08:00
|
|
|
let notifier = Notifier::new();
|
2019-12-11 16:05:10 -08:00
|
|
|
let mut last_transaction_count = 0;
|
|
|
|
loop {
|
|
|
|
let ok = rpc_client
|
|
|
|
.get_transaction_count()
|
|
|
|
.and_then(|transaction_count| {
|
|
|
|
info!("Current transaction count: {}", transaction_count);
|
|
|
|
|
|
|
|
if transaction_count > last_transaction_count {
|
|
|
|
last_transaction_count = transaction_count;
|
|
|
|
Ok(true)
|
|
|
|
} else {
|
|
|
|
Err(io::Error::new(
|
|
|
|
io::ErrorKind::Other,
|
|
|
|
format!(
|
|
|
|
"Transaction count is not advancing: {} <= {}",
|
|
|
|
transaction_count, last_transaction_count
|
|
|
|
),
|
|
|
|
))
|
|
|
|
}
|
|
|
|
})
|
|
|
|
.unwrap_or_else(|err| {
|
|
|
|
datapoint_error!(
|
|
|
|
"watchtower-sanity-failure",
|
|
|
|
("test", "transaction-count", String),
|
|
|
|
("err", err.to_string(), String)
|
|
|
|
);
|
|
|
|
false
|
|
|
|
})
|
|
|
|
&& rpc_client
|
|
|
|
.get_recent_blockhash()
|
|
|
|
.and_then(|(blockhash, _fee_calculator)| {
|
|
|
|
info!("Current blockhash: {}", blockhash);
|
|
|
|
rpc_client.get_new_blockhash(&blockhash)
|
|
|
|
})
|
|
|
|
.and_then(|(blockhash, _fee_calculator)| {
|
|
|
|
info!("New blockhash: {}", blockhash);
|
|
|
|
Ok(true)
|
|
|
|
})
|
|
|
|
.unwrap_or_else(|err| {
|
|
|
|
datapoint_error!(
|
|
|
|
"watchtower-sanity-failure",
|
|
|
|
("test", "blockhash", String),
|
|
|
|
("err", err.to_string(), String)
|
|
|
|
);
|
|
|
|
false
|
|
|
|
})
|
|
|
|
&& rpc_client
|
|
|
|
.get_vote_accounts()
|
|
|
|
.and_then(|vote_accounts| {
|
|
|
|
info!("Current validator count: {}", vote_accounts.current.len());
|
|
|
|
info!(
|
|
|
|
"Delinquent validator count: {}",
|
|
|
|
vote_accounts.delinquent.len()
|
|
|
|
);
|
2019-12-16 09:06:08 -08:00
|
|
|
|
|
|
|
match validator_identity.as_ref() {
|
|
|
|
Some(validator_identity) => {
|
|
|
|
if vote_accounts
|
|
|
|
.current
|
|
|
|
.iter()
|
|
|
|
.any(|vai| vai.node_pubkey == *validator_identity)
|
|
|
|
{
|
|
|
|
Ok(true)
|
|
|
|
} else if vote_accounts
|
|
|
|
.delinquent
|
|
|
|
.iter()
|
|
|
|
.any(|vai| vai.node_pubkey == *validator_identity)
|
|
|
|
{
|
|
|
|
Err(io::Error::new(
|
|
|
|
io::ErrorKind::Other,
|
|
|
|
format!("Validator {} is delinquent", validator_identity),
|
|
|
|
))
|
|
|
|
} else {
|
|
|
|
Err(io::Error::new(
|
|
|
|
io::ErrorKind::Other,
|
|
|
|
format!("Validator {} is missing", validator_identity),
|
|
|
|
))
|
|
|
|
}
|
|
|
|
}
|
|
|
|
None => {
|
|
|
|
if vote_accounts.delinquent.is_empty() {
|
|
|
|
Ok(true)
|
|
|
|
} else {
|
|
|
|
Err(io::Error::new(
|
|
|
|
io::ErrorKind::Other,
|
|
|
|
format!(
|
|
|
|
"{} delinquent validators",
|
|
|
|
vote_accounts.delinquent.len()
|
|
|
|
),
|
|
|
|
))
|
|
|
|
}
|
|
|
|
}
|
2019-12-11 16:05:10 -08:00
|
|
|
}
|
|
|
|
})
|
|
|
|
.unwrap_or_else(|err| {
|
|
|
|
datapoint_error!(
|
|
|
|
"watchtower-sanity-failure",
|
|
|
|
("test", "delinquent-validators", String),
|
|
|
|
("err", err.to_string(), String)
|
|
|
|
);
|
|
|
|
false
|
|
|
|
});
|
|
|
|
|
|
|
|
datapoint_info!("watchtower-sanity", ("ok", ok, bool));
|
2019-12-12 23:49:16 -08:00
|
|
|
if !ok {
|
|
|
|
notifier.send("solana-watchtower sanity failure");
|
|
|
|
}
|
2019-12-11 16:05:10 -08:00
|
|
|
sleep(interval);
|
|
|
|
}
|
|
|
|
}
|