solana/watchtower/src/main.rs

221 lines
8.6 KiB
Rust
Raw Normal View History

2019-12-11 16:05:10 -08:00
//! A command-line executable for monitoring the health of a cluster
mod notifier;
use crate::notifier::Notifier;
2020-03-09 12:35:32 -07:00
use clap::{crate_description, crate_name, value_t, value_t_or_exit, App, Arg};
2019-12-11 16:05:10 -08:00
use log::*;
use solana_clap_utils::{
input_parsers::pubkey_of,
input_validators::{is_pubkey_or_keypair, is_url},
};
2020-03-09 12:35:32 -07:00
use solana_cli_config::{Config, CONFIG_FILE};
2019-12-11 16:05:10 -08:00
use solana_client::rpc_client::RpcClient;
use solana_metrics::{datapoint_error, datapoint_info};
use std::{error, io, thread::sleep, time::Duration};
fn main() -> Result<(), Box<dyn error::Error>> {
let matches = App::new(crate_name!())
.about(crate_description!())
.version(solana_clap_utils::version!())
2020-03-09 12:35:32 -07:00
.arg({
let arg = Arg::with_name("config_file")
.short("C")
.long("config")
.value_name("PATH")
.takes_value(true)
.global(true)
.help("Configuration file to use");
if let Some(ref config_file) = *CONFIG_FILE {
arg.default_value(&config_file)
} else {
arg
}
})
2019-12-11 16:05:10 -08:00
.arg(
Arg::with_name("json_rpc_url")
.long("url")
.value_name("URL")
.takes_value(true)
.validator(is_url)
.help("JSON RPC URL for the cluster"),
)
.arg(
Arg::with_name("interval")
.long("interval")
.value_name("SECONDS")
.takes_value(true)
.default_value("60")
.help("Wait interval seconds between checking the cluster"),
)
.arg(
Arg::with_name("validator_identity")
.long("validator-identity")
.value_name("VALIDATOR IDENTITY PUBKEY")
.takes_value(true)
.validator(is_pubkey_or_keypair)
.help("Monitor a specific validator only instead of the entire cluster"),
)
.arg(
Arg::with_name("no_duplicate_notifications")
.long("no-duplicate-notifications")
.takes_value(false)
.help("Subsequent identical notifications will be suppressed"),
)
2019-12-11 16:05:10 -08:00
.get_matches();
2020-03-09 12:35:32 -07:00
let config = if let Some(config_file) = matches.value_of("config_file") {
Config::load(config_file).unwrap_or_default()
} else {
Config::default()
};
2019-12-11 16:05:10 -08:00
let interval = Duration::from_secs(value_t_or_exit!(matches, "interval", u64));
2020-03-09 12:35:32 -07:00
let json_rpc_url =
value_t!(matches, "json_rpc_url", String).unwrap_or_else(|_| config.json_rpc_url);
let validator_identity = pubkey_of(&matches, "validator_identity").map(|i| i.to_string());
let no_duplicate_notifications = matches.is_present("no_duplicate_notifications");
2019-12-11 16:05:10 -08:00
2020-01-08 09:19:12 -08:00
solana_logger::setup_with_default("solana=info");
2019-12-11 16:05:10 -08:00
solana_metrics::set_panic_hook("watchtower");
2020-03-09 12:35:32 -07:00
info!("RPC URL: {}", json_rpc_url);
let rpc_client = RpcClient::new(json_rpc_url);
2019-12-11 16:05:10 -08:00
let notifier = Notifier::new();
2019-12-11 16:05:10 -08:00
let mut last_transaction_count = 0;
let mut last_check_notification_sent = false;
let mut last_notification_msg = String::from("");
2019-12-11 16:05:10 -08:00
loop {
let mut notify_msg = String::from("solana-watchtower: undefined error");
2019-12-11 16:05:10 -08:00
let ok = rpc_client
.get_transaction_count()
.and_then(|transaction_count| {
info!("Current transaction count: {}", transaction_count);
if transaction_count > last_transaction_count {
last_transaction_count = transaction_count;
Ok(true)
} else {
Err(io::Error::new(
io::ErrorKind::Other,
format!(
"Transaction count is not advancing: {} <= {}",
transaction_count, last_transaction_count
),
))
}
})
.unwrap_or_else(|err| {
notify_msg = format!("solana-watchtower: {}", err.to_string());
2019-12-11 16:05:10 -08:00
datapoint_error!(
"watchtower-sanity-failure",
("test", "transaction-count", String),
("err", err.to_string(), String)
);
false
})
&& rpc_client
.get_recent_blockhash()
.and_then(|(blockhash, _fee_calculator)| {
info!("Current blockhash: {}", blockhash);
rpc_client.get_new_blockhash(&blockhash)
})
.and_then(|(blockhash, _fee_calculator)| {
info!("New blockhash: {}", blockhash);
Ok(true)
})
.unwrap_or_else(|err| {
notify_msg = format!("solana-watchtower: {}", err.to_string());
2019-12-11 16:05:10 -08:00
datapoint_error!(
"watchtower-sanity-failure",
("test", "blockhash", String),
("err", err.to_string(), String)
);
false
})
&& rpc_client
.get_vote_accounts()
.and_then(|vote_accounts| {
info!("Current validator count: {}", vote_accounts.current.len());
info!(
"Delinquent validator count: {}",
vote_accounts.delinquent.len()
);
match validator_identity.as_ref() {
Some(validator_identity) => {
if vote_accounts
.current
.iter()
.any(|vai| vai.node_pubkey == *validator_identity)
{
Ok(true)
} else if vote_accounts
.delinquent
.iter()
.any(|vai| vai.node_pubkey == *validator_identity)
{
Err(io::Error::new(
io::ErrorKind::Other,
format!("Validator {} is delinquent", validator_identity),
))
} else {
Err(io::Error::new(
io::ErrorKind::Other,
format!("Validator {} is missing", validator_identity),
))
}
}
None => {
if vote_accounts.delinquent.is_empty() {
Ok(true)
} else {
Err(io::Error::new(
io::ErrorKind::Other,
format!(
"{} delinquent validators",
vote_accounts.delinquent.len()
),
))
}
}
2019-12-11 16:05:10 -08:00
}
})
.unwrap_or_else(|err| {
notify_msg = format!("solana-watchtower: {}", err.to_string());
2019-12-11 16:05:10 -08:00
datapoint_error!(
"watchtower-sanity-failure",
("test", "delinquent-validators", String),
("err", err.to_string(), String)
);
false
});
datapoint_info!("watchtower-sanity", ("ok", ok, bool));
if !ok {
last_check_notification_sent = true;
if no_duplicate_notifications {
if last_notification_msg != notify_msg {
notifier.send(&notify_msg);
last_notification_msg = notify_msg;
} else {
datapoint_info!(
"watchtower-sanity",
("Suppressing duplicate notification", ok, bool)
);
}
} else {
notifier.send(&notify_msg);
}
} else {
if last_check_notification_sent {
notifier.send("solana-watchtower: All Clear");
}
last_check_notification_sent = false;
last_notification_msg = String::from("");
}
2019-12-11 16:05:10 -08:00
sleep(interval);
}
}