Add validator-identity argument to support monitoring a specific validator only
This commit is contained in:
parent
862e7a410d
commit
844dddfee0
|
@ -4,6 +4,10 @@ count is advancing, new blockhashes are available, and no validators are
|
||||||
delinquent. Results are reported as InfluxDB metrics, with an optional
|
delinquent. Results are reported as InfluxDB metrics, with an optional
|
||||||
Slack/Discord push notification on sanity failure.
|
Slack/Discord push notification on sanity failure.
|
||||||
|
|
||||||
|
If you only care about the health of one specific validator, the
|
||||||
|
`--validator-identity` command-line argument can be used to restrict failure
|
||||||
|
notifications to issues only affecting that validator.
|
||||||
|
|
||||||
### Metrics
|
### Metrics
|
||||||
#### `watchtower-sanity`
|
#### `watchtower-sanity`
|
||||||
On every iteration this data point will be emitted indicating the overall result
|
On every iteration this data point will be emitted indicating the overall result
|
||||||
|
|
|
@ -5,7 +5,10 @@ mod notifier;
|
||||||
use crate::notifier::Notifier;
|
use crate::notifier::Notifier;
|
||||||
use clap::{crate_description, crate_name, value_t_or_exit, App, Arg};
|
use clap::{crate_description, crate_name, value_t_or_exit, App, Arg};
|
||||||
use log::*;
|
use log::*;
|
||||||
use solana_clap_utils::input_validators::is_url;
|
use solana_clap_utils::{
|
||||||
|
input_parsers::pubkey_of,
|
||||||
|
input_validators::{is_pubkey_or_keypair, is_url},
|
||||||
|
};
|
||||||
use solana_client::rpc_client::RpcClient;
|
use solana_client::rpc_client::RpcClient;
|
||||||
use solana_metrics::{datapoint_error, datapoint_info};
|
use solana_metrics::{datapoint_error, datapoint_info};
|
||||||
use std::{error, io, thread::sleep, time::Duration};
|
use std::{error, io, thread::sleep, time::Duration};
|
||||||
|
@ -31,10 +34,19 @@ fn main() -> Result<(), Box<dyn error::Error>> {
|
||||||
.default_value("60")
|
.default_value("60")
|
||||||
.help("Wait interval seconds between checking the cluster"),
|
.help("Wait interval seconds between checking the cluster"),
|
||||||
)
|
)
|
||||||
|
.arg(
|
||||||
|
Arg::with_name("validator_identity")
|
||||||
|
.long("validator-identity")
|
||||||
|
.value_name("VALIDATOR IDENTITY PUBKEY")
|
||||||
|
.takes_value(true)
|
||||||
|
.validator(is_pubkey_or_keypair)
|
||||||
|
.help("Monitor a specific validator only instead of the entire cluster"),
|
||||||
|
)
|
||||||
.get_matches();
|
.get_matches();
|
||||||
|
|
||||||
let interval = Duration::from_secs(value_t_or_exit!(matches, "interval", u64));
|
let interval = Duration::from_secs(value_t_or_exit!(matches, "interval", u64));
|
||||||
let json_rpc_url = value_t_or_exit!(matches, "json_rpc_url", String);
|
let json_rpc_url = value_t_or_exit!(matches, "json_rpc_url", String);
|
||||||
|
let validator_identity = pubkey_of(&matches, "validator_identity").map(|i| i.to_string());
|
||||||
|
|
||||||
solana_logger::setup_with_filter("solana=info");
|
solana_logger::setup_with_filter("solana=info");
|
||||||
solana_metrics::set_panic_hook("watchtower");
|
solana_metrics::set_panic_hook("watchtower");
|
||||||
|
@ -96,13 +108,44 @@ fn main() -> Result<(), Box<dyn error::Error>> {
|
||||||
"Delinquent validator count: {}",
|
"Delinquent validator count: {}",
|
||||||
vote_accounts.delinquent.len()
|
vote_accounts.delinquent.len()
|
||||||
);
|
);
|
||||||
if vote_accounts.delinquent.is_empty() {
|
|
||||||
Ok(true)
|
match validator_identity.as_ref() {
|
||||||
} else {
|
Some(validator_identity) => {
|
||||||
Err(io::Error::new(
|
if vote_accounts
|
||||||
io::ErrorKind::Other,
|
.current
|
||||||
format!("{} delinquent validators", vote_accounts.delinquent.len()),
|
.iter()
|
||||||
))
|
.any(|vai| vai.node_pubkey == *validator_identity)
|
||||||
|
{
|
||||||
|
Ok(true)
|
||||||
|
} else if vote_accounts
|
||||||
|
.delinquent
|
||||||
|
.iter()
|
||||||
|
.any(|vai| vai.node_pubkey == *validator_identity)
|
||||||
|
{
|
||||||
|
Err(io::Error::new(
|
||||||
|
io::ErrorKind::Other,
|
||||||
|
format!("Validator {} is delinquent", validator_identity),
|
||||||
|
))
|
||||||
|
} else {
|
||||||
|
Err(io::Error::new(
|
||||||
|
io::ErrorKind::Other,
|
||||||
|
format!("Validator {} is missing", validator_identity),
|
||||||
|
))
|
||||||
|
}
|
||||||
|
}
|
||||||
|
None => {
|
||||||
|
if vote_accounts.delinquent.is_empty() {
|
||||||
|
Ok(true)
|
||||||
|
} else {
|
||||||
|
Err(io::Error::new(
|
||||||
|
io::ErrorKind::Other,
|
||||||
|
format!(
|
||||||
|
"{} delinquent validators",
|
||||||
|
vote_accounts.delinquent.len()
|
||||||
|
),
|
||||||
|
))
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
})
|
})
|
||||||
.unwrap_or_else(|err| {
|
.unwrap_or_else(|err| {
|
||||||
|
|
Loading…
Reference in New Issue