watchtower: flag to suppress duplicate notifications (#8549)
* watchtower: send error message as notification * watchtower: send all clear notification when ok again * watchtower: add twilio sms notifications * watchtower: flag to suppress duplicate notifications * remove trailing space character * changes as per suggestion on PR * all changes together * cargo fmt
This commit is contained in:
parent
d86103383a
commit
b6553357f9
|
@ -8,6 +8,11 @@ If you only care about the health of one specific validator, the
|
||||||
`--validator-identity` command-line argument can be used to restrict failure
|
`--validator-identity` command-line argument can be used to restrict failure
|
||||||
notifications to issues only affecting that validator.
|
notifications to issues only affecting that validator.
|
||||||
|
|
||||||
|
If you do not want duplicate notifications, for example if you have elected to
|
||||||
|
recieve notifications by SMS the
|
||||||
|
`--no-duplicate-notifications` command-line argument will suppress identical
|
||||||
|
failure notifications.
|
||||||
|
|
||||||
### Metrics
|
### Metrics
|
||||||
#### `watchtower-sanity`
|
#### `watchtower-sanity`
|
||||||
On every iteration this data point will be emitted indicating the overall result
|
On every iteration this data point will be emitted indicating the overall result
|
||||||
|
@ -33,3 +38,10 @@ Telegram requires the following two variables:
|
||||||
export TELEGRAM_BOT_TOKEN=...
|
export TELEGRAM_BOT_TOKEN=...
|
||||||
export TELEGRAM_CHAT_ID=...
|
export TELEGRAM_CHAT_ID=...
|
||||||
```
|
```
|
||||||
|
|
||||||
|
To receive a Twilio SMS notification on failure, having a Twilio account,
|
||||||
|
and a sending number owned by that account,
|
||||||
|
define environment variable before running `solana-watchtower`:
|
||||||
|
```
|
||||||
|
export TWILIO_CONFIG='ACCOUNT=<account>,TOKEN=<securityToken>,TO=<receivingNumber>,FROM=<sendingNumber>'
|
||||||
|
```
|
||||||
|
|
|
@ -42,11 +42,18 @@ fn main() -> Result<(), Box<dyn error::Error>> {
|
||||||
.validator(is_pubkey_or_keypair)
|
.validator(is_pubkey_or_keypair)
|
||||||
.help("Monitor a specific validator only instead of the entire cluster"),
|
.help("Monitor a specific validator only instead of the entire cluster"),
|
||||||
)
|
)
|
||||||
|
.arg(
|
||||||
|
Arg::with_name("no_duplicate_notifications")
|
||||||
|
.long("no-duplicate-notifications")
|
||||||
|
.takes_value(false)
|
||||||
|
.help("Subsequent identical notifications will be suppressed"),
|
||||||
|
)
|
||||||
.get_matches();
|
.get_matches();
|
||||||
|
|
||||||
let interval = Duration::from_secs(value_t_or_exit!(matches, "interval", u64));
|
let interval = Duration::from_secs(value_t_or_exit!(matches, "interval", u64));
|
||||||
let json_rpc_url = value_t_or_exit!(matches, "json_rpc_url", String);
|
let json_rpc_url = value_t_or_exit!(matches, "json_rpc_url", String);
|
||||||
let validator_identity = pubkey_of(&matches, "validator_identity").map(|i| i.to_string());
|
let validator_identity = pubkey_of(&matches, "validator_identity").map(|i| i.to_string());
|
||||||
|
let no_duplicate_notifications = matches.is_present("no_duplicate_notifications");
|
||||||
|
|
||||||
solana_logger::setup_with_default("solana=info");
|
solana_logger::setup_with_default("solana=info");
|
||||||
solana_metrics::set_panic_hook("watchtower");
|
solana_metrics::set_panic_hook("watchtower");
|
||||||
|
@ -55,7 +62,10 @@ fn main() -> Result<(), Box<dyn error::Error>> {
|
||||||
|
|
||||||
let notifier = Notifier::new();
|
let notifier = Notifier::new();
|
||||||
let mut last_transaction_count = 0;
|
let mut last_transaction_count = 0;
|
||||||
|
let mut last_check_notification_sent = false;
|
||||||
|
let mut last_notification_msg = String::from("");
|
||||||
loop {
|
loop {
|
||||||
|
let mut notify_msg = String::from("solana-watchtower: undefined error");
|
||||||
let ok = rpc_client
|
let ok = rpc_client
|
||||||
.get_transaction_count()
|
.get_transaction_count()
|
||||||
.and_then(|transaction_count| {
|
.and_then(|transaction_count| {
|
||||||
|
@ -75,6 +85,7 @@ fn main() -> Result<(), Box<dyn error::Error>> {
|
||||||
}
|
}
|
||||||
})
|
})
|
||||||
.unwrap_or_else(|err| {
|
.unwrap_or_else(|err| {
|
||||||
|
notify_msg = format!("solana-watchtower: {}", err.to_string());
|
||||||
datapoint_error!(
|
datapoint_error!(
|
||||||
"watchtower-sanity-failure",
|
"watchtower-sanity-failure",
|
||||||
("test", "transaction-count", String),
|
("test", "transaction-count", String),
|
||||||
|
@ -93,6 +104,7 @@ fn main() -> Result<(), Box<dyn error::Error>> {
|
||||||
Ok(true)
|
Ok(true)
|
||||||
})
|
})
|
||||||
.unwrap_or_else(|err| {
|
.unwrap_or_else(|err| {
|
||||||
|
notify_msg = format!("solana-watchtower: {}", err.to_string());
|
||||||
datapoint_error!(
|
datapoint_error!(
|
||||||
"watchtower-sanity-failure",
|
"watchtower-sanity-failure",
|
||||||
("test", "blockhash", String),
|
("test", "blockhash", String),
|
||||||
|
@ -149,6 +161,7 @@ fn main() -> Result<(), Box<dyn error::Error>> {
|
||||||
}
|
}
|
||||||
})
|
})
|
||||||
.unwrap_or_else(|err| {
|
.unwrap_or_else(|err| {
|
||||||
|
notify_msg = format!("solana-watchtower: {}", err.to_string());
|
||||||
datapoint_error!(
|
datapoint_error!(
|
||||||
"watchtower-sanity-failure",
|
"watchtower-sanity-failure",
|
||||||
("test", "delinquent-validators", String),
|
("test", "delinquent-validators", String),
|
||||||
|
@ -159,7 +172,26 @@ fn main() -> Result<(), Box<dyn error::Error>> {
|
||||||
|
|
||||||
datapoint_info!("watchtower-sanity", ("ok", ok, bool));
|
datapoint_info!("watchtower-sanity", ("ok", ok, bool));
|
||||||
if !ok {
|
if !ok {
|
||||||
notifier.send("solana-watchtower sanity failure");
|
last_check_notification_sent = true;
|
||||||
|
if no_duplicate_notifications {
|
||||||
|
if last_notification_msg != notify_msg {
|
||||||
|
notifier.send(¬ify_msg);
|
||||||
|
last_notification_msg = notify_msg;
|
||||||
|
} else {
|
||||||
|
datapoint_info!(
|
||||||
|
"watchtower-sanity",
|
||||||
|
("Suppressing duplicate notification", ok, bool)
|
||||||
|
);
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
notifier.send(¬ify_msg);
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
if last_check_notification_sent {
|
||||||
|
notifier.send("solana-watchtower: All Clear");
|
||||||
|
}
|
||||||
|
last_check_notification_sent = false;
|
||||||
|
last_notification_msg = String::from("");
|
||||||
}
|
}
|
||||||
sleep(interval);
|
sleep(interval);
|
||||||
}
|
}
|
||||||
|
|
|
@ -8,11 +8,60 @@ struct TelegramWebHook {
|
||||||
chat_id: String,
|
chat_id: String,
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[derive(Debug, Default)]
|
||||||
|
struct TwilioWebHook {
|
||||||
|
account: String,
|
||||||
|
token: String,
|
||||||
|
to: String,
|
||||||
|
from: String,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl TwilioWebHook {
|
||||||
|
fn complete(&self) -> bool {
|
||||||
|
!(self.account.is_empty()
|
||||||
|
|| self.token.is_empty()
|
||||||
|
|| self.to.is_empty()
|
||||||
|
|| self.from.is_empty())
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
fn get_twilio_config() -> Result<Option<TwilioWebHook>, String> {
|
||||||
|
let config_var = env::var("TWILIO_CONFIG");
|
||||||
|
|
||||||
|
if config_var.is_err() {
|
||||||
|
info!("Twilio notifications disabled");
|
||||||
|
return Ok(None);
|
||||||
|
}
|
||||||
|
|
||||||
|
let mut config = TwilioWebHook::default();
|
||||||
|
|
||||||
|
for pair in config_var.unwrap().split(',') {
|
||||||
|
let nv: Vec<_> = pair.split('=').collect();
|
||||||
|
if nv.len() != 2 {
|
||||||
|
return Err(format!("TWILIO_CONFIG is invalid: '{}'", pair));
|
||||||
|
}
|
||||||
|
let v = nv[1].to_string();
|
||||||
|
match nv[0] {
|
||||||
|
"ACCOUNT" => config.account = v,
|
||||||
|
"TOKEN" => config.token = v,
|
||||||
|
"TO" => config.to = v,
|
||||||
|
"FROM" => config.from = v,
|
||||||
|
_ => return Err(format!("TWILIO_CONFIG is invalid: '{}'", pair)),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if !config.complete() {
|
||||||
|
return Err("TWILIO_CONFIG is incomplete".to_string());
|
||||||
|
}
|
||||||
|
Ok(Some(config))
|
||||||
|
}
|
||||||
|
|
||||||
pub struct Notifier {
|
pub struct Notifier {
|
||||||
client: Client,
|
client: Client,
|
||||||
discord_webhook: Option<String>,
|
discord_webhook: Option<String>,
|
||||||
slack_webhook: Option<String>,
|
slack_webhook: Option<String>,
|
||||||
telegram_webhook: Option<TelegramWebHook>,
|
telegram_webhook: Option<TelegramWebHook>,
|
||||||
|
twilio_webhook: Option<TwilioWebHook>,
|
||||||
}
|
}
|
||||||
|
|
||||||
impl Notifier {
|
impl Notifier {
|
||||||
|
@ -35,12 +84,16 @@ impl Notifier {
|
||||||
info!("Telegram notifications disabled");
|
info!("Telegram notifications disabled");
|
||||||
None
|
None
|
||||||
};
|
};
|
||||||
|
let twilio_webhook = get_twilio_config()
|
||||||
|
.map_err(|err| panic!("Twilio config error: {}", err))
|
||||||
|
.unwrap();
|
||||||
|
|
||||||
Notifier {
|
Notifier {
|
||||||
client: Client::new(),
|
client: Client::new(),
|
||||||
discord_webhook,
|
discord_webhook,
|
||||||
slack_webhook,
|
slack_webhook,
|
||||||
telegram_webhook,
|
telegram_webhook,
|
||||||
|
twilio_webhook,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -67,5 +120,22 @@ impl Notifier {
|
||||||
warn!("Failed to send Telegram message: {:?}", err);
|
warn!("Failed to send Telegram message: {:?}", err);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if let Some(TwilioWebHook {
|
||||||
|
account,
|
||||||
|
token,
|
||||||
|
to,
|
||||||
|
from,
|
||||||
|
}) = &self.twilio_webhook
|
||||||
|
{
|
||||||
|
let url = format!(
|
||||||
|
"https://{}:{}@api.twilio.com/2010-04-01/Accounts/{}/Messages.json",
|
||||||
|
account, token, account
|
||||||
|
);
|
||||||
|
let params = [("To", to), ("From", from), ("Body", &msg.to_string())];
|
||||||
|
if let Err(err) = self.client.post(&url).form(¶ms).send() {
|
||||||
|
warn!("Failed to send Twilio message: {:?}", err);
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in New Issue