watchtower: Add Slack/Discord sanity failure notification (#7467)

automerge
This commit is contained in:
Michael Vines 2019-12-13 00:49:16 -07:00 committed by Grimes
parent b7d6ff6770
commit 48f9b2fdcc
6 changed files with 71 additions and 4 deletions

2
Cargo.lock generated
View File

@ -4103,6 +4103,8 @@ version = "0.22.0"
dependencies = [
"clap 2.33.0 (registry+https://github.com/rust-lang/crates.io-index)",
"log 0.4.8 (registry+https://github.com/rust-lang/crates.io-index)",
"reqwest 0.9.24 (registry+https://github.com/rust-lang/crates.io-index)",
"serde_json 1.0.44 (registry+https://github.com/rust-lang/crates.io-index)",
"solana-clap-utils 0.22.0",
"solana-client 0.22.0",
"solana-logger 0.22.0",

View File

@ -103,11 +103,12 @@ pub fn authorize(
}
pub fn update_node(
vote_pubkey: &Pubkey, // vote account
authorized_pubkey: &Pubkey, // currently authorized
vote_pubkey: &Pubkey,
authorized_voter_pubkey: &Pubkey,
node_pubkey: &Pubkey,
) -> Instruction {
let account_metas = vec![AccountMeta::new(*vote_pubkey, false)].with_signer(authorized_pubkey);
let account_metas =
vec![AccountMeta::new(*vote_pubkey, false)].with_signer(authorized_voter_pubkey);
Instruction::new(
id(),

View File

@ -11,6 +11,8 @@ homepage = "https://solana.com/"
[dependencies]
clap = "2.33.0"
log = "0.4.8"
reqwest = { version = "0.9.24", default-features = false, features = ["rustls-tls"] }
serde_json = "1.0"
solana-clap-utils = { path = "../clap-utils", version = "0.22.0" }
solana-client = { path = "../client", version = "0.22.0" }
solana-logger = { path = "../logger", version = "0.22.0" }

View File

@ -1,7 +1,8 @@
The `solana-watchtower` program is used to monitor the health of a cluster. It
periodically polls the cluster over an RPC API to confirm that the transaction
count is advancing, new blockhashes are available, and no validators are
delinquent. Results are reported as InfluxDB metrics.
delinquent. Results are reported as InfluxDB metrics, with an optional
Slack/Discord push notification on sanity failure.
### Metrics
#### `watchtower-sanity`
@ -14,3 +15,11 @@ the following fields:
* `test`: name of the sanity test that failed
* `err`: exact sanity failure message
### Sanity failure push notification
To receive a Slack and/or Discord notification on sanity failure, define one or
both of these environment variables before running `solana-watchtower`:
```
export SLACK_WEBHOOK=...
export DISCORD_WEBHOOK=...
```

View File

@ -1,5 +1,8 @@
//! A command-line executable for monitoring the health of a cluster
mod notifier;
use crate::notifier::Notifier;
use clap::{crate_description, crate_name, value_t_or_exit, App, Arg};
use log::*;
use solana_clap_utils::input_validators::is_url;
@ -38,6 +41,7 @@ fn main() -> Result<(), Box<dyn error::Error>> {
let rpc_client = RpcClient::new(json_rpc_url.to_string());
let notifier = Notifier::new();
let mut last_transaction_count = 0;
loop {
let ok = rpc_client
@ -111,6 +115,9 @@ fn main() -> Result<(), Box<dyn error::Error>> {
});
datapoint_info!("watchtower-sanity", ("ok", ok, bool));
if !ok {
notifier.send("solana-watchtower sanity failure");
}
sleep(interval);
}
}

View File

@ -0,0 +1,46 @@
use log::*;
use reqwest::Client;
use serde_json::json;
use std::env;
pub struct Notifier {
client: Client,
discord_webhook: Option<String>,
slack_webhook: Option<String>,
}
impl Notifier {
pub fn new() -> Self {
let discord_webhook = env::var("DISCORD_WEBHOOK")
.map_err(|_| {
warn!("Discord notifications disabled");
})
.ok();
let slack_webhook = env::var("SLACK_WEBHOOK")
.map_err(|_| {
warn!("Slack notifications disabled");
})
.ok();
Notifier {
client: Client::new(),
discord_webhook,
slack_webhook,
}
}
pub fn send(&self, msg: &str) {
if let Some(webhook) = &self.discord_webhook {
let data = json!({ "content": msg });
if let Err(err) = self.client.post(webhook).json(&data).send() {
warn!("Failed to send Discord message: {:?}", err);
}
}
if let Some(webhook) = &self.slack_webhook {
let data = json!({ "text": msg });
if let Err(err) = self.client.post(webhook).json(&data).send() {
warn!("Failed to send Slack message: {:?}", err);
}
}
}
}