watchtower: Add Slack/Discord sanity failure notification (#7467)
automerge
This commit is contained in:
parent
b7d6ff6770
commit
48f9b2fdcc
|
@ -4103,6 +4103,8 @@ version = "0.22.0"
|
|||
dependencies = [
|
||||
"clap 2.33.0 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"log 0.4.8 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"reqwest 0.9.24 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"serde_json 1.0.44 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"solana-clap-utils 0.22.0",
|
||||
"solana-client 0.22.0",
|
||||
"solana-logger 0.22.0",
|
||||
|
|
|
@ -103,11 +103,12 @@ pub fn authorize(
|
|||
}
|
||||
|
||||
pub fn update_node(
|
||||
vote_pubkey: &Pubkey, // vote account
|
||||
authorized_pubkey: &Pubkey, // currently authorized
|
||||
vote_pubkey: &Pubkey,
|
||||
authorized_voter_pubkey: &Pubkey,
|
||||
node_pubkey: &Pubkey,
|
||||
) -> Instruction {
|
||||
let account_metas = vec![AccountMeta::new(*vote_pubkey, false)].with_signer(authorized_pubkey);
|
||||
let account_metas =
|
||||
vec![AccountMeta::new(*vote_pubkey, false)].with_signer(authorized_voter_pubkey);
|
||||
|
||||
Instruction::new(
|
||||
id(),
|
||||
|
|
|
@ -11,6 +11,8 @@ homepage = "https://solana.com/"
|
|||
[dependencies]
|
||||
clap = "2.33.0"
|
||||
log = "0.4.8"
|
||||
reqwest = { version = "0.9.24", default-features = false, features = ["rustls-tls"] }
|
||||
serde_json = "1.0"
|
||||
solana-clap-utils = { path = "../clap-utils", version = "0.22.0" }
|
||||
solana-client = { path = "../client", version = "0.22.0" }
|
||||
solana-logger = { path = "../logger", version = "0.22.0" }
|
||||
|
|
|
@ -1,7 +1,8 @@
|
|||
The `solana-watchtower` program is used to monitor the health of a cluster. It
|
||||
periodically polls the cluster over an RPC API to confirm that the transaction
|
||||
count is advancing, new blockhashes are available, and no validators are
|
||||
delinquent. Results are reported as InfluxDB metrics.
|
||||
delinquent. Results are reported as InfluxDB metrics, with an optional
|
||||
Slack/Discord push notification on sanity failure.
|
||||
|
||||
### Metrics
|
||||
#### `watchtower-sanity`
|
||||
|
@ -14,3 +15,11 @@ the following fields:
|
|||
* `test`: name of the sanity test that failed
|
||||
* `err`: exact sanity failure message
|
||||
|
||||
|
||||
### Sanity failure push notification
|
||||
To receive a Slack and/or Discord notification on sanity failure, define one or
|
||||
both of these environment variables before running `solana-watchtower`:
|
||||
```
|
||||
export SLACK_WEBHOOK=...
|
||||
export DISCORD_WEBHOOK=...
|
||||
```
|
||||
|
|
|
@ -1,5 +1,8 @@
|
|||
//! A command-line executable for monitoring the health of a cluster
|
||||
|
||||
mod notifier;
|
||||
|
||||
use crate::notifier::Notifier;
|
||||
use clap::{crate_description, crate_name, value_t_or_exit, App, Arg};
|
||||
use log::*;
|
||||
use solana_clap_utils::input_validators::is_url;
|
||||
|
@ -38,6 +41,7 @@ fn main() -> Result<(), Box<dyn error::Error>> {
|
|||
|
||||
let rpc_client = RpcClient::new(json_rpc_url.to_string());
|
||||
|
||||
let notifier = Notifier::new();
|
||||
let mut last_transaction_count = 0;
|
||||
loop {
|
||||
let ok = rpc_client
|
||||
|
@ -111,6 +115,9 @@ fn main() -> Result<(), Box<dyn error::Error>> {
|
|||
});
|
||||
|
||||
datapoint_info!("watchtower-sanity", ("ok", ok, bool));
|
||||
if !ok {
|
||||
notifier.send("solana-watchtower sanity failure");
|
||||
}
|
||||
sleep(interval);
|
||||
}
|
||||
}
|
||||
|
|
|
@ -0,0 +1,46 @@
|
|||
use log::*;
|
||||
use reqwest::Client;
|
||||
use serde_json::json;
|
||||
use std::env;
|
||||
|
||||
pub struct Notifier {
|
||||
client: Client,
|
||||
discord_webhook: Option<String>,
|
||||
slack_webhook: Option<String>,
|
||||
}
|
||||
|
||||
impl Notifier {
|
||||
pub fn new() -> Self {
|
||||
let discord_webhook = env::var("DISCORD_WEBHOOK")
|
||||
.map_err(|_| {
|
||||
warn!("Discord notifications disabled");
|
||||
})
|
||||
.ok();
|
||||
let slack_webhook = env::var("SLACK_WEBHOOK")
|
||||
.map_err(|_| {
|
||||
warn!("Slack notifications disabled");
|
||||
})
|
||||
.ok();
|
||||
Notifier {
|
||||
client: Client::new(),
|
||||
discord_webhook,
|
||||
slack_webhook,
|
||||
}
|
||||
}
|
||||
|
||||
pub fn send(&self, msg: &str) {
|
||||
if let Some(webhook) = &self.discord_webhook {
|
||||
let data = json!({ "content": msg });
|
||||
if let Err(err) = self.client.post(webhook).json(&data).send() {
|
||||
warn!("Failed to send Discord message: {:?}", err);
|
||||
}
|
||||
}
|
||||
|
||||
if let Some(webhook) = &self.slack_webhook {
|
||||
let data = json!({ "text": msg });
|
||||
if let Err(err) = self.client.post(webhook).json(&data).send() {
|
||||
warn!("Failed to send Slack message: {:?}", err);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
Loading…
Reference in New Issue