Add `getHealth` RPC method

This commit is contained in:
Michael Vines 2021-01-14 21:45:11 -08:00
parent 4d12cf61cc
commit dacb95083d
9 changed files with 148 additions and 23 deletions

View File

@ -85,6 +85,14 @@ impl RpcSender for HttpSender {
} }
} }
}, },
rpc_custom_error::JSON_RPC_SERVER_ERROR_NODE_UNHEALTHLY => {
match serde_json::from_value::<rpc_custom_error::RpcNodeUnhealthyErrorData>(json["error"]["data"].clone()) {
Ok(rpc_custom_error::RpcNodeUnhealthyErrorData { num_slots_behind}) => RpcResponseErrorData::NodeUnhealthy {num_slots_behind},
Err(_err) => {
RpcResponseErrorData::Empty
}
}
},
_ => RpcResponseErrorData::Empty _ => RpcResponseErrorData::Empty
}; };

View File

@ -928,6 +928,11 @@ impl RpcClient {
Ok(hash) Ok(hash)
} }
pub fn get_health(&self) -> ClientResult<()> {
self.send::<String>(RpcRequest::GetHealth, Value::Null)
.map(|_| ())
}
pub fn get_token_account(&self, pubkey: &Pubkey) -> ClientResult<Option<UiTokenAccount>> { pub fn get_token_account(&self, pubkey: &Pubkey) -> ClientResult<Option<UiTokenAccount>> {
Ok(self Ok(self
.get_token_account_with_commitment(pubkey, self.commitment_config)? .get_token_account_with_commitment(pubkey, self.commitment_config)?

View File

@ -25,13 +25,21 @@ pub enum RpcCustomError {
BlockNotAvailable { BlockNotAvailable {
slot: Slot, slot: Slot,
}, },
RpcNodeUnhealthy, RpcNodeUnhealthy {
num_slots_behind: Slot,
},
TransactionPrecompileVerificationFailure(solana_sdk::transaction::TransactionError), TransactionPrecompileVerificationFailure(solana_sdk::transaction::TransactionError),
SlotSkipped { SlotSkipped {
slot: Slot, slot: Slot,
}, },
} }
#[derive(Debug, Serialize, Deserialize)]
#[serde(rename_all = "camelCase")]
pub struct RpcNodeUnhealthyErrorData {
pub num_slots_behind: Slot,
}
impl From<RpcCustomError> for Error { impl From<RpcCustomError> for Error {
fn from(e: RpcCustomError) -> Self { fn from(e: RpcCustomError) -> Self {
match e { match e {
@ -65,10 +73,12 @@ impl From<RpcCustomError> for Error {
message: format!("Block not available for slot {}", slot), message: format!("Block not available for slot {}", slot),
data: None, data: None,
}, },
RpcCustomError::RpcNodeUnhealthy => Self { RpcCustomError::RpcNodeUnhealthy { num_slots_behind } => Self {
code: ErrorCode::ServerError(JSON_RPC_SERVER_ERROR_NODE_UNHEALTHLY), code: ErrorCode::ServerError(JSON_RPC_SERVER_ERROR_NODE_UNHEALTHLY),
message: "RPC node is unhealthy".to_string(), message: format!("RPC node is behind by {} slots", num_slots_behind),
data: None, data: Some(serde_json::json!(RpcNodeUnhealthyErrorData {
num_slots_behind
})),
}, },
RpcCustomError::TransactionPrecompileVerificationFailure(e) => Self { RpcCustomError::TransactionPrecompileVerificationFailure(e) => Self {
code: ErrorCode::ServerError( code: ErrorCode::ServerError(

View File

@ -1,6 +1,6 @@
use crate::rpc_response::RpcSimulateTransactionResult; use crate::rpc_response::RpcSimulateTransactionResult;
use serde_json::{json, Value}; use serde_json::{json, Value};
use solana_sdk::pubkey::Pubkey; use solana_sdk::{clock::Slot, pubkey::Pubkey};
use std::fmt; use std::fmt;
use thiserror::Error; use thiserror::Error;
@ -25,6 +25,7 @@ pub enum RpcRequest {
GetFees, GetFees,
GetFirstAvailableBlock, GetFirstAvailableBlock,
GetGenesisHash, GetGenesisHash,
GetHealth,
GetIdentity, GetIdentity,
GetInflationGovernor, GetInflationGovernor,
GetInflationRate, GetInflationRate,
@ -80,6 +81,7 @@ impl fmt::Display for RpcRequest {
RpcRequest::GetFees => "getFees", RpcRequest::GetFees => "getFees",
RpcRequest::GetFirstAvailableBlock => "getFirstAvailableBlock", RpcRequest::GetFirstAvailableBlock => "getFirstAvailableBlock",
RpcRequest::GetGenesisHash => "getGenesisHash", RpcRequest::GetGenesisHash => "getGenesisHash",
RpcRequest::GetHealth => "getHealth",
RpcRequest::GetIdentity => "getIdentity", RpcRequest::GetIdentity => "getIdentity",
RpcRequest::GetInflationGovernor => "getInflationGovernor", RpcRequest::GetInflationGovernor => "getInflationGovernor",
RpcRequest::GetInflationRate => "getInflationRate", RpcRequest::GetInflationRate => "getInflationRate",
@ -143,6 +145,7 @@ impl RpcRequest {
pub enum RpcResponseErrorData { pub enum RpcResponseErrorData {
Empty, Empty,
SendTransactionPreflightFailure(RpcSimulateTransactionResult), SendTransactionPreflightFailure(RpcSimulateTransactionResult),
NodeUnhealthy { num_slots_behind: Slot },
} }
impl fmt::Display for RpcResponseErrorData { impl fmt::Display for RpcResponseErrorData {

View File

@ -1760,6 +1760,9 @@ pub trait RpcSol {
#[rpc(meta, name = "getGenesisHash")] #[rpc(meta, name = "getGenesisHash")]
fn get_genesis_hash(&self, meta: Self::Metadata) -> Result<String>; fn get_genesis_hash(&self, meta: Self::Metadata) -> Result<String>;
#[rpc(meta, name = "getHealth")]
fn get_health(&self, meta: Self::Metadata) -> Result<String>;
#[rpc(meta, name = "getLeaderSchedule")] #[rpc(meta, name = "getLeaderSchedule")]
fn get_leader_schedule( fn get_leader_schedule(
&self, &self,
@ -2247,6 +2250,15 @@ impl RpcSol for RpcSolImpl {
Ok(meta.genesis_hash.to_string()) Ok(meta.genesis_hash.to_string())
} }
fn get_health(&self, meta: Self::Metadata) -> Result<String> {
match meta.health.check() {
RpcHealthStatus::Ok => Ok("ok".to_string()),
RpcHealthStatus::Behind {
num_slots: num_slots_behind,
} => Err(RpcCustomError::RpcNodeUnhealthy { num_slots_behind }.into()),
}
}
fn get_leader_schedule( fn get_leader_schedule(
&self, &self,
meta: Self::Metadata, meta: Self::Metadata,
@ -2486,9 +2498,15 @@ impl RpcSol for RpcSolImpl {
return Err(e); return Err(e);
} }
if meta.health.check() != RpcHealthStatus::Ok { match meta.health.check() {
return Err(RpcCustomError::RpcNodeUnhealthy.into()); RpcHealthStatus::Ok => (),
RpcHealthStatus::Behind {
num_slots: num_slots_behind,
} => {
return Err(RpcCustomError::RpcNodeUnhealthy { num_slots_behind }.into());
}
} }
if let (Err(err), logs) = preflight_bank.simulate_transaction(transaction.clone()) { if let (Err(err), logs) = preflight_bank.simulate_transaction(transaction.clone()) {
return Err(RpcCustomError::SendTransactionPreflightFailure { return Err(RpcCustomError::SendTransactionPreflightFailure {
message: format!("Transaction simulation failed: {}", err), message: format!("Transaction simulation failed: {}", err),
@ -4518,7 +4536,7 @@ pub mod tests {
); );
// sendTransaction will fail due to poor node health // sendTransaction will fail due to poor node health
health.stub_set_health_status(Some(RpcHealthStatus::Behind)); health.stub_set_health_status(Some(RpcHealthStatus::Behind { num_slots: 42 }));
let req = format!( let req = format!(
r#"{{"jsonrpc":"2.0","id":1,"method":"sendTransaction","params":["{}"]}}"#, r#"{{"jsonrpc":"2.0","id":1,"method":"sendTransaction","params":["{}"]}}"#,
bs58::encode(serialize(&bad_transaction).unwrap()).into_string() bs58::encode(serialize(&bad_transaction).unwrap()).into_string()
@ -4527,7 +4545,7 @@ pub mod tests {
assert_eq!( assert_eq!(
res, res,
Some( Some(
r#"{"jsonrpc":"2.0","error":{"code":-32005,"message":"RPC node is unhealthy"},"id":1}"#.to_string(), r#"{"jsonrpc":"2.0","error":{"code":-32005,"message":"RPC node is behind by 42 slots","data":{"numSlotsBehind":42}},"id":1}"#.to_string(),
) )
); );
health.stub_set_health_status(None); health.stub_set_health_status(None);

View File

@ -1,15 +1,17 @@
use crate::cluster_info::ClusterInfo; use {
use solana_sdk::pubkey::Pubkey; crate::cluster_info::ClusterInfo,
use std::{ solana_sdk::{clock::Slot, pubkey::Pubkey},
collections::HashSet, std::{
sync::atomic::{AtomicBool, Ordering}, collections::HashSet,
sync::Arc, sync::atomic::{AtomicBool, Ordering},
sync::Arc,
},
}; };
#[derive(PartialEq, Clone, Copy)] #[derive(PartialEq, Clone, Copy)]
pub enum RpcHealthStatus { pub enum RpcHealthStatus {
Ok, Ok,
Behind, // Validator is behind its trusted validators Behind { num_slots: Slot }, // Validator is behind its trusted validators
} }
pub struct RpcHealth { pub struct RpcHealth {
@ -88,11 +90,13 @@ impl RpcHealth {
{ {
RpcHealthStatus::Ok RpcHealthStatus::Ok
} else { } else {
let num_slots = latest_trusted_validator_account_hash_slot
.saturating_sub(latest_account_hash_slot);
warn!( warn!(
"health check: me={}, latest trusted_validator={}", "health check: behind by {} slots: me={}, latest trusted_validator={}",
latest_account_hash_slot, latest_trusted_validator_account_hash_slot num_slots, latest_account_hash_slot, latest_trusted_validator_account_hash_slot
); );
RpcHealthStatus::Behind RpcHealthStatus::Behind { num_slots }
} }
} else { } else {
// No trusted validator point of reference available, so this validator is healthy // No trusted validator point of reference available, so this validator is healthy

View File

@ -159,7 +159,7 @@ impl RpcRequestMiddleware {
fn health_check(&self) -> &'static str { fn health_check(&self) -> &'static str {
let response = match self.health.check() { let response = match self.health.check() {
RpcHealthStatus::Ok => "ok", RpcHealthStatus::Ok => "ok",
RpcHealthStatus::Behind => "behind", RpcHealthStatus::Behind { num_slots: _ } => "behind",
}; };
info!("health check: {}", response); info!("health check: {}", response);
response response

View File

@ -36,6 +36,7 @@ gives a convenient interface for the RPC methods.
- [getFees](jsonrpc-api.md#getfees) - [getFees](jsonrpc-api.md#getfees)
- [getFirstAvailableBlock](jsonrpc-api.md#getfirstavailableblock) - [getFirstAvailableBlock](jsonrpc-api.md#getfirstavailableblock)
- [getGenesisHash](jsonrpc-api.md#getgenesishash) - [getGenesisHash](jsonrpc-api.md#getgenesishash)
- [getHealth](jsonrpc-api.md#gethealth)
- [getIdentity](jsonrpc-api.md#getidentity) - [getIdentity](jsonrpc-api.md#getidentity)
- [getInflationGovernor](jsonrpc-api.md#getinflationgovernor) - [getInflationGovernor](jsonrpc-api.md#getinflationgovernor)
- [getInflationRate](jsonrpc-api.md#getinflationrate) - [getInflationRate](jsonrpc-api.md#getinflationrate)
@ -1276,6 +1277,54 @@ Result:
{"jsonrpc":"2.0","result":"GH7ome3EiwEr7tu9JuTh2dpYWBJK3z69Xm1ZE3MEE6JC","id":1} {"jsonrpc":"2.0","result":"GH7ome3EiwEr7tu9JuTh2dpYWBJK3z69Xm1ZE3MEE6JC","id":1}
``` ```
### getHealth
Returns the current health of the node.
If one or more `--trusted-validator` arguments are provided to
`solana-validator`, "ok" is returned when the node has within
`HEALTH_CHECK_SLOT_DISTANCE` slots of the highest trusted validator, otherwise
an error is returned. "ok" is always returned if no trusted validators are
provided.
#### Parameters:
None
#### Results:
If the node is healthy: "ok"
If the node is unhealthy, a JSON RPC error response is returned indicating how far behind the node is.
#### Example:
Request:
```bash
curl http://localhost:8899 -X POST -H "Content-Type: application/json" -d '
{"jsonrpc":"2.0","id":1, "method":"getHealth"}
'
```
Healthy Result:
```json
{"jsonrpc":"2.0","result": "ok","id":1}
```
Unhealthy Result:
```json
{
"jsonrpc": "2.0",
"error": {
"code": -32005,
"message": "RPC node is behind by 42 slots",
"data": {
"numSlotsBehind": 42
}
},
"id": 1
}
```
### getIdentity ### getIdentity
Returns the identity pubkey for the current node Returns the identity pubkey for the current node

View File

@ -4,7 +4,7 @@ use {
fd_lock::FdLock, fd_lock::FdLock,
indicatif::{ProgressBar, ProgressDrawTarget, ProgressStyle}, indicatif::{ProgressBar, ProgressDrawTarget, ProgressStyle},
solana_clap_utils::{input_parsers::pubkey_of, input_validators::is_pubkey}, solana_clap_utils::{input_parsers::pubkey_of, input_validators::is_pubkey},
solana_client::{client_error, rpc_client::RpcClient}, solana_client::{client_error, rpc_client::RpcClient, rpc_request},
solana_core::rpc::JsonRpcConfig, solana_core::rpc::JsonRpcConfig,
solana_faucet::faucet::{run_local_faucet_with_port, FAUCET_PORT}, solana_faucet::faucet::{run_local_faucet_with_port, FAUCET_PORT},
solana_sdk::{ solana_sdk::{
@ -339,7 +339,7 @@ fn main() {
fn get_validator_stats( fn get_validator_stats(
rpc_client: &RpcClient, rpc_client: &RpcClient,
identity: &Pubkey, identity: &Pubkey,
) -> client_error::Result<(Slot, Slot, Slot, u64, Sol)> { ) -> client_error::Result<(Slot, Slot, Slot, u64, Sol, String)> {
let processed_slot = rpc_client.get_slot_with_commitment(CommitmentConfig::recent())?; let processed_slot = rpc_client.get_slot_with_commitment(CommitmentConfig::recent())?;
let confirmed_slot = let confirmed_slot =
rpc_client.get_slot_with_commitment(CommitmentConfig::single_gossip())?; rpc_client.get_slot_with_commitment(CommitmentConfig::single_gossip())?;
@ -350,12 +350,32 @@ fn main() {
.get_balance_with_commitment(identity, CommitmentConfig::single_gossip())? .get_balance_with_commitment(identity, CommitmentConfig::single_gossip())?
.value; .value;
let health = match rpc_client.get_health() {
Ok(()) => "ok".to_string(),
Err(err) => {
if let client_error::ClientErrorKind::RpcError(
rpc_request::RpcError::RpcResponseError {
code: _,
message: _,
data:
rpc_request::RpcResponseErrorData::NodeUnhealthy { num_slots_behind },
},
) = &err.kind
{
format!("{} slots behind", num_slots_behind)
} else {
"unhealthy".to_string()
}
}
};
Ok(( Ok((
processed_slot, processed_slot,
confirmed_slot, confirmed_slot,
finalized_slot, finalized_slot,
transaction_count, transaction_count,
Sol(identity_balance), Sol(identity_balance),
health,
)) ))
} }
@ -373,13 +393,21 @@ fn main() {
finalized_slot, finalized_slot,
transaction_count, transaction_count,
identity_balance, identity_balance,
health,
)) => { )) => {
let uptime = chrono::Duration::from_std(validator_start.elapsed()).unwrap(); let uptime = chrono::Duration::from_std(validator_start.elapsed()).unwrap();
progress_bar.set_message(&format!( progress_bar.set_message(&format!(
"{:02}:{:02}:{:02} | \ "{:02}:{:02}:{:02} \
{}| \
Processed Slot: {} | Confirmed Slot: {} | Finalized Slot: {} | Snapshot Slot: {} | \ Processed Slot: {} | Confirmed Slot: {} | Finalized Slot: {} | Snapshot Slot: {} | \
Transactions: {} | {}", Transactions: {} | {}",
uptime.num_hours(), uptime.num_minutes() % 60, uptime.num_seconds() % 60, uptime.num_hours(), uptime.num_minutes() % 60, uptime.num_seconds() % 60,
if health == "ok" {
"".to_string()
} else {
format!("| {} ", style(health).bold().red())
},
processed_slot, confirmed_slot, finalized_slot, snapshot_slot, processed_slot, confirmed_slot, finalized_slot, snapshot_slot,
transaction_count, identity_balance transaction_count, identity_balance
)); ));