Add `getHealth` RPC method

This commit is contained in:
Michael Vines 2021-01-14 21:45:11 -08:00
parent 4d12cf61cc
commit dacb95083d
9 changed files with 148 additions and 23 deletions

View File

@ -85,6 +85,14 @@ impl RpcSender for HttpSender {
}
}
},
rpc_custom_error::JSON_RPC_SERVER_ERROR_NODE_UNHEALTHLY => {
match serde_json::from_value::<rpc_custom_error::RpcNodeUnhealthyErrorData>(json["error"]["data"].clone()) {
Ok(rpc_custom_error::RpcNodeUnhealthyErrorData { num_slots_behind}) => RpcResponseErrorData::NodeUnhealthy {num_slots_behind},
Err(_err) => {
RpcResponseErrorData::Empty
}
}
},
_ => RpcResponseErrorData::Empty
};

View File

@ -928,6 +928,11 @@ impl RpcClient {
Ok(hash)
}
pub fn get_health(&self) -> ClientResult<()> {
self.send::<String>(RpcRequest::GetHealth, Value::Null)
.map(|_| ())
}
pub fn get_token_account(&self, pubkey: &Pubkey) -> ClientResult<Option<UiTokenAccount>> {
Ok(self
.get_token_account_with_commitment(pubkey, self.commitment_config)?

View File

@ -25,13 +25,21 @@ pub enum RpcCustomError {
BlockNotAvailable {
slot: Slot,
},
RpcNodeUnhealthy,
RpcNodeUnhealthy {
num_slots_behind: Slot,
},
TransactionPrecompileVerificationFailure(solana_sdk::transaction::TransactionError),
SlotSkipped {
slot: Slot,
},
}
#[derive(Debug, Serialize, Deserialize)]
#[serde(rename_all = "camelCase")]
pub struct RpcNodeUnhealthyErrorData {
pub num_slots_behind: Slot,
}
impl From<RpcCustomError> for Error {
fn from(e: RpcCustomError) -> Self {
match e {
@ -65,10 +73,12 @@ impl From<RpcCustomError> for Error {
message: format!("Block not available for slot {}", slot),
data: None,
},
RpcCustomError::RpcNodeUnhealthy => Self {
RpcCustomError::RpcNodeUnhealthy { num_slots_behind } => Self {
code: ErrorCode::ServerError(JSON_RPC_SERVER_ERROR_NODE_UNHEALTHLY),
message: "RPC node is unhealthy".to_string(),
data: None,
message: format!("RPC node is behind by {} slots", num_slots_behind),
data: Some(serde_json::json!(RpcNodeUnhealthyErrorData {
num_slots_behind
})),
},
RpcCustomError::TransactionPrecompileVerificationFailure(e) => Self {
code: ErrorCode::ServerError(

View File

@ -1,6 +1,6 @@
use crate::rpc_response::RpcSimulateTransactionResult;
use serde_json::{json, Value};
use solana_sdk::pubkey::Pubkey;
use solana_sdk::{clock::Slot, pubkey::Pubkey};
use std::fmt;
use thiserror::Error;
@ -25,6 +25,7 @@ pub enum RpcRequest {
GetFees,
GetFirstAvailableBlock,
GetGenesisHash,
GetHealth,
GetIdentity,
GetInflationGovernor,
GetInflationRate,
@ -80,6 +81,7 @@ impl fmt::Display for RpcRequest {
RpcRequest::GetFees => "getFees",
RpcRequest::GetFirstAvailableBlock => "getFirstAvailableBlock",
RpcRequest::GetGenesisHash => "getGenesisHash",
RpcRequest::GetHealth => "getHealth",
RpcRequest::GetIdentity => "getIdentity",
RpcRequest::GetInflationGovernor => "getInflationGovernor",
RpcRequest::GetInflationRate => "getInflationRate",
@ -143,6 +145,7 @@ impl RpcRequest {
pub enum RpcResponseErrorData {
Empty,
SendTransactionPreflightFailure(RpcSimulateTransactionResult),
NodeUnhealthy { num_slots_behind: Slot },
}
impl fmt::Display for RpcResponseErrorData {

View File

@ -1760,6 +1760,9 @@ pub trait RpcSol {
#[rpc(meta, name = "getGenesisHash")]
fn get_genesis_hash(&self, meta: Self::Metadata) -> Result<String>;
#[rpc(meta, name = "getHealth")]
fn get_health(&self, meta: Self::Metadata) -> Result<String>;
#[rpc(meta, name = "getLeaderSchedule")]
fn get_leader_schedule(
&self,
@ -2247,6 +2250,15 @@ impl RpcSol for RpcSolImpl {
Ok(meta.genesis_hash.to_string())
}
fn get_health(&self, meta: Self::Metadata) -> Result<String> {
match meta.health.check() {
RpcHealthStatus::Ok => Ok("ok".to_string()),
RpcHealthStatus::Behind {
num_slots: num_slots_behind,
} => Err(RpcCustomError::RpcNodeUnhealthy { num_slots_behind }.into()),
}
}
fn get_leader_schedule(
&self,
meta: Self::Metadata,
@ -2486,9 +2498,15 @@ impl RpcSol for RpcSolImpl {
return Err(e);
}
if meta.health.check() != RpcHealthStatus::Ok {
return Err(RpcCustomError::RpcNodeUnhealthy.into());
match meta.health.check() {
RpcHealthStatus::Ok => (),
RpcHealthStatus::Behind {
num_slots: num_slots_behind,
} => {
return Err(RpcCustomError::RpcNodeUnhealthy { num_slots_behind }.into());
}
}
if let (Err(err), logs) = preflight_bank.simulate_transaction(transaction.clone()) {
return Err(RpcCustomError::SendTransactionPreflightFailure {
message: format!("Transaction simulation failed: {}", err),
@ -4518,7 +4536,7 @@ pub mod tests {
);
// sendTransaction will fail due to poor node health
health.stub_set_health_status(Some(RpcHealthStatus::Behind));
health.stub_set_health_status(Some(RpcHealthStatus::Behind { num_slots: 42 }));
let req = format!(
r#"{{"jsonrpc":"2.0","id":1,"method":"sendTransaction","params":["{}"]}}"#,
bs58::encode(serialize(&bad_transaction).unwrap()).into_string()
@ -4527,7 +4545,7 @@ pub mod tests {
assert_eq!(
res,
Some(
r#"{"jsonrpc":"2.0","error":{"code":-32005,"message":"RPC node is unhealthy"},"id":1}"#.to_string(),
r#"{"jsonrpc":"2.0","error":{"code":-32005,"message":"RPC node is behind by 42 slots","data":{"numSlotsBehind":42}},"id":1}"#.to_string(),
)
);
health.stub_set_health_status(None);

View File

@ -1,15 +1,17 @@
use crate::cluster_info::ClusterInfo;
use solana_sdk::pubkey::Pubkey;
use std::{
collections::HashSet,
sync::atomic::{AtomicBool, Ordering},
sync::Arc,
use {
crate::cluster_info::ClusterInfo,
solana_sdk::{clock::Slot, pubkey::Pubkey},
std::{
collections::HashSet,
sync::atomic::{AtomicBool, Ordering},
sync::Arc,
},
};
#[derive(PartialEq, Clone, Copy)]
pub enum RpcHealthStatus {
Ok,
Behind, // Validator is behind its trusted validators
Behind { num_slots: Slot }, // Validator is behind its trusted validators
}
pub struct RpcHealth {
@ -88,11 +90,13 @@ impl RpcHealth {
{
RpcHealthStatus::Ok
} else {
let num_slots = latest_trusted_validator_account_hash_slot
.saturating_sub(latest_account_hash_slot);
warn!(
"health check: me={}, latest trusted_validator={}",
latest_account_hash_slot, latest_trusted_validator_account_hash_slot
"health check: behind by {} slots: me={}, latest trusted_validator={}",
num_slots, latest_account_hash_slot, latest_trusted_validator_account_hash_slot
);
RpcHealthStatus::Behind
RpcHealthStatus::Behind { num_slots }
}
} else {
// No trusted validator point of reference available, so this validator is healthy

View File

@ -159,7 +159,7 @@ impl RpcRequestMiddleware {
fn health_check(&self) -> &'static str {
let response = match self.health.check() {
RpcHealthStatus::Ok => "ok",
RpcHealthStatus::Behind => "behind",
RpcHealthStatus::Behind { num_slots: _ } => "behind",
};
info!("health check: {}", response);
response

View File

@ -36,6 +36,7 @@ gives a convenient interface for the RPC methods.
- [getFees](jsonrpc-api.md#getfees)
- [getFirstAvailableBlock](jsonrpc-api.md#getfirstavailableblock)
- [getGenesisHash](jsonrpc-api.md#getgenesishash)
- [getHealth](jsonrpc-api.md#gethealth)
- [getIdentity](jsonrpc-api.md#getidentity)
- [getInflationGovernor](jsonrpc-api.md#getinflationgovernor)
- [getInflationRate](jsonrpc-api.md#getinflationrate)
@ -1276,6 +1277,54 @@ Result:
{"jsonrpc":"2.0","result":"GH7ome3EiwEr7tu9JuTh2dpYWBJK3z69Xm1ZE3MEE6JC","id":1}
```
### getHealth
Returns the current health of the node.
If one or more `--trusted-validator` arguments are provided to
`solana-validator`, "ok" is returned when the node has within
`HEALTH_CHECK_SLOT_DISTANCE` slots of the highest trusted validator, otherwise
an error is returned. "ok" is always returned if no trusted validators are
provided.
#### Parameters:
None
#### Results:
If the node is healthy: "ok"
If the node is unhealthy, a JSON RPC error response is returned indicating how far behind the node is.
#### Example:
Request:
```bash
curl http://localhost:8899 -X POST -H "Content-Type: application/json" -d '
{"jsonrpc":"2.0","id":1, "method":"getHealth"}
'
```
Healthy Result:
```json
{"jsonrpc":"2.0","result": "ok","id":1}
```
Unhealthy Result:
```json
{
"jsonrpc": "2.0",
"error": {
"code": -32005,
"message": "RPC node is behind by 42 slots",
"data": {
"numSlotsBehind": 42
}
},
"id": 1
}
```
### getIdentity
Returns the identity pubkey for the current node

View File

@ -4,7 +4,7 @@ use {
fd_lock::FdLock,
indicatif::{ProgressBar, ProgressDrawTarget, ProgressStyle},
solana_clap_utils::{input_parsers::pubkey_of, input_validators::is_pubkey},
solana_client::{client_error, rpc_client::RpcClient},
solana_client::{client_error, rpc_client::RpcClient, rpc_request},
solana_core::rpc::JsonRpcConfig,
solana_faucet::faucet::{run_local_faucet_with_port, FAUCET_PORT},
solana_sdk::{
@ -339,7 +339,7 @@ fn main() {
fn get_validator_stats(
rpc_client: &RpcClient,
identity: &Pubkey,
) -> client_error::Result<(Slot, Slot, Slot, u64, Sol)> {
) -> client_error::Result<(Slot, Slot, Slot, u64, Sol, String)> {
let processed_slot = rpc_client.get_slot_with_commitment(CommitmentConfig::recent())?;
let confirmed_slot =
rpc_client.get_slot_with_commitment(CommitmentConfig::single_gossip())?;
@ -350,12 +350,32 @@ fn main() {
.get_balance_with_commitment(identity, CommitmentConfig::single_gossip())?
.value;
let health = match rpc_client.get_health() {
Ok(()) => "ok".to_string(),
Err(err) => {
if let client_error::ClientErrorKind::RpcError(
rpc_request::RpcError::RpcResponseError {
code: _,
message: _,
data:
rpc_request::RpcResponseErrorData::NodeUnhealthy { num_slots_behind },
},
) = &err.kind
{
format!("{} slots behind", num_slots_behind)
} else {
"unhealthy".to_string()
}
}
};
Ok((
processed_slot,
confirmed_slot,
finalized_slot,
transaction_count,
Sol(identity_balance),
health,
))
}
@ -373,13 +393,21 @@ fn main() {
finalized_slot,
transaction_count,
identity_balance,
health,
)) => {
let uptime = chrono::Duration::from_std(validator_start.elapsed()).unwrap();
progress_bar.set_message(&format!(
"{:02}:{:02}:{:02} | \
"{:02}:{:02}:{:02} \
{}| \
Processed Slot: {} | Confirmed Slot: {} | Finalized Slot: {} | Snapshot Slot: {} | \
Transactions: {} | {}",
uptime.num_hours(), uptime.num_minutes() % 60, uptime.num_seconds() % 60,
if health == "ok" {
"".to_string()
} else {
format!("| {} ", style(health).bold().red())
},
processed_slot, confirmed_slot, finalized_slot, snapshot_slot,
transaction_count, identity_balance
));