Add 'unknown' health check state

This commit is contained in:
Michael Vines 2021-03-04 21:18:08 -08:00 committed by mergify[bot]
parent ee621878b0
commit 66b781eec3
5 changed files with 74 additions and 58 deletions

View File

@ -1866,6 +1866,10 @@ pub mod rpc_minimal {
fn get_health(&self, meta: Self::Metadata) -> Result<String> {
match meta.health.check() {
RpcHealthStatus::Ok => Ok("ok".to_string()),
RpcHealthStatus::Unknown => Err(RpcCustomError::NodeUnhealthy {
num_slots_behind: None,
}
.into()),
RpcHealthStatus::Behind { num_slots } => Err(RpcCustomError::NodeUnhealthy {
num_slots_behind: Some(num_slots),
}
@ -2700,6 +2704,12 @@ pub mod rpc_full {
match meta.health.check() {
RpcHealthStatus::Ok => (),
RpcHealthStatus::Unknown => {
return Err(RpcCustomError::NodeUnhealthy {
num_slots_behind: None,
}
.into());
}
RpcHealthStatus::Behind { num_slots } => {
return Err(RpcCustomError::NodeUnhealthy {
num_slots_behind: Some(num_slots),

View File

@ -8,10 +8,11 @@ use {
},
};
#[derive(PartialEq, Clone, Copy)]
#[derive(PartialEq, Clone, Copy, Debug)]
pub enum RpcHealthStatus {
Ok,
Behind { num_slots: Slot }, // Validator is behind its trusted validators
Unknown,
}
pub struct RpcHealth {
@ -51,52 +52,53 @@ impl RpcHealth {
if self.override_health_check.load(Ordering::Relaxed) {
RpcHealthStatus::Ok
} else if let Some(trusted_validators) = &self.trusted_validators {
let (latest_account_hash_slot, latest_trusted_validator_account_hash_slot) = {
match (
self.cluster_info
.get_accounts_hash_for_node(&self.cluster_info.id(), |hashes| {
hashes
.iter()
.max_by(|a, b| a.0.cmp(&b.0))
.map(|slot_hash| slot_hash.0)
})
.flatten(),
trusted_validators
.iter()
.filter_map(|trusted_validator| {
self.cluster_info
.get_accounts_hash_for_node(&trusted_validator, |hashes| {
hashes
.iter()
.max_by(|a, b| a.0.cmp(&b.0))
.map(|slot_hash| slot_hash.0)
})
.flatten()
})
.max(),
) {
(
self.cluster_info
.get_accounts_hash_for_node(&self.cluster_info.id(), |hashes| {
hashes
.iter()
.max_by(|a, b| a.0.cmp(&b.0))
.map(|slot_hash| slot_hash.0)
})
.flatten()
.unwrap_or(0),
trusted_validators
.iter()
.map(|trusted_validator| {
self.cluster_info
.get_accounts_hash_for_node(&trusted_validator, |hashes| {
hashes
.iter()
.max_by(|a, b| a.0.cmp(&b.0))
.map(|slot_hash| slot_hash.0)
})
.flatten()
.unwrap_or(0)
})
.max()
.unwrap_or(0),
)
};
// This validator is considered healthy if its latest account hash slot is within
// `health_check_slot_distance` of the latest trusted validator's account hash slot
if latest_account_hash_slot > 0
&& latest_trusted_validator_account_hash_slot > 0
&& latest_account_hash_slot
> latest_trusted_validator_account_hash_slot
.saturating_sub(self.health_check_slot_distance)
{
RpcHealthStatus::Ok
} else {
let num_slots = latest_trusted_validator_account_hash_slot
.saturating_sub(latest_account_hash_slot);
warn!(
"health check: behind by {} slots: me={}, latest trusted_validator={}",
num_slots, latest_account_hash_slot, latest_trusted_validator_account_hash_slot
);
RpcHealthStatus::Behind { num_slots }
Some(latest_account_hash_slot),
Some(latest_trusted_validator_account_hash_slot),
) => {
// The validator is considered healthy if its latest account hash slot is within
// `health_check_slot_distance` of the latest trusted validator's account hash slot
if latest_account_hash_slot
> latest_trusted_validator_account_hash_slot
.saturating_sub(self.health_check_slot_distance)
{
RpcHealthStatus::Ok
} else {
let num_slots = latest_trusted_validator_account_hash_slot
.saturating_sub(latest_account_hash_slot);
warn!(
"health check: behind by {} slots: me={}, latest trusted_validator={}",
num_slots,
latest_account_hash_slot,
latest_trusted_validator_account_hash_slot
);
RpcHealthStatus::Behind { num_slots }
}
}
_ => RpcHealthStatus::Unknown,
}
} else {
// No trusted validator point of reference available, so this validator is healthy

View File

@ -178,7 +178,8 @@ impl RpcRequestMiddleware {
fn health_check(&self) -> &'static str {
let response = match self.health.check() {
RpcHealthStatus::Ok => "ok",
RpcHealthStatus::Behind { num_slots: _ } => "behind",
RpcHealthStatus::Behind { .. } => "behind",
RpcHealthStatus::Unknown => "unknown",
};
info!("health check: {}", response);
response
@ -696,18 +697,20 @@ mod tests {
let rm = RpcRequestMiddleware::new(PathBuf::from("/"), None, create_bank_forks(), health);
// No account hashes for this node or any trusted validators == "behind"
assert_eq!(rm.health_check(), "behind");
// No account hashes for this node or any trusted validators
assert_eq!(rm.health_check(), "unknown");
// No account hashes for any trusted validators == "behind"
// No account hashes for any trusted validators
cluster_info.push_accounts_hashes(vec![(1000, Hash::default()), (900, Hash::default())]);
cluster_info.flush_push_queue();
assert_eq!(rm.health_check(), "behind");
assert_eq!(rm.health_check(), "unknown");
// Override health check
override_health_check.store(true, Ordering::Relaxed);
assert_eq!(rm.health_check(), "ok");
override_health_check.store(false, Ordering::Relaxed);
// This node is ahead of the trusted validators == "ok"
// This node is ahead of the trusted validators
cluster_info
.gossip
.write()
@ -727,7 +730,7 @@ mod tests {
.unwrap();
assert_eq!(rm.health_check(), "ok");
// Node is slightly behind the trusted validators == "ok"
// Node is slightly behind the trusted validators
cluster_info
.gossip
.write()
@ -743,7 +746,7 @@ mod tests {
.unwrap();
assert_eq!(rm.health_check(), "ok");
// Node is far behind the trusted validators == "behind"
// Node is far behind the trusted validators
cluster_info
.gossip
.write()

View File

@ -187,11 +187,12 @@ Many methods that take a commitment parameter return an RpcResponse JSON object
Although not a JSON RPC API, a `GET /health` at the RPC HTTP Endpoint provides a
health-check mechanism for use by load balancers or other network
infrastructure. This request will always return a HTTP 200 OK response with a body of
"ok" or "behind" based on the following conditions:
"ok", "behind" or "unknown" based on the following conditions:
1. If one or more `--trusted-validator` arguments are provided to `solana-validator`, "ok" is returned
when the node has within `HEALTH_CHECK_SLOT_DISTANCE` slots of the highest trusted validator,
otherwise "behind" is returned.
when the node has within `HEALTH_CHECK_SLOT_DISTANCE` slots of the highest
trusted validator, otherwise "behind". "unknown" is returned when no slot
information from trusted validators is not yet available.
2. "ok" is always returned if no trusted validators are provided.
## JSON RPC API Reference

View File

@ -266,7 +266,7 @@ fn get_validator_stats(
{
format!("{} slots behind", num_slots_behind)
} else {
"unhealthy".to_string()
"health unknown".to_string()
}
}
};