Add 'unknown' health check state
This commit is contained in:
parent
ee621878b0
commit
66b781eec3
|
@ -1866,6 +1866,10 @@ pub mod rpc_minimal {
|
|||
fn get_health(&self, meta: Self::Metadata) -> Result<String> {
|
||||
match meta.health.check() {
|
||||
RpcHealthStatus::Ok => Ok("ok".to_string()),
|
||||
RpcHealthStatus::Unknown => Err(RpcCustomError::NodeUnhealthy {
|
||||
num_slots_behind: None,
|
||||
}
|
||||
.into()),
|
||||
RpcHealthStatus::Behind { num_slots } => Err(RpcCustomError::NodeUnhealthy {
|
||||
num_slots_behind: Some(num_slots),
|
||||
}
|
||||
|
@ -2700,6 +2704,12 @@ pub mod rpc_full {
|
|||
|
||||
match meta.health.check() {
|
||||
RpcHealthStatus::Ok => (),
|
||||
RpcHealthStatus::Unknown => {
|
||||
return Err(RpcCustomError::NodeUnhealthy {
|
||||
num_slots_behind: None,
|
||||
}
|
||||
.into());
|
||||
}
|
||||
RpcHealthStatus::Behind { num_slots } => {
|
||||
return Err(RpcCustomError::NodeUnhealthy {
|
||||
num_slots_behind: Some(num_slots),
|
||||
|
|
|
@ -8,10 +8,11 @@ use {
|
|||
},
|
||||
};
|
||||
|
||||
#[derive(PartialEq, Clone, Copy)]
|
||||
#[derive(PartialEq, Clone, Copy, Debug)]
|
||||
pub enum RpcHealthStatus {
|
||||
Ok,
|
||||
Behind { num_slots: Slot }, // Validator is behind its trusted validators
|
||||
Unknown,
|
||||
}
|
||||
|
||||
pub struct RpcHealth {
|
||||
|
@ -51,52 +52,53 @@ impl RpcHealth {
|
|||
if self.override_health_check.load(Ordering::Relaxed) {
|
||||
RpcHealthStatus::Ok
|
||||
} else if let Some(trusted_validators) = &self.trusted_validators {
|
||||
let (latest_account_hash_slot, latest_trusted_validator_account_hash_slot) = {
|
||||
match (
|
||||
self.cluster_info
|
||||
.get_accounts_hash_for_node(&self.cluster_info.id(), |hashes| {
|
||||
hashes
|
||||
.iter()
|
||||
.max_by(|a, b| a.0.cmp(&b.0))
|
||||
.map(|slot_hash| slot_hash.0)
|
||||
})
|
||||
.flatten(),
|
||||
trusted_validators
|
||||
.iter()
|
||||
.filter_map(|trusted_validator| {
|
||||
self.cluster_info
|
||||
.get_accounts_hash_for_node(&trusted_validator, |hashes| {
|
||||
hashes
|
||||
.iter()
|
||||
.max_by(|a, b| a.0.cmp(&b.0))
|
||||
.map(|slot_hash| slot_hash.0)
|
||||
})
|
||||
.flatten()
|
||||
})
|
||||
.max(),
|
||||
) {
|
||||
(
|
||||
self.cluster_info
|
||||
.get_accounts_hash_for_node(&self.cluster_info.id(), |hashes| {
|
||||
hashes
|
||||
.iter()
|
||||
.max_by(|a, b| a.0.cmp(&b.0))
|
||||
.map(|slot_hash| slot_hash.0)
|
||||
})
|
||||
.flatten()
|
||||
.unwrap_or(0),
|
||||
trusted_validators
|
||||
.iter()
|
||||
.map(|trusted_validator| {
|
||||
self.cluster_info
|
||||
.get_accounts_hash_for_node(&trusted_validator, |hashes| {
|
||||
hashes
|
||||
.iter()
|
||||
.max_by(|a, b| a.0.cmp(&b.0))
|
||||
.map(|slot_hash| slot_hash.0)
|
||||
})
|
||||
.flatten()
|
||||
.unwrap_or(0)
|
||||
})
|
||||
.max()
|
||||
.unwrap_or(0),
|
||||
)
|
||||
};
|
||||
|
||||
// This validator is considered healthy if its latest account hash slot is within
|
||||
// `health_check_slot_distance` of the latest trusted validator's account hash slot
|
||||
if latest_account_hash_slot > 0
|
||||
&& latest_trusted_validator_account_hash_slot > 0
|
||||
&& latest_account_hash_slot
|
||||
> latest_trusted_validator_account_hash_slot
|
||||
.saturating_sub(self.health_check_slot_distance)
|
||||
{
|
||||
RpcHealthStatus::Ok
|
||||
} else {
|
||||
let num_slots = latest_trusted_validator_account_hash_slot
|
||||
.saturating_sub(latest_account_hash_slot);
|
||||
warn!(
|
||||
"health check: behind by {} slots: me={}, latest trusted_validator={}",
|
||||
num_slots, latest_account_hash_slot, latest_trusted_validator_account_hash_slot
|
||||
);
|
||||
RpcHealthStatus::Behind { num_slots }
|
||||
Some(latest_account_hash_slot),
|
||||
Some(latest_trusted_validator_account_hash_slot),
|
||||
) => {
|
||||
// The validator is considered healthy if its latest account hash slot is within
|
||||
// `health_check_slot_distance` of the latest trusted validator's account hash slot
|
||||
if latest_account_hash_slot
|
||||
> latest_trusted_validator_account_hash_slot
|
||||
.saturating_sub(self.health_check_slot_distance)
|
||||
{
|
||||
RpcHealthStatus::Ok
|
||||
} else {
|
||||
let num_slots = latest_trusted_validator_account_hash_slot
|
||||
.saturating_sub(latest_account_hash_slot);
|
||||
warn!(
|
||||
"health check: behind by {} slots: me={}, latest trusted_validator={}",
|
||||
num_slots,
|
||||
latest_account_hash_slot,
|
||||
latest_trusted_validator_account_hash_slot
|
||||
);
|
||||
RpcHealthStatus::Behind { num_slots }
|
||||
}
|
||||
}
|
||||
_ => RpcHealthStatus::Unknown,
|
||||
}
|
||||
} else {
|
||||
// No trusted validator point of reference available, so this validator is healthy
|
||||
|
|
|
@ -178,7 +178,8 @@ impl RpcRequestMiddleware {
|
|||
fn health_check(&self) -> &'static str {
|
||||
let response = match self.health.check() {
|
||||
RpcHealthStatus::Ok => "ok",
|
||||
RpcHealthStatus::Behind { num_slots: _ } => "behind",
|
||||
RpcHealthStatus::Behind { .. } => "behind",
|
||||
RpcHealthStatus::Unknown => "unknown",
|
||||
};
|
||||
info!("health check: {}", response);
|
||||
response
|
||||
|
@ -696,18 +697,20 @@ mod tests {
|
|||
|
||||
let rm = RpcRequestMiddleware::new(PathBuf::from("/"), None, create_bank_forks(), health);
|
||||
|
||||
// No account hashes for this node or any trusted validators == "behind"
|
||||
assert_eq!(rm.health_check(), "behind");
|
||||
// No account hashes for this node or any trusted validators
|
||||
assert_eq!(rm.health_check(), "unknown");
|
||||
|
||||
// No account hashes for any trusted validators == "behind"
|
||||
// No account hashes for any trusted validators
|
||||
cluster_info.push_accounts_hashes(vec![(1000, Hash::default()), (900, Hash::default())]);
|
||||
cluster_info.flush_push_queue();
|
||||
assert_eq!(rm.health_check(), "behind");
|
||||
assert_eq!(rm.health_check(), "unknown");
|
||||
|
||||
// Override health check
|
||||
override_health_check.store(true, Ordering::Relaxed);
|
||||
assert_eq!(rm.health_check(), "ok");
|
||||
override_health_check.store(false, Ordering::Relaxed);
|
||||
|
||||
// This node is ahead of the trusted validators == "ok"
|
||||
// This node is ahead of the trusted validators
|
||||
cluster_info
|
||||
.gossip
|
||||
.write()
|
||||
|
@ -727,7 +730,7 @@ mod tests {
|
|||
.unwrap();
|
||||
assert_eq!(rm.health_check(), "ok");
|
||||
|
||||
// Node is slightly behind the trusted validators == "ok"
|
||||
// Node is slightly behind the trusted validators
|
||||
cluster_info
|
||||
.gossip
|
||||
.write()
|
||||
|
@ -743,7 +746,7 @@ mod tests {
|
|||
.unwrap();
|
||||
assert_eq!(rm.health_check(), "ok");
|
||||
|
||||
// Node is far behind the trusted validators == "behind"
|
||||
// Node is far behind the trusted validators
|
||||
cluster_info
|
||||
.gossip
|
||||
.write()
|
||||
|
|
|
@ -187,11 +187,12 @@ Many methods that take a commitment parameter return an RpcResponse JSON object
|
|||
Although not a JSON RPC API, a `GET /health` at the RPC HTTP Endpoint provides a
|
||||
health-check mechanism for use by load balancers or other network
|
||||
infrastructure. This request will always return a HTTP 200 OK response with a body of
|
||||
"ok" or "behind" based on the following conditions:
|
||||
"ok", "behind" or "unknown" based on the following conditions:
|
||||
|
||||
1. If one or more `--trusted-validator` arguments are provided to `solana-validator`, "ok" is returned
|
||||
when the node has within `HEALTH_CHECK_SLOT_DISTANCE` slots of the highest trusted validator,
|
||||
otherwise "behind" is returned.
|
||||
when the node has within `HEALTH_CHECK_SLOT_DISTANCE` slots of the highest
|
||||
trusted validator, otherwise "behind". "unknown" is returned when no slot
|
||||
information from trusted validators is not yet available.
|
||||
2. "ok" is always returned if no trusted validators are provided.
|
||||
|
||||
## JSON RPC API Reference
|
||||
|
|
|
@ -266,7 +266,7 @@ fn get_validator_stats(
|
|||
{
|
||||
format!("{} slots behind", num_slots_behind)
|
||||
} else {
|
||||
"unhealthy".to_string()
|
||||
"health unknown".to_string()
|
||||
}
|
||||
}
|
||||
};
|
||||
|
|
Loading…
Reference in New Issue