zebra/zebra-state/src/service/read/find.rs

637 lines
21 KiB
Rust

//! Finding and reading block hashes and headers, in response to peer requests.
//!
//! In the functions in this module:
//!
//! The block write task commits blocks to the finalized state before updating
//! `chain` with a cached copy of the best non-finalized chain from
//! `NonFinalizedState.chain_set`. Then the block commit task can commit additional blocks to
//! the finalized state after we've cloned the `chain`.
//!
//! This means that some blocks can be in both:
//! - the cached [`Chain`], and
//! - the shared finalized [`ZebraDb`] reference.
use std::{
iter,
ops::{RangeBounds, RangeInclusive},
sync::Arc,
};
use chrono::{DateTime, Utc};
use zebra_chain::{
block::{self, Block, Height},
parameters::Network,
serialization::DateTime32,
work::difficulty::CompactDifficulty,
};
use crate::{
constants,
service::{
block_iter::any_ancestor_blocks,
check::{difficulty::POW_ADJUSTMENT_BLOCK_SPAN, AdjustedDifficulty},
finalized_state::ZebraDb,
non_finalized_state::{Chain, NonFinalizedState},
read::{self, block::block_header, FINALIZED_STATE_QUERY_RETRIES},
},
BoxError,
};
#[cfg(test)]
mod tests;
/// Returns the tip of the best chain in the non-finalized or finalized state.
pub fn best_tip(
non_finalized_state: &NonFinalizedState,
db: &ZebraDb,
) -> Option<(block::Height, block::Hash)> {
tip(non_finalized_state.best_chain(), db)
}
/// Returns the tip of `chain`.
/// If there is no chain, returns the tip of `db`.
pub fn tip<C>(chain: Option<C>, db: &ZebraDb) -> Option<(Height, block::Hash)>
where
C: AsRef<Chain>,
{
// # Correctness
//
// If there is an overlap between the non-finalized and finalized states,
// where the finalized tip is above the non-finalized tip,
// Zebra is receiving a lot of blocks, or this request has been delayed for a long time,
// so it is acceptable to return either tip.
chain
.map(|chain| chain.as_ref().non_finalized_tip())
.or_else(|| db.tip())
}
/// Returns the tip [`Height`] of `chain`.
/// If there is no chain, returns the tip of `db`.
pub fn tip_height<C>(chain: Option<C>, db: &ZebraDb) -> Option<Height>
where
C: AsRef<Chain>,
{
tip(chain, db).map(|(height, _hash)| height)
}
/// Returns the tip [`block::Hash`] of `chain`.
/// If there is no chain, returns the tip of `db`.
#[allow(dead_code)]
pub fn tip_hash<C>(chain: Option<C>, db: &ZebraDb) -> Option<block::Hash>
where
C: AsRef<Chain>,
{
tip(chain, db).map(|(_height, hash)| hash)
}
/// Return the depth of block `hash` from the chain tip.
/// Searches `chain` for `hash`, then searches `db`.
pub fn depth<C>(chain: Option<C>, db: &ZebraDb, hash: block::Hash) -> Option<u32>
where
C: AsRef<Chain>,
{
let chain = chain.as_ref();
// # Correctness
//
// It is ok to do this lookup in two different calls. Finalized state updates
// can only add overlapping blocks, and hashes are unique.
let tip = tip_height(chain, db)?;
let height = height_by_hash(chain, db, hash)?;
Some(tip.0 - height.0)
}
/// Return the height for the block at `hash`, if `hash` is in `chain` or `db`.
pub fn height_by_hash<C>(chain: Option<C>, db: &ZebraDb, hash: block::Hash) -> Option<Height>
where
C: AsRef<Chain>,
{
// # Correctness
//
// Finalized state updates can only add overlapping blocks, and hashes are unique.
chain
.and_then(|chain| chain.as_ref().height_by_hash(hash))
.or_else(|| db.height(hash))
}
/// Return the hash for the block at `height`, if `height` is in `chain` or `db`.
pub fn hash_by_height<C>(chain: Option<C>, db: &ZebraDb, height: Height) -> Option<block::Hash>
where
C: AsRef<Chain>,
{
// # Correctness
//
// Finalized state updates can only add overlapping blocks, and heights are unique
// in the current `chain`.
//
// If there is an overlap between the non-finalized and finalized states,
// where the finalized tip is above the non-finalized tip,
// Zebra is receiving a lot of blocks, or this request has been delayed for a long time,
// so it is acceptable to return hashes from either chain.
chain
.and_then(|chain| chain.as_ref().hash_by_height(height))
.or_else(|| db.hash(height))
}
/// Return true if `hash` is in `chain` or `db`.
pub fn chain_contains_hash<C>(chain: Option<C>, db: &ZebraDb, hash: block::Hash) -> bool
where
C: AsRef<Chain>,
{
// # Correctness
//
// Finalized state updates can only add overlapping blocks, and hashes are unique.
//
// If there is an overlap between the non-finalized and finalized states,
// where the finalized tip is above the non-finalized tip,
// Zebra is receiving a lot of blocks, or this request has been delayed for a long time,
// so it is acceptable to return hashes from either chain.
chain
.map(|chain| chain.as_ref().height_by_hash.contains_key(&hash))
.unwrap_or(false)
|| db.contains_hash(hash)
}
/// Create a block locator from `chain` and `db`.
///
/// A block locator is used to efficiently find an intersection of two node's chains.
/// It contains a list of block hashes at decreasing heights, skipping some blocks,
/// so that any intersection can be located, no matter how long or different the chains are.
pub fn block_locator<C>(chain: Option<C>, db: &ZebraDb) -> Option<Vec<block::Hash>>
where
C: AsRef<Chain>,
{
let chain = chain.as_ref();
// # Correctness
//
// It is ok to do these lookups using multiple database calls. Finalized state updates
// can only add overlapping blocks, and hashes are unique.
//
// If there is an overlap between the non-finalized and finalized states,
// where the finalized tip is above the non-finalized tip,
// Zebra is receiving a lot of blocks, or this request has been delayed for a long time,
// so it is acceptable to return a set of hashes from multiple chains.
//
// Multiple heights can not map to the same hash, even in different chains,
// because the block height is covered by the block hash,
// via the transaction merkle tree commitments.
let tip_height = tip_height(chain, db)?;
let heights = block_locator_heights(tip_height);
let mut hashes = Vec::with_capacity(heights.len());
for height in heights {
if let Some(hash) = hash_by_height(chain, db, height) {
hashes.push(hash);
}
}
Some(hashes)
}
/// Get the heights of the blocks for constructing a block_locator list.
///
/// Zebra uses a decreasing list of block heights, starting at the tip, and skipping some heights.
/// See [`block_locator()`] for details.
pub fn block_locator_heights(tip_height: block::Height) -> Vec<block::Height> {
// The initial height in the returned `vec` is the tip height,
// and the final height is `MAX_BLOCK_REORG_HEIGHT` below the tip.
//
// The initial distance between heights is 1, and it doubles between each subsequent height.
// So the number of returned heights is approximately `log_2(MAX_BLOCK_REORG_HEIGHT)`.
// Limit the maximum locator depth.
let min_locator_height = tip_height
.0
.saturating_sub(constants::MAX_BLOCK_REORG_HEIGHT);
// Create an exponentially decreasing set of heights.
let exponential_locators = iter::successors(Some(1u32), |h| h.checked_mul(2))
.flat_map(move |step| tip_height.0.checked_sub(step));
// Start at the tip, add decreasing heights, and end MAX_BLOCK_REORG_HEIGHT below the tip.
let locators = iter::once(tip_height.0)
.chain(exponential_locators)
.take_while(move |&height| height > min_locator_height)
.chain(iter::once(min_locator_height))
.map(block::Height)
.collect();
tracing::debug!(
?tip_height,
?min_locator_height,
?locators,
"created block locator"
);
locators
}
/// Find the first hash that's in the peer's `known_blocks`, and in `chain` or `db`.
///
/// Returns `None` if:
/// * there is no matching hash in the chain, or
/// * the state is empty.
fn find_chain_intersection<C>(
chain: Option<C>,
db: &ZebraDb,
known_blocks: Vec<block::Hash>,
) -> Option<block::Hash>
where
C: AsRef<Chain>,
{
// We can get a block locator request before we have downloaded the genesis block
if chain.is_none() && db.is_empty() {
return None;
}
let chain = chain.as_ref();
known_blocks
.iter()
.find(|&&hash| chain_contains_hash(chain, db, hash))
.cloned()
}
/// Returns a range of [`Height`]s in the chain,
/// starting after the `intersection` hash on the chain.
///
/// See [`find_chain_hashes()`] for details.
fn find_chain_height_range<C>(
chain: Option<C>,
db: &ZebraDb,
intersection: Option<block::Hash>,
stop: Option<block::Hash>,
max_len: u32,
) -> impl RangeBounds<u32> + Iterator<Item = u32>
where
C: AsRef<Chain>,
{
#[allow(clippy::reversed_empty_ranges)]
const EMPTY_RANGE: RangeInclusive<u32> = 1..=0;
assert!(max_len > 0, "max_len must be at least 1");
let chain = chain.as_ref();
// We can get a block locator request before we have downloaded the genesis block
let chain_tip_height = if let Some(height) = tip_height(chain, db) {
height
} else {
tracing::debug!(
response_len = ?0,
"responding to peer GetBlocks or GetHeaders with empty state",
);
return EMPTY_RANGE;
};
// Find the intersection height
let intersection_height = match intersection {
Some(intersection_hash) => match height_by_hash(chain, db, intersection_hash) {
Some(intersection_height) => Some(intersection_height),
// A recently committed block dropped the intersection we previously found
None => {
info!(
?intersection,
?stop,
?max_len,
"state found intersection but then dropped it, ignoring request",
);
return EMPTY_RANGE;
}
},
// There is no intersection
None => None,
};
// Now find the start and maximum heights
let (start_height, max_height) = match intersection_height {
// start after the intersection_height, and return max_len hashes or headers
Some(intersection_height) => (
Height(intersection_height.0 + 1),
Height(intersection_height.0 + max_len),
),
// start at genesis, and return max_len hashes or headers
None => (Height(0), Height(max_len - 1)),
};
let stop_height = stop.and_then(|hash| height_by_hash(chain, db, hash));
// Compute the final height, making sure it is:
// * at or below our chain tip, and
// * at or below the height of the stop hash.
let final_height = std::cmp::min(max_height, chain_tip_height);
let final_height = stop_height
.map(|stop_height| std::cmp::min(final_height, stop_height))
.unwrap_or(final_height);
// TODO: implement Step for Height, when Step stabilises
// https://github.com/rust-lang/rust/issues/42168
let height_range = start_height.0..=final_height.0;
let response_len = height_range.clone().count();
tracing::debug!(
?start_height,
?final_height,
?response_len,
?chain_tip_height,
?stop_height,
?intersection_height,
?intersection,
?stop,
?max_len,
"responding to peer GetBlocks or GetHeaders",
);
// Check the function implements the Find protocol
assert!(
response_len <= max_len.try_into().expect("fits in usize"),
"a Find response must not exceed the maximum response length",
);
height_range
}
/// Returns a list of [`block::Hash`]es in the chain,
/// following the `intersection` with the chain.
///
/// See [`find_chain_hashes()`] for details.
fn collect_chain_hashes<C>(
chain: Option<C>,
db: &ZebraDb,
intersection: Option<block::Hash>,
stop: Option<block::Hash>,
max_len: u32,
) -> Vec<block::Hash>
where
C: AsRef<Chain>,
{
let chain = chain.as_ref();
let height_range = find_chain_height_range(chain, db, intersection, stop, max_len);
// All the hashes should be in the chain.
// If they are not, we don't want to return them.
let hashes: Vec<block::Hash> = height_range.into_iter().map_while(|height| {
let hash = hash_by_height(chain, db, Height(height));
// A recently committed block dropped the intersection we previously found.
if hash.is_none() {
info!(
?intersection,
?stop,
?max_len,
"state found height range, but then partially dropped it, returning partial response",
);
}
tracing::trace!(
?hash,
?height,
?intersection,
?stop,
?max_len,
"adding hash to peer Find response",
);
hash
}).collect();
// Check the function implements the Find protocol
assert!(
intersection
.map(|hash| !hashes.contains(&hash))
.unwrap_or(true),
"the list must not contain the intersection hash",
);
if let (Some(stop), Some((_, hashes_except_last))) = (stop, hashes.split_last()) {
assert!(
!hashes_except_last.contains(&stop),
"if the stop hash is in the list, it must be the final hash",
);
}
hashes
}
/// Returns a list of [`block::Header`]s in the chain,
/// following the `intersection` with the chain.
///
/// See [`find_chain_hashes()`] for details.
fn collect_chain_headers<C>(
chain: Option<C>,
db: &ZebraDb,
intersection: Option<block::Hash>,
stop: Option<block::Hash>,
max_len: u32,
) -> Vec<Arc<block::Header>>
where
C: AsRef<Chain>,
{
let chain = chain.as_ref();
let height_range = find_chain_height_range(chain, db, intersection, stop, max_len);
// We don't check that this function implements the Find protocol,
// because fetching extra hashes (or re-calculating hashes) is expensive.
// (This was one of the most expensive and longest-running functions in the state.)
// All the headers should be in the chain.
// If they are not, we don't want to return them.
height_range.into_iter().map_while(|height| {
let header = block_header(chain, db, Height(height).into());
// A recently committed block dropped the intersection we previously found
if header.is_none() {
info!(
?intersection,
?stop,
?max_len,
"state found height range, but then partially dropped it, returning partial response",
);
}
tracing::trace!(
?height,
?intersection,
?stop,
?max_len,
"adding header to peer Find response",
);
header
}).collect()
}
/// Finds the first hash that's in the peer's `known_blocks` and the chain.
/// Returns a list of hashes that follow that intersection, from the chain.
///
/// Starts from the first matching hash in the chain, ignoring all other hashes in
/// `known_blocks`. If there is no matching hash in the chain, starts from the genesis
/// hash.
///
/// Includes finalized and non-finalized blocks.
///
/// Stops the list of hashes after:
/// * adding the tip,
/// * adding the `stop` hash to the list, if it is in the chain, or
/// * adding `max_len` hashes to the list.
///
/// Returns an empty list if the state is empty,
/// and a partial or empty list if the found heights are concurrently modified.
pub fn find_chain_hashes<C>(
chain: Option<C>,
db: &ZebraDb,
known_blocks: Vec<block::Hash>,
stop: Option<block::Hash>,
max_len: u32,
) -> Vec<block::Hash>
where
C: AsRef<Chain>,
{
// # Correctness
//
// See the note in `block_locator()`.
let chain = chain.as_ref();
let intersection = find_chain_intersection(chain, db, known_blocks);
collect_chain_hashes(chain, db, intersection, stop, max_len)
}
/// Finds the first hash that's in the peer's `known_blocks` and the chain.
/// Returns a list of headers that follow that intersection, from the chain.
///
/// See [`find_chain_hashes()`] for details.
pub fn find_chain_headers<C>(
chain: Option<C>,
db: &ZebraDb,
known_blocks: Vec<block::Hash>,
stop: Option<block::Hash>,
max_len: u32,
) -> Vec<Arc<block::Header>>
where
C: AsRef<Chain>,
{
// # Correctness
//
// Headers are looked up by their hashes using a unique mapping,
// so it is not possible for multiple hashes to look up the same header,
// even across different chains.
//
// See also the note in `block_locator()`.
let chain = chain.as_ref();
let intersection = find_chain_intersection(chain, db, known_blocks);
collect_chain_headers(chain, db, intersection, stop, max_len)
}
/// Returns the median-time-past of the *next* block to be added to the best chain in
/// `non_finalized_state` or `db`.
///
/// # Panics
///
/// - If we don't have enough blocks in the state.
pub fn next_median_time_past(
non_finalized_state: &NonFinalizedState,
db: &ZebraDb,
) -> Result<DateTime32, BoxError> {
let mut best_relevant_chain_result = best_relevant_chain(non_finalized_state, db);
// Retry the finalized state query if it was interrupted by a finalizing block.
//
// TODO: refactor this into a generic retry(finalized_closure, process_and_check_closure) fn
for _ in 0..FINALIZED_STATE_QUERY_RETRIES {
if best_relevant_chain_result.is_ok() {
break;
}
best_relevant_chain_result = best_relevant_chain(non_finalized_state, db);
}
Ok(calculate_median_time_past(best_relevant_chain_result?))
}
/// Do a consistency check by checking the finalized tip before and after all other database queries.
///
/// Returns recent blocks in reverse height order from the tip.
/// Returns an error if the tip obtained before and after is not the same.
///
/// # Panics
///
/// - If we don't have enough blocks in the state.
fn best_relevant_chain(
non_finalized_state: &NonFinalizedState,
db: &ZebraDb,
) -> Result<[Arc<Block>; POW_ADJUSTMENT_BLOCK_SPAN], BoxError> {
let state_tip_before_queries = read::best_tip(non_finalized_state, db).ok_or_else(|| {
BoxError::from("Zebra's state is empty, wait until it syncs to the chain tip")
})?;
let best_relevant_chain =
any_ancestor_blocks(non_finalized_state, db, state_tip_before_queries.1);
let best_relevant_chain: Vec<_> = best_relevant_chain
.into_iter()
.take(POW_ADJUSTMENT_BLOCK_SPAN)
.collect();
let best_relevant_chain = best_relevant_chain.try_into().map_err(|_error| {
"Zebra's state only has a few blocks, wait until it syncs to the chain tip"
})?;
let state_tip_after_queries =
read::best_tip(non_finalized_state, db).expect("already checked for an empty tip");
if state_tip_before_queries != state_tip_after_queries {
return Err("Zebra is committing too many blocks to the state, \
wait until it syncs to the chain tip"
.into());
}
Ok(best_relevant_chain)
}
/// Returns the median-time-past for the provided `relevant_chain`.
///
/// The `relevant_chain` has blocks in reverse height order.
///
/// See [`next_median_time_past()`] for details.
fn calculate_median_time_past(
relevant_chain: [Arc<Block>; POW_ADJUSTMENT_BLOCK_SPAN],
) -> DateTime32 {
let relevant_data: Vec<(CompactDifficulty, DateTime<Utc>)> = relevant_chain
.iter()
.map(|block| (block.header.difficulty_threshold, block.header.time))
.collect();
// TODO: split out median-time-past into its own struct?
let ignored_time = DateTime::default();
let ignored_height = Height(0);
let ignored_network = Network::Mainnet;
// Get the median-time-past, which doesn't depend on the time or the previous block height.
// `context` will always have the correct length, because this function takes an array.
let median_time_past = AdjustedDifficulty::new_from_header_time(
ignored_time,
ignored_height,
ignored_network,
relevant_data,
)
.median_time_past();
// > Define the median-time-past of a block to be the median of the nTime fields of the
// > preceding PoWMedianBlockSpan blocks (or all preceding blocks if there are fewer than
// > PoWMedianBlockSpan). The median-time-past of a genesis block is not defined.
// https://zips.z.cash/protocol/protocol.pdf#blockheader
DateTime32::try_from(median_time_past).expect("valid blocks have in-range times")
}