zebra/zebra-state/src/service/finalized_state.rs

564 lines
22 KiB
Rust

//! The primary implementation of the `zebra_state::Service` built upon rocksdb.
//!
//! Zebra's database is implemented in 4 layers:
//! - [`FinalizedState`]: queues, validates, and commits blocks, using...
//! - [`ZebraDb`]: reads and writes [`zebra_chain`] types to the state database, using...
//! - [`DiskDb`]: reads and writes generic types to any column family in the database, using...
//! - [`disk_format`]: converts types to raw database bytes.
//!
//! These layers allow us to split [`zebra_chain`] types for efficient database storage.
//! They reduce the risk of data corruption bugs, runtime inconsistencies, and panics.
//!
//! # Correctness
//!
//! [`crate::constants::state_database_format_version_in_code()`] must be incremented
//! each time the database format (column, serialization, etc) changes.
use std::{
io::{stderr, stdout, Write},
sync::Arc,
};
use zebra_chain::{block, parallel::tree::NoteCommitmentTrees, parameters::Network};
use crate::{
constants::{state_database_format_version_in_code, STATE_DATABASE_KIND},
request::{FinalizableBlock, FinalizedBlock, Treestate},
service::{check, QueuedCheckpointVerified},
BoxError, CheckpointVerifiedBlock, CloneError, Config,
};
pub mod column_family;
mod disk_db;
mod disk_format;
mod zebra_db;
#[cfg(any(test, feature = "proptest-impl"))]
mod arbitrary;
#[cfg(test)]
mod tests;
#[allow(unused_imports)]
pub use column_family::{TypedColumnFamily, WriteTypedBatch};
#[allow(unused_imports)]
pub use disk_db::{DiskDb, DiskWriteBatch, ReadDisk, WriteDisk};
#[allow(unused_imports)]
pub use disk_format::{
FromDisk, IntoDisk, OutputIndex, OutputLocation, RawBytes, TransactionIndex,
TransactionLocation, MAX_ON_DISK_HEIGHT,
};
pub use zebra_db::ZebraDb;
#[cfg(feature = "shielded-scan")]
pub use disk_format::{
SaplingScannedDatabaseEntry, SaplingScannedDatabaseIndex, SaplingScannedResult,
SaplingScanningKey,
};
#[cfg(any(test, feature = "proptest-impl"))]
pub use disk_format::KV;
/// The column families supported by the running `zebra-state` database code.
///
/// Existing column families that aren't listed here are preserved when the database is opened.
pub const STATE_COLUMN_FAMILIES_IN_CODE: &[&str] = &[
// Blocks
"hash_by_height",
"height_by_hash",
"block_header_by_height",
// Transactions
"tx_by_loc",
"hash_by_tx_loc",
"tx_loc_by_hash",
// Transparent
"balance_by_transparent_addr",
"tx_loc_by_transparent_addr_loc",
"utxo_by_out_loc",
"utxo_loc_by_transparent_addr_loc",
// Sprout
"sprout_nullifiers",
"sprout_anchors",
"sprout_note_commitment_tree",
// Sapling
"sapling_nullifiers",
"sapling_anchors",
"sapling_note_commitment_tree",
"sapling_note_commitment_subtree",
// Orchard
"orchard_nullifiers",
"orchard_anchors",
"orchard_note_commitment_tree",
"orchard_note_commitment_subtree",
// Chain
"history_tree",
"tip_chain_value_pool",
];
/// The finalized part of the chain state, stored in the db.
///
/// `rocksdb` allows concurrent writes through a shared reference,
/// so clones of the finalized state represent the same database instance.
/// When the final clone is dropped, the database is closed.
///
/// This is different from `NonFinalizedState::clone()`,
/// which returns an independent copy of the chains.
#[derive(Clone, Debug)]
pub struct FinalizedState {
// Configuration
//
// This configuration cannot be modified after the database is initialized,
// because some clones would have different values.
//
/// The configured stop height.
///
/// Commit blocks to the finalized state up to this height, then exit Zebra.
debug_stop_at_height: Option<block::Height>,
// Owned State
//
// Everything contained in this state must be shared by all clones, or read-only.
//
/// The underlying database.
///
/// `rocksdb` allows reads and writes via a shared reference,
/// so this database object can be freely cloned.
/// The last instance that is dropped will close the underlying database.
pub db: ZebraDb,
#[cfg(feature = "elasticsearch")]
/// The elasticsearch handle.
pub elastic_db: Option<elasticsearch::Elasticsearch>,
#[cfg(feature = "elasticsearch")]
/// A collection of blocks to be sent to elasticsearch as a bulk.
pub elastic_blocks: Vec<String>,
}
impl FinalizedState {
/// Returns an on-disk database instance for `config`, `network`, and `elastic_db`.
/// If there is no existing database, creates a new database on disk.
pub fn new(
config: &Config,
network: Network,
#[cfg(feature = "elasticsearch")] elastic_db: Option<elasticsearch::Elasticsearch>,
) -> Self {
Self::new_with_debug(
config,
network,
false,
#[cfg(feature = "elasticsearch")]
elastic_db,
false,
)
}
/// Returns an on-disk database instance with the supplied production and debug settings.
/// If there is no existing database, creates a new database on disk.
///
/// This method is intended for use in tests.
pub(crate) fn new_with_debug(
config: &Config,
network: Network,
debug_skip_format_upgrades: bool,
#[cfg(feature = "elasticsearch")] elastic_db: Option<elasticsearch::Elasticsearch>,
read_only: bool,
) -> Self {
let db = ZebraDb::new(
config,
STATE_DATABASE_KIND,
&state_database_format_version_in_code(),
network,
debug_skip_format_upgrades,
STATE_COLUMN_FAMILIES_IN_CODE
.iter()
.map(ToString::to_string),
read_only,
);
#[cfg(feature = "elasticsearch")]
let new_state = Self {
debug_stop_at_height: config.debug_stop_at_height.map(block::Height),
db,
elastic_db,
elastic_blocks: vec![],
};
#[cfg(not(feature = "elasticsearch"))]
let new_state = Self {
debug_stop_at_height: config.debug_stop_at_height.map(block::Height),
db,
};
// TODO: move debug_stop_at_height into a task in the start command (#3442)
if let Some(tip_height) = new_state.db.finalized_tip_height() {
if new_state.is_at_stop_height(tip_height) {
let debug_stop_at_height = new_state
.debug_stop_at_height
.expect("true from `is_at_stop_height` implies `debug_stop_at_height` is Some");
let tip_hash = new_state.db.finalized_tip_hash();
if tip_height > debug_stop_at_height {
tracing::error!(
?debug_stop_at_height,
?tip_height,
?tip_hash,
"previous state height is greater than the stop height",
);
}
tracing::info!(
?debug_stop_at_height,
?tip_height,
?tip_hash,
"state is already at the configured height"
);
// RocksDB can do a cleanup when column families are opened.
// So we want to drop it before we exit.
std::mem::drop(new_state);
// Drops tracing log output that's hasn't already been written to stdout
// since this exits before calling drop on the WorkerGuard for the logger thread.
// This is okay for now because this is test-only code
//
// TODO: Call ZebradApp.shutdown or drop its Tracing component before calling exit_process to flush logs to stdout
Self::exit_process();
}
}
tracing::info!(tip = ?new_state.db.tip(), "loaded Zebra state cache");
new_state
}
/// Returns the configured network for this database.
pub fn network(&self) -> Network {
self.db.network()
}
/// Commit a checkpoint-verified block to the state.
///
/// It's the caller's responsibility to ensure that blocks are committed in
/// order.
pub fn commit_finalized(
&mut self,
ordered_block: QueuedCheckpointVerified,
prev_note_commitment_trees: Option<NoteCommitmentTrees>,
) -> Result<(CheckpointVerifiedBlock, NoteCommitmentTrees), BoxError> {
let (checkpoint_verified, rsp_tx) = ordered_block;
let result = self.commit_finalized_direct(
checkpoint_verified.clone().into(),
prev_note_commitment_trees,
"commit checkpoint-verified request",
);
if result.is_ok() {
metrics::counter!("state.checkpoint.finalized.block.count").increment(1);
metrics::gauge!("state.checkpoint.finalized.block.height")
.set(checkpoint_verified.height.0 as f64);
// This height gauge is updated for both fully verified and checkpoint blocks.
// These updates can't conflict, because the state makes sure that blocks
// are committed in order.
metrics::gauge!("zcash.chain.verified.block.height")
.set(checkpoint_verified.height.0 as f64);
metrics::counter!("zcash.chain.verified.block.total").increment(1);
} else {
metrics::counter!("state.checkpoint.error.block.count").increment(1);
metrics::gauge!("state.checkpoint.error.block.height")
.set(checkpoint_verified.height.0 as f64);
};
// Make the error cloneable, so we can send it to the block verify future,
// and the block write task.
let result = result.map_err(CloneError::from);
let _ = rsp_tx.send(result.clone().map(|(hash, _)| hash).map_err(BoxError::from));
result
.map(|(_hash, note_commitment_trees)| (checkpoint_verified, note_commitment_trees))
.map_err(BoxError::from)
}
/// Immediately commit a `finalized` block to the finalized state.
///
/// This can be called either by the non-finalized state (when finalizing
/// a block) or by the checkpoint verifier.
///
/// Use `source` as the source of the block in log messages.
///
/// # Errors
///
/// - Propagates any errors from writing to the DB
/// - Propagates any errors from updating history and note commitment trees
/// - If `hashFinalSaplingRoot` / `hashLightClientRoot` / `hashBlockCommitments`
/// does not match the expected value
#[allow(clippy::unwrap_in_result)]
pub fn commit_finalized_direct(
&mut self,
finalizable_block: FinalizableBlock,
prev_note_commitment_trees: Option<NoteCommitmentTrees>,
source: &str,
) -> Result<(block::Hash, NoteCommitmentTrees), BoxError> {
let (height, hash, finalized, prev_note_commitment_trees) = match finalizable_block {
FinalizableBlock::Checkpoint {
checkpoint_verified,
} => {
// Checkpoint-verified blocks don't have an associated treestate, so we retrieve the
// treestate of the finalized tip from the database and update it for the block
// being committed, assuming the retrieved treestate is the parent block's
// treestate. Later on, this function proves this assumption by asserting that the
// finalized tip is the parent block of the block being committed.
let block = checkpoint_verified.block.clone();
let mut history_tree = self.db.history_tree();
let prev_note_commitment_trees = prev_note_commitment_trees
.unwrap_or_else(|| self.db.note_commitment_trees_for_tip());
// Update the note commitment trees.
let mut note_commitment_trees = prev_note_commitment_trees.clone();
note_commitment_trees.update_trees_parallel(&block)?;
// Check the block commitment if the history tree was not
// supplied by the non-finalized state. Note that we don't do
// this check for history trees supplied by the non-finalized
// state because the non-finalized state checks the block
// commitment.
//
// For Nu5-onward, the block hash commits only to
// non-authorizing data (see ZIP-244). This checks the
// authorizing data commitment, making sure the entire block
// contents were committed to. The test is done here (and not
// during semantic validation) because it needs the history tree
// root. While it _is_ checked during contextual validation,
// that is not called by the checkpoint verifier, and keeping a
// history tree there would be harder to implement.
//
// TODO: run this CPU-intensive cryptography in a parallel rayon
// thread, if it shows up in profiles
check::block_commitment_is_valid_for_chain_history(
block.clone(),
self.network(),
&history_tree,
)?;
// Update the history tree.
//
// TODO: run this CPU-intensive cryptography in a parallel rayon
// thread, if it shows up in profiles
let history_tree_mut = Arc::make_mut(&mut history_tree);
let sapling_root = note_commitment_trees.sapling.root();
let orchard_root = note_commitment_trees.orchard.root();
history_tree_mut.push(self.network(), block.clone(), sapling_root, orchard_root)?;
let treestate = Treestate {
note_commitment_trees,
history_tree,
};
(
checkpoint_verified.height,
checkpoint_verified.hash,
FinalizedBlock::from_checkpoint_verified(checkpoint_verified, treestate),
Some(prev_note_commitment_trees),
)
}
FinalizableBlock::Contextual {
contextually_verified,
treestate,
} => (
contextually_verified.height,
contextually_verified.hash,
FinalizedBlock::from_contextually_verified(contextually_verified, treestate),
prev_note_commitment_trees,
),
};
let committed_tip_hash = self.db.finalized_tip_hash();
let committed_tip_height = self.db.finalized_tip_height();
// Assert that callers (including unit tests) get the chain order correct
if self.db.is_empty() {
assert_eq!(
committed_tip_hash, finalized.block.header.previous_block_hash,
"the first block added to an empty state must be a genesis block, source: {source}",
);
assert_eq!(
block::Height(0),
height,
"cannot commit genesis: invalid height, source: {source}",
);
} else {
assert_eq!(
committed_tip_height.expect("state must have a genesis block committed") + 1,
Some(height),
"committed block height must be 1 more than the finalized tip height, source: {source}",
);
assert_eq!(
committed_tip_hash, finalized.block.header.previous_block_hash,
"committed block must be a child of the finalized tip, source: {source}",
);
}
#[cfg(feature = "elasticsearch")]
let finalized_inner_block = finalized.block.clone();
let note_commitment_trees = finalized.treestate.note_commitment_trees.clone();
let result = self.db.write_block(
finalized,
prev_note_commitment_trees,
self.network(),
source,
);
if result.is_ok() {
// Save blocks to elasticsearch if the feature is enabled.
#[cfg(feature = "elasticsearch")]
self.elasticsearch(&finalized_inner_block);
// TODO: move the stop height check to the syncer (#3442)
if self.is_at_stop_height(height) {
tracing::info!(
?height,
?hash,
block_source = ?source,
"stopping at configured height, flushing database to disk"
);
// We're just about to do a forced exit, so it's ok to do a forced db shutdown
self.db.shutdown(true);
// Drops tracing log output that's hasn't already been written to stdout
// since this exits before calling drop on the WorkerGuard for the logger thread.
// This is okay for now because this is test-only code
//
// TODO: Call ZebradApp.shutdown or drop its Tracing component before calling exit_process to flush logs to stdout
Self::exit_process();
}
}
result.map(|hash| (hash, note_commitment_trees))
}
#[cfg(feature = "elasticsearch")]
/// Store finalized blocks into an elasticsearch database.
///
/// We use the elasticsearch bulk api to index multiple blocks at a time while we are
/// synchronizing the chain, when we get close to tip we index blocks one by one.
pub fn elasticsearch(&mut self, block: &Arc<block::Block>) {
if let Some(client) = self.elastic_db.clone() {
let block_time = block.header.time.timestamp();
let local_time = chrono::Utc::now().timestamp();
// Bulk size is small enough to avoid the elasticsearch 100mb content length limitation.
// MAX_BLOCK_BYTES = 2MB but each block use around 4.1 MB of JSON.
// Each block count as 2 as we send them with a operation/header line. A value of 48
// is 24 blocks.
const AWAY_FROM_TIP_BULK_SIZE: usize = 48;
// The number of blocks the bulk will have when we are in sync.
// A value of 2 means only 1 block as we want to insert them as soon as we get
// them for a real time experience. This is the same for mainnet and testnet.
const CLOSE_TO_TIP_BULK_SIZE: usize = 2;
// We consider in sync when the local time and the blockchain time difference is
// less than this number of seconds.
const CLOSE_TO_TIP_SECONDS: i64 = 14400; // 4 hours
let mut blocks_size_to_dump = AWAY_FROM_TIP_BULK_SIZE;
// If we are close to the tip, index one block per bulk call.
if local_time - block_time < CLOSE_TO_TIP_SECONDS {
blocks_size_to_dump = CLOSE_TO_TIP_BULK_SIZE;
}
// Insert the operation line.
let height_number = block.coinbase_height().unwrap_or(block::Height(0)).0;
self.elastic_blocks.push(
serde_json::json!({
"index": {
"_id": height_number.to_string().as_str()
}
})
.to_string(),
);
// Insert the block itself.
self.elastic_blocks
.push(serde_json::json!(block).to_string());
// We are in bulk time, insert to ES all we have.
if self.elastic_blocks.len() >= blocks_size_to_dump {
let rt = tokio::runtime::Runtime::new()
.expect("runtime creation for elasticsearch should not fail.");
let blocks = self.elastic_blocks.clone();
let network = self.network();
rt.block_on(async move {
let response = client
.bulk(elasticsearch::BulkParts::Index(
format!("zcash_{}", network.to_string().to_lowercase()).as_str(),
))
.body(blocks)
.send()
.await
.expect("ES Request should never fail");
// Make sure no errors ever.
let response_body = response
.json::<serde_json::Value>()
.await
.expect("ES response parsing error. Maybe we are sending more than 100 mb of data (`http.max_content_length`)");
let errors = response_body["errors"].as_bool().unwrap_or(true);
assert!(!errors, "{}", format!("ES error: {response_body}"));
});
// Clean the block storage.
self.elastic_blocks.clear();
}
}
}
/// Stop the process if `block_height` is greater than or equal to the
/// configured stop height.
fn is_at_stop_height(&self, block_height: block::Height) -> bool {
let debug_stop_at_height = match self.debug_stop_at_height {
Some(debug_stop_at_height) => debug_stop_at_height,
None => return false,
};
if block_height < debug_stop_at_height {
return false;
}
true
}
/// Exit the host process.
///
/// Designed for debugging and tests.
///
/// TODO: move the stop height check to the syncer (#3442)
fn exit_process() -> ! {
tracing::info!("exiting Zebra");
// Some OSes require a flush to send all output to the terminal.
// Zebra's logging doesn't depend on `tokio`, so we flush the stdlib sync streams.
//
// TODO: if this doesn't work, send an empty line as well.
let _ = stdout().lock().flush();
let _ = stderr().lock().flush();
// Give some time to logger thread to flush out any remaining lines to stdout
// and yield so that tests pass on MacOS
std::thread::sleep(std::time::Duration::from_secs(3));
// Exits before calling drop on the WorkerGuard for the logger thread,
// dropping any lines that haven't already been written to stdout.
// This is okay for now because this is test-only code
std::process::exit(0);
}
}