
564 lines
22 KiB

//! The primary implementation of the `zebra_state::Service` built upon rocksdb.
//! Zebra's database is implemented in 4 layers:
//! - [`FinalizedState`]: queues, validates, and commits blocks, using...
//! - [`ZebraDb`]: reads and writes [`zebra_chain`] types to the state database, using...
//! - [`DiskDb`]: reads and writes generic types to any column family in the database, using...
//! - [`disk_format`]: converts types to raw database bytes.
//! These layers allow us to split [`zebra_chain`] types for efficient database storage.
//! They reduce the risk of data corruption bugs, runtime inconsistencies, and panics.
//! # Correctness
//! [`crate::constants::state_database_format_version_in_code()`] must be incremented
//! each time the database format (column, serialization, etc) changes.
use std::{
io::{stderr, stdout, Write},
use zebra_chain::{block, parallel::tree::NoteCommitmentTrees, parameters::Network};
use crate::{
constants::{state_database_format_version_in_code, STATE_DATABASE_KIND},
request::{FinalizableBlock, FinalizedBlock, Treestate},
service::{check, QueuedCheckpointVerified},
BoxError, CheckpointVerifiedBlock, CloneError, Config,
pub mod column_family;
mod disk_db;
mod disk_format;
mod zebra_db;
#[cfg(any(test, feature = "proptest-impl"))]
mod arbitrary;
mod tests;
pub use column_family::{TypedColumnFamily, WriteTypedBatch};
pub use disk_db::{DiskDb, DiskWriteBatch, ReadDisk, WriteDisk};
pub use disk_format::{
FromDisk, IntoDisk, OutputIndex, OutputLocation, RawBytes, TransactionIndex,
TransactionLocation, MAX_ON_DISK_HEIGHT,
pub use zebra_db::ZebraDb;
#[cfg(feature = "shielded-scan")]
pub use disk_format::{
SaplingScannedDatabaseEntry, SaplingScannedDatabaseIndex, SaplingScannedResult,
#[cfg(any(test, feature = "proptest-impl"))]
pub use disk_format::KV;
/// The column families supported by the running `zebra-state` database code.
/// Existing column families that aren't listed here are preserved when the database is opened.
pub const STATE_COLUMN_FAMILIES_IN_CODE: &[&str] = &[
// Blocks
// Transactions
// Transparent
// Sprout
// Sapling
// Orchard
// Chain
/// The finalized part of the chain state, stored in the db.
/// `rocksdb` allows concurrent writes through a shared reference,
/// so clones of the finalized state represent the same database instance.
/// When the final clone is dropped, the database is closed.
/// This is different from `NonFinalizedState::clone()`,
/// which returns an independent copy of the chains.
#[derive(Clone, Debug)]
pub struct FinalizedState {
// Configuration
// This configuration cannot be modified after the database is initialized,
// because some clones would have different values.
/// The configured stop height.
/// Commit blocks to the finalized state up to this height, then exit Zebra.
debug_stop_at_height: Option<block::Height>,
// Owned State
// Everything contained in this state must be shared by all clones, or read-only.
/// The underlying database.
/// `rocksdb` allows reads and writes via a shared reference,
/// so this database object can be freely cloned.
/// The last instance that is dropped will close the underlying database.
pub db: ZebraDb,
#[cfg(feature = "elasticsearch")]
/// The elasticsearch handle.
pub elastic_db: Option<elasticsearch::Elasticsearch>,
#[cfg(feature = "elasticsearch")]
/// A collection of blocks to be sent to elasticsearch as a bulk.
pub elastic_blocks: Vec<String>,
impl FinalizedState {
/// Returns an on-disk database instance for `config`, `network`, and `elastic_db`.
/// If there is no existing database, creates a new database on disk.
pub fn new(
config: &Config,
network: Network,
#[cfg(feature = "elasticsearch")] elastic_db: Option<elasticsearch::Elasticsearch>,
) -> Self {
#[cfg(feature = "elasticsearch")]
/// Returns an on-disk database instance with the supplied production and debug settings.
/// If there is no existing database, creates a new database on disk.
/// This method is intended for use in tests.
pub(crate) fn new_with_debug(
config: &Config,
network: Network,
debug_skip_format_upgrades: bool,
#[cfg(feature = "elasticsearch")] elastic_db: Option<elasticsearch::Elasticsearch>,
read_only: bool,
) -> Self {
let db = ZebraDb::new(
#[cfg(feature = "elasticsearch")]
let new_state = Self {
debug_stop_at_height: config.debug_stop_at_height.map(block::Height),
elastic_blocks: vec![],
#[cfg(not(feature = "elasticsearch"))]
let new_state = Self {
debug_stop_at_height: config.debug_stop_at_height.map(block::Height),
// TODO: move debug_stop_at_height into a task in the start command (#3442)
if let Some(tip_height) = new_state.db.finalized_tip_height() {
if new_state.is_at_stop_height(tip_height) {
let debug_stop_at_height = new_state
.expect("true from `is_at_stop_height` implies `debug_stop_at_height` is Some");
let tip_hash = new_state.db.finalized_tip_hash();
if tip_height > debug_stop_at_height {
"previous state height is greater than the stop height",
"state is already at the configured height"
// RocksDB can do a cleanup when column families are opened.
// So we want to drop it before we exit.
// Drops tracing log output that's hasn't already been written to stdout
// since this exits before calling drop on the WorkerGuard for the logger thread.
// This is okay for now because this is test-only code
// TODO: Call ZebradApp.shutdown or drop its Tracing component before calling exit_process to flush logs to stdout
tracing::info!(tip = ?new_state.db.tip(), "loaded Zebra state cache");
/// Returns the configured network for this database.
pub fn network(&self) -> Network {
/// Commit a checkpoint-verified block to the state.
/// It's the caller's responsibility to ensure that blocks are committed in
/// order.
pub fn commit_finalized(
&mut self,
ordered_block: QueuedCheckpointVerified,
prev_note_commitment_trees: Option<NoteCommitmentTrees>,
) -> Result<(CheckpointVerifiedBlock, NoteCommitmentTrees), BoxError> {
let (checkpoint_verified, rsp_tx) = ordered_block;
let result = self.commit_finalized_direct(
"commit checkpoint-verified request",
if result.is_ok() {
.set(checkpoint_verified.height.0 as f64);
// This height gauge is updated for both fully verified and checkpoint blocks.
// These updates can't conflict, because the state makes sure that blocks
// are committed in order.
.set(checkpoint_verified.height.0 as f64);
} else {
.set(checkpoint_verified.height.0 as f64);
// Make the error cloneable, so we can send it to the block verify future,
// and the block write task.
let result = result.map_err(CloneError::from);
let _ = rsp_tx.send(result.clone().map(|(hash, _)| hash).map_err(BoxError::from));
.map(|(_hash, note_commitment_trees)| (checkpoint_verified, note_commitment_trees))
/// Immediately commit a `finalized` block to the finalized state.
/// This can be called either by the non-finalized state (when finalizing
/// a block) or by the checkpoint verifier.
/// Use `source` as the source of the block in log messages.
/// # Errors
/// - Propagates any errors from writing to the DB
/// - Propagates any errors from updating history and note commitment trees
/// - If `hashFinalSaplingRoot` / `hashLightClientRoot` / `hashBlockCommitments`
/// does not match the expected value
pub fn commit_finalized_direct(
&mut self,
finalizable_block: FinalizableBlock,
prev_note_commitment_trees: Option<NoteCommitmentTrees>,
source: &str,
) -> Result<(block::Hash, NoteCommitmentTrees), BoxError> {
let (height, hash, finalized, prev_note_commitment_trees) = match finalizable_block {
FinalizableBlock::Checkpoint {
} => {
// Checkpoint-verified blocks don't have an associated treestate, so we retrieve the
// treestate of the finalized tip from the database and update it for the block
// being committed, assuming the retrieved treestate is the parent block's
// treestate. Later on, this function proves this assumption by asserting that the
// finalized tip is the parent block of the block being committed.
let block = checkpoint_verified.block.clone();
let mut history_tree = self.db.history_tree();
let prev_note_commitment_trees = prev_note_commitment_trees
.unwrap_or_else(|| self.db.note_commitment_trees_for_tip());
// Update the note commitment trees.
let mut note_commitment_trees = prev_note_commitment_trees.clone();
// Check the block commitment if the history tree was not
// supplied by the non-finalized state. Note that we don't do
// this check for history trees supplied by the non-finalized
// state because the non-finalized state checks the block
// commitment.
// For Nu5-onward, the block hash commits only to
// non-authorizing data (see ZIP-244). This checks the
// authorizing data commitment, making sure the entire block
// contents were committed to. The test is done here (and not
// during semantic validation) because it needs the history tree
// root. While it _is_ checked during contextual validation,
// that is not called by the checkpoint verifier, and keeping a
// history tree there would be harder to implement.
// TODO: run this CPU-intensive cryptography in a parallel rayon
// thread, if it shows up in profiles
// Update the history tree.
// TODO: run this CPU-intensive cryptography in a parallel rayon
// thread, if it shows up in profiles
let history_tree_mut = Arc::make_mut(&mut history_tree);
let sapling_root = note_commitment_trees.sapling.root();
let orchard_root = note_commitment_trees.orchard.root();
history_tree_mut.push(self.network(), block.clone(), sapling_root, orchard_root)?;
let treestate = Treestate {
FinalizedBlock::from_checkpoint_verified(checkpoint_verified, treestate),
FinalizableBlock::Contextual {
} => (
FinalizedBlock::from_contextually_verified(contextually_verified, treestate),
let committed_tip_hash = self.db.finalized_tip_hash();
let committed_tip_height = self.db.finalized_tip_height();
// Assert that callers (including unit tests) get the chain order correct
if self.db.is_empty() {
committed_tip_hash, finalized.block.header.previous_block_hash,
"the first block added to an empty state must be a genesis block, source: {source}",
"cannot commit genesis: invalid height, source: {source}",
} else {
committed_tip_height.expect("state must have a genesis block committed") + 1,
"committed block height must be 1 more than the finalized tip height, source: {source}",
committed_tip_hash, finalized.block.header.previous_block_hash,
"committed block must be a child of the finalized tip, source: {source}",
#[cfg(feature = "elasticsearch")]
let finalized_inner_block = finalized.block.clone();
let note_commitment_trees = finalized.treestate.note_commitment_trees.clone();
let result = self.db.write_block(
if result.is_ok() {
// Save blocks to elasticsearch if the feature is enabled.
#[cfg(feature = "elasticsearch")]
// TODO: move the stop height check to the syncer (#3442)
if self.is_at_stop_height(height) {
block_source = ?source,
"stopping at configured height, flushing database to disk"
// We're just about to do a forced exit, so it's ok to do a forced db shutdown
// Drops tracing log output that's hasn't already been written to stdout
// since this exits before calling drop on the WorkerGuard for the logger thread.
// This is okay for now because this is test-only code
// TODO: Call ZebradApp.shutdown or drop its Tracing component before calling exit_process to flush logs to stdout
result.map(|hash| (hash, note_commitment_trees))
#[cfg(feature = "elasticsearch")]
/// Store finalized blocks into an elasticsearch database.
/// We use the elasticsearch bulk api to index multiple blocks at a time while we are
/// synchronizing the chain, when we get close to tip we index blocks one by one.
pub fn elasticsearch(&mut self, block: &Arc<block::Block>) {
if let Some(client) = self.elastic_db.clone() {
let block_time = block.header.time.timestamp();
let local_time = chrono::Utc::now().timestamp();
// Bulk size is small enough to avoid the elasticsearch 100mb content length limitation.
// MAX_BLOCK_BYTES = 2MB but each block use around 4.1 MB of JSON.
// Each block count as 2 as we send them with a operation/header line. A value of 48
// is 24 blocks.
const AWAY_FROM_TIP_BULK_SIZE: usize = 48;
// The number of blocks the bulk will have when we are in sync.
// A value of 2 means only 1 block as we want to insert them as soon as we get
// them for a real time experience. This is the same for mainnet and testnet.
const CLOSE_TO_TIP_BULK_SIZE: usize = 2;
// We consider in sync when the local time and the blockchain time difference is
// less than this number of seconds.
const CLOSE_TO_TIP_SECONDS: i64 = 14400; // 4 hours
let mut blocks_size_to_dump = AWAY_FROM_TIP_BULK_SIZE;
// If we are close to the tip, index one block per bulk call.
if local_time - block_time < CLOSE_TO_TIP_SECONDS {
blocks_size_to_dump = CLOSE_TO_TIP_BULK_SIZE;
// Insert the operation line.
let height_number = block.coinbase_height().unwrap_or(block::Height(0)).0;
"index": {
"_id": height_number.to_string().as_str()
// Insert the block itself.
// We are in bulk time, insert to ES all we have.
if self.elastic_blocks.len() >= blocks_size_to_dump {
let rt = tokio::runtime::Runtime::new()
.expect("runtime creation for elasticsearch should not fail.");
let blocks = self.elastic_blocks.clone();
let network = self.network();
rt.block_on(async move {
let response = client
format!("zcash_{}", network.to_string().to_lowercase()).as_str(),
.expect("ES Request should never fail");
// Make sure no errors ever.
let response_body = response
.expect("ES response parsing error. Maybe we are sending more than 100 mb of data (`http.max_content_length`)");
let errors = response_body["errors"].as_bool().unwrap_or(true);
assert!(!errors, "{}", format!("ES error: {response_body}"));
// Clean the block storage.
/// Stop the process if `block_height` is greater than or equal to the
/// configured stop height.
fn is_at_stop_height(&self, block_height: block::Height) -> bool {
let debug_stop_at_height = match self.debug_stop_at_height {
Some(debug_stop_at_height) => debug_stop_at_height,
None => return false,
if block_height < debug_stop_at_height {
return false;
/// Exit the host process.
/// Designed for debugging and tests.
/// TODO: move the stop height check to the syncer (#3442)
fn exit_process() -> ! {
tracing::info!("exiting Zebra");
// Some OSes require a flush to send all output to the terminal.
// Zebra's logging doesn't depend on `tokio`, so we flush the stdlib sync streams.
// TODO: if this doesn't work, send an empty line as well.
let _ = stdout().lock().flush();
let _ = stderr().lock().flush();
// Give some time to logger thread to flush out any remaining lines to stdout
// and yield so that tests pass on MacOS
// Exits before calling drop on the WorkerGuard for the logger thread,
// dropping any lines that haven't already been written to stdout.
// This is okay for now because this is test-only code