2. refactor(state): move all RocksDB API calls to the disk_db module (#3578)

* refactor(state): move RocksDB-specific initialization to a new module * refactor(state): move RocksDB-specific shutdown to a new module * refactor(state): temporarily allow RocksDB-specific reads and writes, without a new module Unlike the last few commits, this one actually compiles. * refactor(state): add a DiskWriteBatch wrapper for RocksDB writes * refactor(state): move finalized state test methods to a test module
2022-02-22 22:59:44 +10:00 · 2022-02-22 22:59:44 +10:00 · 32017f992b
parent 8e36686cc3
commit 32017f992b
6 changed files with 433 additions and 334 deletions
--- a/zebra-state/src/config.rs
+++ b/zebra-state/src/config.rs
@ -1,8 +1,6 @@
-use std::{convert::TryInto, path::PathBuf};
+use std::path::PathBuf;
 use rlimit::increase_nofile_limit;
 use serde::{Deserialize, Serialize};
 use tracing::{info, warn};
 use zebra_chain::parameters::Network;
@ -57,34 +55,14 @@ fn gen_temp_path(prefix: &str) -> PathBuf {
 }
 impl Config {
-    /// The ideal open file limit for Zebra
+    /// Returns the path for the finalized state database
-    const IDEAL_OPEN_FILE_LIMIT: u64 = 1024;
+    pub(crate) fn db_path(&self, network: Network) -> PathBuf {
    /// The minimum number of open files for Zebra to operate normally. Also used
    /// as the default open file limit, when the OS doesn't tell us how many
    /// files we can use.
    ///
    /// We want 100+ file descriptors for peers, and 100+ for the database.
    ///
    /// On Windows, the default limit is 512 high-level I/O files, and 8192
    /// low-level I/O files:
    /// https://docs.microsoft.com/en-us/cpp/c-runtime-library/reference/setmaxstdio?view=msvc-160#remarks
    const MIN_OPEN_FILE_LIMIT: u64 = 512;
    /// The number of files used internally by Zebra.
    ///
    /// Zebra uses file descriptors for OS libraries (10+), polling APIs (10+),
    /// stdio (3), and other OS facilities (2+).
    const RESERVED_FILE_COUNT: u64 = 48;
    /// Returns the path and database options for the finalized state database
    pub(crate) fn db_config(&self, network: Network) -> (PathBuf, rocksdb::Options) {
        let net_dir = match network {
            Network::Mainnet => "mainnet",
            Network::Testnet => "testnet",
        };
-        let path = if self.ephemeral {
+        if self.ephemeral {
            gen_temp_path(&format!(
                "zebra-state-v{}-{}",
                crate::constants::DATABASE_FORMAT_VERSION,
@ -95,25 +73,7 @@ impl Config {
                .join("state")
                .join(format!("v{}", crate::constants::DATABASE_FORMAT_VERSION))
                .join(net_dir)
-        };
+        }
        let mut opts = rocksdb::Options::default();
        opts.create_if_missing(true);
        opts.create_missing_column_families(true);
        let open_file_limit = Config::increase_open_file_limit();
        let db_file_limit = Config::get_db_open_file_limit(open_file_limit);
        // If the current limit is very large, set the DB limit using the ideal limit
        let ideal_limit = Config::get_db_open_file_limit(Config::IDEAL_OPEN_FILE_LIMIT)
            .try_into()
            .expect("ideal open file limit fits in a c_int");
        let db_file_limit = db_file_limit.try_into().unwrap_or(ideal_limit);
        opts.set_max_open_files(db_file_limit);
        (path, opts)
    }
    /// Construct a config for an ephemeral database
@ -123,92 +83,6 @@ impl Config {
            ..Config::default()
        }
    }
    /// Calculate the database's share of `open_file_limit`
    fn get_db_open_file_limit(open_file_limit: u64) -> u64 {
        // Give the DB half the files, and reserve half the files for peers
        (open_file_limit - Config::RESERVED_FILE_COUNT) / 2
    }
    /// Increase the open file limit for this process to `IDEAL_OPEN_FILE_LIMIT`.
    /// If that fails, try `MIN_OPEN_FILE_LIMIT`.
    ///
    /// If the current limit is above `IDEAL_OPEN_FILE_LIMIT`, leaves it
    /// unchanged.
    ///
    /// Returns the current limit, after any successful increases.
    ///
    /// # Panics
    ///
    /// If the open file limit can not be increased to `MIN_OPEN_FILE_LIMIT`.
    fn increase_open_file_limit() -> u64 {
        // `increase_nofile_limit` doesn't do anything on Windows in rlimit 0.7.0.
        //
        // On Windows, the default limit is:
        // - 512 high-level stream I/O files (via the C standard functions), and
        // - 8192 low-level I/O files (via the Unix C functions).
        // https://docs.microsoft.com/en-us/cpp/c-runtime-library/reference/setmaxstdio?view=msvc-160#remarks
        //
        // If we need more high-level I/O files on Windows,
        // use `setmaxstdio` and `getmaxstdio` from the `rlimit` crate:
        // https://docs.rs/rlimit/latest/rlimit/#windows
        //
        // Then panic if `setmaxstdio` fails to set the minimum value,
        // and `getmaxstdio` is below the minimum value.
        // We try setting the ideal limit, then the minimum limit.
        let current_limit = match increase_nofile_limit(Config::IDEAL_OPEN_FILE_LIMIT) {
            Ok(current_limit) => current_limit,
            Err(limit_error) => {
                info!(
                ?limit_error,
                min_limit = ?Config::MIN_OPEN_FILE_LIMIT,
                ideal_limit = ?Config::IDEAL_OPEN_FILE_LIMIT,
                "unable to increase the open file limit, \
                 assuming Zebra can open a minimum number of files"
                );
                return Config::MIN_OPEN_FILE_LIMIT;
            }
        };
        if current_limit < Config::MIN_OPEN_FILE_LIMIT {
            panic!(
                "open file limit too low: \
                 unable to set the number of open files to {}, \
                 the minimum number of files required by Zebra. \
                 Current limit is {:?}. \
                 Hint: Increase the open file limit to {} before launching Zebra",
                Config::MIN_OPEN_FILE_LIMIT,
                current_limit,
                Config::IDEAL_OPEN_FILE_LIMIT
            );
        } else if current_limit < Config::IDEAL_OPEN_FILE_LIMIT {
            warn!(
                ?current_limit,
                min_limit = ?Config::MIN_OPEN_FILE_LIMIT,
                ideal_limit = ?Config::IDEAL_OPEN_FILE_LIMIT,
                "the maximum number of open files is below Zebra's ideal limit. \
                 Hint: Increase the open file limit to {} before launching Zebra",
                Config::IDEAL_OPEN_FILE_LIMIT
            );
        } else if cfg!(windows) {
            info!(
                min_limit = ?Config::MIN_OPEN_FILE_LIMIT,
                ideal_limit = ?Config::IDEAL_OPEN_FILE_LIMIT,
                "assuming the open file limit is high enough for Zebra",
            );
        } else {
            info!(
                ?current_limit,
                min_limit = ?Config::MIN_OPEN_FILE_LIMIT,
                ideal_limit = ?Config::IDEAL_OPEN_FILE_LIMIT,
                "the open file limit is high enough for Zebra",
            );
        }
        current_limit
    }
 }
 impl Default for Config {
--- a/zebra-state/src/lib.rs
+++ b/zebra-state/src/lib.rs
@ -12,6 +12,9 @@
 #![doc(html_logo_url = "https://www.zfnd.org/images/zebra-icon.png")]
 #![doc(html_root_url = "https://doc.zebra.zfnd.org/zebra_state")]
 #[macro_use]
 extern crate tracing;
 #[cfg(any(test, feature = "proptest-impl"))]
 mod arbitrary;
 mod config;
--- a/zebra-state/src/service/finalized_state.rs
+++ b/zebra-state/src/service/finalized_state.rs
@ -1,4 +1,9 @@
 //! The primary implementation of the `zebra_state::Service` built upon rocksdb
 //!
 //! # Correctness
 //!
 //! The [`crate::constants::DATABASE_FORMAT_VERSION`] constant must
 //! be incremented each time the database format (column, serialization, etc) changes.
 use std::{
    borrow::Borrow,
@ -25,8 +30,8 @@ use crate::{
    service::{
        check,
        finalized_state::{
-            disk_db::{ReadDisk, WriteDisk},
+            disk_db::{DiskDb, DiskWriteBatch, ReadDisk, WriteDisk},
-            disk_format::{FromDisk, IntoDisk, TransactionLocation},
+            disk_format::{FromDisk, TransactionLocation},
        },
        QueuedFinalized,
    },
@ -44,87 +49,44 @@ mod tests;
 /// The finalized part of the chain state, stored in the db.
 pub struct FinalizedState {
    /// The underlying database.
    db: DiskDb,
    /// Queued blocks that arrived out of order, indexed by their parent block hash.
    queued_by_prev_hash: HashMap<block::Hash, QueuedFinalized>,
    /// A metric tracking the maximum height that's currently in `queued_by_prev_hash`
    ///
    /// Set to `f64::NAN` if `queued_by_prev_hash` is empty, because grafana shows NaNs
    /// as a break in the graph.
    max_queued_height: f64,
-    db: rocksdb::DB,
+    /// The configured stop height.
-    ephemeral: bool,
+    ///
    /// Commit blocks to the finalized state up to this height, then exit Zebra.
    debug_stop_at_height: Option<block::Height>,
    /// The configured network.
    network: Network,
 }
 impl FinalizedState {
    pub fn new(config: &Config, network: Network) -> Self {
-        let (path, db_options) = config.db_config(network);
+        let db = DiskDb::new(config, network);
        // Note: The [`crate::constants::DATABASE_FORMAT_VERSION`] constant must
        // be incremented each time the database format (column, serialization, etc) changes.
        let column_families = vec![
            rocksdb::ColumnFamilyDescriptor::new("hash_by_height", db_options.clone()),
            rocksdb::ColumnFamilyDescriptor::new("height_by_hash", db_options.clone()),
            rocksdb::ColumnFamilyDescriptor::new("block_by_height", db_options.clone()),
            rocksdb::ColumnFamilyDescriptor::new("tx_by_hash", db_options.clone()),
            rocksdb::ColumnFamilyDescriptor::new("utxo_by_outpoint", db_options.clone()),
            rocksdb::ColumnFamilyDescriptor::new("sprout_nullifiers", db_options.clone()),
            rocksdb::ColumnFamilyDescriptor::new("sapling_nullifiers", db_options.clone()),
            rocksdb::ColumnFamilyDescriptor::new("orchard_nullifiers", db_options.clone()),
            rocksdb::ColumnFamilyDescriptor::new("sprout_anchors", db_options.clone()),
            rocksdb::ColumnFamilyDescriptor::new("sapling_anchors", db_options.clone()),
            rocksdb::ColumnFamilyDescriptor::new("orchard_anchors", db_options.clone()),
            rocksdb::ColumnFamilyDescriptor::new("sprout_note_commitment_tree", db_options.clone()),
            rocksdb::ColumnFamilyDescriptor::new(
                "sapling_note_commitment_tree",
                db_options.clone(),
            ),
            rocksdb::ColumnFamilyDescriptor::new(
                "orchard_note_commitment_tree",
                db_options.clone(),
            ),
            rocksdb::ColumnFamilyDescriptor::new("history_tree", db_options.clone()),
            rocksdb::ColumnFamilyDescriptor::new("tip_chain_value_pool", db_options.clone()),
        ];
        let db_result = rocksdb::DB::open_cf_descriptors(&db_options, &path, column_families);
        let db = match db_result {
            Ok(d) => {
                tracing::info!("Opened Zebra state cache at {}", path.display());
                d
            }
            // TODO: provide a different hint if the disk is full, see #1623
            Err(e) => panic!(
                "Opening database {:?} failed: {:?}. \
                 Hint: Check if another zebrad process is running. \
                 Try changing the state cache_dir in the Zebra config.",
                path, e,
            ),
        };
        let new_state = Self {
            queued_by_prev_hash: HashMap::new(),
            max_queued_height: f64::NAN,
            db,
            ephemeral: config.ephemeral,
            debug_stop_at_height: config.debug_stop_at_height.map(block::Height),
            network,
        };
        // TODO: remove these extra logs once bugs like #2905 are fixed
        tracing::info!("reading cached tip height");
        if let Some(tip_height) = new_state.finalized_tip_height() {
            tracing::info!(?tip_height, "loaded cached tip height");
            if new_state.is_at_stop_height(tip_height) {
                let debug_stop_at_height = new_state
                    .debug_stop_at_height
                    .expect("true from `is_at_stop_height` implies `debug_stop_at_height` is Some");
                tracing::info!("reading cached tip hash");
                let tip_hash = new_state.finalized_tip_hash();
                if tip_height > debug_stop_at_height {
@ -145,7 +107,6 @@ impl FinalizedState {
                // RocksDB can do a cleanup when column families are opened.
                // So we want to drop it before we exit.
                tracing::info!("closing cached state");
                std::mem::drop(new_state);
                Self::exit_process();
@ -232,14 +193,6 @@ impl FinalizedState {
        self.tip().map(|(height, _)| height)
    }
    fn is_empty(&self, cf: &rocksdb::ColumnFamily) -> bool {
        // use iterator to check if it's empty
        !self
            .db
            .iterator_cf(cf, rocksdb::IteratorMode::Start)
            .valid()
    }
    /// Immediately commit `finalized` to the finalized state.
    ///
    /// This can be called either by the non-finalized state (when finalizing
@ -285,7 +238,7 @@ impl FinalizedState {
        let tip_chain_value_pool = self.db.cf_handle("tip_chain_value_pool").unwrap();
        // Assert that callers (including unit tests) get the chain order correct
-        if self.is_empty(hash_by_height) {
+        if self.db.is_empty(hash_by_height) {
            assert_eq!(
                GENESIS_PREVIOUS_BLOCK_HASH, finalized.block.header.previous_block_hash,
                "the first block added to an empty state must be a genesis block, source: {}",
@ -346,8 +299,8 @@ impl FinalizedState {
        // the genesis case.
        // If the closure returns an error it will be propagated and the batch will not be written
        // to the BD afterwards.
-        let prepare_commit = || -> Result<rocksdb::WriteBatch, BoxError> {
+        let prepare_commit = || -> Result<DiskWriteBatch, BoxError> {
-            let mut batch = rocksdb::WriteBatch::default();
+            let mut batch = DiskWriteBatch::new();
            // Index the block
            batch.zs_insert(hash_by_height, height, hash);
@ -413,7 +366,7 @@ impl FinalizedState {
                            if let Some(utxo) = self.utxo(outpoint) {
                                all_utxos_spent_by_block.insert(*outpoint, utxo);
                            }
-                            batch.delete_cf(utxo_by_outpoint, outpoint.as_bytes());
+                            batch.zs_delete(utxo_by_outpoint, outpoint);
                        }
                        // Coinbase inputs represent new coins,
                        // so there are no UTXOs to mark as spent.
@ -505,6 +458,7 @@ impl FinalizedState {
        tracing::trace!(?source, "committed block from");
        // TODO: move the stop height check to the syncer (#3442)
        if result.is_ok() && self.is_at_stop_height(height) {
            tracing::info!(?source, "committed block from");
            tracing::info!(
@ -513,9 +467,8 @@ impl FinalizedState {
                "stopping at configured height, flushing database to disk"
            );
-            self.shutdown();
+            self.db.shutdown();
            // TODO: replace with a graceful shutdown (#1678)
            Self::exit_process();
        }
@ -525,7 +478,8 @@ impl FinalizedState {
    /// Exit the host process.
    ///
    /// Designed for debugging and tests.
-    /// TODO: replace with a graceful shutdown (#1678)
+    ///
    /// TODO: move the stop height check to the syncer (#3442)
    fn exit_process() -> ! {
        tracing::info!("exiting Zebra");
@ -582,7 +536,7 @@ impl FinalizedState {
    pub fn tip(&self) -> Option<(block::Height, block::Hash)> {
        let hash_by_height = self.db.cf_handle("hash_by_height").unwrap();
        self.db
-            .iterator_cf(hash_by_height, rocksdb::IteratorMode::End)
+            .reverse_iterator(hash_by_height)
            .next()
            .map(|(height_bytes, hash_bytes)| {
                let height = block::Height::from_bytes(height_bytes);
@ -754,32 +708,6 @@ impl FinalizedState {
        }
    }
    /// If the database is `ephemeral`, delete it.
    fn delete_ephemeral(&self) {
        if self.ephemeral {
            let path = self.db.path();
            tracing::info!(cache_path = ?path, "removing temporary database files");
            // We'd like to use `rocksdb::Env::mem_env` for ephemeral databases,
            // but the Zcash blockchain might not fit in memory. So we just
            // delete the database files instead.
            //
            // We'd like to call `DB::destroy` here, but calling destroy on a
            // live DB is undefined behaviour:
            // https://github.com/facebook/rocksdb/wiki/RocksDB-FAQ#basic-readwrite
            //
            // So we assume that all the database files are under `path`, and
            // delete them using standard filesystem APIs. Deleting open files
            // might cause errors on non-Unix platforms, so we ignore the result.
            // (The OS will delete them eventually anyway.)
            let res = std::fs::remove_dir_all(path);
            // TODO: downgrade to debug once bugs like #2905 are fixed
            //       but leave any errors at "info" level
            tracing::info!(?res, "removed temporary database files");
        }
    }
    /// Returns the `Path` where the files used by this database are located.
    #[allow(dead_code)]
    pub fn path(&self) -> &Path {
@ -793,104 +721,6 @@ impl FinalizedState {
            .zs_get(value_pool_cf, &())
            .unwrap_or_else(ValueBalance::zero)
    }
    /// Allow to set up a fake value pool in the database for testing purposes.
    #[cfg(any(test, feature = "proptest-impl"))]
    #[allow(dead_code)]
    pub fn set_current_value_pool(&self, fake_value_pool: ValueBalance<NonNegative>) {
        let mut batch = rocksdb::WriteBatch::default();
        let value_pool_cf = self.db.cf_handle("tip_chain_value_pool").unwrap();
        batch.zs_insert(value_pool_cf, (), fake_value_pool);
        self.db.write(batch).unwrap();
    }
    /// Artificially prime the note commitment tree anchor sets with anchors
    /// referenced in a block, for testing purposes _only_.
    #[cfg(test)]
    pub fn populate_with_anchors(&self, block: &Block) {
        let mut batch = rocksdb::WriteBatch::default();
        let sprout_anchors = self.db.cf_handle("sprout_anchors").unwrap();
        let sapling_anchors = self.db.cf_handle("sapling_anchors").unwrap();
        let orchard_anchors = self.db.cf_handle("orchard_anchors").unwrap();
        for transaction in block.transactions.iter() {
            // Sprout
            for joinsplit in transaction.sprout_groth16_joinsplits() {
                batch.zs_insert(
                    sprout_anchors,
                    joinsplit.anchor,
                    sprout::tree::NoteCommitmentTree::default(),
                );
            }
            // Sapling
            for anchor in transaction.sapling_anchors() {
                batch.zs_insert(sapling_anchors, anchor, ());
            }
            // Orchard
            if let Some(orchard_shielded_data) = transaction.orchard_shielded_data() {
                batch.zs_insert(orchard_anchors, orchard_shielded_data.shared_anchor, ());
            }
        }
        self.db.write(batch).unwrap();
    }
    /// Shut down the database, cleaning up background tasks and ephemeral data.
    fn shutdown(&mut self) {
        // Drop isn't guaranteed to run, such as when we panic, or if the tokio shutdown times out.
        //
        // Zebra's data should be fine if we don't clean up, because:
        // - the database flushes regularly anyway
        // - Zebra commits each block in a database transaction, any incomplete blocks get rolled back
        // - ephemeral files are placed in the os temp dir and should be cleaned up automatically eventually
        tracing::info!("flushing database to disk");
        self.db.flush().expect("flush is successful");
        // But we should call `cancel_all_background_work` before Zebra exits.
        // If we don't, we see these kinds of errors:
        // ```
        // pthread lock: Invalid argument
        // pure virtual method called
        // terminate called without an active exception
        // pthread destroy mutex: Device or resource busy
        // Aborted (core dumped)
        // ```
        //
        // The RocksDB wiki says:
        // > Q: Is it safe to close RocksDB while another thread is issuing read, write or manual compaction requests?
        // >
        // > A: No. The users of RocksDB need to make sure all functions have finished before they close RocksDB.
        // > You can speed up the waiting by calling CancelAllBackgroundWork().
        //
        // https://github.com/facebook/rocksdb/wiki/RocksDB-FAQ
        tracing::info!("stopping background database tasks");
        self.db.cancel_all_background_work(true);
        // We'd like to drop the database before deleting its files,
        // because that closes the column families and the database correctly.
        // But Rust's ownership rules make that difficult,
        // so we just flush and delete ephemeral data instead.
        //
        // The RocksDB wiki says:
        // > rocksdb::DB instances need to be destroyed before your main function exits.
        // > RocksDB instances usually depend on some internal static variables.
        // > Users need to make sure rocksdb::DB instances are destroyed before those static variables.
        //
        // https://github.com/facebook/rocksdb/wiki/Known-Issues
        //
        // But our current code doesn't seem to cause any issues.
        // We might want to explicitly drop the database as part of graceful shutdown (#1678).
        self.delete_ephemeral();
    }
 }
 impl Drop for FinalizedState {
    fn drop(&mut self) {
        self.shutdown();
    }
 }
 fn block_precommit_metrics(block: &Block, hash: block::Hash, height: block::Height) {
--- a/zebra-state/src/service/finalized_state/arbitrary.rs
+++ b/zebra-state/src/service/finalized_state/arbitrary.rs
@ -6,9 +6,18 @@ use std::sync::Arc;
 use proptest::prelude::*;
-use zebra_chain::block;
+use zebra_chain::{
    amount::NonNegative,
    block::{self, Block},
    sprout,
    value_balance::ValueBalance,
 };
-use crate::service::finalized_state::disk_format::{FromDisk, IntoDisk, TransactionLocation};
+use crate::service::finalized_state::{
    disk_db::{DiskWriteBatch, WriteDisk},
    disk_format::{FromDisk, IntoDisk, TransactionLocation},
    FinalizedState,
 };
 impl Arbitrary for TransactionLocation {
    type Parameters = ();
@ -84,3 +93,47 @@ where
    assert_round_trip_arc(Arc::new(input.clone()));
    assert_round_trip(input);
 }
 impl FinalizedState {
    /// Allow to set up a fake value pool in the database for testing purposes.
    pub fn set_current_value_pool(&self, fake_value_pool: ValueBalance<NonNegative>) {
        let mut batch = DiskWriteBatch::new();
        let value_pool_cf = self.db.cf_handle("tip_chain_value_pool").unwrap();
        batch.zs_insert(value_pool_cf, (), fake_value_pool);
        self.db.write(batch).unwrap();
    }
    /// Artificially prime the note commitment tree anchor sets with anchors
    /// referenced in a block, for testing purposes _only_.
    pub fn populate_with_anchors(&self, block: &Block) {
        let mut batch = DiskWriteBatch::new();
        let sprout_anchors = self.db.cf_handle("sprout_anchors").unwrap();
        let sapling_anchors = self.db.cf_handle("sapling_anchors").unwrap();
        let orchard_anchors = self.db.cf_handle("orchard_anchors").unwrap();
        for transaction in block.transactions.iter() {
            // Sprout
            for joinsplit in transaction.sprout_groth16_joinsplits() {
                batch.zs_insert(
                    sprout_anchors,
                    joinsplit.anchor,
                    sprout::tree::NoteCommitmentTree::default(),
                );
            }
            // Sapling
            for anchor in transaction.sapling_anchors() {
                batch.zs_insert(sapling_anchors, anchor, ());
            }
            // Orchard
            if let Some(orchard_shielded_data) = transaction.orchard_shielded_data() {
                batch.zs_insert(orchard_anchors, orchard_shielded_data.shared_anchor, ());
            }
        }
        self.db.write(batch).unwrap();
    }
 }
--- a/zebra-state/src/service/finalized_state/disk_db.rs
+++ b/zebra-state/src/service/finalized_state/disk_db.rs
@ -3,10 +3,39 @@
 //! This module makes sure that:
 //! - all disk writes happen inside a RocksDB transaction, and
 //! - format-specific invariants are maintained.
 //!
 //! # Correctness
 //!
 //! The [`crate::constants::DATABASE_FORMAT_VERSION`] constant must
 //! be incremented each time the database format (column, serialization, etc) changes.
-use std::fmt::Debug;
+use std::{fmt::Debug, path::Path};
-use crate::service::finalized_state::disk_format::{FromDisk, IntoDisk};
+use rlimit::increase_nofile_limit;
 use zebra_chain::parameters::Network;
 use crate::{
    service::finalized_state::disk_format::{FromDisk, IntoDisk},
    Config,
 };
 /// Wrapper struct to ensure low-level database access goes through the correct API.
 pub struct DiskDb {
    /// The inner RocksDB database.
    db: rocksdb::DB,
    /// The configured temporary database setting.
    ///
    /// If true, the database files are deleted on drop.
    ephemeral: bool,
 }
 /// Wrapper struct to ensure low-level database writes go through the correct API.
 pub struct DiskWriteBatch {
    /// The inner RocksDB write batch.
    batch: rocksdb::WriteBatch,
 }
 /// Helper trait for inserting (Key, Value) pairs into rocksdb with a consistently
 /// defined format
@ -24,7 +53,7 @@ pub trait WriteDisk {
        K: IntoDisk + Debug;
 }
-impl WriteDisk for rocksdb::WriteBatch {
+impl WriteDisk for DiskWriteBatch {
    fn zs_insert<K, V>(&mut self, cf: &rocksdb::ColumnFamily, key: K, value: V)
    where
        K: IntoDisk + Debug,
@ -32,7 +61,7 @@ impl WriteDisk for rocksdb::WriteBatch {
    {
        let key_bytes = key.as_bytes();
        let value_bytes = value.as_bytes();
-        self.put_cf(cf, key_bytes, value_bytes);
+        self.batch.put_cf(cf, key_bytes, value_bytes);
    }
    fn zs_delete<K>(&mut self, cf: &rocksdb::ColumnFamily, key: K)
@ -40,7 +69,7 @@ impl WriteDisk for rocksdb::WriteBatch {
        K: IntoDisk + Debug,
    {
        let key_bytes = key.as_bytes();
-        self.delete_cf(cf, key_bytes);
+        self.batch.delete_cf(cf, key_bytes);
    }
 }
@ -59,7 +88,7 @@ pub trait ReadDisk {
        K: IntoDisk;
 }
-impl ReadDisk for rocksdb::DB {
+impl ReadDisk for DiskDb {
    fn zs_get<K, V>(&self, cf: &rocksdb::ColumnFamily, key: &K) -> Option<V>
    where
        K: IntoDisk,
@ -71,6 +100,7 @@ impl ReadDisk for rocksdb::DB {
        // value, because we're going to deserialize it anyways, which avoids an
        // extra copy
        let value_bytes = self
            .db
            .get_pinned_cf(cf, key_bytes)
            .expect("expected that disk errors would not occur");
@ -85,8 +115,312 @@ impl ReadDisk for rocksdb::DB {
        // We use `get_pinned_cf` to avoid taking ownership of the serialized
        // value, because we don't use the value at all. This avoids an extra copy.
-        self.get_pinned_cf(cf, key_bytes)
+        self.db
            .get_pinned_cf(cf, key_bytes)
            .expect("expected that disk errors would not occur")
            .is_some()
    }
 }
 impl DiskWriteBatch {
    pub fn new() -> Self {
        DiskWriteBatch {
            batch: rocksdb::WriteBatch::default(),
        }
    }
 }
 impl DiskDb {
    /// The ideal open file limit for Zebra
    const IDEAL_OPEN_FILE_LIMIT: u64 = 1024;
    /// The minimum number of open files for Zebra to operate normally. Also used
    /// as the default open file limit, when the OS doesn't tell us how many
    /// files we can use.
    ///
    /// We want 100+ file descriptors for peers, and 100+ for the database.
    ///
    /// On Windows, the default limit is 512 high-level I/O files, and 8192
    /// low-level I/O files:
    /// https://docs.microsoft.com/en-us/cpp/c-runtime-library/reference/setmaxstdio?view=msvc-160#remarks
    const MIN_OPEN_FILE_LIMIT: u64 = 512;
    /// The number of files used internally by Zebra.
    ///
    /// Zebra uses file descriptors for OS libraries (10+), polling APIs (10+),
    /// stdio (3), and other OS facilities (2+).
    const RESERVED_FILE_COUNT: u64 = 48;
    pub fn new(config: &Config, network: Network) -> DiskDb {
        let path = config.db_path(network);
        let db_options = DiskDb::options();
        let column_families = vec![
            rocksdb::ColumnFamilyDescriptor::new("hash_by_height", db_options.clone()),
            rocksdb::ColumnFamilyDescriptor::new("height_by_hash", db_options.clone()),
            rocksdb::ColumnFamilyDescriptor::new("block_by_height", db_options.clone()),
            rocksdb::ColumnFamilyDescriptor::new("tx_by_hash", db_options.clone()),
            rocksdb::ColumnFamilyDescriptor::new("utxo_by_outpoint", db_options.clone()),
            rocksdb::ColumnFamilyDescriptor::new("sprout_nullifiers", db_options.clone()),
            rocksdb::ColumnFamilyDescriptor::new("sapling_nullifiers", db_options.clone()),
            rocksdb::ColumnFamilyDescriptor::new("orchard_nullifiers", db_options.clone()),
            rocksdb::ColumnFamilyDescriptor::new("sprout_anchors", db_options.clone()),
            rocksdb::ColumnFamilyDescriptor::new("sapling_anchors", db_options.clone()),
            rocksdb::ColumnFamilyDescriptor::new("orchard_anchors", db_options.clone()),
            rocksdb::ColumnFamilyDescriptor::new("sprout_note_commitment_tree", db_options.clone()),
            rocksdb::ColumnFamilyDescriptor::new(
                "sapling_note_commitment_tree",
                db_options.clone(),
            ),
            rocksdb::ColumnFamilyDescriptor::new(
                "orchard_note_commitment_tree",
                db_options.clone(),
            ),
            rocksdb::ColumnFamilyDescriptor::new("history_tree", db_options.clone()),
            rocksdb::ColumnFamilyDescriptor::new("tip_chain_value_pool", db_options.clone()),
        ];
        // TODO: move opening the database to a blocking thread (#2188)
        let db_result = rocksdb::DB::open_cf_descriptors(&db_options, &path, column_families);
        match db_result {
            Ok(db) => {
                info!("Opened Zebra state cache at {}", path.display());
                DiskDb {
                    db,
                    ephemeral: config.ephemeral,
                }
            }
            // TODO: provide a different hint if the disk is full, see #1623
            Err(e) => panic!(
                "Opening database {:?} failed: {:?}. \
                 Hint: Check if another zebrad process is running. \
                 Try changing the state cache_dir in the Zebra config.",
                path, e,
            ),
        }
    }
    /// Returns the `Path` where the files used by this database are located.
    pub fn path(&self) -> &Path {
        self.db.path()
    }
    /// Returns the column family handle for `cf_name`.
    pub fn cf_handle(&self, cf_name: &str) -> Option<&rocksdb::ColumnFamily> {
        self.db.cf_handle(cf_name)
    }
    /// Returns an iterator over the keys in `cf_name`, starting from the first key.
    pub fn forward_iterator(&self, cf_handle: &rocksdb::ColumnFamily) -> rocksdb::DBIterator {
        self.db.iterator_cf(cf_handle, rocksdb::IteratorMode::Start)
    }
    /// Returns a reverse iterator over the keys in `cf_name`, starting from the last key.
    pub fn reverse_iterator(&self, cf_handle: &rocksdb::ColumnFamily) -> rocksdb::DBIterator {
        self.db.iterator_cf(cf_handle, rocksdb::IteratorMode::End)
    }
    /// Returns true if `cf` does not contain any entries.
    pub fn is_empty(&self, cf_handle: &rocksdb::ColumnFamily) -> bool {
        // Empty column families return invalid iterators.
        !self.forward_iterator(cf_handle).valid()
    }
    /// Writes `batch` to the database.
    pub fn write(&self, batch: DiskWriteBatch) -> Result<(), rocksdb::Error> {
        // TODO: move writing to the database to a blocking thread (#2188)
        self.db.write(batch.batch)
    }
    /// Returns the database options for the finalized state database.
    fn options() -> rocksdb::Options {
        let mut opts = rocksdb::Options::default();
        opts.create_if_missing(true);
        opts.create_missing_column_families(true);
        let open_file_limit = DiskDb::increase_open_file_limit();
        let db_file_limit = DiskDb::get_db_open_file_limit(open_file_limit);
        // If the current limit is very large, set the DB limit using the ideal limit
        let ideal_limit = DiskDb::get_db_open_file_limit(DiskDb::IDEAL_OPEN_FILE_LIMIT)
            .try_into()
            .expect("ideal open file limit fits in a c_int");
        let db_file_limit = db_file_limit.try_into().unwrap_or(ideal_limit);
        opts.set_max_open_files(db_file_limit);
        opts
    }
    /// Calculate the database's share of `open_file_limit`
    fn get_db_open_file_limit(open_file_limit: u64) -> u64 {
        // Give the DB half the files, and reserve half the files for peers
        (open_file_limit - DiskDb::RESERVED_FILE_COUNT) / 2
    }
    /// Increase the open file limit for this process to `IDEAL_OPEN_FILE_LIMIT`.
    /// If that fails, try `MIN_OPEN_FILE_LIMIT`.
    ///
    /// If the current limit is above `IDEAL_OPEN_FILE_LIMIT`, leaves it
    /// unchanged.
    ///
    /// Returns the current limit, after any successful increases.
    ///
    /// # Panics
    ///
    /// If the open file limit can not be increased to `MIN_OPEN_FILE_LIMIT`.
    fn increase_open_file_limit() -> u64 {
        // `increase_nofile_limit` doesn't do anything on Windows in rlimit 0.7.0.
        //
        // On Windows, the default limit is:
        // - 512 high-level stream I/O files (via the C standard functions), and
        // - 8192 low-level I/O files (via the Unix C functions).
        // https://docs.microsoft.com/en-us/cpp/c-runtime-library/reference/setmaxstdio?view=msvc-160#remarks
        //
        // If we need more high-level I/O files on Windows,
        // use `setmaxstdio` and `getmaxstdio` from the `rlimit` crate:
        // https://docs.rs/rlimit/latest/rlimit/#windows
        //
        // Then panic if `setmaxstdio` fails to set the minimum value,
        // and `getmaxstdio` is below the minimum value.
        // We try setting the ideal limit, then the minimum limit.
        let current_limit = match increase_nofile_limit(DiskDb::IDEAL_OPEN_FILE_LIMIT) {
            Ok(current_limit) => current_limit,
            Err(limit_error) => {
                info!(
                ?limit_error,
                min_limit = ?DiskDb::MIN_OPEN_FILE_LIMIT,
                ideal_limit = ?DiskDb::IDEAL_OPEN_FILE_LIMIT,
                "unable to increase the open file limit, \
                 assuming Zebra can open a minimum number of files"
                );
                return DiskDb::MIN_OPEN_FILE_LIMIT;
            }
        };
        if current_limit < DiskDb::MIN_OPEN_FILE_LIMIT {
            panic!(
                "open file limit too low: \
                 unable to set the number of open files to {}, \
                 the minimum number of files required by Zebra. \
                 Current limit is {:?}. \
                 Hint: Increase the open file limit to {} before launching Zebra",
                DiskDb::MIN_OPEN_FILE_LIMIT,
                current_limit,
                DiskDb::IDEAL_OPEN_FILE_LIMIT
            );
        } else if current_limit < DiskDb::IDEAL_OPEN_FILE_LIMIT {
            warn!(
                ?current_limit,
                min_limit = ?DiskDb::MIN_OPEN_FILE_LIMIT,
                ideal_limit = ?DiskDb::IDEAL_OPEN_FILE_LIMIT,
                "the maximum number of open files is below Zebra's ideal limit. \
                 Hint: Increase the open file limit to {} before launching Zebra",
                DiskDb::IDEAL_OPEN_FILE_LIMIT
            );
        } else if cfg!(windows) {
            info!(
                min_limit = ?DiskDb::MIN_OPEN_FILE_LIMIT,
                ideal_limit = ?DiskDb::IDEAL_OPEN_FILE_LIMIT,
                "assuming the open file limit is high enough for Zebra",
            );
        } else {
            info!(
                ?current_limit,
                min_limit = ?DiskDb::MIN_OPEN_FILE_LIMIT,
                ideal_limit = ?DiskDb::IDEAL_OPEN_FILE_LIMIT,
                "the open file limit is high enough for Zebra",
            );
        }
        current_limit
    }
    /// Shut down the database, cleaning up background tasks and ephemeral data.
    ///
    /// TODO: make private after the stop height check has moved to the syncer (#3442)
    ///       move shutting down the database to a blocking thread (#2188)
    pub(crate) fn shutdown(&mut self) {
        // Drop isn't guaranteed to run, such as when we panic, or if the tokio shutdown times out.
        //
        // Zebra's data should be fine if we don't clean up, because:
        // - the database flushes regularly anyway
        // - Zebra commits each block in a database transaction, any incomplete blocks get rolled back
        // - ephemeral files are placed in the os temp dir and should be cleaned up automatically eventually
        info!("flushing database to disk");
        self.db.flush().expect("flush is successful");
        // But we should call `cancel_all_background_work` before Zebra exits.
        // If we don't, we see these kinds of errors:
        // ```
        // pthread lock: Invalid argument
        // pure virtual method called
        // terminate called without an active exception
        // pthread destroy mutex: Device or resource busy
        // Aborted (core dumped)
        // ```
        //
        // The RocksDB wiki says:
        // > Q: Is it safe to close RocksDB while another thread is issuing read, write or manual compaction requests?
        // >
        // > A: No. The users of RocksDB need to make sure all functions have finished before they close RocksDB.
        // > You can speed up the waiting by calling CancelAllBackgroundWork().
        //
        // https://github.com/facebook/rocksdb/wiki/RocksDB-FAQ
        info!("stopping background database tasks");
        self.db.cancel_all_background_work(true);
        // We'd like to drop the database before deleting its files,
        // because that closes the column families and the database correctly.
        // But Rust's ownership rules make that difficult,
        // so we just flush and delete ephemeral data instead.
        //
        // The RocksDB wiki says:
        // > rocksdb::DB instances need to be destroyed before your main function exits.
        // > RocksDB instances usually depend on some internal static variables.
        // > Users need to make sure rocksdb::DB instances are destroyed before those static variables.
        //
        // https://github.com/facebook/rocksdb/wiki/Known-Issues
        //
        // But our current code doesn't seem to cause any issues.
        // We might want to explicitly drop the database as part of graceful shutdown (#1678).
        self.delete_ephemeral();
    }
    /// If the database is `ephemeral`, delete it.
    fn delete_ephemeral(&self) {
        if self.ephemeral {
            let path = self.path();
            info!(cache_path = ?path, "removing temporary database files");
            // We'd like to use `rocksdb::Env::mem_env` for ephemeral databases,
            // but the Zcash blockchain might not fit in memory. So we just
            // delete the database files instead.
            //
            // We'd like to call `DB::destroy` here, but calling destroy on a
            // live DB is undefined behaviour:
            // https://github.com/facebook/rocksdb/wiki/RocksDB-FAQ#basic-readwrite
            //
            // So we assume that all the database files are under `path`, and
            // delete them using standard filesystem APIs. Deleting open files
            // might cause errors on non-Unix platforms, so we ignore the result.
            // (The OS will delete them eventually anyway.)
            let res = std::fs::remove_dir_all(path);
            // TODO: downgrade to debug once bugs like #2905 are fixed
            //       but leave any errors at "info" level
            info!(?res, "removed temporary database files");
        }
    }
 }
 impl Drop for DiskDb {
    fn drop(&mut self) {
        self.shutdown();
    }
 }
--- a/zebra-state/src/service/finalized_state/disk_format.rs
+++ b/zebra-state/src/service/finalized_state/disk_format.rs
@ -1,4 +1,9 @@
 //! Module defining the serialization format for finalized data.
 //!
 //! # Correctness
 //!
 //! The [`crate::constants::DATABASE_FORMAT_VERSION`] constant must
 //! be incremented each time the database format (column, serialization, etc) changes.
 use std::{collections::BTreeMap, convert::TryInto, fmt::Debug, sync::Arc};