change(db): Make the first stable release forward-compatible with planned state changes (#6813)
* Implement minor and patch database format versions * Log and update database format versions when opening database * Refactor the current list of column families into a constant * Open all available column families, including from future Zebra versions * Refactor note commitment tree lookups to go through the height methods * Make Sapling/Orchard note commitment tree lookup forwards compatible * Ignore errors reading column family lists from disk * Update format version comments and TODOs * Correctly log newly created database formats --------- Co-authored-by: mergify[bot] <37929162+mergify[bot]@users.noreply.github.com>
This commit is contained in:
parent
815c77870d
commit
355f1233f5
|
@ -5962,6 +5962,7 @@ dependencies = [
|
|||
"regex",
|
||||
"rlimit",
|
||||
"rocksdb",
|
||||
"semver 1.0.17",
|
||||
"serde",
|
||||
"serde_json",
|
||||
"spandoc",
|
||||
|
|
|
@ -46,6 +46,7 @@ mset = "0.1.1"
|
|||
regex = "1.8.4"
|
||||
rlimit = "0.9.1"
|
||||
rocksdb = { version = "0.21.0", default_features = false, features = ["lz4"] }
|
||||
semver = "1.0.17"
|
||||
serde = { version = "1.0.163", features = ["serde_derive"] }
|
||||
tempfile = "3.5.0"
|
||||
thiserror = "1.0.40"
|
||||
|
|
|
@ -1,16 +1,26 @@
|
|||
//! Cached state configuration for Zebra.
|
||||
|
||||
use std::{
|
||||
fs::{canonicalize, remove_dir_all, DirEntry, ReadDir},
|
||||
fs::{self, canonicalize, remove_dir_all, DirEntry, ReadDir},
|
||||
io::ErrorKind,
|
||||
path::{Path, PathBuf},
|
||||
};
|
||||
|
||||
use semver::Version;
|
||||
use serde::{Deserialize, Serialize};
|
||||
use tokio::task::{spawn_blocking, JoinHandle};
|
||||
use tracing::Span;
|
||||
|
||||
use zebra_chain::parameters::Network;
|
||||
|
||||
use crate::{
|
||||
constants::{
|
||||
DATABASE_FORMAT_MINOR_VERSION, DATABASE_FORMAT_PATCH_VERSION, DATABASE_FORMAT_VERSION,
|
||||
DATABASE_FORMAT_VERSION_FILE_NAME,
|
||||
},
|
||||
BoxError,
|
||||
};
|
||||
|
||||
/// Configuration for the state service.
|
||||
#[derive(Clone, Debug, Deserialize, Serialize)]
|
||||
#[serde(deny_unknown_fields, default)]
|
||||
|
@ -125,6 +135,15 @@ impl Config {
|
|||
}
|
||||
}
|
||||
|
||||
/// Returns the path of the database format version file.
|
||||
pub fn version_file_path(&self, network: Network) -> PathBuf {
|
||||
let mut version_path = self.db_path(network);
|
||||
|
||||
version_path.push(DATABASE_FORMAT_VERSION_FILE_NAME);
|
||||
|
||||
version_path
|
||||
}
|
||||
|
||||
/// Construct a config for an ephemeral database
|
||||
pub fn ephemeral() -> Config {
|
||||
Config {
|
||||
|
@ -267,8 +286,83 @@ fn parse_dir_name(entry: &DirEntry) -> Option<String> {
|
|||
/// Parse the state version number from `dir_name`.
|
||||
///
|
||||
/// Returns `None` if parsing fails, or the directory name is not in the expected format.
|
||||
fn parse_version_number(dir_name: &str) -> Option<u32> {
|
||||
fn parse_version_number(dir_name: &str) -> Option<u64> {
|
||||
dir_name
|
||||
.strip_prefix('v')
|
||||
.and_then(|version| version.parse().ok())
|
||||
}
|
||||
|
||||
/// Returns the full semantic version of the currently running database format code.
|
||||
///
|
||||
/// This is the version implemented by the Zebra code that's currently running,
|
||||
/// the minor and patch versions on disk can be different.
|
||||
pub fn database_format_version_in_code() -> Version {
|
||||
Version::new(
|
||||
DATABASE_FORMAT_VERSION,
|
||||
DATABASE_FORMAT_MINOR_VERSION,
|
||||
DATABASE_FORMAT_PATCH_VERSION,
|
||||
)
|
||||
}
|
||||
|
||||
/// Returns the full semantic version of the on-disk database.
|
||||
/// If there is no existing on-disk database, returns `Ok(None)`.
|
||||
///
|
||||
/// This is the format of the data on disk, the minor and patch versions
|
||||
/// implemented by the running Zebra code can be different.
|
||||
pub fn database_format_version_on_disk(
|
||||
config: &Config,
|
||||
network: Network,
|
||||
) -> Result<Option<Version>, BoxError> {
|
||||
let version_path = config.version_file_path(network);
|
||||
|
||||
let version = match fs::read_to_string(version_path) {
|
||||
Ok(version) => version,
|
||||
Err(e) if e.kind() == ErrorKind::NotFound => {
|
||||
// If the version file doesn't exist, don't guess the version.
|
||||
// (It will end up being the version in code, once the database is created.)
|
||||
return Ok(None);
|
||||
}
|
||||
Err(e) => Err(e)?,
|
||||
};
|
||||
|
||||
let (minor, patch) = version
|
||||
.split_once('.')
|
||||
.ok_or("invalid database format version file")?;
|
||||
|
||||
Ok(Some(Version::new(
|
||||
DATABASE_FORMAT_VERSION,
|
||||
minor.parse()?,
|
||||
patch.parse()?,
|
||||
)))
|
||||
}
|
||||
|
||||
/// Writes the currently running semantic database version to the on-disk database.
|
||||
///
|
||||
/// # Correctness
|
||||
///
|
||||
/// This should only be called after all running format upgrades are complete.
|
||||
///
|
||||
/// # Concurrency
|
||||
///
|
||||
/// This must only be called while RocksDB has an open database for `config`.
|
||||
/// Otherwise, multiple Zebra processes could write the version at the same time,
|
||||
/// corrupting the file.
|
||||
pub fn write_database_format_version_to_disk(
|
||||
config: &Config,
|
||||
network: Network,
|
||||
) -> Result<(), BoxError> {
|
||||
let version_path = config.version_file_path(network);
|
||||
|
||||
// The major version is already in the directory path.
|
||||
let version = format!(
|
||||
"{}.{}",
|
||||
DATABASE_FORMAT_MINOR_VERSION, DATABASE_FORMAT_PATCH_VERSION
|
||||
);
|
||||
|
||||
// # Concurrency
|
||||
//
|
||||
// The caller handles locking for this file write.
|
||||
fs::write(version_path, version.as_bytes())?;
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
|
|
@ -1,4 +1,11 @@
|
|||
//! Definitions of constants.
|
||||
//! Constants that impact state behaviour.
|
||||
|
||||
use lazy_static::lazy_static;
|
||||
use regex::Regex;
|
||||
|
||||
// For doc comment links
|
||||
#[allow(unused_imports)]
|
||||
use crate::config::{database_format_version_in_code, database_format_version_on_disk};
|
||||
|
||||
pub use zebra_chain::transparent::MIN_TRANSPARENT_COINBASE_MATURITY;
|
||||
|
||||
|
@ -19,13 +26,42 @@ pub use zebra_chain::transparent::MIN_TRANSPARENT_COINBASE_MATURITY;
|
|||
// TODO: change to HeightDiff
|
||||
pub const MAX_BLOCK_REORG_HEIGHT: u32 = MIN_TRANSPARENT_COINBASE_MATURITY - 1;
|
||||
|
||||
/// The database format version, incremented each time the database format changes.
|
||||
pub const DATABASE_FORMAT_VERSION: u32 = 25;
|
||||
/// The database format major version, incremented each time the on-disk database format has a
|
||||
/// breaking data format change.
|
||||
///
|
||||
/// Breaking changes include:
|
||||
/// - deleting a column family, or
|
||||
/// - changing a column family's data format in an incompatible way.
|
||||
///
|
||||
/// Breaking changes become minor version changes if:
|
||||
/// - we previously added compatibility code, and
|
||||
/// - it's available in all supported Zebra versions.
|
||||
///
|
||||
/// Use [`database_format_version_in_code()`] or [`database_format_version_on_disk()`]
|
||||
/// to get the full semantic format version.
|
||||
pub const DATABASE_FORMAT_VERSION: u64 = 25;
|
||||
|
||||
/// The database format minor version, incremented each time the on-disk database format has a
|
||||
/// significant data format change.
|
||||
///
|
||||
/// Significant changes include:
|
||||
/// - adding new column families,
|
||||
/// - changing the format of a column family in a compatible way, or
|
||||
/// - breaking changes with compatibility code in all supported Zebra versions.
|
||||
pub const DATABASE_FORMAT_MINOR_VERSION: u64 = 0;
|
||||
|
||||
/// The database format patch version, incremented each time the on-disk database format has a
|
||||
/// significant format compatibility fix.
|
||||
pub const DATABASE_FORMAT_PATCH_VERSION: u64 = 1;
|
||||
|
||||
/// The name of the file containing the minor and patch database versions.
|
||||
pub const DATABASE_FORMAT_VERSION_FILE_NAME: &str = "version";
|
||||
|
||||
/// The maximum number of blocks to check for NU5 transactions,
|
||||
/// before we assume we are on a pre-NU5 legacy chain.
|
||||
///
|
||||
/// Zebra usually only has to check back a few blocks, but on testnet it can be a long time between v5 transactions.
|
||||
/// Zebra usually only has to check back a few blocks on mainnet, but on testnet it can be a long
|
||||
/// time between v5 transactions.
|
||||
pub const MAX_LEGACY_CHAIN_BLOCKS: usize = 100_000;
|
||||
|
||||
/// The maximum number of non-finalized chain forks Zebra will track.
|
||||
|
@ -58,9 +94,6 @@ const MAX_FIND_BLOCK_HEADERS_RESULTS_FOR_PROTOCOL: u32 = 160;
|
|||
pub const MAX_FIND_BLOCK_HEADERS_RESULTS_FOR_ZEBRA: u32 =
|
||||
MAX_FIND_BLOCK_HEADERS_RESULTS_FOR_PROTOCOL - 2;
|
||||
|
||||
use lazy_static::lazy_static;
|
||||
use regex::Regex;
|
||||
|
||||
lazy_static! {
|
||||
/// Regex that matches the RocksDB error when its lock file is already open.
|
||||
pub static ref LOCK_FILE_ERROR: Regex = Regex::new("(lock file).*(temporarily unavailable)|(in use)|(being used by another process)").expect("regex is valid");
|
||||
|
|
|
@ -10,13 +10,18 @@
|
|||
//! The [`crate::constants::DATABASE_FORMAT_VERSION`] constant must
|
||||
//! be incremented each time the database format (column, serialization, etc) changes.
|
||||
|
||||
use std::{fmt::Debug, path::Path, sync::Arc};
|
||||
use std::{cmp::Ordering, fmt::Debug, path::Path, sync::Arc};
|
||||
|
||||
use itertools::Itertools;
|
||||
use rlimit::increase_nofile_limit;
|
||||
|
||||
use zebra_chain::parameters::Network;
|
||||
|
||||
use crate::{
|
||||
config::{
|
||||
database_format_version_in_code, database_format_version_on_disk,
|
||||
write_database_format_version_to_disk,
|
||||
},
|
||||
service::finalized_state::disk_format::{FromDisk, IntoDisk},
|
||||
Config,
|
||||
};
|
||||
|
@ -386,61 +391,93 @@ impl DiskDb {
|
|||
/// <https://github.com/facebook/rocksdb/wiki/RocksDB-FAQ#configuration-and-tuning>
|
||||
const MEMTABLE_RAM_CACHE_MEGABYTES: usize = 128;
|
||||
|
||||
/// The column families supported by the running database code.
|
||||
const COLUMN_FAMILIES_IN_CODE: &[&'static str] = &[
|
||||
// Blocks
|
||||
"hash_by_height",
|
||||
"height_by_hash",
|
||||
"block_header_by_height",
|
||||
// Transactions
|
||||
"tx_by_loc",
|
||||
"hash_by_tx_loc",
|
||||
"tx_loc_by_hash",
|
||||
// Transparent
|
||||
"balance_by_transparent_addr",
|
||||
"tx_loc_by_transparent_addr_loc",
|
||||
"utxo_by_out_loc",
|
||||
"utxo_loc_by_transparent_addr_loc",
|
||||
// Sprout
|
||||
"sprout_nullifiers",
|
||||
"sprout_anchors",
|
||||
"sprout_note_commitment_tree",
|
||||
// Sapling
|
||||
"sapling_nullifiers",
|
||||
"sapling_anchors",
|
||||
"sapling_note_commitment_tree",
|
||||
// Orchard
|
||||
"orchard_nullifiers",
|
||||
"orchard_anchors",
|
||||
"orchard_note_commitment_tree",
|
||||
// Chain
|
||||
"history_tree",
|
||||
"tip_chain_value_pool",
|
||||
];
|
||||
|
||||
/// Opens or creates the database at `config.path` for `network`,
|
||||
/// and returns a shared low-level database wrapper.
|
||||
pub fn new(config: &Config, network: Network) -> DiskDb {
|
||||
let path = config.db_path(network);
|
||||
|
||||
let running_version = database_format_version_in_code();
|
||||
let disk_version = database_format_version_on_disk(config, network)
|
||||
.expect("unable to read database format version file");
|
||||
|
||||
match disk_version.as_ref().map(|disk| disk.cmp(&running_version)) {
|
||||
// TODO: if the on-disk format is older, actually run the upgrade task after the
|
||||
// database has been opened (#6642)
|
||||
Some(Ordering::Less) => info!(
|
||||
?running_version,
|
||||
?disk_version,
|
||||
"trying to open older database format: launching upgrade task"
|
||||
),
|
||||
// TODO: if the on-disk format is newer, downgrade the version after the
|
||||
// database has been opened (#6642)
|
||||
Some(Ordering::Greater) => info!(
|
||||
?running_version,
|
||||
?disk_version,
|
||||
"trying to open newer database format: data should be compatible"
|
||||
),
|
||||
Some(Ordering::Equal) => info!(
|
||||
?running_version,
|
||||
"trying to open compatible database format"
|
||||
),
|
||||
None => info!(
|
||||
?running_version,
|
||||
"creating new database with the current format"
|
||||
),
|
||||
}
|
||||
|
||||
let db_options = DiskDb::options();
|
||||
|
||||
let column_families = vec![
|
||||
// Blocks
|
||||
rocksdb::ColumnFamilyDescriptor::new("hash_by_height", db_options.clone()),
|
||||
rocksdb::ColumnFamilyDescriptor::new("height_by_hash", db_options.clone()),
|
||||
rocksdb::ColumnFamilyDescriptor::new("block_header_by_height", db_options.clone()),
|
||||
// Transactions
|
||||
rocksdb::ColumnFamilyDescriptor::new("tx_by_loc", db_options.clone()),
|
||||
rocksdb::ColumnFamilyDescriptor::new("hash_by_tx_loc", db_options.clone()),
|
||||
rocksdb::ColumnFamilyDescriptor::new("tx_loc_by_hash", db_options.clone()),
|
||||
// Transparent
|
||||
rocksdb::ColumnFamilyDescriptor::new("balance_by_transparent_addr", db_options.clone()),
|
||||
rocksdb::ColumnFamilyDescriptor::new(
|
||||
"tx_loc_by_transparent_addr_loc",
|
||||
db_options.clone(),
|
||||
),
|
||||
rocksdb::ColumnFamilyDescriptor::new("utxo_by_out_loc", db_options.clone()),
|
||||
rocksdb::ColumnFamilyDescriptor::new(
|
||||
"utxo_loc_by_transparent_addr_loc",
|
||||
db_options.clone(),
|
||||
),
|
||||
// Sprout
|
||||
rocksdb::ColumnFamilyDescriptor::new("sprout_nullifiers", db_options.clone()),
|
||||
rocksdb::ColumnFamilyDescriptor::new("sprout_anchors", db_options.clone()),
|
||||
rocksdb::ColumnFamilyDescriptor::new("sprout_note_commitment_tree", db_options.clone()),
|
||||
// Sapling
|
||||
rocksdb::ColumnFamilyDescriptor::new("sapling_nullifiers", db_options.clone()),
|
||||
rocksdb::ColumnFamilyDescriptor::new("sapling_anchors", db_options.clone()),
|
||||
rocksdb::ColumnFamilyDescriptor::new(
|
||||
"sapling_note_commitment_tree",
|
||||
db_options.clone(),
|
||||
),
|
||||
// Orchard
|
||||
rocksdb::ColumnFamilyDescriptor::new("orchard_nullifiers", db_options.clone()),
|
||||
rocksdb::ColumnFamilyDescriptor::new("orchard_anchors", db_options.clone()),
|
||||
rocksdb::ColumnFamilyDescriptor::new(
|
||||
"orchard_note_commitment_tree",
|
||||
db_options.clone(),
|
||||
),
|
||||
// Chain
|
||||
rocksdb::ColumnFamilyDescriptor::new("history_tree", db_options.clone()),
|
||||
rocksdb::ColumnFamilyDescriptor::new("tip_chain_value_pool", db_options.clone()),
|
||||
];
|
||||
// When opening the database in read/write mode, all column families must be opened.
|
||||
//
|
||||
// To make Zebra forward-compatible with databases updated by later versions,
|
||||
// we read any existing column families off the disk, then add any new column families
|
||||
// from the current implementation.
|
||||
//
|
||||
// <https://github.com/facebook/rocksdb/wiki/Column-Families#reference>
|
||||
let column_families_on_disk = DB::list_cf(&db_options, &path).unwrap_or_default();
|
||||
let column_families_in_code = Self::COLUMN_FAMILIES_IN_CODE
|
||||
.iter()
|
||||
.map(ToString::to_string);
|
||||
|
||||
// TODO: move opening the database to a blocking thread (#2188)
|
||||
let db_result = rocksdb::DBWithThreadMode::<DBThreadMode>::open_cf_descriptors(
|
||||
&db_options,
|
||||
&path,
|
||||
column_families,
|
||||
);
|
||||
let column_families = column_families_on_disk
|
||||
.into_iter()
|
||||
.chain(column_families_in_code)
|
||||
.unique()
|
||||
.map(|cf_name| rocksdb::ColumnFamilyDescriptor::new(cf_name, db_options.clone()));
|
||||
|
||||
let db_result = DB::open_cf_descriptors(&db_options, &path, column_families);
|
||||
|
||||
match db_result {
|
||||
Ok(db) => {
|
||||
|
@ -453,6 +490,27 @@ impl DiskDb {
|
|||
|
||||
db.assert_default_cf_is_empty();
|
||||
|
||||
// Now we've checked that the database format is up-to-date,
|
||||
// mark it as updated on disk.
|
||||
//
|
||||
// # Concurrency
|
||||
//
|
||||
// The version must only be updated while RocksDB is holding the database
|
||||
// directory lock. This prevents multiple Zebra instances corrupting the version
|
||||
// file.
|
||||
//
|
||||
// # TODO
|
||||
//
|
||||
// - only update the version at the end of the format upgrade task (#6642)
|
||||
// - add a note to the format upgrade task code to update the version constants
|
||||
// whenever the format changes
|
||||
// - add a test that the format upgrade runs exactly once when:
|
||||
// 1. if an older cached state format is opened, the format is upgraded,
|
||||
// then if Zebra is launched again the format is not upgraded
|
||||
// 2. if the current cached state format is opened, the format is not upgraded
|
||||
write_database_format_version_to_disk(config, network)
|
||||
.expect("unable to write database format version file to disk");
|
||||
|
||||
db
|
||||
}
|
||||
|
||||
|
|
|
@ -107,24 +107,41 @@ impl ZebraDb {
|
|||
None => return Default::default(),
|
||||
};
|
||||
|
||||
let sapling_nct_handle = self.db.cf_handle("sapling_note_commitment_tree").unwrap();
|
||||
|
||||
self.db
|
||||
.zs_get(&sapling_nct_handle, &height)
|
||||
.map(Arc::new)
|
||||
self.sapling_note_commitment_tree_by_height(&height)
|
||||
.expect("Sapling note commitment tree must exist if there is a finalized tip")
|
||||
}
|
||||
|
||||
/// Returns the Sapling note commitment tree matching the given block height.
|
||||
#[allow(dead_code)]
|
||||
/// Returns the Sapling note commitment tree matching the given block height,
|
||||
/// or `None` if the height is above the finalized tip.
|
||||
#[allow(clippy::unwrap_in_result)]
|
||||
pub fn sapling_note_commitment_tree_by_height(
|
||||
&self,
|
||||
height: &Height,
|
||||
) -> Option<Arc<sapling::tree::NoteCommitmentTree>> {
|
||||
let tip_height = self.finalized_tip_height()?;
|
||||
|
||||
// If we're above the tip, searching backwards would always return the tip tree.
|
||||
// But the correct answer is "we don't know that tree yet".
|
||||
if *height > tip_height {
|
||||
return None;
|
||||
}
|
||||
|
||||
let sapling_trees = self.db.cf_handle("sapling_note_commitment_tree").unwrap();
|
||||
|
||||
self.db.zs_get(&sapling_trees, height).map(Arc::new)
|
||||
// If we know there must be a tree, search backwards for it.
|
||||
//
|
||||
// # Compatibility
|
||||
//
|
||||
// Allow older Zebra versions to read future database formats, after note commitment trees
|
||||
// have been deduplicated. See ticket #6642 for details.
|
||||
let (_first_duplicate_height, tree) = self
|
||||
.db
|
||||
.zs_prev_key_value_back_from(&sapling_trees, height)
|
||||
.expect(
|
||||
"Sapling note commitment trees must exist for all heights below the finalized tip",
|
||||
);
|
||||
|
||||
Some(Arc::new(tree))
|
||||
}
|
||||
|
||||
/// Returns the Orchard note commitment tree of the finalized tip
|
||||
|
@ -135,24 +152,38 @@ impl ZebraDb {
|
|||
None => return Default::default(),
|
||||
};
|
||||
|
||||
let orchard_nct_handle = self.db.cf_handle("orchard_note_commitment_tree").unwrap();
|
||||
|
||||
self.db
|
||||
.zs_get(&orchard_nct_handle, &height)
|
||||
.map(Arc::new)
|
||||
self.orchard_note_commitment_tree_by_height(&height)
|
||||
.expect("Orchard note commitment tree must exist if there is a finalized tip")
|
||||
}
|
||||
|
||||
/// Returns the Orchard note commitment tree matching the given block height.
|
||||
#[allow(dead_code)]
|
||||
/// Returns the Orchard note commitment tree matching the given block height,
|
||||
/// or `None` if the height is above the finalized tip.
|
||||
#[allow(clippy::unwrap_in_result)]
|
||||
pub fn orchard_note_commitment_tree_by_height(
|
||||
&self,
|
||||
height: &Height,
|
||||
) -> Option<Arc<orchard::tree::NoteCommitmentTree>> {
|
||||
let tip_height = self.finalized_tip_height()?;
|
||||
|
||||
// If we're above the tip, searching backwards would always return the tip tree.
|
||||
// But the correct answer is "we don't know that tree yet".
|
||||
if *height > tip_height {
|
||||
return None;
|
||||
}
|
||||
|
||||
let orchard_trees = self.db.cf_handle("orchard_note_commitment_tree").unwrap();
|
||||
|
||||
self.db.zs_get(&orchard_trees, height).map(Arc::new)
|
||||
// # Compatibility
|
||||
//
|
||||
// Allow older Zebra versions to read future database formats. See ticket #6642 for details.
|
||||
let (_first_duplicate_height, tree) = self
|
||||
.db
|
||||
.zs_prev_key_value_back_from(&orchard_trees, height)
|
||||
.expect(
|
||||
"Orchard note commitment trees must exist for all heights below the finalized tip",
|
||||
);
|
||||
|
||||
Some(Arc::new(tree))
|
||||
}
|
||||
|
||||
/// Returns the shielded note commitment trees of the finalized tip
|
||||
|
|
Loading…
Reference in New Issue