change(db): Make the first stable release forward-compatible with planned state changes (#6813)

* Implement minor and patch database format versions

* Log and update database format versions when opening database

* Refactor the current list of column families into a constant

* Open all available column families, including from future Zebra versions

* Refactor note commitment tree lookups to go through the height methods

* Make Sapling/Orchard note commitment tree lookup forwards compatible

* Ignore errors reading column family lists from disk

* Update format version comments and TODOs

* Correctly log newly created database formats

---------

Co-authored-by: mergify[bot] <37929162+mergify[bot]@users.noreply.github.com>
This commit is contained in:
teor 2023-06-07 07:18:57 +10:00 committed by GitHub
parent 815c77870d
commit 355f1233f5
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
6 changed files with 292 additions and 74 deletions

View File

@ -5962,6 +5962,7 @@ dependencies = [
"regex",
"rlimit",
"rocksdb",
"semver 1.0.17",
"serde",
"serde_json",
"spandoc",

View File

@ -46,6 +46,7 @@ mset = "0.1.1"
regex = "1.8.4"
rlimit = "0.9.1"
rocksdb = { version = "0.21.0", default_features = false, features = ["lz4"] }
semver = "1.0.17"
serde = { version = "1.0.163", features = ["serde_derive"] }
tempfile = "3.5.0"
thiserror = "1.0.40"

View File

@ -1,16 +1,26 @@
//! Cached state configuration for Zebra.
use std::{
fs::{canonicalize, remove_dir_all, DirEntry, ReadDir},
fs::{self, canonicalize, remove_dir_all, DirEntry, ReadDir},
io::ErrorKind,
path::{Path, PathBuf},
};
use semver::Version;
use serde::{Deserialize, Serialize};
use tokio::task::{spawn_blocking, JoinHandle};
use tracing::Span;
use zebra_chain::parameters::Network;
use crate::{
constants::{
DATABASE_FORMAT_MINOR_VERSION, DATABASE_FORMAT_PATCH_VERSION, DATABASE_FORMAT_VERSION,
DATABASE_FORMAT_VERSION_FILE_NAME,
},
BoxError,
};
/// Configuration for the state service.
#[derive(Clone, Debug, Deserialize, Serialize)]
#[serde(deny_unknown_fields, default)]
@ -125,6 +135,15 @@ impl Config {
}
}
/// Returns the path of the database format version file.
pub fn version_file_path(&self, network: Network) -> PathBuf {
let mut version_path = self.db_path(network);
version_path.push(DATABASE_FORMAT_VERSION_FILE_NAME);
version_path
}
/// Construct a config for an ephemeral database
pub fn ephemeral() -> Config {
Config {
@ -267,8 +286,83 @@ fn parse_dir_name(entry: &DirEntry) -> Option<String> {
/// Parse the state version number from `dir_name`.
///
/// Returns `None` if parsing fails, or the directory name is not in the expected format.
fn parse_version_number(dir_name: &str) -> Option<u32> {
fn parse_version_number(dir_name: &str) -> Option<u64> {
dir_name
.strip_prefix('v')
.and_then(|version| version.parse().ok())
}
/// Returns the full semantic version of the currently running database format code.
///
/// This is the version implemented by the Zebra code that's currently running,
/// the minor and patch versions on disk can be different.
pub fn database_format_version_in_code() -> Version {
Version::new(
DATABASE_FORMAT_VERSION,
DATABASE_FORMAT_MINOR_VERSION,
DATABASE_FORMAT_PATCH_VERSION,
)
}
/// Returns the full semantic version of the on-disk database.
/// If there is no existing on-disk database, returns `Ok(None)`.
///
/// This is the format of the data on disk, the minor and patch versions
/// implemented by the running Zebra code can be different.
pub fn database_format_version_on_disk(
config: &Config,
network: Network,
) -> Result<Option<Version>, BoxError> {
let version_path = config.version_file_path(network);
let version = match fs::read_to_string(version_path) {
Ok(version) => version,
Err(e) if e.kind() == ErrorKind::NotFound => {
// If the version file doesn't exist, don't guess the version.
// (It will end up being the version in code, once the database is created.)
return Ok(None);
}
Err(e) => Err(e)?,
};
let (minor, patch) = version
.split_once('.')
.ok_or("invalid database format version file")?;
Ok(Some(Version::new(
DATABASE_FORMAT_VERSION,
minor.parse()?,
patch.parse()?,
)))
}
/// Writes the currently running semantic database version to the on-disk database.
///
/// # Correctness
///
/// This should only be called after all running format upgrades are complete.
///
/// # Concurrency
///
/// This must only be called while RocksDB has an open database for `config`.
/// Otherwise, multiple Zebra processes could write the version at the same time,
/// corrupting the file.
pub fn write_database_format_version_to_disk(
config: &Config,
network: Network,
) -> Result<(), BoxError> {
let version_path = config.version_file_path(network);
// The major version is already in the directory path.
let version = format!(
"{}.{}",
DATABASE_FORMAT_MINOR_VERSION, DATABASE_FORMAT_PATCH_VERSION
);
// # Concurrency
//
// The caller handles locking for this file write.
fs::write(version_path, version.as_bytes())?;
Ok(())
}

View File

@ -1,4 +1,11 @@
//! Definitions of constants.
//! Constants that impact state behaviour.
use lazy_static::lazy_static;
use regex::Regex;
// For doc comment links
#[allow(unused_imports)]
use crate::config::{database_format_version_in_code, database_format_version_on_disk};
pub use zebra_chain::transparent::MIN_TRANSPARENT_COINBASE_MATURITY;
@ -19,13 +26,42 @@ pub use zebra_chain::transparent::MIN_TRANSPARENT_COINBASE_MATURITY;
// TODO: change to HeightDiff
pub const MAX_BLOCK_REORG_HEIGHT: u32 = MIN_TRANSPARENT_COINBASE_MATURITY - 1;
/// The database format version, incremented each time the database format changes.
pub const DATABASE_FORMAT_VERSION: u32 = 25;
/// The database format major version, incremented each time the on-disk database format has a
/// breaking data format change.
///
/// Breaking changes include:
/// - deleting a column family, or
/// - changing a column family's data format in an incompatible way.
///
/// Breaking changes become minor version changes if:
/// - we previously added compatibility code, and
/// - it's available in all supported Zebra versions.
///
/// Use [`database_format_version_in_code()`] or [`database_format_version_on_disk()`]
/// to get the full semantic format version.
pub const DATABASE_FORMAT_VERSION: u64 = 25;
/// The database format minor version, incremented each time the on-disk database format has a
/// significant data format change.
///
/// Significant changes include:
/// - adding new column families,
/// - changing the format of a column family in a compatible way, or
/// - breaking changes with compatibility code in all supported Zebra versions.
pub const DATABASE_FORMAT_MINOR_VERSION: u64 = 0;
/// The database format patch version, incremented each time the on-disk database format has a
/// significant format compatibility fix.
pub const DATABASE_FORMAT_PATCH_VERSION: u64 = 1;
/// The name of the file containing the minor and patch database versions.
pub const DATABASE_FORMAT_VERSION_FILE_NAME: &str = "version";
/// The maximum number of blocks to check for NU5 transactions,
/// before we assume we are on a pre-NU5 legacy chain.
///
/// Zebra usually only has to check back a few blocks, but on testnet it can be a long time between v5 transactions.
/// Zebra usually only has to check back a few blocks on mainnet, but on testnet it can be a long
/// time between v5 transactions.
pub const MAX_LEGACY_CHAIN_BLOCKS: usize = 100_000;
/// The maximum number of non-finalized chain forks Zebra will track.
@ -58,9 +94,6 @@ const MAX_FIND_BLOCK_HEADERS_RESULTS_FOR_PROTOCOL: u32 = 160;
pub const MAX_FIND_BLOCK_HEADERS_RESULTS_FOR_ZEBRA: u32 =
MAX_FIND_BLOCK_HEADERS_RESULTS_FOR_PROTOCOL - 2;
use lazy_static::lazy_static;
use regex::Regex;
lazy_static! {
/// Regex that matches the RocksDB error when its lock file is already open.
pub static ref LOCK_FILE_ERROR: Regex = Regex::new("(lock file).*(temporarily unavailable)|(in use)|(being used by another process)").expect("regex is valid");

View File

@ -10,13 +10,18 @@
//! The [`crate::constants::DATABASE_FORMAT_VERSION`] constant must
//! be incremented each time the database format (column, serialization, etc) changes.
use std::{fmt::Debug, path::Path, sync::Arc};
use std::{cmp::Ordering, fmt::Debug, path::Path, sync::Arc};
use itertools::Itertools;
use rlimit::increase_nofile_limit;
use zebra_chain::parameters::Network;
use crate::{
config::{
database_format_version_in_code, database_format_version_on_disk,
write_database_format_version_to_disk,
},
service::finalized_state::disk_format::{FromDisk, IntoDisk},
Config,
};
@ -386,61 +391,93 @@ impl DiskDb {
/// <https://github.com/facebook/rocksdb/wiki/RocksDB-FAQ#configuration-and-tuning>
const MEMTABLE_RAM_CACHE_MEGABYTES: usize = 128;
/// The column families supported by the running database code.
const COLUMN_FAMILIES_IN_CODE: &[&'static str] = &[
// Blocks
"hash_by_height",
"height_by_hash",
"block_header_by_height",
// Transactions
"tx_by_loc",
"hash_by_tx_loc",
"tx_loc_by_hash",
// Transparent
"balance_by_transparent_addr",
"tx_loc_by_transparent_addr_loc",
"utxo_by_out_loc",
"utxo_loc_by_transparent_addr_loc",
// Sprout
"sprout_nullifiers",
"sprout_anchors",
"sprout_note_commitment_tree",
// Sapling
"sapling_nullifiers",
"sapling_anchors",
"sapling_note_commitment_tree",
// Orchard
"orchard_nullifiers",
"orchard_anchors",
"orchard_note_commitment_tree",
// Chain
"history_tree",
"tip_chain_value_pool",
];
/// Opens or creates the database at `config.path` for `network`,
/// and returns a shared low-level database wrapper.
pub fn new(config: &Config, network: Network) -> DiskDb {
let path = config.db_path(network);
let running_version = database_format_version_in_code();
let disk_version = database_format_version_on_disk(config, network)
.expect("unable to read database format version file");
match disk_version.as_ref().map(|disk| disk.cmp(&running_version)) {
// TODO: if the on-disk format is older, actually run the upgrade task after the
// database has been opened (#6642)
Some(Ordering::Less) => info!(
?running_version,
?disk_version,
"trying to open older database format: launching upgrade task"
),
// TODO: if the on-disk format is newer, downgrade the version after the
// database has been opened (#6642)
Some(Ordering::Greater) => info!(
?running_version,
?disk_version,
"trying to open newer database format: data should be compatible"
),
Some(Ordering::Equal) => info!(
?running_version,
"trying to open compatible database format"
),
None => info!(
?running_version,
"creating new database with the current format"
),
}
let db_options = DiskDb::options();
let column_families = vec![
// Blocks
rocksdb::ColumnFamilyDescriptor::new("hash_by_height", db_options.clone()),
rocksdb::ColumnFamilyDescriptor::new("height_by_hash", db_options.clone()),
rocksdb::ColumnFamilyDescriptor::new("block_header_by_height", db_options.clone()),
// Transactions
rocksdb::ColumnFamilyDescriptor::new("tx_by_loc", db_options.clone()),
rocksdb::ColumnFamilyDescriptor::new("hash_by_tx_loc", db_options.clone()),
rocksdb::ColumnFamilyDescriptor::new("tx_loc_by_hash", db_options.clone()),
// Transparent
rocksdb::ColumnFamilyDescriptor::new("balance_by_transparent_addr", db_options.clone()),
rocksdb::ColumnFamilyDescriptor::new(
"tx_loc_by_transparent_addr_loc",
db_options.clone(),
),
rocksdb::ColumnFamilyDescriptor::new("utxo_by_out_loc", db_options.clone()),
rocksdb::ColumnFamilyDescriptor::new(
"utxo_loc_by_transparent_addr_loc",
db_options.clone(),
),
// Sprout
rocksdb::ColumnFamilyDescriptor::new("sprout_nullifiers", db_options.clone()),
rocksdb::ColumnFamilyDescriptor::new("sprout_anchors", db_options.clone()),
rocksdb::ColumnFamilyDescriptor::new("sprout_note_commitment_tree", db_options.clone()),
// Sapling
rocksdb::ColumnFamilyDescriptor::new("sapling_nullifiers", db_options.clone()),
rocksdb::ColumnFamilyDescriptor::new("sapling_anchors", db_options.clone()),
rocksdb::ColumnFamilyDescriptor::new(
"sapling_note_commitment_tree",
db_options.clone(),
),
// Orchard
rocksdb::ColumnFamilyDescriptor::new("orchard_nullifiers", db_options.clone()),
rocksdb::ColumnFamilyDescriptor::new("orchard_anchors", db_options.clone()),
rocksdb::ColumnFamilyDescriptor::new(
"orchard_note_commitment_tree",
db_options.clone(),
),
// Chain
rocksdb::ColumnFamilyDescriptor::new("history_tree", db_options.clone()),
rocksdb::ColumnFamilyDescriptor::new("tip_chain_value_pool", db_options.clone()),
];
// When opening the database in read/write mode, all column families must be opened.
//
// To make Zebra forward-compatible with databases updated by later versions,
// we read any existing column families off the disk, then add any new column families
// from the current implementation.
//
// <https://github.com/facebook/rocksdb/wiki/Column-Families#reference>
let column_families_on_disk = DB::list_cf(&db_options, &path).unwrap_or_default();
let column_families_in_code = Self::COLUMN_FAMILIES_IN_CODE
.iter()
.map(ToString::to_string);
// TODO: move opening the database to a blocking thread (#2188)
let db_result = rocksdb::DBWithThreadMode::<DBThreadMode>::open_cf_descriptors(
&db_options,
&path,
column_families,
);
let column_families = column_families_on_disk
.into_iter()
.chain(column_families_in_code)
.unique()
.map(|cf_name| rocksdb::ColumnFamilyDescriptor::new(cf_name, db_options.clone()));
let db_result = DB::open_cf_descriptors(&db_options, &path, column_families);
match db_result {
Ok(db) => {
@ -453,6 +490,27 @@ impl DiskDb {
db.assert_default_cf_is_empty();
// Now we've checked that the database format is up-to-date,
// mark it as updated on disk.
//
// # Concurrency
//
// The version must only be updated while RocksDB is holding the database
// directory lock. This prevents multiple Zebra instances corrupting the version
// file.
//
// # TODO
//
// - only update the version at the end of the format upgrade task (#6642)
// - add a note to the format upgrade task code to update the version constants
// whenever the format changes
// - add a test that the format upgrade runs exactly once when:
// 1. if an older cached state format is opened, the format is upgraded,
// then if Zebra is launched again the format is not upgraded
// 2. if the current cached state format is opened, the format is not upgraded
write_database_format_version_to_disk(config, network)
.expect("unable to write database format version file to disk");
db
}

View File

@ -107,24 +107,41 @@ impl ZebraDb {
None => return Default::default(),
};
let sapling_nct_handle = self.db.cf_handle("sapling_note_commitment_tree").unwrap();
self.db
.zs_get(&sapling_nct_handle, &height)
.map(Arc::new)
self.sapling_note_commitment_tree_by_height(&height)
.expect("Sapling note commitment tree must exist if there is a finalized tip")
}
/// Returns the Sapling note commitment tree matching the given block height.
#[allow(dead_code)]
/// Returns the Sapling note commitment tree matching the given block height,
/// or `None` if the height is above the finalized tip.
#[allow(clippy::unwrap_in_result)]
pub fn sapling_note_commitment_tree_by_height(
&self,
height: &Height,
) -> Option<Arc<sapling::tree::NoteCommitmentTree>> {
let tip_height = self.finalized_tip_height()?;
// If we're above the tip, searching backwards would always return the tip tree.
// But the correct answer is "we don't know that tree yet".
if *height > tip_height {
return None;
}
let sapling_trees = self.db.cf_handle("sapling_note_commitment_tree").unwrap();
self.db.zs_get(&sapling_trees, height).map(Arc::new)
// If we know there must be a tree, search backwards for it.
//
// # Compatibility
//
// Allow older Zebra versions to read future database formats, after note commitment trees
// have been deduplicated. See ticket #6642 for details.
let (_first_duplicate_height, tree) = self
.db
.zs_prev_key_value_back_from(&sapling_trees, height)
.expect(
"Sapling note commitment trees must exist for all heights below the finalized tip",
);
Some(Arc::new(tree))
}
/// Returns the Orchard note commitment tree of the finalized tip
@ -135,24 +152,38 @@ impl ZebraDb {
None => return Default::default(),
};
let orchard_nct_handle = self.db.cf_handle("orchard_note_commitment_tree").unwrap();
self.db
.zs_get(&orchard_nct_handle, &height)
.map(Arc::new)
self.orchard_note_commitment_tree_by_height(&height)
.expect("Orchard note commitment tree must exist if there is a finalized tip")
}
/// Returns the Orchard note commitment tree matching the given block height.
#[allow(dead_code)]
/// Returns the Orchard note commitment tree matching the given block height,
/// or `None` if the height is above the finalized tip.
#[allow(clippy::unwrap_in_result)]
pub fn orchard_note_commitment_tree_by_height(
&self,
height: &Height,
) -> Option<Arc<orchard::tree::NoteCommitmentTree>> {
let tip_height = self.finalized_tip_height()?;
// If we're above the tip, searching backwards would always return the tip tree.
// But the correct answer is "we don't know that tree yet".
if *height > tip_height {
return None;
}
let orchard_trees = self.db.cf_handle("orchard_note_commitment_tree").unwrap();
self.db.zs_get(&orchard_trees, height).map(Arc::new)
// # Compatibility
//
// Allow older Zebra versions to read future database formats. See ticket #6642 for details.
let (_first_duplicate_height, tree) = self
.db
.zs_prev_key_value_back_from(&orchard_trees, height)
.expect(
"Orchard note commitment trees must exist for all heights below the finalized tip",
);
Some(Arc::new(tree))
}
/// Returns the shielded note commitment trees of the finalized tip