change(db): Upgrade subtrees from the tip backwards, for compatibility with wallet syncing (#7531)
* Avoid manual handling of previous sapling trees by using iterator windows instead * Avoid manual sapling subtree index handling by comparing prev and current subtree indexes instead * Simplify adding notes by using the exact number of remaining notes * Simplify by skipping the first block, because it can't complete a subtree * Re-use existing tree update code * Apply the sapling changes to orchard subtree updates * add a reverse database column family iterator function * Make skipping the lowest tree independent of iteration order * Move new subtree checks into the iterator, rename to end_height * Split subtree calculation into a new method * Split the calculate and write methods * Quickly check the first subtree before running the full upgrade * Do the quick checks every time Zebra runs, and refactor slow check error handling * Do quick checks for orchard as well * Make orchard tree upgrade match sapling upgrade code * Upgrade subtrees in reverse height order * Bump the database patch version so the upgrade runs again * Reset previous subtree upgrade data before doing this one * Add extra checks to subtree calculation to diagnose errors * Use correct heights for subtrees completed at the end of a block * Add even more checks to diagnose issues * Instrument upgrade methods to improve diagnostics * Prevent modification of re-used trees * Debug with subtree positions as well * Fix an off-by-one error with completed subtrees * Fix typos and confusing comments Co-authored-by: Marek <mail@marek.onl> * Fix mistaken previous tree handling and end tree comments * Remove unnecessary subtraction in remaining leaves calc * Log heights when assertions fail * Fix new subtree detection filter * Move new subtree check into a method, cleanup unused code * Remove redundant assertions * Wait for subtree upgrade before testing RPCs * Fix subtree search in quick check * Temporarily upgrade subtrees in forward height order * Clarify some comments * Fix missing test imports * Fix subtree logging * Add a comment about a potential hang with future upgrades * Fix zebrad var ownership * Log more info when add_subtrees.rs fails * cargo fmt --all * Fix unrelated clippy::unnecessary_unwrap * cargo clippy --fix --all-features --all-targets; cargo fmt --all * Stop the quick check depending on tree de-duplication * Refactor waiting for the upgrade into functions * Wait for state upgrades whenever the cached state is updated * Wait for the testnet upgrade in the right place * Fix unused variable * Fix a subtree detection bug and comments * Remove an early reference to reverse direction * Stop skipping subtrees completed at the end of blocks * Actually fix new subtree code * Upgrade subtrees in reverse height order Reverts "Temporarily upgrade subtrees in forward height order" This reverts commita9558be214
. * Bump the database patch version to re-run the upgrade (for testing) * Revert "Remove an early reference to reverse direction" This reverts commitc206404377
. --------- Co-authored-by: Marek <mail@marek.onl>
This commit is contained in:
parent
b6a18e9bab
commit
d651ee3c16
|
@ -53,13 +53,13 @@ pub(crate) const DATABASE_FORMAT_MINOR_VERSION: u64 = 2;
|
|||
|
||||
/// The database format patch version, incremented each time the on-disk database format has a
|
||||
/// significant format compatibility fix.
|
||||
pub(crate) const DATABASE_FORMAT_PATCH_VERSION: u64 = 1;
|
||||
pub(crate) const DATABASE_FORMAT_PATCH_VERSION: u64 = 2;
|
||||
|
||||
/// Returns the highest database version that modifies the subtree index format.
|
||||
///
|
||||
/// This version is used by tests to wait for the subtree upgrade to finish.
|
||||
pub fn latest_version_for_adding_subtrees() -> Version {
|
||||
Version::parse("25.2.1").expect("Hardcoded version string should be valid.")
|
||||
Version::parse("25.2.2").expect("Hardcoded version string should be valid.")
|
||||
}
|
||||
|
||||
/// The name of the file containing the minor and patch database versions.
|
||||
|
|
|
@ -258,6 +258,7 @@ impl PartialEq for DiskDb {
|
|||
self.ephemeral, other.ephemeral,
|
||||
"database with same path but different ephemeral configs",
|
||||
);
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
|
@ -431,6 +432,46 @@ impl DiskDb {
|
|||
///
|
||||
/// Holding this iterator open might delay block commit transactions.
|
||||
pub fn zs_range_iter<C, K, V, R>(&self, cf: &C, range: R) -> impl Iterator<Item = (K, V)> + '_
|
||||
where
|
||||
C: rocksdb::AsColumnFamilyRef,
|
||||
K: IntoDisk + FromDisk,
|
||||
V: FromDisk,
|
||||
R: RangeBounds<K>,
|
||||
{
|
||||
self.zs_range_iter_with_direction(cf, range, false)
|
||||
}
|
||||
|
||||
/// Returns a reverse iterator over the items in `cf` in `range`.
|
||||
///
|
||||
/// Holding this iterator open might delay block commit transactions.
|
||||
///
|
||||
/// This code is copied from `zs_range_iter()`, but with the mode reversed.
|
||||
pub fn zs_reverse_range_iter<C, K, V, R>(
|
||||
&self,
|
||||
cf: &C,
|
||||
range: R,
|
||||
) -> impl Iterator<Item = (K, V)> + '_
|
||||
where
|
||||
C: rocksdb::AsColumnFamilyRef,
|
||||
K: IntoDisk + FromDisk,
|
||||
V: FromDisk,
|
||||
R: RangeBounds<K>,
|
||||
{
|
||||
self.zs_range_iter_with_direction(cf, range, true)
|
||||
}
|
||||
|
||||
/// Returns an iterator over the items in `cf` in `range`.
|
||||
///
|
||||
/// RocksDB iterators are ordered by increasing key bytes by default.
|
||||
/// Otherwise, if `reverse` is `true`, the iterator is ordered by decreasing key bytes.
|
||||
///
|
||||
/// Holding this iterator open might delay block commit transactions.
|
||||
fn zs_range_iter_with_direction<C, K, V, R>(
|
||||
&self,
|
||||
cf: &C,
|
||||
range: R,
|
||||
reverse: bool,
|
||||
) -> impl Iterator<Item = (K, V)> + '_
|
||||
where
|
||||
C: rocksdb::AsColumnFamilyRef,
|
||||
K: IntoDisk + FromDisk,
|
||||
|
@ -451,40 +492,67 @@ impl DiskDb {
|
|||
|
||||
let start_bound = map_to_vec(range.start_bound());
|
||||
let end_bound = map_to_vec(range.end_bound());
|
||||
let range = (start_bound.clone(), end_bound);
|
||||
let range = (start_bound, end_bound);
|
||||
|
||||
let start_bound_vec =
|
||||
if let Included(ref start_bound) | Excluded(ref start_bound) = start_bound {
|
||||
start_bound.clone()
|
||||
} else {
|
||||
// Actually unused
|
||||
Vec::new()
|
||||
};
|
||||
|
||||
let start_mode = if matches!(start_bound, Unbounded) {
|
||||
// Unbounded iterators start at the first item
|
||||
rocksdb::IteratorMode::Start
|
||||
} else {
|
||||
rocksdb::IteratorMode::From(start_bound_vec.as_slice(), rocksdb::Direction::Forward)
|
||||
};
|
||||
let mode = Self::zs_iter_mode(&range, reverse);
|
||||
|
||||
// Reading multiple items from iterators has caused database hangs,
|
||||
// in previous RocksDB versions
|
||||
self.db
|
||||
.iterator_cf(cf, start_mode)
|
||||
.iterator_cf(cf, mode)
|
||||
.map(|result| result.expect("unexpected database failure"))
|
||||
.map(|(key, value)| (key.to_vec(), value))
|
||||
// Skip excluded start bound and empty ranges. The `start_mode` already skips keys
|
||||
// before the start bound.
|
||||
// Skip excluded "from" bound and empty ranges. The `mode` already skips keys
|
||||
// strictly before the "from" bound.
|
||||
.skip_while({
|
||||
let range = range.clone();
|
||||
move |(key, _value)| !range.contains(key)
|
||||
})
|
||||
// Take until the excluded end bound is reached, or we're after the included end bound.
|
||||
// Take until the excluded "to" bound is reached,
|
||||
// or we're after the included "to" bound.
|
||||
.take_while(move |(key, _value)| range.contains(key))
|
||||
.map(|(key, value)| (K::from_bytes(key), V::from_bytes(value)))
|
||||
}
|
||||
|
||||
/// Returns the RocksDB iterator "from" mode for `range`.
|
||||
///
|
||||
/// RocksDB iterators are ordered by increasing key bytes by default.
|
||||
/// Otherwise, if `reverse` is `true`, the iterator is ordered by decreasing key bytes.
|
||||
fn zs_iter_mode<R>(range: &R, reverse: bool) -> rocksdb::IteratorMode
|
||||
where
|
||||
R: RangeBounds<Vec<u8>>,
|
||||
{
|
||||
use std::ops::Bound::*;
|
||||
|
||||
let from_bound = if reverse {
|
||||
range.end_bound()
|
||||
} else {
|
||||
range.start_bound()
|
||||
};
|
||||
|
||||
match from_bound {
|
||||
Unbounded => {
|
||||
if reverse {
|
||||
// Reversed unbounded iterators start from the last item
|
||||
rocksdb::IteratorMode::End
|
||||
} else {
|
||||
// Unbounded iterators start from the first item
|
||||
rocksdb::IteratorMode::Start
|
||||
}
|
||||
}
|
||||
|
||||
Included(bound) | Excluded(bound) => {
|
||||
let direction = if reverse {
|
||||
rocksdb::Direction::Reverse
|
||||
} else {
|
||||
rocksdb::Direction::Forward
|
||||
};
|
||||
|
||||
rocksdb::IteratorMode::From(bound.as_slice(), direction)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// The ideal open file limit for Zebra
|
||||
const IDEAL_OPEN_FILE_LIMIT: u64 = 1024;
|
||||
|
||||
|
|
|
@ -21,6 +21,9 @@ use crate::service::finalized_state::{
|
|||
|
||||
/// Runs disk format upgrade for adding Sapling and Orchard note commitment subtrees to database.
|
||||
///
|
||||
/// Trees are added to the database in reverse height order, so that wallets can sync correctly
|
||||
/// while the upgrade is running.
|
||||
///
|
||||
/// Returns `Ok` if the upgrade completed, and `Err` if it was cancelled.
|
||||
#[allow(clippy::unwrap_in_result)]
|
||||
#[instrument(skip(upgrade_db, cancel_receiver))]
|
||||
|
@ -38,13 +41,21 @@ pub fn run(
|
|||
// block can't complete multiple level 16 subtrees (or complete an entire subtree by itself).
|
||||
// Currently, with 2MB blocks and v4/v5 sapling and orchard output sizes, the subtree index can
|
||||
// increase by at most 1 every ~20 blocks.
|
||||
//
|
||||
// # Compatibility
|
||||
//
|
||||
// Because wallets search backwards from the chain tip, subtrees need to be added to the
|
||||
// database in reverse height order. (Tip first, genesis last.)
|
||||
//
|
||||
// Otherwise, wallets that sync during the upgrade will be missing some notes.
|
||||
|
||||
// Generate a list of sapling subtree inputs: previous and current trees, and their end heights.
|
||||
let subtrees = upgrade_db
|
||||
.sapling_tree_by_height_range(..=initial_tip_height)
|
||||
.sapling_tree_by_reversed_height_range(..=initial_tip_height)
|
||||
// We need both the tree and its previous tree for each shielded block.
|
||||
.tuple_windows()
|
||||
.map(|((prev_end_height, prev_tree), (end_height, tree))| {
|
||||
// Because the iterator is reversed, the larger tree is first.
|
||||
.map(|((end_height, tree), (prev_end_height, prev_tree))| {
|
||||
(prev_end_height, prev_tree, end_height, tree)
|
||||
})
|
||||
// Find new subtrees.
|
||||
|
@ -65,10 +76,11 @@ pub fn run(
|
|||
|
||||
// Generate a list of orchard subtree inputs: previous and current trees, and their end heights.
|
||||
let subtrees = upgrade_db
|
||||
.orchard_tree_by_height_range(..=initial_tip_height)
|
||||
.orchard_tree_by_reversed_height_range(..=initial_tip_height)
|
||||
// We need both the tree and its previous tree for each shielded block.
|
||||
.tuple_windows()
|
||||
.map(|((prev_end_height, prev_tree), (end_height, tree))| {
|
||||
// Because the iterator is reversed, the larger tree is first.
|
||||
.map(|((end_height, tree), (prev_end_height, prev_tree))| {
|
||||
(prev_end_height, prev_tree, end_height, tree)
|
||||
})
|
||||
// Find new subtrees.
|
||||
|
|
|
@ -198,6 +198,19 @@ impl ZebraDb {
|
|||
self.db.zs_range_iter(&sapling_trees, range)
|
||||
}
|
||||
|
||||
/// Returns the Sapling note commitment trees in the reversed range, in decreasing height order.
|
||||
#[allow(clippy::unwrap_in_result)]
|
||||
pub fn sapling_tree_by_reversed_height_range<R>(
|
||||
&self,
|
||||
range: R,
|
||||
) -> impl Iterator<Item = (Height, Arc<sapling::tree::NoteCommitmentTree>)> + '_
|
||||
where
|
||||
R: std::ops::RangeBounds<Height>,
|
||||
{
|
||||
let sapling_trees = self.db.cf_handle("sapling_note_commitment_tree").unwrap();
|
||||
self.db.zs_reverse_range_iter(&sapling_trees, range)
|
||||
}
|
||||
|
||||
/// Returns the Sapling note commitment subtree at this `index`.
|
||||
///
|
||||
/// # Correctness
|
||||
|
@ -331,6 +344,19 @@ impl ZebraDb {
|
|||
self.db.zs_range_iter(&orchard_trees, range)
|
||||
}
|
||||
|
||||
/// Returns the Orchard note commitment trees in the reversed range, in decreasing height order.
|
||||
#[allow(clippy::unwrap_in_result)]
|
||||
pub fn orchard_tree_by_reversed_height_range<R>(
|
||||
&self,
|
||||
range: R,
|
||||
) -> impl Iterator<Item = (Height, Arc<orchard::tree::NoteCommitmentTree>)> + '_
|
||||
where
|
||||
R: std::ops::RangeBounds<Height>,
|
||||
{
|
||||
let orchard_trees = self.db.cf_handle("orchard_note_commitment_tree").unwrap();
|
||||
self.db.zs_reverse_range_iter(&orchard_trees, range)
|
||||
}
|
||||
|
||||
/// Returns the Orchard note commitment subtree at this `index`.
|
||||
///
|
||||
/// # Correctness
|
||||
|
|
Loading…
Reference in New Issue