change(db): Upgrade subtrees from the tip backwards, for compatibility with wallet syncing (#7531)

* Avoid manual handling of previous sapling trees by using iterator windows instead

* Avoid manual sapling subtree index handling by comparing prev and current subtree indexes instead

* Simplify adding notes by using the exact number of remaining notes

* Simplify by skipping the first block, because it can't complete a subtree

* Re-use existing tree update code

* Apply the sapling changes to orchard subtree updates

* add a reverse database column family iterator function

* Make skipping the lowest tree independent of iteration order

* Move new subtree checks into the iterator, rename to end_height

* Split subtree calculation into a new method

* Split the calculate and write methods

* Quickly check the first subtree before running the full upgrade

* Do the quick checks every time Zebra runs, and refactor slow check error handling

* Do quick checks for orchard as well

* Make orchard tree upgrade match sapling upgrade code

* Upgrade subtrees in reverse height order

* Bump the database patch version so the upgrade runs again

* Reset previous subtree upgrade data before doing this one

* Add extra checks to subtree calculation to diagnose errors

* Use correct heights for subtrees completed at the end of a block

* Add even more checks to diagnose issues

* Instrument upgrade methods to improve diagnostics

* Prevent modification of re-used trees

* Debug with subtree positions as well

* Fix an off-by-one error with completed subtrees

* Fix typos and confusing comments

Co-authored-by: Marek <mail@marek.onl>

* Fix mistaken previous tree handling and end tree comments

* Remove unnecessary subtraction in remaining leaves calc

* Log heights when assertions fail

* Fix new subtree detection filter

* Move new subtree check into a method, cleanup unused code

* Remove redundant assertions

* Wait for subtree upgrade before testing RPCs

* Fix subtree search in quick check

* Temporarily upgrade subtrees in forward height order

* Clarify some comments

* Fix missing test imports

* Fix subtree logging

* Add a comment about a potential hang with future upgrades

* Fix zebrad var ownership

* Log more info when add_subtrees.rs fails

* cargo fmt --all

* Fix unrelated clippy::unnecessary_unwrap

* cargo clippy --fix --all-features --all-targets; cargo fmt --all

* Stop the quick check depending on tree de-duplication

* Refactor waiting for the upgrade into functions

* Wait for state upgrades whenever the cached state is updated

* Wait for the testnet upgrade in the right place

* Fix unused variable

* Fix a subtree detection bug and comments

* Remove an early reference to reverse direction

* Stop skipping subtrees completed at the end of blocks

* Actually fix new subtree code

* Upgrade subtrees in reverse height order

Reverts "Temporarily upgrade subtrees in forward height order"
This reverts commit a9558be214.

* Bump the database patch version to re-run the upgrade (for testing)

* Revert "Remove an early reference to reverse direction"

This reverts commit c206404377.

---------

Co-authored-by: Marek <mail@marek.onl>
This commit is contained in:
teor 2023-09-21 09:41:28 +10:00 committed by GitHub
parent b6a18e9bab
commit d651ee3c16
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
4 changed files with 131 additions and 25 deletions

View File

@ -53,13 +53,13 @@ pub(crate) const DATABASE_FORMAT_MINOR_VERSION: u64 = 2;
/// The database format patch version, incremented each time the on-disk database format has a
/// significant format compatibility fix.
pub(crate) const DATABASE_FORMAT_PATCH_VERSION: u64 = 1;
pub(crate) const DATABASE_FORMAT_PATCH_VERSION: u64 = 2;
/// Returns the highest database version that modifies the subtree index format.
///
/// This version is used by tests to wait for the subtree upgrade to finish.
pub fn latest_version_for_adding_subtrees() -> Version {
Version::parse("25.2.1").expect("Hardcoded version string should be valid.")
Version::parse("25.2.2").expect("Hardcoded version string should be valid.")
}
/// The name of the file containing the minor and patch database versions.

View File

@ -258,6 +258,7 @@ impl PartialEq for DiskDb {
self.ephemeral, other.ephemeral,
"database with same path but different ephemeral configs",
);
return true;
}
@ -431,6 +432,46 @@ impl DiskDb {
///
/// Holding this iterator open might delay block commit transactions.
pub fn zs_range_iter<C, K, V, R>(&self, cf: &C, range: R) -> impl Iterator<Item = (K, V)> + '_
where
C: rocksdb::AsColumnFamilyRef,
K: IntoDisk + FromDisk,
V: FromDisk,
R: RangeBounds<K>,
{
self.zs_range_iter_with_direction(cf, range, false)
}
/// Returns a reverse iterator over the items in `cf` in `range`.
///
/// Holding this iterator open might delay block commit transactions.
///
/// This code is copied from `zs_range_iter()`, but with the mode reversed.
pub fn zs_reverse_range_iter<C, K, V, R>(
&self,
cf: &C,
range: R,
) -> impl Iterator<Item = (K, V)> + '_
where
C: rocksdb::AsColumnFamilyRef,
K: IntoDisk + FromDisk,
V: FromDisk,
R: RangeBounds<K>,
{
self.zs_range_iter_with_direction(cf, range, true)
}
/// Returns an iterator over the items in `cf` in `range`.
///
/// RocksDB iterators are ordered by increasing key bytes by default.
/// Otherwise, if `reverse` is `true`, the iterator is ordered by decreasing key bytes.
///
/// Holding this iterator open might delay block commit transactions.
fn zs_range_iter_with_direction<C, K, V, R>(
&self,
cf: &C,
range: R,
reverse: bool,
) -> impl Iterator<Item = (K, V)> + '_
where
C: rocksdb::AsColumnFamilyRef,
K: IntoDisk + FromDisk,
@ -451,40 +492,67 @@ impl DiskDb {
let start_bound = map_to_vec(range.start_bound());
let end_bound = map_to_vec(range.end_bound());
let range = (start_bound.clone(), end_bound);
let range = (start_bound, end_bound);
let start_bound_vec =
if let Included(ref start_bound) | Excluded(ref start_bound) = start_bound {
start_bound.clone()
} else {
// Actually unused
Vec::new()
};
let start_mode = if matches!(start_bound, Unbounded) {
// Unbounded iterators start at the first item
rocksdb::IteratorMode::Start
} else {
rocksdb::IteratorMode::From(start_bound_vec.as_slice(), rocksdb::Direction::Forward)
};
let mode = Self::zs_iter_mode(&range, reverse);
// Reading multiple items from iterators has caused database hangs,
// in previous RocksDB versions
self.db
.iterator_cf(cf, start_mode)
.iterator_cf(cf, mode)
.map(|result| result.expect("unexpected database failure"))
.map(|(key, value)| (key.to_vec(), value))
// Skip excluded start bound and empty ranges. The `start_mode` already skips keys
// before the start bound.
// Skip excluded "from" bound and empty ranges. The `mode` already skips keys
// strictly before the "from" bound.
.skip_while({
let range = range.clone();
move |(key, _value)| !range.contains(key)
})
// Take until the excluded end bound is reached, or we're after the included end bound.
// Take until the excluded "to" bound is reached,
// or we're after the included "to" bound.
.take_while(move |(key, _value)| range.contains(key))
.map(|(key, value)| (K::from_bytes(key), V::from_bytes(value)))
}
/// Returns the RocksDB iterator "from" mode for `range`.
///
/// RocksDB iterators are ordered by increasing key bytes by default.
/// Otherwise, if `reverse` is `true`, the iterator is ordered by decreasing key bytes.
fn zs_iter_mode<R>(range: &R, reverse: bool) -> rocksdb::IteratorMode
where
R: RangeBounds<Vec<u8>>,
{
use std::ops::Bound::*;
let from_bound = if reverse {
range.end_bound()
} else {
range.start_bound()
};
match from_bound {
Unbounded => {
if reverse {
// Reversed unbounded iterators start from the last item
rocksdb::IteratorMode::End
} else {
// Unbounded iterators start from the first item
rocksdb::IteratorMode::Start
}
}
Included(bound) | Excluded(bound) => {
let direction = if reverse {
rocksdb::Direction::Reverse
} else {
rocksdb::Direction::Forward
};
rocksdb::IteratorMode::From(bound.as_slice(), direction)
}
}
}
/// The ideal open file limit for Zebra
const IDEAL_OPEN_FILE_LIMIT: u64 = 1024;

View File

@ -21,6 +21,9 @@ use crate::service::finalized_state::{
/// Runs disk format upgrade for adding Sapling and Orchard note commitment subtrees to database.
///
/// Trees are added to the database in reverse height order, so that wallets can sync correctly
/// while the upgrade is running.
///
/// Returns `Ok` if the upgrade completed, and `Err` if it was cancelled.
#[allow(clippy::unwrap_in_result)]
#[instrument(skip(upgrade_db, cancel_receiver))]
@ -38,13 +41,21 @@ pub fn run(
// block can't complete multiple level 16 subtrees (or complete an entire subtree by itself).
// Currently, with 2MB blocks and v4/v5 sapling and orchard output sizes, the subtree index can
// increase by at most 1 every ~20 blocks.
//
// # Compatibility
//
// Because wallets search backwards from the chain tip, subtrees need to be added to the
// database in reverse height order. (Tip first, genesis last.)
//
// Otherwise, wallets that sync during the upgrade will be missing some notes.
// Generate a list of sapling subtree inputs: previous and current trees, and their end heights.
let subtrees = upgrade_db
.sapling_tree_by_height_range(..=initial_tip_height)
.sapling_tree_by_reversed_height_range(..=initial_tip_height)
// We need both the tree and its previous tree for each shielded block.
.tuple_windows()
.map(|((prev_end_height, prev_tree), (end_height, tree))| {
// Because the iterator is reversed, the larger tree is first.
.map(|((end_height, tree), (prev_end_height, prev_tree))| {
(prev_end_height, prev_tree, end_height, tree)
})
// Find new subtrees.
@ -65,10 +76,11 @@ pub fn run(
// Generate a list of orchard subtree inputs: previous and current trees, and their end heights.
let subtrees = upgrade_db
.orchard_tree_by_height_range(..=initial_tip_height)
.orchard_tree_by_reversed_height_range(..=initial_tip_height)
// We need both the tree and its previous tree for each shielded block.
.tuple_windows()
.map(|((prev_end_height, prev_tree), (end_height, tree))| {
// Because the iterator is reversed, the larger tree is first.
.map(|((end_height, tree), (prev_end_height, prev_tree))| {
(prev_end_height, prev_tree, end_height, tree)
})
// Find new subtrees.

View File

@ -198,6 +198,19 @@ impl ZebraDb {
self.db.zs_range_iter(&sapling_trees, range)
}
/// Returns the Sapling note commitment trees in the reversed range, in decreasing height order.
#[allow(clippy::unwrap_in_result)]
pub fn sapling_tree_by_reversed_height_range<R>(
&self,
range: R,
) -> impl Iterator<Item = (Height, Arc<sapling::tree::NoteCommitmentTree>)> + '_
where
R: std::ops::RangeBounds<Height>,
{
let sapling_trees = self.db.cf_handle("sapling_note_commitment_tree").unwrap();
self.db.zs_reverse_range_iter(&sapling_trees, range)
}
/// Returns the Sapling note commitment subtree at this `index`.
///
/// # Correctness
@ -331,6 +344,19 @@ impl ZebraDb {
self.db.zs_range_iter(&orchard_trees, range)
}
/// Returns the Orchard note commitment trees in the reversed range, in decreasing height order.
#[allow(clippy::unwrap_in_result)]
pub fn orchard_tree_by_reversed_height_range<R>(
&self,
range: R,
) -> impl Iterator<Item = (Height, Arc<orchard::tree::NoteCommitmentTree>)> + '_
where
R: std::ops::RangeBounds<Height>,
{
let orchard_trees = self.db.cf_handle("orchard_note_commitment_tree").unwrap();
self.db.zs_reverse_range_iter(&orchard_trees, range)
}
/// Returns the Orchard note commitment subtree at this `index`.
///
/// # Correctness