change(db): Upgrade subtrees from the tip backwards, for compatibility with wallet syncing (#7531)

* Avoid manual handling of previous sapling trees by using iterator windows instead * Avoid manual sapling subtree index handling by comparing prev and current subtree indexes instead * Simplify adding notes by using the exact number of remaining notes * Simplify by skipping the first block, because it can't complete a subtree * Re-use existing tree update code * Apply the sapling changes to orchard subtree updates * add a reverse database column family iterator function * Make skipping the lowest tree independent of iteration order * Move new subtree checks into the iterator, rename to end_height * Split subtree calculation into a new method * Split the calculate and write methods * Quickly check the first subtree before running the full upgrade * Do the quick checks every time Zebra runs, and refactor slow check error handling * Do quick checks for orchard as well * Make orchard tree upgrade match sapling upgrade code * Upgrade subtrees in reverse height order * Bump the database patch version so the upgrade runs again * Reset previous subtree upgrade data before doing this one * Add extra checks to subtree calculation to diagnose errors * Use correct heights for subtrees completed at the end of a block * Add even more checks to diagnose issues * Instrument upgrade methods to improve diagnostics * Prevent modification of re-used trees * Debug with subtree positions as well * Fix an off-by-one error with completed subtrees * Fix typos and confusing comments Co-authored-by: Marek <mail@marek.onl> * Fix mistaken previous tree handling and end tree comments * Remove unnecessary subtraction in remaining leaves calc * Log heights when assertions fail * Fix new subtree detection filter * Move new subtree check into a method, cleanup unused code * Remove redundant assertions * Wait for subtree upgrade before testing RPCs * Fix subtree search in quick check * Temporarily upgrade subtrees in forward height order * Clarify some comments * Fix missing test imports * Fix subtree logging * Add a comment about a potential hang with future upgrades * Fix zebrad var ownership * Log more info when add_subtrees.rs fails * cargo fmt --all * Fix unrelated clippy::unnecessary_unwrap * cargo clippy --fix --all-features --all-targets; cargo fmt --all * Stop the quick check depending on tree de-duplication * Refactor waiting for the upgrade into functions * Wait for state upgrades whenever the cached state is updated * Wait for the testnet upgrade in the right place * Fix unused variable * Fix a subtree detection bug and comments * Remove an early reference to reverse direction * Stop skipping subtrees completed at the end of blocks * Actually fix new subtree code * Upgrade subtrees in reverse height order Reverts "Temporarily upgrade subtrees in forward height order" This reverts commit a9558be214. * Bump the database patch version to re-run the upgrade (for testing) * Revert "Remove an early reference to reverse direction" This reverts commit c206404377. --------- Co-authored-by: Marek <mail@marek.onl>
2023-09-21 09:41:28 +10:00 · 2023-09-21 09:41:28 +10:00 · d651ee3c16
parent b6a18e9bab
commit d651ee3c16
4 changed files with 131 additions and 25 deletions
--- a/zebra-state/src/constants.rs
+++ b/zebra-state/src/constants.rs
@ -53,13 +53,13 @@ pub(crate) const DATABASE_FORMAT_MINOR_VERSION: u64 = 2;

 /// The database format patch version, incremented each time the on-disk database format has a
 /// significant format compatibility fix.
-pub(crate) const DATABASE_FORMAT_PATCH_VERSION: u64 = 1;
+pub(crate) const DATABASE_FORMAT_PATCH_VERSION: u64 = 2;

 /// Returns the highest database version that modifies the subtree index format.
 ///
 /// This version is used by tests to wait for the subtree upgrade to finish.
 pub fn latest_version_for_adding_subtrees() -> Version {
-    Version::parse("25.2.1").expect("Hardcoded version string should be valid.")
+    Version::parse("25.2.2").expect("Hardcoded version string should be valid.")
 }

 /// The name of the file containing the minor and patch database versions.
--- a/zebra-state/src/service/finalized_state/disk_db.rs
+++ b/zebra-state/src/service/finalized_state/disk_db.rs
@ -258,6 +258,7 @@ impl PartialEq for DiskDb {
                self.ephemeral, other.ephemeral,
                "database with same path but different ephemeral configs",
            );
+
            return true;
        }

@ -431,6 +432,46 @@ impl DiskDb {
    ///
    /// Holding this iterator open might delay block commit transactions.
    pub fn zs_range_iter<C, K, V, R>(&self, cf: &C, range: R) -> impl Iterator<Item = (K, V)> + '_
+    where
+        C: rocksdb::AsColumnFamilyRef,
+        K: IntoDisk + FromDisk,
+        V: FromDisk,
+        R: RangeBounds<K>,
+    {
+        self.zs_range_iter_with_direction(cf, range, false)
+    }
+
+    /// Returns a reverse iterator over the items in `cf` in `range`.
+    ///
+    /// Holding this iterator open might delay block commit transactions.
+    ///
+    /// This code is copied from `zs_range_iter()`, but with the mode reversed.
+    pub fn zs_reverse_range_iter<C, K, V, R>(
+        &self,
+        cf: &C,
+        range: R,
+    ) -> impl Iterator<Item = (K, V)> + '_
+    where
+        C: rocksdb::AsColumnFamilyRef,
+        K: IntoDisk + FromDisk,
+        V: FromDisk,
+        R: RangeBounds<K>,
+    {
+        self.zs_range_iter_with_direction(cf, range, true)
+    }
+
+    /// Returns an iterator over the items in `cf` in `range`.
+    ///
+    /// RocksDB iterators are ordered by increasing key bytes by default.
+    /// Otherwise, if `reverse` is `true`, the iterator is ordered by decreasing key bytes.
+    ///
+    /// Holding this iterator open might delay block commit transactions.
+    fn zs_range_iter_with_direction<C, K, V, R>(
+        &self,
+        cf: &C,
+        range: R,
+        reverse: bool,
+    ) -> impl Iterator<Item = (K, V)> + '_
    where
        C: rocksdb::AsColumnFamilyRef,
        K: IntoDisk + FromDisk,
@ -451,40 +492,67 @@ impl DiskDb {

        let start_bound = map_to_vec(range.start_bound());
        let end_bound = map_to_vec(range.end_bound());
-        let range = (start_bound.clone(), end_bound);
+        let range = (start_bound, end_bound);

-        let start_bound_vec =
-            if let Included(ref start_bound) | Excluded(ref start_bound) = start_bound {
-                start_bound.clone()
-            } else {
-                // Actually unused
-                Vec::new()
-            };
-
-        let start_mode = if matches!(start_bound, Unbounded) {
-            // Unbounded iterators start at the first item
-            rocksdb::IteratorMode::Start
-        } else {
-            rocksdb::IteratorMode::From(start_bound_vec.as_slice(), rocksdb::Direction::Forward)
-        };
+        let mode = Self::zs_iter_mode(&range, reverse);

        // Reading multiple items from iterators has caused database hangs,
        // in previous RocksDB versions
        self.db
-            .iterator_cf(cf, start_mode)
+            .iterator_cf(cf, mode)
            .map(|result| result.expect("unexpected database failure"))
            .map(|(key, value)| (key.to_vec(), value))
-            // Skip excluded start bound and empty ranges. The `start_mode` already skips keys
-            // before the start bound.
+            // Skip excluded "from" bound and empty ranges. The `mode` already skips keys
+            // strictly before the "from" bound.
            .skip_while({
                let range = range.clone();
                move |(key, _value)| !range.contains(key)
            })
-            // Take until the excluded end bound is reached, or we're after the included end bound.
+            // Take until the excluded "to" bound is reached,
+            // or we're after the included "to" bound.
            .take_while(move |(key, _value)| range.contains(key))
            .map(|(key, value)| (K::from_bytes(key), V::from_bytes(value)))
    }

+    /// Returns the RocksDB iterator "from" mode for `range`.
+    ///
+    /// RocksDB iterators are ordered by increasing key bytes by default.
+    /// Otherwise, if `reverse` is `true`, the iterator is ordered by decreasing key bytes.
+    fn zs_iter_mode<R>(range: &R, reverse: bool) -> rocksdb::IteratorMode
+    where
+        R: RangeBounds<Vec<u8>>,
+    {
+        use std::ops::Bound::*;
+
+        let from_bound = if reverse {
+            range.end_bound()
+        } else {
+            range.start_bound()
+        };
+
+        match from_bound {
+            Unbounded => {
+                if reverse {
+                    // Reversed unbounded iterators start from the last item
+                    rocksdb::IteratorMode::End
+                } else {
+                    // Unbounded iterators start from the first item
+                    rocksdb::IteratorMode::Start
+                }
+            }
+
+            Included(bound) | Excluded(bound) => {
+                let direction = if reverse {
+                    rocksdb::Direction::Reverse
+                } else {
+                    rocksdb::Direction::Forward
+                };
+
+                rocksdb::IteratorMode::From(bound.as_slice(), direction)
+            }
+        }
+    }
+
    /// The ideal open file limit for Zebra
    const IDEAL_OPEN_FILE_LIMIT: u64 = 1024;

--- a/zebra-state/src/service/finalized_state/disk_format/upgrade/add_subtrees.rs
+++ b/zebra-state/src/service/finalized_state/disk_format/upgrade/add_subtrees.rs
@ -21,6 +21,9 @@ use crate::service::finalized_state::{

 /// Runs disk format upgrade for adding Sapling and Orchard note commitment subtrees to database.
 ///
+/// Trees are added to the database in reverse height order, so that wallets can sync correctly
+/// while the upgrade is running.
+///
 /// Returns `Ok` if the upgrade completed, and `Err` if it was cancelled.
 #[allow(clippy::unwrap_in_result)]
 #[instrument(skip(upgrade_db, cancel_receiver))]
@ -38,13 +41,21 @@ pub fn run(
    // block can't complete multiple level 16 subtrees (or complete an entire subtree by itself).
    // Currently, with 2MB blocks and v4/v5 sapling and orchard output sizes, the subtree index can
    // increase by at most 1 every ~20 blocks.
+    //
+    // # Compatibility
+    //
+    // Because wallets search backwards from the chain tip, subtrees need to be added to the
+    // database in reverse height order. (Tip first, genesis last.)
+    //
+    // Otherwise, wallets that sync during the upgrade will be missing some notes.

    // Generate a list of sapling subtree inputs: previous and current trees, and their end heights.
    let subtrees = upgrade_db
-        .sapling_tree_by_height_range(..=initial_tip_height)
+        .sapling_tree_by_reversed_height_range(..=initial_tip_height)
        // We need both the tree and its previous tree for each shielded block.
        .tuple_windows()
-        .map(|((prev_end_height, prev_tree), (end_height, tree))| {
+        // Because the iterator is reversed, the larger tree is first.
+        .map(|((end_height, tree), (prev_end_height, prev_tree))| {
            (prev_end_height, prev_tree, end_height, tree)
        })
        // Find new subtrees.
@ -65,10 +76,11 @@ pub fn run(

    // Generate a list of orchard subtree inputs: previous and current trees, and their end heights.
    let subtrees = upgrade_db
-        .orchard_tree_by_height_range(..=initial_tip_height)
+        .orchard_tree_by_reversed_height_range(..=initial_tip_height)
        // We need both the tree and its previous tree for each shielded block.
        .tuple_windows()
-        .map(|((prev_end_height, prev_tree), (end_height, tree))| {
+        // Because the iterator is reversed, the larger tree is first.
+        .map(|((end_height, tree), (prev_end_height, prev_tree))| {
            (prev_end_height, prev_tree, end_height, tree)
        })
        // Find new subtrees.
--- a/zebra-state/src/service/finalized_state/zebra_db/shielded.rs
+++ b/zebra-state/src/service/finalized_state/zebra_db/shielded.rs
@ -198,6 +198,19 @@ impl ZebraDb {
        self.db.zs_range_iter(&sapling_trees, range)
    }

+    /// Returns the Sapling note commitment trees in the reversed range, in decreasing height order.
+    #[allow(clippy::unwrap_in_result)]
+    pub fn sapling_tree_by_reversed_height_range<R>(
+        &self,
+        range: R,
+    ) -> impl Iterator<Item = (Height, Arc<sapling::tree::NoteCommitmentTree>)> + '_
+    where
+        R: std::ops::RangeBounds<Height>,
+    {
+        let sapling_trees = self.db.cf_handle("sapling_note_commitment_tree").unwrap();
+        self.db.zs_reverse_range_iter(&sapling_trees, range)
+    }
+
    /// Returns the Sapling note commitment subtree at this `index`.
    ///
    /// # Correctness
@ -331,6 +344,19 @@ impl ZebraDb {
        self.db.zs_range_iter(&orchard_trees, range)
    }

+    /// Returns the Orchard note commitment trees in the reversed range, in decreasing height order.
+    #[allow(clippy::unwrap_in_result)]
+    pub fn orchard_tree_by_reversed_height_range<R>(
+        &self,
+        range: R,
+    ) -> impl Iterator<Item = (Height, Arc<orchard::tree::NoteCommitmentTree>)> + '_
+    where
+        R: std::ops::RangeBounds<Height>,
+    {
+        let orchard_trees = self.db.cf_handle("orchard_note_commitment_tree").unwrap();
+        self.db.zs_reverse_range_iter(&orchard_trees, range)
+    }
+
    /// Returns the Orchard note commitment subtree at this `index`.
    ///
    /// # Correctness