change(state): Stop using iterators on column families with many deletions (#7663)

Co-authored-by: Arya <aryasolhi@gmail.com>
This commit is contained in:
teor 2023-10-05 07:36:06 +10:00 committed by GitHub
parent a9bd127121
commit fcc7bf4e33
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
3 changed files with 103 additions and 63 deletions

View File

@ -10,8 +10,12 @@ use std::collections::BTreeMap;
use bincode::Options;
use zebra_chain::{
amount::NonNegative, block::Height, history_tree::NonEmptyHistoryTree, parameters::Network,
primitives::zcash_history, value_balance::ValueBalance,
amount::NonNegative,
block::Height,
history_tree::{HistoryTree, NonEmptyHistoryTree},
parameters::Network,
primitives::zcash_history,
value_balance::ValueBalance,
};
use crate::service::finalized_state::disk_format::{FromDisk, IntoDisk};
@ -78,3 +82,10 @@ impl FromDisk for NonEmptyHistoryTree {
.expect("deserialization format should match the serialization format used by IntoDisk")
}
}
// We don't write empty history trees to disk, so we know this one is non-empty.
impl FromDisk for HistoryTree {
fn from_bytes(bytes: impl AsRef<[u8]>) -> Self {
NonEmptyHistoryTree::from_bytes(bytes).into()
}
}

View File

@ -14,59 +14,74 @@
use std::{borrow::Borrow, collections::HashMap, sync::Arc};
use zebra_chain::{
amount::NonNegative,
history_tree::{HistoryTree, NonEmptyHistoryTree},
transparent,
value_balance::ValueBalance,
amount::NonNegative, history_tree::HistoryTree, transparent, value_balance::ValueBalance,
};
use crate::{
request::SemanticallyVerifiedBlockWithTrees,
service::finalized_state::{
disk_db::{DiskDb, DiskWriteBatch, ReadDisk, WriteDisk},
disk_format::RawBytes,
zebra_db::ZebraDb,
},
BoxError, SemanticallyVerifiedBlock,
};
impl ZebraDb {
/// Returns the ZIP-221 history tree of the finalized tip or `None`
/// if it does not exist yet in the state (pre-Heartwood).
/// Returns the ZIP-221 history tree of the finalized tip.
///
/// If history trees have not been activated yet (pre-Heartwood), or the state is empty,
/// returns an empty history tree.
pub fn history_tree(&self) -> Arc<HistoryTree> {
if self.finalized_tip_height().is_some() {
let history_tree_cf = self.db.cf_handle("history_tree").unwrap();
// # Concurrency
//
// There is only one tree in this column family, which is atomically updated by a block
// write batch (database transaction). If this update runs between the height read and
// the tree read, the height will be wrong, and the tree will be missing.
// That could cause consensus bugs.
//
// Instead, always read the last tree in the column family, regardless of height.
//
// See ticket #7581 for more details.
//
// TODO: this concurrency bug will be permanently fixed in PR #7392,
// by changing the block update to overwrite the tree, rather than deleting it.
//
// # Forwards Compatibility
//
// This code can read the column family format in 1.2.0 and earlier (tip height key),
// and after PR #7392 is merged (empty key).
let history_tree: Option<NonEmptyHistoryTree> = self
.db
.zs_last_key_value(&history_tree_cf)
// RawBytes will deserialize both Height and `()` (empty) keys.
.map(|(_key, value): (RawBytes, _)| value);
if let Some(non_empty_tree) = history_tree {
return Arc::new(HistoryTree::from(non_empty_tree));
}
if self.is_empty() {
return Arc::<HistoryTree>::default();
}
Default::default()
// # Performance
//
// Using `zs_last_key_value()` on this column family significantly reduces sync performance
// (#7618). But it seems to work for other column families. This is probably because
// `zs_delete()` is also used on the same column family:
// <https://tracker.ceph.com/issues/55324>
// <https://jira.mariadb.org/browse/MDEV-19670>
//
// See also the performance notes in:
// <https://github.com/facebook/rocksdb/wiki/Iterator#iterating-upper-bound-and-lower-bound>
//
// This bug will be fixed by PR #7392, because it changes this column family to update the
// existing key, rather than deleting old keys.
let history_tree_cf = self.db.cf_handle("history_tree").unwrap();
// # Forwards Compatibility
//
// This code can read the column family format in 1.2.0 and earlier (tip height key),
// and after PR #7392 is merged (empty key). The height-based code can be removed when
// versions 1.2.0 and earlier are no longer supported.
//
// # Concurrency
//
// There is only one tree in this column family, which is atomically updated by a block
// write batch (database transaction). If this update runs between the height read and
// the tree read, the height will be wrong, and the tree will be missing.
// That could cause consensus bugs.
//
// Instead, try reading the new empty-key format (from PR #7392) first,
// then read the old format if needed.
//
// See ticket #7581 for more details.
//
// TODO: this concurrency bug will be permanently fixed in PR #7392,
// by changing the block update to overwrite the tree, rather than deleting it.
let mut history_tree: Option<Arc<HistoryTree>> = self.db.zs_get(&history_tree_cf, &());
if history_tree.is_none() {
let tip_height = self
.finalized_tip_height()
.expect("just checked for an empty database");
history_tree = self.db.zs_get(&history_tree_cf, &tip_height);
}
history_tree.unwrap_or_default()
}
/// Returns the stored `ValueBalance` for the best chain at the finalized tip height.

View File

@ -30,7 +30,6 @@ use crate::{
request::SemanticallyVerifiedBlockWithTrees,
service::finalized_state::{
disk_db::{DiskDb, DiskWriteBatch, ReadDisk, WriteDisk},
disk_format::RawBytes,
zebra_db::ZebraDb,
},
BoxError, SemanticallyVerifiedBlock,
@ -85,34 +84,49 @@ impl ZebraDb {
/// Returns the Sprout note commitment tree of the finalized tip
/// or the empty tree if the state is empty.
pub fn sprout_tree(&self) -> Arc<sprout::tree::NoteCommitmentTree> {
if self.finalized_tip_height().is_none() {
return Default::default();
if self.is_empty() {
return Arc::<sprout::tree::NoteCommitmentTree>::default();
}
let sprout_nct_handle = self.db.cf_handle("sprout_note_commitment_tree").unwrap();
// # Performance
//
// Using `zs_last_key_value()` on this column family significantly reduces sync performance
// (#7618). This is probably because `zs_delete()` is also used on the same column family.
// See the comment in `ZebraDb::history_tree()` for details.
//
// This bug will be fixed by PR #7392, because it changes this column family to update the
// existing key, rather than deleting old keys.
let sprout_tree_cf = self.db.cf_handle("sprout_note_commitment_tree").unwrap();
// # Concurrency
//
// There is only one tree in this column family, which is atomically updated by a block
// write batch (database transaction). If this update runs between the height read and the
// tree read, the height will be wrong, and the tree will be missing.
//
// Instead, always read the last tree in the column family, regardless of height.
//
// See ticket #7581 for more details.
//
// TODO: this concurrency bug will be permanently fixed in PR #7392,
// by changing the block update to overwrite the tree, rather than deleting it.
//
// # Forwards Compatibility
//
// This code can read the column family format in 1.2.0 and earlier (tip height key),
// and after PR #7392 is merged (empty key).
self.db
.zs_last_key_value(&sprout_nct_handle)
// RawBytes will deserialize both Height and `()` (empty) keys.
.map(|(_key, value): (RawBytes, _)| Arc::new(value))
.expect("Sprout note commitment tree must exist if there is a finalized tip")
// and after PR #7392 is merged (empty key). The height-based code can be removed when
// versions 1.2.0 and earlier are no longer supported.
//
// # Concurrency
//
// There is only one tree in this column family, which is atomically updated by a block
// write batch (database transaction). If this update runs between the height read and
// the tree read, the height will be wrong, and the tree will be missing.
// That could cause consensus bugs.
//
// See the comment in `ZebraDb::history_tree()` for details.
//
// TODO: this concurrency bug will be permanently fixed in PR #7392,
// by changing the block update to overwrite the tree, rather than deleting it.
let mut sprout_tree: Option<Arc<sprout::tree::NoteCommitmentTree>> =
self.db.zs_get(&sprout_tree_cf, &());
if sprout_tree.is_none() {
let tip_height = self
.finalized_tip_height()
.expect("just checked for an empty database");
sprout_tree = self.db.zs_get(&sprout_tree_cf, &tip_height);
}
sprout_tree.expect("Sprout note commitment tree must exist if there is a finalized tip")
}
/// Returns the Sprout note commitment tree matching the given anchor.