zebra/zebra-scan/src/storage/db/sapling.rs

274 lines
11 KiB
Rust

//! Sapling-specific database reading and writing.
//!
//! The sapling scanner database has the following format:
//!
//! | name | Reading & Writing Key/Values |
//! |--------------------|-------------------------------------------------|
//! | [`SAPLING_TX_IDS`] | [`SaplingTxIdsCf`] & [`WriteSaplingTxIdsBatch`] |
//!
//! And types:
//! `SaplingScannedResult`: same as `transaction::Hash`, but with bytes in display order.
//! `None` is stored as a zero-length array of bytes.
//!
//! `SaplingScannedDatabaseIndex` = `SaplingScanningKey` | `TransactionLocation`
//! `TransactionLocation` = `Height` | `TransactionIndex`
//!
//! This format allows us to efficiently find all the results for each key, and the latest height
//! for each key.
//!
//! If there are no results for a height, we store `None` as the result for the coinbase
//! transaction. This allows is to scan each key from the next height after we restart. We also use
//! this mechanism to store key birthday heights, by storing the height before the birthday as the
//! "last scanned" block.
use std::{
collections::{BTreeMap, HashMap},
ops::RangeBounds,
};
use itertools::Itertools;
use zebra_chain::block::Height;
use zebra_state::{
DiskWriteBatch, SaplingScannedDatabaseEntry, SaplingScannedDatabaseIndex, SaplingScannedResult,
SaplingScanningKey, TransactionIndex, TransactionLocation, TypedColumnFamily, WriteTypedBatch,
};
use crate::storage::{Storage, INSERT_CONTROL_INTERVAL};
/// The name of the sapling transaction IDs result column family.
///
/// This constant should be used so the compiler can detect typos.
pub const SAPLING_TX_IDS: &str = "sapling_tx_ids";
/// The type for reading sapling transaction IDs results from the database.
///
/// This constant should be used so the compiler can detect incorrectly typed accesses to the
/// column family.
pub type SaplingTxIdsCf<'cf> =
TypedColumnFamily<'cf, SaplingScannedDatabaseIndex, Option<SaplingScannedResult>>;
/// The type for writing sapling transaction IDs results from the database.
///
/// This constant should be used so the compiler can detect incorrectly typed accesses to the
/// column family.
pub type WriteSaplingTxIdsBatch<'cf> =
WriteTypedBatch<'cf, SaplingScannedDatabaseIndex, Option<SaplingScannedResult>, DiskWriteBatch>;
impl Storage {
// Reading Sapling database entries
/// Returns the result for a specific database index (key, block height, transaction index).
/// Returns `None` if the result is missing or an empty marker for a birthday or progress
/// height.
pub fn sapling_result_for_index(
&self,
index: &SaplingScannedDatabaseIndex,
) -> Option<SaplingScannedResult> {
self.sapling_tx_ids_cf().zs_get(index).flatten()
}
/// Returns the results for a specific key and block height.
pub fn sapling_results_for_key_and_height(
&self,
sapling_key: &SaplingScanningKey,
height: Height,
) -> BTreeMap<TransactionIndex, Option<SaplingScannedResult>> {
let kh_min = SaplingScannedDatabaseIndex::min_for_key_and_height(sapling_key, height);
let kh_max = SaplingScannedDatabaseIndex::max_for_key_and_height(sapling_key, height);
self.sapling_results_in_range(kh_min..=kh_max)
.into_iter()
.map(|(result_index, txid)| (result_index.tx_loc.index, txid))
.collect()
}
/// Returns all the results for a specific key, indexed by height.
pub fn sapling_results_for_key(
&self,
sapling_key: &SaplingScanningKey,
) -> BTreeMap<Height, Vec<SaplingScannedResult>> {
let k_min = SaplingScannedDatabaseIndex::min_for_key(sapling_key);
let k_max = SaplingScannedDatabaseIndex::max_for_key(sapling_key);
// Get an iterator of individual transaction results, and turn it into a HashMap by height
let results: HashMap<Height, Vec<Option<SaplingScannedResult>>> = self
.sapling_results_in_range(k_min..=k_max)
.into_iter()
.map(|(index, result)| (index.tx_loc.height, result))
.into_group_map();
// But we want Vec<SaplingScannedResult>, with empty Vecs instead of [None, None, ...]
results
.into_iter()
.map(|(index, vector)| -> (Height, Vec<SaplingScannedResult>) {
(index, vector.into_iter().flatten().collect())
})
.collect()
}
/// Returns all the keys and their last scanned heights.
pub fn sapling_keys_and_last_scanned_heights(&self) -> HashMap<SaplingScanningKey, Height> {
let sapling_tx_ids = self.sapling_tx_ids_cf();
let mut keys = HashMap::new();
let mut last_stored_record = sapling_tx_ids.zs_last_key_value();
while let Some((last_stored_record_index, _result)) = last_stored_record {
let sapling_key = last_stored_record_index.sapling_key.clone();
let height = last_stored_record_index.tx_loc.height;
let prev_height = keys.insert(sapling_key.clone(), height);
assert_eq!(
prev_height, None,
"unexpected duplicate key: keys must only be inserted once \
last_stored_record_index: {last_stored_record_index:?}",
);
// Skip all the results until the next key.
last_stored_record = sapling_tx_ids.zs_prev_key_value_strictly_before(
&SaplingScannedDatabaseIndex::min_for_key(&sapling_key),
);
}
keys
}
/// Returns the Sapling indexes and results in the supplied range.
///
/// Convenience method for accessing raw data with the correct types.
fn sapling_results_in_range(
&self,
range: impl RangeBounds<SaplingScannedDatabaseIndex>,
) -> BTreeMap<SaplingScannedDatabaseIndex, Option<SaplingScannedResult>> {
self.sapling_tx_ids_cf().zs_items_in_range_ordered(range)
}
// Column family convenience methods
/// Returns a typed handle to the `sapling_tx_ids` column family.
pub(crate) fn sapling_tx_ids_cf(&self) -> SaplingTxIdsCf {
SaplingTxIdsCf::new(&self.db, SAPLING_TX_IDS)
.expect("column family was created when database was created")
}
// Writing database entries
//
// To avoid exposing internal types, and accidentally forgetting to write a batch,
// each pub(crate) write method should write an entire batch.
/// Inserts a batch of scanned sapling result for a key and height.
/// If a result already exists for that key, height, and index, it is replaced.
pub(crate) fn insert_sapling_results(
&mut self,
sapling_key: &SaplingScanningKey,
height: Height,
sapling_results: BTreeMap<TransactionIndex, SaplingScannedResult>,
) {
// We skip key heights that have one or more results, so the results for each key height
// must be in a single batch.
let mut batch = self.sapling_tx_ids_cf().new_batch_for_writing();
// Every `INSERT_CONTROL_INTERVAL` we add a new entry to the scanner database for each key
// so we can track progress made in the last interval even if no transaction was yet found.
let needs_control_entry =
height.0 % INSERT_CONTROL_INTERVAL == 0 && sapling_results.is_empty();
// Add scanner progress tracking entry for key.
// Defensive programming: add the tracking entry first, so that we don't accidentally
// overwrite real results with it. (This is currently prevented by the empty check.)
if needs_control_entry {
batch = batch.insert_sapling_height(sapling_key, height);
}
for (index, sapling_result) in sapling_results {
let index = SaplingScannedDatabaseIndex {
sapling_key: sapling_key.clone(),
tx_loc: TransactionLocation::from_parts(height, index),
};
let entry = SaplingScannedDatabaseEntry {
index,
value: Some(sapling_result),
};
batch = batch.zs_insert(&entry.index, &entry.value);
}
batch
.write_batch()
.expect("unexpected database write failure");
}
/// Insert a sapling scanning `key`, and mark all heights before `birthday_height` so they
/// won't be scanned.
///
/// If a result already exists for the coinbase transaction at the height before the birthday,
/// it is replaced with an empty result. This can happen if the user increases the birthday
/// height.
///
/// TODO: ignore incorrect changes to birthday heights
pub(crate) fn insert_sapling_key(
&mut self,
sapling_key: &SaplingScanningKey,
birthday_height: Option<Height>,
) {
let min_birthday_height = self.min_sapling_birthday_height();
// The birthday height must be at least the minimum height for that pool.
let birthday_height = birthday_height
.unwrap_or(min_birthday_height)
.max(min_birthday_height);
// And we want to skip up to the height before it.
let skip_up_to_height = birthday_height.previous().unwrap_or(Height::MIN);
// It's ok to write some keys and not others during shutdown, so each key can get its own
// batch. (They will be re-written on startup anyway.)
//
// TODO: ignore incorrect changes to birthday heights,
// and redundant birthday heights
self.sapling_tx_ids_cf()
.new_batch_for_writing()
.insert_sapling_height(sapling_key, skip_up_to_height)
.write_batch()
.expect("unexpected database write failure");
}
/// Delete the results of sapling scanning `keys`, if they exist
pub(crate) fn delete_sapling_results(&mut self, keys: Vec<SaplingScanningKey>) {
let mut batch = self.sapling_tx_ids_cf().new_batch_for_writing();
for key in &keys {
let from = SaplingScannedDatabaseIndex::min_for_key(key);
let until_strictly_before = SaplingScannedDatabaseIndex::max_for_key(key);
batch = batch
.zs_delete_range(&from, &until_strictly_before)
// TODO: convert zs_delete_range() to take std::ops::RangeBounds
.zs_delete(&until_strictly_before);
}
batch
.write_batch()
.expect("unexpected database write failure");
}
}
/// Utility trait for inserting sapling heights into a WriteSaplingTxIdsBatch.
trait InsertSaplingHeight {
fn insert_sapling_height(self, sapling_key: &SaplingScanningKey, height: Height) -> Self;
}
impl<'cf> InsertSaplingHeight for WriteSaplingTxIdsBatch<'cf> {
/// Insert sapling height with no results.
///
/// If a result already exists for the coinbase transaction at that height,
/// it is replaced with an empty result. This should never happen.
fn insert_sapling_height(self, sapling_key: &SaplingScanningKey, height: Height) -> Self {
let index = SaplingScannedDatabaseIndex::min_for_key_and_height(sapling_key, height);
// TODO: assert that we don't overwrite any entries here.
self.zs_insert(&index, &None)
}
}