zebra/zebra-scan/src/scan.rs

530 lines
18 KiB
Rust

//! The scanner task and scanning APIs.
use std::{
collections::{BTreeMap, HashMap},
sync::{
mpsc::{Receiver, TryRecvError},
Arc,
},
time::Duration,
};
use color_eyre::{eyre::eyre, Report};
use itertools::Itertools;
use tokio::task::JoinHandle;
use tower::{buffer::Buffer, util::BoxService, Service, ServiceExt};
use tracing::Instrument;
use zcash_client_backend::{
data_api::ScannedBlock,
encoding::decode_extended_full_viewing_key,
proto::compact_formats::{
ChainMetadata, CompactBlock, CompactSaplingOutput, CompactSaplingSpend, CompactTx,
},
scanning::{ScanError, ScanningKey},
};
use zcash_primitives::{
constants::*,
sapling::SaplingIvk,
zip32::{AccountId, DiversifiableFullViewingKey, Scope},
};
use zebra_chain::{
block::{Block, Height},
chain_tip::ChainTip,
diagnostic::task::WaitForPanics,
parameters::Network,
serialization::ZcashSerialize,
transaction::Transaction,
};
use zebra_state::{ChainTipChange, SaplingScannedResult, TransactionIndex};
use crate::{
init::ScanTaskCommand,
storage::{SaplingScanningKey, Storage},
Config, ScanTask,
};
/// The generic state type used by the scanner.
pub type State = Buffer<
BoxService<zebra_state::Request, zebra_state::Response, zebra_state::BoxError>,
zebra_state::Request,
>;
/// Wait a few seconds at startup for some blocks to get verified.
///
/// But sometimes the state might be empty if the network is slow.
const INITIAL_WAIT: Duration = Duration::from_secs(15);
/// The amount of time between checking for new blocks and starting new scans.
///
/// This is just under half the target block interval.
const CHECK_INTERVAL: Duration = Duration::from_secs(30);
/// We log an info log with progress after this many blocks.
const INFO_LOG_INTERVAL: u32 = 10_000;
/// Start a scan task that reads blocks from `state`, scans them with the configured keys in
/// `storage`, and then writes the results to `storage`.
pub async fn start(
state: State,
chain_tip_change: ChainTipChange,
storage: Storage,
cmd_receiver: Receiver<ScanTaskCommand>,
) -> Result<(), Report> {
let network = storage.network();
let sapling_activation_height = storage.min_sapling_birthday_height();
// Do not scan and notify if we are below sapling activation height.
loop {
let tip_height = tip_height(state.clone()).await?;
if tip_height < sapling_activation_height {
info!("scanner is waiting for sapling activation. Current tip: {}, Sapling activation: {}", tip_height.0, sapling_activation_height.0);
tokio::time::sleep(CHECK_INTERVAL).await;
continue;
}
break;
}
// Read keys from the storage on disk, which can block async execution.
let key_storage = storage.clone();
let key_heights = tokio::task::spawn_blocking(move || key_storage.sapling_keys_last_heights())
.wait_for_panics()
.await;
let key_heights = Arc::new(key_heights);
let mut height = get_min_height(&key_heights).unwrap_or(sapling_activation_height);
// Parse and convert keys once, then use them to scan all blocks.
// There is some cryptography here, but it should be fast even with thousands of keys.
let parsed_keys: HashMap<
SaplingScanningKey,
(Vec<DiversifiableFullViewingKey>, Vec<SaplingIvk>),
> = key_heights
.keys()
.map(|key| {
let parsed_keys = sapling_key_to_scan_block_keys(key, network)?;
Ok::<_, Report>((key.clone(), parsed_keys))
})
.try_collect()?;
let mut parsed_keys = Arc::new(parsed_keys);
// Give empty states time to verify some blocks before we start scanning.
tokio::time::sleep(INITIAL_WAIT).await;
loop {
parsed_keys = ScanTask::process_msgs(&cmd_receiver, parsed_keys)?;
let scanned_height = scan_height_and_store_results(
height,
state.clone(),
chain_tip_change.clone(),
storage.clone(),
key_heights.clone(),
parsed_keys.clone(),
)
.await?;
// If we've reached the tip, sleep for a while then try and get the same block.
if scanned_height.is_none() {
tokio::time::sleep(CHECK_INTERVAL).await;
continue;
}
height = height
.next()
.expect("a valid blockchain never reaches the max height");
}
}
impl ScanTask {
/// Accepts the scan task's `parsed_key` collection and a reference to the command channel receiver
///
/// Processes messages in the scan task channel, updating `parsed_keys` if required.
///
/// Returns the updated `parsed_keys`
fn process_msgs(
cmd_receiver: &Receiver<ScanTaskCommand>,
mut parsed_keys: Arc<
HashMap<SaplingScanningKey, (Vec<DiversifiableFullViewingKey>, Vec<SaplingIvk>)>,
>,
) -> Result<
Arc<HashMap<SaplingScanningKey, (Vec<DiversifiableFullViewingKey>, Vec<SaplingIvk>)>>,
Report,
> {
loop {
let cmd = match cmd_receiver.try_recv() {
Ok(cmd) => cmd,
Err(TryRecvError::Empty) => break,
Err(TryRecvError::Disconnected) => {
// Return early if the sender has been dropped.
return Err(eyre!("command channel disconnected"));
}
};
match cmd {
ScanTaskCommand::RemoveKeys { done_tx, keys } => {
// TODO: Replace with Arc::unwrap_or_clone() when it stabilises:
// https://github.com/rust-lang/rust/issues/93610
let mut updated_parsed_keys =
Arc::try_unwrap(parsed_keys).unwrap_or_else(|arc| (*arc).clone());
for key in keys {
updated_parsed_keys.remove(&key);
}
parsed_keys = Arc::new(updated_parsed_keys);
// Ignore send errors for the done notification
let _ = done_tx.send(());
}
_ => continue,
}
}
Ok(parsed_keys)
}
}
/// Get the block at `height` from `state`, scan it with the keys in `parsed_keys`, and store the
/// results in `storage`. If `height` is lower than the `key_birthdays` for that key, skip it.
///
/// Returns:
/// - `Ok(Some(height))` if the height was scanned,
/// - `Ok(None)` if the height was not in the state, and
/// - `Err(error)` on fatal errors.
pub async fn scan_height_and_store_results(
height: Height,
mut state: State,
chain_tip_change: ChainTipChange,
storage: Storage,
key_last_scanned_heights: Arc<HashMap<SaplingScanningKey, Height>>,
parsed_keys: Arc<
HashMap<SaplingScanningKey, (Vec<DiversifiableFullViewingKey>, Vec<SaplingIvk>)>,
>,
) -> Result<Option<Height>, Report> {
let network = storage.network();
// Only log at info level every 100,000 blocks.
//
// TODO: also log progress every 5 minutes once we reach the tip?
let is_info_log = height.0 % INFO_LOG_INTERVAL == 0;
// Get a block from the state.
// We can't use ServiceExt::oneshot() here, because it causes lifetime errors in init().
let block = state
.ready()
.await
.map_err(|e| eyre!(e))?
.call(zebra_state::Request::Block(height.into()))
.await
.map_err(|e| eyre!(e))?;
let block = match block {
zebra_state::Response::Block(Some(block)) => block,
zebra_state::Response::Block(None) => return Ok(None),
_ => unreachable!("unmatched response to a state::Block request"),
};
// Scan it with all the keys.
//
// TODO: scan each key in parallel (after MVP?)
for (key_num, (sapling_key, last_scanned_height)) in key_last_scanned_heights.iter().enumerate()
{
// Only scan what was not scanned for each key
if height <= *last_scanned_height {
continue;
}
// # Security
//
// We can't log `sapling_key` here because it is a private viewing key. Anyone who reads
// the logs could use the key to view those transactions.
if is_info_log {
info!(
"Scanning the blockchain for key {}, started at block {:?}, now at block {:?}, current tip {:?}",
key_num, last_scanned_height.next().expect("height is not maximum").as_usize(),
height.as_usize(),
chain_tip_change.latest_chain_tip().best_tip_height().expect("we should have a tip to scan").as_usize(),
);
}
// Get the pre-parsed keys for this configured key.
let (dfvks, ivks) = parsed_keys.get(sapling_key).cloned().unwrap_or_default();
let sapling_key = sapling_key.clone();
let block = block.clone();
let mut storage = storage.clone();
// We use a dummy size of the Sapling note commitment tree.
//
// We can't set the size to zero, because the underlying scanning function would return
// `zcash_client_backeng::scanning::ScanError::TreeSizeUnknown`.
//
// And we can't set them close to 0, because the scanner subtracts the number of notes
// in the block, and panics with "attempt to subtract with overflow". The number of
// notes in a block must be less than this value, this is a consensus rule.
//
// TODO: use the real sapling tree size: `zs::Response::SaplingTree().position() + 1`
let sapling_tree_size = 1 << 16;
tokio::task::spawn_blocking(move || {
let dfvk_res =
scan_block(network, &block, sapling_tree_size, &dfvks).map_err(|e| eyre!(e))?;
let ivk_res =
scan_block(network, &block, sapling_tree_size, &ivks).map_err(|e| eyre!(e))?;
let dfvk_res = scanned_block_to_db_result(dfvk_res);
let ivk_res = scanned_block_to_db_result(ivk_res);
storage.add_sapling_results(&sapling_key, height, dfvk_res);
storage.add_sapling_results(&sapling_key, height, ivk_res);
Ok::<_, Report>(())
})
.wait_for_panics()
.await?;
}
Ok(Some(height))
}
/// Returns the transactions from `block` belonging to the given `scanning_keys`.
/// This list of keys should come from a single configured `SaplingScanningKey`.
///
/// For example, there are two individual viewing keys for most shielded transfers:
/// - the payment (external) key, and
/// - the change (internal) key.
///
/// # Performance / Hangs
///
/// This method can block while reading database files, so it must be inside spawn_blocking()
/// in async code.
///
/// TODO:
/// - Pass the real `sapling_tree_size` parameter from the state.
/// - Add other prior block metadata.
pub fn scan_block<K: ScanningKey>(
network: Network,
block: &Block,
sapling_tree_size: u32,
scanning_keys: &[K],
) -> Result<ScannedBlock<K::Nf>, ScanError> {
// TODO: Implement a check that returns early when the block height is below the Sapling
// activation height.
let network: zcash_primitives::consensus::Network = network.into();
let chain_metadata = ChainMetadata {
sapling_commitment_tree_size: sapling_tree_size,
// Orchard is not supported at the moment so the tree size can be 0.
orchard_commitment_tree_size: 0,
};
// Use a dummy `AccountId` as we don't use accounts yet.
let dummy_account = AccountId::from(0);
let scanning_keys: Vec<_> = scanning_keys
.iter()
.map(|key| (&dummy_account, key))
.collect();
zcash_client_backend::scanning::scan_block(
&network,
block_to_compact(block, chain_metadata),
scanning_keys.as_slice(),
// Ignore whether notes are change from a viewer's own spends for now.
&[],
// Ignore previous blocks for now.
None,
)
}
/// Converts a Zebra-format scanning key into some `scan_block()` keys.
///
/// Currently only accepts extended full viewing keys, and returns both their diversifiable full
/// viewing key and their individual viewing key, for testing purposes.
///
// TODO: work out what string format is used for SaplingIvk, if any, and support it here
// performance: stop returning both the dfvk and ivk for the same key
// TODO: use `ViewingKey::parse` from zebra-chain instead
pub fn sapling_key_to_scan_block_keys(
sapling_key: &SaplingScanningKey,
network: Network,
) -> Result<(Vec<DiversifiableFullViewingKey>, Vec<SaplingIvk>), Report> {
let hrp = if network.is_a_test_network() {
// Assume custom testnets have the same HRP
//
// TODO: add the regtest HRP here
testnet::HRP_SAPLING_EXTENDED_FULL_VIEWING_KEY
} else {
mainnet::HRP_SAPLING_EXTENDED_FULL_VIEWING_KEY
};
let efvk = decode_extended_full_viewing_key(hrp, sapling_key).map_err(|e| eyre!(e))?;
// Just return all the keys for now, so we can be sure our code supports them.
let dfvk = efvk.to_diversifiable_full_viewing_key();
let eivk = dfvk.to_ivk(Scope::External);
let iivk = dfvk.to_ivk(Scope::Internal);
Ok((vec![dfvk], vec![eivk, iivk]))
}
/// Converts a zebra block and meta data into a compact block.
pub fn block_to_compact(block: &Block, chain_metadata: ChainMetadata) -> CompactBlock {
CompactBlock {
height: block
.coinbase_height()
.expect("verified block should have a valid height")
.0
.into(),
// TODO: performance: look up the block hash from the state rather than recalculating it
hash: block.hash().bytes_in_display_order().to_vec(),
prev_hash: block
.header
.previous_block_hash
.bytes_in_display_order()
.to_vec(),
time: block
.header
.time
.timestamp()
.try_into()
.expect("unsigned 32-bit times should work until 2105"),
header: block
.header
.zcash_serialize_to_vec()
.expect("verified block should serialize"),
vtx: block
.transactions
.iter()
.cloned()
.enumerate()
.map(transaction_to_compact)
.collect(),
chain_metadata: Some(chain_metadata),
// The protocol version is used for the gRPC wire format, so it isn't needed here.
proto_version: 0,
}
}
/// Converts a zebra transaction into a compact transaction.
fn transaction_to_compact((index, tx): (usize, Arc<Transaction>)) -> CompactTx {
CompactTx {
index: index
.try_into()
.expect("tx index in block should fit in u64"),
// TODO: performance: look up the tx hash from the state rather than recalculating it
hash: tx.hash().bytes_in_display_order().to_vec(),
// `fee` is not checked by the `scan_block` function. It is allowed to be unset.
// <https://docs.rs/zcash_client_backend/latest/zcash_client_backend/proto/compact_formats/struct.CompactTx.html#structfield.fee>
fee: 0,
spends: tx
.sapling_nullifiers()
.map(|nf| CompactSaplingSpend {
nf: <[u8; 32]>::from(*nf).to_vec(),
})
.collect(),
// > output encodes the cmu field, ephemeralKey field, and a 52-byte prefix of the encCiphertext field of a Sapling Output
//
// <https://docs.rs/zcash_client_backend/latest/zcash_client_backend/proto/compact_formats/struct.CompactSaplingOutput.html>
outputs: tx
.sapling_outputs()
.map(|output| CompactSaplingOutput {
cmu: output.cm_u.to_bytes().to_vec(),
ephemeral_key: output
.ephemeral_key
.zcash_serialize_to_vec()
.expect("verified output should serialize successfully"),
ciphertext: output
.enc_ciphertext
.zcash_serialize_to_vec()
.expect("verified output should serialize successfully")
.into_iter()
.take(52)
.collect(),
})
.collect(),
// `actions` is not checked by the `scan_block` function.
actions: vec![],
}
}
/// Convert a scanned block to a list of scanner database results.
fn scanned_block_to_db_result<Nf>(
scanned_block: ScannedBlock<Nf>,
) -> BTreeMap<TransactionIndex, SaplingScannedResult> {
scanned_block
.transactions()
.iter()
.map(|tx| {
(
TransactionIndex::from_usize(tx.index),
SaplingScannedResult::from_bytes_in_display_order(*tx.txid.as_ref()),
)
})
.collect()
}
/// Get the minimal height available in a key_heights map.
fn get_min_height(map: &HashMap<String, Height>) -> Option<Height> {
map.values().cloned().min()
}
/// Get tip height or return genesis block height if no tip is available.
async fn tip_height(mut state: State) -> Result<Height, Report> {
let tip = state
.ready()
.await
.map_err(|e| eyre!(e))?
.call(zebra_state::Request::Tip)
.await
.map_err(|e| eyre!(e))?;
match tip {
zebra_state::Response::Tip(Some((height, _hash))) => Ok(height),
zebra_state::Response::Tip(None) => Ok(Height(0)),
_ => unreachable!("unmatched response to a state::Tip request"),
}
}
/// Initialize the scanner based on its config, and spawn a task for it.
///
/// TODO: add a test for this function.
pub fn spawn_init(
config: &Config,
network: Network,
state: State,
chain_tip_change: ChainTipChange,
cmd_receiver: Receiver<ScanTaskCommand>,
) -> JoinHandle<Result<(), Report>> {
let config = config.clone();
// TODO: spawn an entirely new executor here, to avoid timing attacks.
tokio::spawn(init(config, network, state, chain_tip_change, cmd_receiver).in_current_span())
}
/// Initialize the scanner based on its config.
///
/// TODO: add a test for this function.
pub async fn init(
config: Config,
network: Network,
state: State,
chain_tip_change: ChainTipChange,
cmd_receiver: Receiver<ScanTaskCommand>,
) -> Result<(), Report> {
let storage = tokio::task::spawn_blocking(move || Storage::new(&config, network, false))
.wait_for_panics()
.await;
// TODO: add more tasks here?
start(state, chain_tip_change, storage, cmd_receiver).await
}