Merge branch 'main' into send-trees-to-from-nonfin-to-fin-state

This commit is contained in:
Marek 2022-07-26 22:15:00 +02:00
commit d6c53c5ea9
13 changed files with 615 additions and 192 deletions

View File

@ -63,6 +63,12 @@ jobs:
steps:
- run: 'echo "No build required"'
test-update-sync:
name: Zebra tip update / Run update-to-tip test
runs-on: ubuntu-latest
steps:
- run: 'echo "No build required"'
lightwalletd-rpc-test:
name: Zebra tip JSON-RPC / Run fully-synced-rpc test
runs-on: ubuntu-latest

View File

@ -309,6 +309,33 @@ jobs:
disk_suffix: tip
height_grep_text: 'current_height.*=.*Height'
# Test that Zebra can sync to the chain tip, using a cached Zebra tip state,
# without launching `lightwalletd`.
#
# Runs:
# - after every PR is merged to `main`
# - on every PR update
#
# If the state version has changed, waits for the new cached state to be created.
# Otherwise, if the state rebuild was skipped, runs immediately after the build job.
test-update-sync:
name: Zebra tip update
needs: test-full-sync
uses: ./.github/workflows/deploy-gcp-tests.yml
if: ${{ !cancelled() && !failure() && github.event.inputs.regenerate-disks != 'true' && github.event.inputs.run-full-sync != 'true' }}
with:
app_name: zebrad
test_id: update-to-tip
test_description: Test syncing to tip with a Zebra tip state
test_variables: '-e TEST_UPDATE_SYNC=1 -e ZEBRA_FORCE_USE_COLOR=1 -e ZEBRA_CACHED_STATE_DIR=/var/cache/zebrad-cache'
needs_zebra_state: true
# TODO: do we want to update the disk on every PR, to increase CI speed?
saves_to_disk: false
disk_suffix: tip
root_state_path: '/var/cache'
# TODO: do we also want to test the `zebrad` part of the `lwd-cache`? (But not update it.)
zebra_state_dir: 'zebrad-cache'
# Test that Zebra can answer a synthetic RPC call, using a cached Zebra tip state
#
# Runs:
@ -410,6 +437,7 @@ jobs:
test_variables: '-e TEST_LWD_UPDATE_SYNC=1 -e ZEBRA_TEST_LIGHTWALLETD=1 -e ZEBRA_FORCE_USE_COLOR=1 -e ZEBRA_CACHED_STATE_DIR=/var/cache/zebrad-cache -e LIGHTWALLETD_DATA_DIR=/var/cache/lwd-cache'
needs_zebra_state: true
needs_lwd_state: true
# TODO: do we want to update the disk on every PR, to increase CI speed?
saves_to_disk: false
disk_prefix: lwd-cache
disk_suffix: tip

View File

@ -38,6 +38,12 @@ case "$1" in
cargo test --locked --release --features "test_sync_to_mandatory_checkpoint_${NETWORK,,},lightwalletd-grpc-tests" --package zebrad --test acceptance -- --nocapture --include-ignored "sync_to_mandatory_checkpoint_${NETWORK,,}"
# TODO: replace with $ZEBRA_CACHED_STATE_DIR in Rust and workflows
ls -lh "/zebrad-cache"/*/* || (echo "No /zebrad-cache/*/*"; ls -lhR "/zebrad-cache" | head -50 || echo "No /zebrad-cache directory")
elif [[ "$TEST_UPDATE_SYNC" -eq "1" ]]; then
# Run a Zebra sync starting at the cached tip, and syncing to the latest tip.
#
# List directory used by test
ls -lh "$ZEBRA_CACHED_STATE_DIR"/*/* || (echo "No $ZEBRA_CACHED_STATE_DIR/*/*"; ls -lhR "$ZEBRA_CACHED_STATE_DIR" | head -50 || echo "No $ZEBRA_CACHED_STATE_DIR directory")
cargo test --locked --release --features lightwalletd-grpc-tests --package zebrad --test acceptance -- --nocapture --include-ignored zebrad_update_sync
elif [[ "$TEST_CHECKPOINT_SYNC" -eq "1" ]]; then
# Run a Zebra sync starting at the cached mandatory checkpoint, and syncing past it.
#

View File

@ -56,6 +56,7 @@ humantime = "2.1.0"
displaydoc = "0.2.3"
static_assertions = "1.1.0"
thiserror = "1.0.31"
tracing = "0.1.31"
# Serialization
hex = { version = "0.4.3", features = ["serde"] }

View File

@ -0,0 +1,120 @@
//! Tracing the execution time of functions.
//!
//! TODO: also trace polling time for futures, using a `Future` wrapper
use std::time::{Duration, Instant};
use crate::fmt::duration_short;
/// The default minimum info-level message time.
pub const DEFAULT_MIN_INFO_TIME: Duration = Duration::from_secs(5);
/// The default minimum warning message time.
pub const DEFAULT_MIN_WARN_TIME: Duration = Duration::from_secs(20);
/// A guard that logs code execution time when dropped.
#[derive(Debug)]
pub struct CodeTimer {
/// The time that the code started executing.
start: Instant,
/// The minimum duration for info-level messages.
min_info_time: Duration,
/// The minimum duration for warning messages.
min_warn_time: Duration,
/// `true` if this timer has finished.
has_finished: bool,
}
impl CodeTimer {
/// Start timing the execution of a function, method, or other code region.
///
/// Returns a guard that finishes timing the code when dropped,
/// or when [`CodeTimer::finish()`] is called.
#[track_caller]
pub fn start() -> Self {
let start = Instant::now();
trace!(?start, "starting code timer");
Self {
start,
min_info_time: DEFAULT_MIN_INFO_TIME,
min_warn_time: DEFAULT_MIN_WARN_TIME,
has_finished: false,
}
}
/// Finish timing the execution of a function, method, or other code region.
#[track_caller]
pub fn finish<S>(
mut self,
module_path: &'static str,
line: u32,
description: impl Into<Option<S>>,
) where
S: ToString,
{
self.finish_inner(Some(module_path), Some(line), description);
}
/// Finish timing the execution of a function, method, or other code region.
///
/// This private method can be called from [`CodeTimer::finish()`] or `drop()`.
#[track_caller]
fn finish_inner<S>(
&mut self,
module_path: impl Into<Option<&'static str>>,
line: impl Into<Option<u32>>,
description: impl Into<Option<S>>,
) where
S: ToString,
{
if self.has_finished {
return;
}
self.has_finished = true;
let execution = self.start.elapsed();
let execution_time = duration_short(execution);
let module_path = module_path.into();
let line = line.into();
let description = description
.into()
.map(|desc| desc.to_string() + " ")
.unwrap_or_default();
if execution >= self.min_warn_time {
warn!(
?execution_time,
?module_path,
?line,
"{description}code took a long time to execute",
);
} else if execution >= self.min_info_time {
info!(
?execution_time,
?module_path,
?line,
"{description}code took longer than expected to execute",
);
} else {
trace!(
?execution_time,
?module_path,
?line,
"{description}code timer finished",
);
}
}
}
impl Drop for CodeTimer {
#[track_caller]
fn drop(&mut self) {
self.finish_inner(None, None, "(dropped, cancelled, or aborted)")
}
}

View File

@ -8,7 +8,8 @@ use proptest::prelude::*;
use proptest_derive::Arbitrary;
pub mod time;
pub use time::{humantime_milliseconds, humantime_seconds};
pub use time::{duration_short, humantime_milliseconds, humantime_seconds};
/// Wrapper to override `Debug`, redirecting it to only output the type's name.
#[derive(Copy, Clone, PartialEq, Eq, PartialOrd, Ord, Hash)]

View File

@ -2,6 +2,22 @@
use std::time::Duration;
/// The minimum amount of time displayed with only seconds (no milliseconds).
pub const MIN_SECONDS_ONLY_TIME: Duration = Duration::from_secs(5);
/// Returns a human-friendly formatted string for the whole number of seconds in `duration`.
pub fn duration_short(duration: impl Into<Duration>) -> String {
let duration = duration.into();
if duration >= MIN_SECONDS_ONLY_TIME {
humantime_seconds(duration)
} else {
humantime_milliseconds(duration)
}
}
// TODO: rename these functions to duration_*
/// Returns a human-friendly formatted string for the whole number of seconds in `duration`.
pub fn humantime_seconds(duration: impl Into<Duration>) -> String {
let duration = duration.into();

View File

@ -9,15 +9,19 @@
// Required by bitvec! macro
#![recursion_limit = "256"]
#[macro_use]
extern crate bitflags;
#[macro_use]
extern crate serde;
#[macro_use]
extern crate bitflags;
extern crate tracing;
pub mod amount;
pub mod block;
pub mod chain_tip;
pub mod diagnostic;
pub mod fmt;
pub mod history_tree;
pub mod orchard;

View File

@ -1137,7 +1137,7 @@ async fn send_periodic_heartbeats_with_shutdown_handle(
// slow rate, and shutdown is a oneshot. If both futures
// are ready, we want the shutdown to take priority over
// sending a useless heartbeat.
let result = match future::select(shutdown_rx, heartbeat_run_loop).await {
match future::select(shutdown_rx, heartbeat_run_loop).await {
Either::Left((Ok(CancelHeartbeatTask), _unused_run_loop)) => {
tracing::trace!("shutting down because Client requested shut down");
handle_heartbeat_shutdown(
@ -1164,9 +1164,7 @@ async fn send_periodic_heartbeats_with_shutdown_handle(
result
}
};
result
}
}
/// Send periodical heartbeats to `server_tx`, and update the peer status through

View File

@ -27,13 +27,14 @@ use std::{
use futures::future::FutureExt;
use tokio::sync::{oneshot, watch};
use tower::{util::BoxService, Service};
use tracing::instrument;
use tracing::{instrument, Instrument, Span};
#[cfg(any(test, feature = "proptest-impl"))]
use tower::buffer::Buffer;
use zebra_chain::{
block,
diagnostic::CodeTimer,
parameters::{Network, NetworkUpgrade},
transparent,
};
@ -166,12 +167,19 @@ impl StateService {
config: Config,
network: Network,
) -> (Self, ReadStateService, LatestChainTip, ChainTipChange) {
let timer = CodeTimer::start();
let disk = FinalizedState::new(&config, network);
timer.finish(module_path!(), line!(), "opening finalized state database");
let timer = CodeTimer::start();
let initial_tip = disk
.db()
.tip_block()
.map(FinalizedBlock::from)
.map(ChainTipBlock::from);
timer.finish(module_path!(), line!(), "fetching database tip");
let timer = CodeTimer::start();
let (chain_tip_sender, latest_chain_tip, chain_tip_change) =
ChainTipSender::new(initial_tip, network);
@ -192,8 +200,11 @@ impl StateService {
chain_tip_sender,
best_chain_sender,
};
timer.finish(module_path!(), line!(), "initializing state service");
tracing::info!("starting legacy chain check");
let timer = CodeTimer::start();
if let Some(tip) = state.best_tip() {
if let Some(nu5_activation_height) = NetworkUpgrade::Nu5.activation_height(network) {
if check::legacy_chain(
@ -216,6 +227,7 @@ impl StateService {
}
}
tracing::info!("no legacy chain found");
timer.finish(module_path!(), line!(), "legacy chain check");
(state, read_only_service, latest_chain_tip, chain_tip_change)
}
@ -754,6 +766,8 @@ impl Service<Request> for StateService {
"type" => "commit_block",
);
let timer = CodeTimer::start();
self.assert_block_can_be_validated(&prepared);
self.pending_utxos
@ -768,10 +782,16 @@ impl Service<Request> for StateService {
// Since each block is spawned into its own task,
// there shouldn't be any other code running in the same task,
// so we don't need to worry about blocking it:
// https://docs.rs/tokio/latest/tokio/task/fn.block_in_place.html#
let rsp_rx =
tokio::task::block_in_place(|| self.queue_and_commit_non_finalized(prepared));
// https://docs.rs/tokio/latest/tokio/task/fn.block_in_place.html
let span = Span::current();
let rsp_rx = tokio::task::block_in_place(move || {
span.in_scope(|| self.queue_and_commit_non_finalized(prepared))
});
// The work is all done, the future just waits on a channel for the result
timer.finish(module_path!(), line!(), "CommitBlock");
let span = Span::current();
async move {
rsp_rx
.await
@ -784,6 +804,7 @@ impl Service<Request> for StateService {
.map(Response::Committed)
.map_err(Into::into)
}
.instrument(span)
.boxed()
}
Request::CommitFinalizedBlock(finalized) => {
@ -794,6 +815,8 @@ impl Service<Request> for StateService {
"type" => "commit_finalized_block",
);
let timer = CodeTimer::start();
self.pending_utxos.check_against(&finalized.new_outputs);
// # Performance
@ -802,9 +825,15 @@ impl Service<Request> for StateService {
// and written to disk.
//
// See the note in `CommitBlock` for more details.
let rsp_rx =
tokio::task::block_in_place(|| self.queue_and_commit_finalized(finalized));
let span = Span::current();
let rsp_rx = tokio::task::block_in_place(move || {
span.in_scope(|| self.queue_and_commit_finalized(finalized))
});
// The work is all done, the future just waits on a channel for the result
timer.finish(module_path!(), line!(), "CommitFinalizedBlock");
let span = Span::current();
async move {
rsp_rx
.await
@ -819,6 +848,7 @@ impl Service<Request> for StateService {
.map(Response::Committed)
.map_err(Into::into)
}
.instrument(span)
.boxed()
}
Request::Depth(hash) => {
@ -829,7 +859,14 @@ impl Service<Request> for StateService {
"type" => "depth",
);
let timer = CodeTimer::start();
// TODO: move this work into the future, like Block and Transaction?
let rsp = Ok(Response::Depth(self.best_depth(hash)));
// The work is all done, the future just returns the result.
timer.finish(module_path!(), line!(), "Depth");
async move { rsp }.boxed()
}
// TODO: consider spawning small reads into blocking tasks,
@ -842,7 +879,14 @@ impl Service<Request> for StateService {
"type" => "tip",
);
let timer = CodeTimer::start();
// TODO: move this work into the future, like Block and Transaction?
let rsp = Ok(Response::Tip(self.best_tip()));
// The work is all done, the future just returns the result.
timer.finish(module_path!(), line!(), "Tip");
async move { rsp }.boxed()
}
Request::BlockLocator => {
@ -853,9 +897,16 @@ impl Service<Request> for StateService {
"type" => "block_locator",
);
let timer = CodeTimer::start();
// TODO: move this work into the future, like Block and Transaction?
let rsp = Ok(Response::BlockLocator(
self.block_locator().unwrap_or_default(),
));
// The work is all done, the future just returns the result.
timer.finish(module_path!(), line!(), "BlockLocator");
async move { rsp }.boxed()
}
Request::Transaction(hash) => {
@ -866,6 +917,8 @@ impl Service<Request> for StateService {
"type" => "transaction",
);
let timer = CodeTimer::start();
// Prepare data for concurrent execution
let best_chain = self.mem.best_chain().cloned();
let db = self.disk.db().clone();
@ -873,10 +926,16 @@ impl Service<Request> for StateService {
// # Performance
//
// Allow other async tasks to make progress while the transaction is being read from disk.
let span = Span::current();
tokio::task::spawn_blocking(move || {
let rsp = read::transaction(best_chain, &db, hash);
span.in_scope(|| {
let rsp = read::transaction(best_chain, &db, hash);
Ok(Response::Transaction(rsp.map(|(tx, _height)| tx)))
// The work is done in the future.
timer.finish(module_path!(), line!(), "Transaction");
Ok(Response::Transaction(rsp.map(|(tx, _height)| tx)))
})
})
.map(|join_result| join_result.expect("panic in Request::Transaction"))
.boxed()
@ -889,6 +948,8 @@ impl Service<Request> for StateService {
"type" => "block",
);
let timer = CodeTimer::start();
// Prepare data for concurrent execution
let best_chain = self.mem.best_chain().cloned();
let db = self.disk.db().clone();
@ -896,10 +957,16 @@ impl Service<Request> for StateService {
// # Performance
//
// Allow other async tasks to make progress while the block is being read from disk.
let span = Span::current();
tokio::task::spawn_blocking(move || {
let rsp = read::block(best_chain, &db, hash_or_height);
span.in_scope(move || {
let rsp = read::block(best_chain, &db, hash_or_height);
Ok(Response::Block(rsp))
// The work is done in the future.
timer.finish(module_path!(), line!(), "Block");
Ok(Response::Block(rsp))
})
})
.map(|join_result| join_result.expect("panic in Request::Block"))
.boxed()
@ -912,13 +979,19 @@ impl Service<Request> for StateService {
"type" => "await_utxo",
);
let timer = CodeTimer::start();
let span = Span::current();
let fut = self.pending_utxos.queue(outpoint);
if let Some(utxo) = self.any_utxo(&outpoint) {
self.pending_utxos.respond(&outpoint, utxo);
}
fut.boxed()
// The future waits on a channel for a response.
timer.finish(module_path!(), line!(), "AwaitUtxo");
fut.instrument(span).boxed()
}
Request::FindBlockHashes { known_blocks, stop } => {
metrics::counter!(
@ -929,8 +1002,16 @@ impl Service<Request> for StateService {
);
const MAX_FIND_BLOCK_HASHES_RESULTS: usize = 500;
let timer = CodeTimer::start();
// TODO: move this work into the future, like Block and Transaction?
let res =
self.find_best_chain_hashes(known_blocks, stop, MAX_FIND_BLOCK_HASHES_RESULTS);
// The work is all done, the future just returns the result.
timer.finish(module_path!(), line!(), "FindBlockHashes");
async move { Ok(Response::BlockHashes(res)) }.boxed()
}
Request::FindBlockHeaders { known_blocks, stop } => {
@ -951,6 +1032,11 @@ impl Service<Request> for StateService {
//
// https://github.com/bitcoin/bitcoin/pull/4468/files#r17026905
let count = MAX_FIND_BLOCK_HEADERS_RESULTS - 2;
let timer = CodeTimer::start();
// TODO: move this work into the future, like Block and Transaction?
// return heights instead, to improve lookup performance?
let res = self.find_best_chain_hashes(known_blocks, stop, count);
// And prepare data for concurrent execution
@ -961,18 +1047,25 @@ impl Service<Request> for StateService {
//
// Now we have the chain hashes, we can read the headers concurrently,
// which allows other async tasks to make progress while data is being read from disk.
let span = Span::current();
tokio::task::spawn_blocking(move || {
let res = res
.iter()
.map(|&hash| {
let header = read::block_header(best_chain.clone(), &db, hash.into())
.expect("block header for found hash is in the best chain");
span.in_scope(move || {
let res = res
.iter()
.map(|&hash| {
let header =
read::block_header(best_chain.clone(), &db, hash.into())
.expect("block header for found hash is in the best chain");
block::CountedHeader { header }
})
.collect();
block::CountedHeader { header }
})
.collect();
Ok(Response::BlockHeaders(res))
// Some of the work is done in the future.
timer.finish(module_path!(), line!(), "FindBlockHeaders");
Ok(Response::BlockHeaders(res))
})
})
.map(|join_result| join_result.expect("panic in Request::FindBlockHeaders"))
.boxed()
@ -1003,17 +1096,25 @@ impl Service<ReadRequest> for ReadStateService {
"type" => "block",
);
let timer = CodeTimer::start();
let state = self.clone();
// # Performance
//
// Allow other async tasks to make progress while concurrently reading blocks from disk.
let span = Span::current();
tokio::task::spawn_blocking(move || {
let block = state.best_chain_receiver.with_watch_data(|best_chain| {
read::block(best_chain, &state.db, hash_or_height)
});
span.in_scope(move || {
let block = state.best_chain_receiver.with_watch_data(|best_chain| {
read::block(best_chain, &state.db, hash_or_height)
});
Ok(ReadResponse::Block(block))
// The work is done in the future.
timer.finish(module_path!(), line!(), "ReadRequest::Block");
Ok(ReadResponse::Block(block))
})
})
.map(|join_result| join_result.expect("panic in ReadRequest::Block"))
.boxed()
@ -1028,18 +1129,26 @@ impl Service<ReadRequest> for ReadStateService {
"type" => "transaction",
);
let timer = CodeTimer::start();
let state = self.clone();
// # Performance
//
// Allow other async tasks to make progress while concurrently reading transactions from disk.
let span = Span::current();
tokio::task::spawn_blocking(move || {
let transaction_and_height =
state.best_chain_receiver.with_watch_data(|best_chain| {
read::transaction(best_chain, &state.db, hash)
});
span.in_scope(move || {
let transaction_and_height =
state.best_chain_receiver.with_watch_data(|best_chain| {
read::transaction(best_chain, &state.db, hash)
});
Ok(ReadResponse::Transaction(transaction_and_height))
// The work is done in the future.
timer.finish(module_path!(), line!(), "ReadRequest::Transaction");
Ok(ReadResponse::Transaction(transaction_and_height))
})
})
.map(|join_result| join_result.expect("panic in ReadRequest::Transaction"))
.boxed()
@ -1053,17 +1162,26 @@ impl Service<ReadRequest> for ReadStateService {
"type" => "sapling_tree",
);
let timer = CodeTimer::start();
let state = self.clone();
// # Performance
//
// Allow other async tasks to make progress while concurrently reading trees from disk.
let span = Span::current();
tokio::task::spawn_blocking(move || {
let sapling_tree = state.best_chain_receiver.with_watch_data(|best_chain| {
read::sapling_tree(best_chain, &state.db, hash_or_height)
});
span.in_scope(move || {
let sapling_tree =
state.best_chain_receiver.with_watch_data(|best_chain| {
read::sapling_tree(best_chain, &state.db, hash_or_height)
});
Ok(ReadResponse::SaplingTree(sapling_tree))
// The work is done in the future.
timer.finish(module_path!(), line!(), "ReadRequest::SaplingTree");
Ok(ReadResponse::SaplingTree(sapling_tree))
})
})
.map(|join_result| join_result.expect("panic in ReadRequest::SaplingTree"))
.boxed()
@ -1077,17 +1195,26 @@ impl Service<ReadRequest> for ReadStateService {
"type" => "orchard_tree",
);
let timer = CodeTimer::start();
let state = self.clone();
// # Performance
//
// Allow other async tasks to make progress while concurrently reading trees from disk.
let span = Span::current();
tokio::task::spawn_blocking(move || {
let orchard_tree = state.best_chain_receiver.with_watch_data(|best_chain| {
read::orchard_tree(best_chain, &state.db, hash_or_height)
});
span.in_scope(move || {
let orchard_tree =
state.best_chain_receiver.with_watch_data(|best_chain| {
read::orchard_tree(best_chain, &state.db, hash_or_height)
});
Ok(ReadResponse::OrchardTree(orchard_tree))
// The work is done in the future.
timer.finish(module_path!(), line!(), "ReadRequest::OrchardTree");
Ok(ReadResponse::OrchardTree(orchard_tree))
})
})
.map(|join_result| join_result.expect("panic in ReadRequest::OrchardTree"))
.boxed()
@ -1105,17 +1232,29 @@ impl Service<ReadRequest> for ReadStateService {
"type" => "transaction_ids_by_addresses",
);
let timer = CodeTimer::start();
let state = self.clone();
// # Performance
//
// Allow other async tasks to make progress while concurrently reading transaction IDs from disk.
let span = Span::current();
tokio::task::spawn_blocking(move || {
let tx_ids = state.best_chain_receiver.with_watch_data(|best_chain| {
read::transparent_tx_ids(best_chain, &state.db, addresses, height_range)
});
span.in_scope(move || {
let tx_ids = state.best_chain_receiver.with_watch_data(|best_chain| {
read::transparent_tx_ids(best_chain, &state.db, addresses, height_range)
});
tx_ids.map(ReadResponse::AddressesTransactionIds)
// The work is done in the future.
timer.finish(
module_path!(),
line!(),
"ReadRequest::TransactionIdsByAddresses",
);
tx_ids.map(ReadResponse::AddressesTransactionIds)
})
})
.map(|join_result| {
join_result.expect("panic in ReadRequest::TransactionIdsByAddresses")
@ -1132,17 +1271,25 @@ impl Service<ReadRequest> for ReadStateService {
"type" => "address_balance",
);
let timer = CodeTimer::start();
let state = self.clone();
// # Performance
//
// Allow other async tasks to make progress while concurrently reading balances from disk.
let span = Span::current();
tokio::task::spawn_blocking(move || {
let balance = state.best_chain_receiver.with_watch_data(|best_chain| {
read::transparent_balance(best_chain, &state.db, addresses)
})?;
span.in_scope(move || {
let balance = state.best_chain_receiver.with_watch_data(|best_chain| {
read::transparent_balance(best_chain, &state.db, addresses)
})?;
Ok(ReadResponse::AddressBalance(balance))
// The work is done in the future.
timer.finish(module_path!(), line!(), "ReadRequest::AddressBalance");
Ok(ReadResponse::AddressBalance(balance))
})
})
.map(|join_result| join_result.expect("panic in ReadRequest::AddressBalance"))
.boxed()
@ -1157,17 +1304,25 @@ impl Service<ReadRequest> for ReadStateService {
"type" => "utxos_by_addresses",
);
let timer = CodeTimer::start();
let state = self.clone();
// # Performance
//
// Allow other async tasks to make progress while concurrently reading UTXOs from disk.
let span = Span::current();
tokio::task::spawn_blocking(move || {
let utxos = state.best_chain_receiver.with_watch_data(|best_chain| {
read::transparent_utxos(state.network, best_chain, &state.db, addresses)
});
span.in_scope(move || {
let utxos = state.best_chain_receiver.with_watch_data(|best_chain| {
read::transparent_utxos(state.network, best_chain, &state.db, addresses)
});
utxos.map(ReadResponse::Utxos)
// The work is done in the future.
timer.finish(module_path!(), line!(), "ReadRequest::UtxosByAddresses");
utxos.map(ReadResponse::Utxos)
})
})
.map(|join_result| join_result.expect("panic in ReadRequest::UtxosByAddresses"))
.boxed()

View File

@ -1199,10 +1199,17 @@ fn lightwalletd_integration() -> Result<()> {
lightwalletd_integration_test(LaunchWithEmptyState)
}
/// Make sure `lightwalletd` can sync from Zebra, in update sync mode.
/// Make sure `zebrad` can sync from peers, but don't actually launch `lightwalletd`.
///
/// If `LIGHTWALLETD_DATA_DIR` is set, runs a quick sync, then a full sync.
/// If `LIGHTWALLETD_DATA_DIR` is not set, just runs a full sync.
/// This test only runs when the `ZEBRA_CACHED_STATE_DIR` env var is set.
///
/// This test might work on Windows.
#[test]
fn zebrad_update_sync() -> Result<()> {
lightwalletd_integration_test(UpdateZebraCachedStateNoRpc)
}
/// Make sure `lightwalletd` can sync from Zebra, in update sync mode.
///
/// This test only runs when the `ZEBRA_TEST_LIGHTWALLETD`,
/// `ZEBRA_CACHED_STATE_DIR`, and `LIGHTWALLETD_DATA_DIR` env vars are set.
@ -1249,6 +1256,9 @@ fn lightwalletd_full_sync() -> Result<()> {
async fn lightwalletd_test_suite() -> Result<()> {
lightwalletd_integration_test(LaunchWithEmptyState)?;
// Only runs when ZEBRA_CACHED_STATE_DIR is set.
lightwalletd_integration_test(UpdateZebraCachedStateNoRpc)?;
// Only runs when ZEBRA_CACHED_STATE_DIR is set.
// When manually running the test suite, allow cached state in the full sync test.
lightwalletd_integration_test(FullSyncFromGenesis {
@ -1271,26 +1281,29 @@ async fn lightwalletd_test_suite() -> Result<()> {
/// Run a lightwalletd integration test with a configuration for `test_type`.
///
/// Set `allow_cached_state_for_full_sync` to speed up manual full sync tests.
/// Set `FullSyncFromGenesis { allow_lightwalletd_cached_state: true }` to speed up manual full sync tests.
///
/// The random ports in this test can cause [rare port conflicts.](#Note on port conflict)
#[cfg(not(target_os = "windows"))]
fn lightwalletd_integration_test(test_type: LightwalletdTestType) -> Result<()> {
zebra_test::init();
// Skip the test unless the user specifically asked for it
if zebra_skip_lightwalletd_tests() {
//
// TODO: pass test_type to zebra_skip_lightwalletd_tests() and check for lightwalletd launch in there
if test_type.launches_lightwalletd() && zebra_skip_lightwalletd_tests() {
return Ok(());
}
// Get the zebrad and lightwalletd configs
// TODO: split the zebrad and lightwalletd launches and checks into separate functions?
// Get the zebrad config
// Handle the Zebra state directory based on the test type:
// - LaunchWithEmptyState: ignore the state directory
// - FullSyncFromGenesis & UpdateCachedState:
// - FullSyncFromGenesis, UpdateCachedState, UpdateZebraCachedStateNoRpc:
// skip the test if it is not available, timeout if it is not populated
// Write a configuration that has RPC listen_addr set.
// Write a configuration that has RPC listen_addr set (if needed).
// If the state path env var is set, use it in the config.
let config = if let Some(config) =
test_type.zebrad_config("lightwalletd_integration_test".to_string())
@ -1301,7 +1314,7 @@ fn lightwalletd_integration_test(test_type: LightwalletdTestType) -> Result<()>
};
// Handle the lightwalletd state directory based on the test type:
// - LaunchWithEmptyState: ignore the state directory
// - LaunchWithEmptyState, UpdateZebraCachedStateNoRpc: ignore the state directory
// - FullSyncFromGenesis: use it if available, timeout if it is already populated
// - UpdateCachedState: skip the test if it is not available, timeout if it is not populated
let lightwalletd_state_path =
@ -1321,9 +1334,6 @@ fn lightwalletd_integration_test(test_type: LightwalletdTestType) -> Result<()>
// Get the lists of process failure logs
let (zebrad_failure_messages, zebrad_ignore_messages) = test_type.zebrad_failure_messages();
let (lightwalletd_failure_messages, lightwalletd_ignore_messages) =
test_type.lightwalletd_failure_messages();
// Launch zebrad
let zdir = testdir()?.with_exact_config(&config)?;
let mut zebrad = zdir
@ -1339,109 +1349,121 @@ fn lightwalletd_integration_test(test_type: LightwalletdTestType) -> Result<()>
zebrad.expect_stdout_line_matches("loaded Zebra state cache .*tip.*=.*None")?;
}
// Wait until `zebrad` has opened the RPC endpoint
zebrad.expect_stdout_line_matches(regex::escape(
format!("Opened RPC endpoint at {}", config.rpc.listen_addr.unwrap()).as_str(),
))?;
// Launch lightwalletd, if needed
let mut lightwalletd = if test_type.launches_lightwalletd() {
// Wait until `zebrad` has opened the RPC endpoint
zebrad.expect_stdout_line_matches(regex::escape(
format!("Opened RPC endpoint at {}", config.rpc.listen_addr.unwrap()).as_str(),
))?;
// Launch lightwalletd
// Write a fake zcashd configuration that has the rpcbind and rpcport options set
let ldir = testdir()?;
let ldir = ldir.with_lightwalletd_config(config.rpc.listen_addr.unwrap())?;
// Write a fake zcashd configuration that has the rpcbind and rpcport options set
let ldir = testdir()?;
let ldir = ldir.with_lightwalletd_config(config.rpc.listen_addr.unwrap())?;
let (lightwalletd_failure_messages, lightwalletd_ignore_messages) =
test_type.lightwalletd_failure_messages();
// Launch the lightwalletd process
let lightwalletd = if test_type == LaunchWithEmptyState {
ldir.spawn_lightwalletd_child(None, args![])?
// Launch the lightwalletd process
let lightwalletd = if test_type == LaunchWithEmptyState {
ldir.spawn_lightwalletd_child(None, args![])?
} else {
ldir.spawn_lightwalletd_child(lightwalletd_state_path, args![])?
};
let mut lightwalletd = lightwalletd
.with_timeout(test_type.lightwalletd_timeout())
.with_failure_regex_iter(lightwalletd_failure_messages, lightwalletd_ignore_messages);
// Wait until `lightwalletd` has launched
lightwalletd.expect_stdout_line_matches(regex::escape("Starting gRPC server"))?;
// Check that `lightwalletd` is calling the expected Zebra RPCs
// getblockchaininfo
if test_type.needs_zebra_cached_state() {
lightwalletd.expect_stdout_line_matches(
"Got sapling height 419200 block height [0-9]{7} chain main branchID [0-9a-f]{8}",
)?;
} else {
// Timeout the test if we're somehow accidentally using a cached state in our temp dir
lightwalletd.expect_stdout_line_matches(
"Got sapling height 419200 block height [0-9]{1,6} chain main branchID 00000000",
)?;
}
if test_type.needs_lightwalletd_cached_state() {
lightwalletd.expect_stdout_line_matches("Found [0-9]{7} blocks in cache")?;
} else if !test_type.allow_lightwalletd_cached_state() {
// Timeout the test if we're somehow accidentally using a cached state in our temp dir
lightwalletd.expect_stdout_line_matches("Found 0 blocks in cache")?;
}
// getblock with the first Sapling block in Zebra's state
//
// zcash/lightwalletd calls getbestblockhash here, but
// adityapk00/lightwalletd calls getblock
//
// The log also depends on what is in Zebra's state:
//
// # Cached Zebra State
//
// lightwalletd ingests blocks into its cache.
//
// # Empty Zebra State
//
// lightwalletd tries to download the Sapling activation block, but it's not in the state.
//
// Until the Sapling activation block has been downloaded,
// lightwalletd will keep retrying getblock.
if !test_type.allow_lightwalletd_cached_state() {
if test_type.needs_zebra_cached_state() {
lightwalletd.expect_stdout_line_matches(
"([Aa]dding block to cache)|([Ww]aiting for block)",
)?;
} else {
lightwalletd.expect_stdout_line_matches(regex::escape(
"Waiting for zcashd height to reach Sapling activation height (419200)",
))?;
}
}
Some(lightwalletd)
} else {
ldir.spawn_lightwalletd_child(lightwalletd_state_path, args![])?
None
};
let mut lightwalletd = lightwalletd
.with_timeout(test_type.lightwalletd_timeout())
.with_failure_regex_iter(lightwalletd_failure_messages, lightwalletd_ignore_messages);
// Wait until `lightwalletd` has launched
lightwalletd.expect_stdout_line_matches(regex::escape("Starting gRPC server"))?;
// Check that `lightwalletd` is calling the expected Zebra RPCs
// getblockchaininfo
if test_type.needs_zebra_cached_state() {
lightwalletd.expect_stdout_line_matches(
"Got sapling height 419200 block height [0-9]{7} chain main branchID [0-9a-f]{8}",
)?;
} else {
// Timeout the test if we're somehow accidentally using a cached state in our temp dir
lightwalletd.expect_stdout_line_matches(
"Got sapling height 419200 block height [0-9]{1,6} chain main branchID 00000000",
)?;
}
if test_type.needs_lightwalletd_cached_state() {
lightwalletd.expect_stdout_line_matches("Found [0-9]{7} blocks in cache")?;
} else if !test_type.allow_lightwalletd_cached_state() {
// Timeout the test if we're somehow accidentally using a cached state in our temp dir
lightwalletd.expect_stdout_line_matches("Found 0 blocks in cache")?;
}
// getblock with the first Sapling block in Zebra's state
//
// zcash/lightwalletd calls getbestblockhash here, but
// adityapk00/lightwalletd calls getblock
//
// The log also depends on what is in Zebra's state:
//
// # Cached Zebra State
//
// lightwalletd ingests blocks into its cache.
//
// # Empty Zebra State
//
// lightwalletd tries to download the Sapling activation block, but it's not in the state.
//
// Until the Sapling activation block has been downloaded,
// lightwalletd will keep retrying getblock.
if !test_type.allow_lightwalletd_cached_state() {
if test_type.needs_zebra_cached_state() {
lightwalletd
.expect_stdout_line_matches("([Aa]dding block to cache)|([Ww]aiting for block)")?;
} else {
lightwalletd.expect_stdout_line_matches(regex::escape(
"Waiting for zcashd height to reach Sapling activation height (419200)",
))?;
}
}
if matches!(test_type, UpdateCachedState | FullSyncFromGenesis { .. }) {
// Wait for Zebra to sync its cached state to the chain tip
zebrad.expect_stdout_line_matches(SYNC_FINISHED_REGEX)?;
// Wait for lightwalletd to sync some blocks
lightwalletd
.expect_stdout_line_matches("([Aa]dding block to cache)|([Ww]aiting for block)")?;
if let Some(ref mut lightwalletd) = lightwalletd {
lightwalletd
.expect_stdout_line_matches("([Aa]dding block to cache)|([Ww]aiting for block)")?;
// Wait for lightwalletd to sync to Zebra's tip.
//
// TODO: after the lightwalletd hangs are fixed, fail the test on errors or timeouts
if cfg!(lightwalletd_hang_fix) {
lightwalletd.expect_stdout_line_matches("[Ww]aiting for block")?;
} else {
// To work around a hang bug, we run the test until:
// - lightwalletd starts waiting for blocks (best case scenario)
// - lightwalletd syncs to near the tip (workaround, cached state image is usable)
// - the test times out with an error, but we ignore it
// (workaround, cached state might be usable, slow, or might fail other tests)
// Wait for lightwalletd to sync to Zebra's tip.
//
// TODO: update the regex to `1[8-9][0-9]{5}` when mainnet reaches block 1_800_000
let log_result = lightwalletd.expect_stdout_line_matches(
"([Aa]dding block to cache 1[7-9][0-9]{5})|([Ww]aiting for block)",
);
if log_result.is_err() {
tracing::warn!(
?log_result,
"ignoring a lightwalletd test failure, to work around a lightwalletd hang bug",
// TODO: after the lightwalletd hangs are fixed, fail the test on errors or timeouts
if cfg!(lightwalletd_hang_fix) {
lightwalletd.expect_stdout_line_matches("[Ww]aiting for block")?;
} else {
// To work around a hang bug, we run the test until:
// - lightwalletd starts waiting for blocks (best case scenario)
// - lightwalletd syncs to near the tip (workaround, cached state image is usable)
// - the test times out with an error, but we ignore it
// (workaround, cached state might be usable, slow, or might fail other tests)
//
// TODO: update the regex to `1[8-9][0-9]{5}` when mainnet reaches block 1_800_000
let log_result = lightwalletd.expect_stdout_line_matches(
"([Aa]dding block to cache 1[7-9][0-9]{5})|([Ww]aiting for block)",
);
if log_result.is_err() {
// This error takes up about 100 lines, and looks like a panic message
tracing::warn!(
multi_line_error = ?log_result,
"ignoring a lightwalletd test failure, to work around a lightwalletd hang bug",
);
}
}
}
@ -1455,28 +1477,36 @@ fn lightwalletd_integration_test(test_type: LightwalletdTestType) -> Result<()>
//
// TODO: re-enable this code when lightwalletd hangs are fixed
if cfg!(lightwalletd_hang_fix) {
lightwalletd.expect_stdout_line_matches(regex::escape(
"Block hash changed, clearing mempool clients",
))?;
lightwalletd.expect_stdout_line_matches(regex::escape("Adding new mempool txid"))?;
if let Some(ref mut lightwalletd) = lightwalletd {
lightwalletd.expect_stdout_line_matches(regex::escape(
"Block hash changed, clearing mempool clients",
))?;
lightwalletd
.expect_stdout_line_matches(regex::escape("Adding new mempool txid"))?;
}
}
}
// Cleanup both processes
lightwalletd.kill()?;
zebrad.kill()?;
let lightwalletd_output = lightwalletd.wait_with_output()?.assert_failure()?;
let zebrad_output = zebrad.wait_with_output()?.assert_failure()?;
//
// If the test fails here, see the [note on port conflict](#Note on port conflict)
//
// zcash/lightwalletd exits by itself, but
// adityapk00/lightwalletd keeps on going, so it gets killed by the test harness.
zebrad.kill()?;
if let Some(mut lightwalletd) = lightwalletd {
lightwalletd.kill()?;
let lightwalletd_output = lightwalletd.wait_with_output()?.assert_failure()?;
lightwalletd_output
.assert_was_killed()
.wrap_err("Possible port conflict. Are there other acceptance tests running?")?;
}
let zebrad_output = zebrad.wait_with_output()?.assert_failure()?;
lightwalletd_output
.assert_was_killed()
.wrap_err("Possible port conflict. Are there other acceptance tests running?")?;
zebrad_output
.assert_was_killed()
.wrap_err("Possible port conflict. Are there other acceptance tests running?")?;

View File

@ -45,7 +45,11 @@ pub const BETWEEN_NODES_DELAY: Duration = Duration::from_secs(2);
/// The amount of time we wait for lightwalletd to update to the tip.
///
/// The cached tip can be a few days old, and Zebra needs time to activate its mempool.
pub const LIGHTWALLETD_UPDATE_TIP_DELAY: Duration = Duration::from_secs(20 * 60);
///
/// Currently, `zebrad` syncs are slower than `lightwalletd` syncs, so we re-use its timeout.
///
/// TODO: reduce to 20 minutes when `zebrad` sync performance improves
pub const LIGHTWALLETD_UPDATE_TIP_DELAY: Duration = LIGHTWALLETD_FULL_SYNC_TIP_DELAY;
/// The amount of time we wait for lightwalletd to do a full sync to the tip.
///
@ -53,6 +57,14 @@ pub const LIGHTWALLETD_UPDATE_TIP_DELAY: Duration = Duration::from_secs(20 * 60)
/// and Zebra needs time to activate its mempool.
pub const LIGHTWALLETD_FULL_SYNC_TIP_DELAY: Duration = Duration::from_secs(90 * 60);
/// The amount of extra time we wait for Zebra to sync to the tip,
/// after we ignore a lightwalletd failure.
///
/// Zebra logs a status entry every minute, so there should be at least 4 in this interval.
///
/// TODO: remove this extra time when lightwalletd hangs are fixed
pub const ZEBRAD_EXTRA_DELAY_FOR_LIGHTWALLETD_WORKAROUND: Duration = Duration::from_secs(5 * 60);
/// Extension trait for methods on `tempfile::TempDir` for using it as a test
/// directory for `zebrad`.
pub trait ZebradTestDirExt

View File

@ -28,7 +28,7 @@ use super::{
},
launch::{
ZebradTestDirExt, LIGHTWALLETD_DELAY, LIGHTWALLETD_FULL_SYNC_TIP_DELAY,
LIGHTWALLETD_UPDATE_TIP_DELAY,
LIGHTWALLETD_UPDATE_TIP_DELAY, ZEBRAD_EXTRA_DELAY_FOR_LIGHTWALLETD_WORKAROUND,
},
};
@ -67,7 +67,7 @@ pub fn zebra_skip_lightwalletd_tests() -> bool {
// TODO: check if the lightwalletd binary is in the PATH?
// (this doesn't seem to be implemented in the standard library)
//
// See is_command_available in zebra-test/tests/command.rs for one way to do this.
// See is_command_available() in zebra-test/src/tests/command.rs for one way to do this.
if env::var_os(ZEBRA_TEST_LIGHTWALLETD).is_none() {
// This message is captured by the test runner, use
@ -236,6 +236,14 @@ pub enum LightwalletdTestType {
///
/// This test requires a cached Zebra and lightwalletd state.
UpdateCachedState,
/// Launch `zebrad` and sync it to the tip, but don't launch `lightwalletd`.
///
/// If this test fails, the failure is in `zebrad` without RPCs or `lightwalletd`.
/// If it succeeds, but the RPC tests fail, the problem is caused by RPCs or `lightwalletd`.
///
/// This test requires a cached Zebra state.
UpdateZebraCachedStateNoRpc,
}
impl LightwalletdTestType {
@ -243,26 +251,36 @@ impl LightwalletdTestType {
pub fn needs_zebra_cached_state(&self) -> bool {
match self {
LaunchWithEmptyState => false,
FullSyncFromGenesis { .. } | UpdateCachedState => true,
FullSyncFromGenesis { .. } | UpdateCachedState | UpdateZebraCachedStateNoRpc => true,
}
}
/// Does this test need a lightwalletd cached state?
/// Does this test launch `lightwalletd`?
pub fn launches_lightwalletd(&self) -> bool {
match self {
UpdateZebraCachedStateNoRpc => false,
LaunchWithEmptyState | FullSyncFromGenesis { .. } | UpdateCachedState => true,
}
}
/// Does this test need a `lightwalletd` cached state?
pub fn needs_lightwalletd_cached_state(&self) -> bool {
match self {
LaunchWithEmptyState | FullSyncFromGenesis { .. } => false,
LaunchWithEmptyState | FullSyncFromGenesis { .. } | UpdateZebraCachedStateNoRpc => {
false
}
UpdateCachedState => true,
}
}
/// Does this test allow a lightwalletd cached state, even if it is not required?
/// Does this test allow a `lightwalletd` cached state, even if it is not required?
pub fn allow_lightwalletd_cached_state(&self) -> bool {
match self {
LaunchWithEmptyState => false,
FullSyncFromGenesis {
allow_lightwalletd_cached_state,
} => *allow_lightwalletd_cached_state,
UpdateCachedState => true,
UpdateCachedState | UpdateZebraCachedStateNoRpc => true,
}
}
@ -287,13 +305,19 @@ impl LightwalletdTestType {
/// Returns `None` if the test should be skipped,
/// and `Some(Err(_))` if the config could not be created.
pub fn zebrad_config(&self, test_name: String) -> Option<Result<ZebradConfig>> {
let config = if self.launches_lightwalletd() {
random_known_rpc_port_config()
} else {
default_test_config()
};
if !self.needs_zebra_cached_state() {
return Some(random_known_rpc_port_config());
return Some(config);
}
let zebra_state_path = self.zebrad_state_path(test_name)?;
let mut config = match random_known_rpc_port_config() {
let mut config = match config {
Ok(config) => config,
Err(error) => return Some(Err(error)),
};
@ -307,8 +331,17 @@ impl LightwalletdTestType {
Some(Ok(config))
}
/// Returns the lightwalletd state path for this test, if set.
/// Returns the `lightwalletd` state path for this test, if set, and if allowed for this test.
pub fn lightwalletd_state_path(&self, test_name: String) -> Option<PathBuf> {
if !self.launches_lightwalletd() {
tracing::info!(
"running {test_name:?} {self:?} lightwalletd test, \
ignoring any cached state in the {LIGHTWALLETD_DATA_DIR:?} environment variable",
);
return None;
}
match env::var_os(LIGHTWALLETD_DATA_DIR) {
Some(path) => Some(path.into()),
None => {
@ -331,21 +364,29 @@ impl LightwalletdTestType {
/// Returns the `zebrad` timeout for this test type.
pub fn zebrad_timeout(&self) -> Duration {
// We use the same timeouts as lightwalletd,
let base_timeout = match self {
LaunchWithEmptyState => LIGHTWALLETD_DELAY,
FullSyncFromGenesis { .. } => LIGHTWALLETD_FULL_SYNC_TIP_DELAY,
UpdateCachedState | UpdateZebraCachedStateNoRpc => LIGHTWALLETD_UPDATE_TIP_DELAY,
};
// If lightwalletd hangs and times out, Zebra needs a bit of extra time to finish
base_timeout + ZEBRAD_EXTRA_DELAY_FOR_LIGHTWALLETD_WORKAROUND
}
/// Returns the `lightwalletd` timeout for this test type.
#[track_caller]
pub fn lightwalletd_timeout(&self) -> Duration {
if !self.launches_lightwalletd() {
panic!("lightwalletd must not be launched in the {self:?} test");
}
// We use the same timeouts for zebrad and lightwalletd,
// because the tests swap between checking zebrad and lightwalletd.
match self {
LaunchWithEmptyState => LIGHTWALLETD_DELAY,
FullSyncFromGenesis { .. } => LIGHTWALLETD_FULL_SYNC_TIP_DELAY,
UpdateCachedState => LIGHTWALLETD_UPDATE_TIP_DELAY,
}
}
/// Returns the `lightwalletd` timeout for this test type.
pub fn lightwalletd_timeout(&self) -> Duration {
match self {
LaunchWithEmptyState => LIGHTWALLETD_DELAY,
FullSyncFromGenesis { .. } => LIGHTWALLETD_FULL_SYNC_TIP_DELAY,
UpdateCachedState => LIGHTWALLETD_UPDATE_TIP_DELAY,
UpdateCachedState | UpdateZebraCachedStateNoRpc => LIGHTWALLETD_UPDATE_TIP_DELAY,
}
}
@ -375,7 +416,12 @@ impl LightwalletdTestType {
/// Returns `lightwalletd` log regexes that indicate the tests have failed,
/// and regexes of any failures that should be ignored.
#[track_caller]
pub fn lightwalletd_failure_messages(&self) -> (Vec<String>, Vec<String>) {
if !self.launches_lightwalletd() {
panic!("lightwalletd must not be launched in the {self:?} test");
}
let mut lightwalletd_failure_messages: Vec<String> = LIGHTWALLETD_FAILURE_MESSAGES
.iter()
.chain(PROCESS_FAILURE_MESSAGES)