591 lines
24 KiB
Rust
591 lines
24 KiB
Rust
//! `start` subcommand - entry point for starting a zebra node
|
|
//!
|
|
//! ## Application Structure
|
|
//!
|
|
//! A zebra node consists of the following services and tasks:
|
|
//!
|
|
//! Peers:
|
|
//! * Peer Connection Pool Service
|
|
//! * primary external interface for outbound requests from this node to remote peers
|
|
//! * accepts requests from services and tasks in this node, and sends them to remote peers
|
|
//! * Peer Discovery Service
|
|
//! * maintains a list of peer addresses, and connection priority metadata
|
|
//! * discovers new peer addresses from existing peer connections
|
|
//! * initiates new outbound peer connections in response to demand from tasks within this node
|
|
//!
|
|
//! Blocks & Mempool Transactions:
|
|
//! * Consensus Service
|
|
//! * handles all validation logic for the node
|
|
//! * verifies blocks using zebra-chain, then stores verified blocks in zebra-state
|
|
//! * verifies mempool and block transactions using zebra-chain and zebra-script,
|
|
//! and returns verified mempool transactions for mempool storage
|
|
//! * Groth16 Parameters Download Task
|
|
//! * downloads the Sprout and Sapling Groth16 circuit parameter files
|
|
//! * finishes when the download is complete and the download file hashes have been checked
|
|
//! * Inbound Service
|
|
//! * primary external interface for inbound peer requests to this node
|
|
//! * handles requests from peers for network data, chain data, and mempool transactions
|
|
//! * spawns download and verify tasks for each gossiped block
|
|
//! * sends gossiped transactions to the mempool service
|
|
//!
|
|
//! Blocks:
|
|
//! * Sync Task
|
|
//! * runs in the background and continuously queries the network for
|
|
//! new blocks to be verified and added to the local state
|
|
//! * spawns download and verify tasks for each crawled block
|
|
//! * State Service
|
|
//! * contextually verifies blocks
|
|
//! * handles in-memory storage of multiple non-finalized chains
|
|
//! * handles permanent storage of the best finalized chain
|
|
//! * Block Gossip Task
|
|
//! * runs in the background and continuously queries the state for
|
|
//! newly committed blocks to be gossiped to peers
|
|
//! * Progress Task
|
|
//! * logs progress towards the chain tip
|
|
//!
|
|
//! Mempool Transactions:
|
|
//! * Mempool Service
|
|
//! * activates when the syncer is near the chain tip
|
|
//! * spawns download and verify tasks for each crawled or gossiped transaction
|
|
//! * handles in-memory storage of unmined transactions
|
|
//! * Queue Checker Task
|
|
//! * runs in the background, polling the mempool to store newly verified transactions
|
|
//! * Transaction Gossip Task
|
|
//! * runs in the background and gossips newly added mempool transactions
|
|
//! to peers
|
|
//!
|
|
//! Remote Procedure Calls:
|
|
//! * JSON-RPC Service
|
|
//! * answers RPC client requests using the State Service and Mempool Service
|
|
//! * submits client transactions to the node's mempool
|
|
|
|
use std::{cmp::max, ops::Add, time::Duration};
|
|
|
|
use abscissa_core::{config, Command, FrameworkError, Options, Runnable};
|
|
use chrono::Utc;
|
|
use color_eyre::eyre::{eyre, Report};
|
|
use futures::FutureExt;
|
|
use num_integer::div_ceil;
|
|
use tokio::{pin, select, sync::oneshot};
|
|
use tower::{builder::ServiceBuilder, util::BoxService};
|
|
use tracing_futures::Instrument;
|
|
|
|
use zebra_chain::{
|
|
block::Height,
|
|
chain_tip::ChainTip,
|
|
parameters::{Network, NetworkUpgrade, POST_BLOSSOM_POW_TARGET_SPACING},
|
|
};
|
|
use zebra_consensus::CheckpointList;
|
|
|
|
use zebra_rpc::server::RpcServer;
|
|
|
|
use crate::{
|
|
application::app_version,
|
|
components::{
|
|
inbound::{self, InboundSetupData},
|
|
mempool::{self, Mempool},
|
|
sync::{self, SyncStatus},
|
|
tokio::{RuntimeRun, TokioComponent},
|
|
ChainSync, Inbound,
|
|
},
|
|
config::ZebradConfig,
|
|
prelude::*,
|
|
};
|
|
|
|
/// `start` subcommand
|
|
#[derive(Command, Debug, Options)]
|
|
pub struct StartCmd {
|
|
/// Filter strings which override the config file and defaults
|
|
#[options(free, help = "tracing filters which override the zebrad.toml config")]
|
|
filters: Vec<String>,
|
|
}
|
|
|
|
impl StartCmd {
|
|
async fn start(&self) -> Result<(), Report> {
|
|
let config = app_config().clone();
|
|
info!(?config);
|
|
|
|
info!("initializing node state");
|
|
let (state_service, _read_only_state_service, latest_chain_tip, chain_tip_change) =
|
|
zebra_state::init(config.state.clone(), config.network.network);
|
|
let state = ServiceBuilder::new()
|
|
.buffer(Self::state_buffer_bound())
|
|
.service(state_service);
|
|
|
|
info!("initializing network");
|
|
// The service that our node uses to respond to requests by peers. The
|
|
// load_shed middleware ensures that we reduce the size of the peer set
|
|
// in response to excess load.
|
|
let (setup_tx, setup_rx) = oneshot::channel();
|
|
let inbound = ServiceBuilder::new()
|
|
.load_shed()
|
|
.buffer(inbound::downloads::MAX_INBOUND_CONCURRENCY)
|
|
.service(Inbound::new(setup_rx));
|
|
|
|
let (peer_set, address_book) =
|
|
zebra_network::init(config.network.clone(), inbound, latest_chain_tip.clone()).await;
|
|
|
|
info!("initializing verifiers");
|
|
let (chain_verifier, tx_verifier, mut groth16_download_handle) =
|
|
zebra_consensus::chain::init(
|
|
config.consensus.clone(),
|
|
config.network.network,
|
|
state.clone(),
|
|
config.consensus.debug_skip_parameter_preload,
|
|
)
|
|
.await;
|
|
|
|
info!("initializing syncer");
|
|
let (syncer, sync_status) = ChainSync::new(
|
|
&config,
|
|
peer_set.clone(),
|
|
chain_verifier.clone(),
|
|
state.clone(),
|
|
latest_chain_tip.clone(),
|
|
);
|
|
|
|
info!("initializing mempool");
|
|
let (mempool, mempool_transaction_receiver) = Mempool::new(
|
|
&config.mempool,
|
|
peer_set.clone(),
|
|
state.clone(),
|
|
tx_verifier,
|
|
sync_status.clone(),
|
|
latest_chain_tip.clone(),
|
|
chain_tip_change.clone(),
|
|
);
|
|
let mempool = BoxService::new(mempool);
|
|
let mempool = ServiceBuilder::new()
|
|
.buffer(mempool::downloads::MAX_INBOUND_CONCURRENCY)
|
|
.service(mempool);
|
|
|
|
// Launch RPC server
|
|
let rpc_task_handle = RpcServer::spawn(
|
|
config.rpc,
|
|
app_version().to_string(),
|
|
mempool.clone(),
|
|
state.clone(),
|
|
);
|
|
|
|
let setup_data = InboundSetupData {
|
|
address_book,
|
|
block_download_peer_set: peer_set.clone(),
|
|
block_verifier: chain_verifier,
|
|
mempool: mempool.clone(),
|
|
state,
|
|
latest_chain_tip: latest_chain_tip.clone(),
|
|
};
|
|
setup_tx
|
|
.send(setup_data)
|
|
.map_err(|_| eyre!("could not send setup data to inbound service"))?;
|
|
|
|
let syncer_task_handle = tokio::spawn(syncer.sync().in_current_span());
|
|
|
|
let mut block_gossip_task_handle = tokio::spawn(
|
|
sync::gossip_best_tip_block_hashes(
|
|
sync_status.clone(),
|
|
chain_tip_change.clone(),
|
|
peer_set.clone(),
|
|
)
|
|
.in_current_span(),
|
|
);
|
|
|
|
let mempool_crawler_task_handle = mempool::Crawler::spawn(
|
|
&config.mempool,
|
|
peer_set.clone(),
|
|
mempool.clone(),
|
|
sync_status.clone(),
|
|
chain_tip_change,
|
|
);
|
|
|
|
let mempool_queue_checker_task_handle = mempool::QueueChecker::spawn(mempool.clone());
|
|
|
|
let tx_gossip_task_handle = tokio::spawn(
|
|
mempool::gossip_mempool_transaction_id(mempool_transaction_receiver, peer_set)
|
|
.in_current_span(),
|
|
);
|
|
|
|
let progress_task_handle = tokio::spawn(
|
|
Self::update_progress(config.network.network, latest_chain_tip, sync_status)
|
|
.in_current_span(),
|
|
);
|
|
|
|
info!("spawned initial Zebra tasks");
|
|
|
|
// TODO: put tasks into an ongoing FuturesUnordered and a startup FuturesUnordered?
|
|
|
|
// ongoing tasks
|
|
pin!(rpc_task_handle);
|
|
pin!(syncer_task_handle);
|
|
pin!(mempool_crawler_task_handle);
|
|
pin!(mempool_queue_checker_task_handle);
|
|
pin!(tx_gossip_task_handle);
|
|
pin!(progress_task_handle);
|
|
|
|
// startup tasks
|
|
let groth16_download_handle_fused = (&mut groth16_download_handle).fuse();
|
|
pin!(groth16_download_handle_fused);
|
|
|
|
// Wait for tasks to finish
|
|
let exit_status = loop {
|
|
let mut exit_when_task_finishes = true;
|
|
|
|
let result = select! {
|
|
rpc_result = &mut rpc_task_handle => {
|
|
rpc_result
|
|
.expect("unexpected panic in the rpc task");
|
|
info!("rpc task exited");
|
|
Ok(())
|
|
}
|
|
|
|
sync_result = &mut syncer_task_handle => sync_result
|
|
.expect("unexpected panic in the syncer task")
|
|
.map(|_| info!("syncer task exited")),
|
|
|
|
block_gossip_result = &mut block_gossip_task_handle => block_gossip_result
|
|
.expect("unexpected panic in the chain tip block gossip task")
|
|
.map(|_| info!("chain tip block gossip task exited"))
|
|
.map_err(|e| eyre!(e)),
|
|
|
|
mempool_crawl_result = &mut mempool_crawler_task_handle => mempool_crawl_result
|
|
.expect("unexpected panic in the mempool crawler")
|
|
.map(|_| info!("mempool crawler task exited"))
|
|
.map_err(|e| eyre!(e)),
|
|
|
|
mempool_queue_result = &mut mempool_queue_checker_task_handle => mempool_queue_result
|
|
.expect("unexpected panic in the mempool queue checker")
|
|
.map(|_| info!("mempool queue checker task exited"))
|
|
.map_err(|e| eyre!(e)),
|
|
|
|
tx_gossip_result = &mut tx_gossip_task_handle => tx_gossip_result
|
|
.expect("unexpected panic in the transaction gossip task")
|
|
.map(|_| info!("transaction gossip task exited"))
|
|
.map_err(|e| eyre!(e)),
|
|
|
|
progress_result = &mut progress_task_handle => {
|
|
progress_result
|
|
.expect("unexpected panic in the chain progress task");
|
|
info!("chain progress task exited");
|
|
Ok(())
|
|
}
|
|
|
|
// Unlike other tasks, we expect the download task to finish while Zebra is running.
|
|
groth16_download_result = &mut groth16_download_handle_fused => {
|
|
groth16_download_result
|
|
.unwrap_or_else(|_| panic!(
|
|
"unexpected panic in the Groth16 pre-download and check task. {}",
|
|
zebra_consensus::groth16::Groth16Parameters::failure_hint())
|
|
);
|
|
|
|
exit_when_task_finishes = false;
|
|
Ok(())
|
|
}
|
|
};
|
|
|
|
// Stop Zebra if a task finished and returned an error,
|
|
// or if an ongoing task exited.
|
|
if let Err(err) = result {
|
|
break Err(err);
|
|
}
|
|
|
|
if exit_when_task_finishes {
|
|
break Ok(());
|
|
}
|
|
};
|
|
|
|
info!("exiting Zebra because an ongoing task exited: stopping other tasks");
|
|
|
|
// ongoing tasks
|
|
syncer_task_handle.abort();
|
|
block_gossip_task_handle.abort();
|
|
mempool_crawler_task_handle.abort();
|
|
mempool_queue_checker_task_handle.abort();
|
|
tx_gossip_task_handle.abort();
|
|
rpc_task_handle.abort();
|
|
|
|
// startup tasks
|
|
groth16_download_handle.abort();
|
|
|
|
exit_status
|
|
}
|
|
|
|
/// Returns the bound for the state service buffer,
|
|
/// based on the configurations of the services that use the state concurrently.
|
|
fn state_buffer_bound() -> usize {
|
|
let config = app_config().clone();
|
|
|
|
// TODO: do we also need to account for concurrent use across services?
|
|
// we could multiply the maximum by 3/2, or add a fixed constant
|
|
max(
|
|
config.sync.max_concurrent_block_requests,
|
|
max(
|
|
inbound::downloads::MAX_INBOUND_CONCURRENCY,
|
|
mempool::downloads::MAX_INBOUND_CONCURRENCY,
|
|
),
|
|
)
|
|
}
|
|
|
|
/// Logs Zebra's estimated progress towards the chain tip.
|
|
async fn update_progress(
|
|
network: Network,
|
|
latest_chain_tip: impl ChainTip,
|
|
sync_status: SyncStatus,
|
|
) {
|
|
// The amount of time between progress logs.
|
|
const LOG_INTERVAL: Duration = Duration::from_secs(60);
|
|
|
|
// The number of blocks we consider to be close to the tip.
|
|
//
|
|
// Most chain forks are 1-7 blocks long.
|
|
const MAX_CLOSE_TO_TIP_BLOCKS: i32 = 1;
|
|
|
|
// Skip slow sync warnings when we are this close to the tip.
|
|
//
|
|
// In testing, we've seen warnings around 30 blocks.
|
|
//
|
|
// TODO: replace with `MAX_CLOSE_TO_TIP_BLOCKS` after fixing slow syncing near tip (#3375)
|
|
const MIN_SYNC_WARNING_BLOCKS: i32 = 60;
|
|
|
|
// The number of fractional digits in sync percentages.
|
|
const SYNC_PERCENT_FRAC_DIGITS: usize = 3;
|
|
|
|
// The minimum number of extra blocks mined between updating a checkpoint list,
|
|
// and running an automated test that depends on that list.
|
|
//
|
|
// Makes sure that the block finalization code always runs in sync tests,
|
|
// even if the miner or test node clock is wrong by a few minutes.
|
|
//
|
|
// This is an estimate based on the time it takes to:
|
|
// - get the tip height from `zcashd`,
|
|
// - run `zebra-checkpoints` to update the checkpoint list,
|
|
// - submit a pull request, and
|
|
// - run a CI test that logs progress based on the new checkpoint height.
|
|
//
|
|
// We might add tests that sync from a cached tip state,
|
|
// so we only allow a few extra blocks here.
|
|
const MIN_BLOCKS_MINED_AFTER_CHECKPOINT_UPDATE: i32 = 10;
|
|
|
|
// The minimum number of extra blocks after the highest checkpoint, based on:
|
|
// - the non-finalized state limit, and
|
|
// - the minimum number of extra blocks mined between a checkpoint update,
|
|
// and the automated tests for that update.
|
|
let min_after_checkpoint_blocks = i32::try_from(zebra_state::MAX_BLOCK_REORG_HEIGHT)
|
|
.expect("constant fits in i32")
|
|
+ MIN_BLOCKS_MINED_AFTER_CHECKPOINT_UPDATE;
|
|
|
|
// The minimum height of the valid best chain, based on:
|
|
// - the hard-coded checkpoint height,
|
|
// - the minimum number of blocks after the highest checkpoint.
|
|
let after_checkpoint_height = CheckpointList::new(network)
|
|
.max_height()
|
|
.add(min_after_checkpoint_blocks)
|
|
.expect("hard-coded checkpoint height is far below Height::MAX");
|
|
|
|
let target_block_spacing = NetworkUpgrade::target_spacing_for_height(network, Height::MAX);
|
|
let max_block_spacing =
|
|
NetworkUpgrade::minimum_difficulty_spacing_for_height(network, Height::MAX);
|
|
|
|
// We expect the state height to increase at least once in this interval.
|
|
//
|
|
// Most chain forks are 1-7 blocks long.
|
|
//
|
|
// TODO: remove the target_block_spacing multiplier,
|
|
// after fixing slow syncing near tip (#3375)
|
|
let min_state_block_interval = max_block_spacing.unwrap_or(target_block_spacing * 4) * 2;
|
|
|
|
// Formatted string for logging.
|
|
let max_block_spacing = max_block_spacing
|
|
.map(|duration| duration.to_string())
|
|
.unwrap_or_else(|| "None".to_string());
|
|
|
|
// The last time we downloaded and verified at least one block.
|
|
//
|
|
// Initialized to the start time to simplify the code.
|
|
let mut last_state_change_time = Utc::now();
|
|
|
|
// The state tip height, when we last downloaded and verified at least one block.
|
|
//
|
|
// Initialized to the genesis height to simplify the code.
|
|
let mut last_state_change_height = Height(0);
|
|
|
|
loop {
|
|
let now = Utc::now();
|
|
let is_syncer_stopped = sync_status.is_close_to_tip();
|
|
|
|
if let Some(estimated_height) =
|
|
latest_chain_tip.estimate_network_chain_tip_height(network, now)
|
|
{
|
|
// The estimate/actual race doesn't matter here,
|
|
// because we're only using it for metrics and logging.
|
|
let current_height = latest_chain_tip
|
|
.best_tip_height()
|
|
.expect("unexpected empty state: estimate requires a block height");
|
|
|
|
// Work out the sync progress towards the estimated tip.
|
|
let sync_progress = f64::from(current_height.0) / f64::from(estimated_height.0);
|
|
let sync_percent = format!(
|
|
"{:.frac$} %",
|
|
sync_progress * 100.0,
|
|
frac = SYNC_PERCENT_FRAC_DIGITS,
|
|
);
|
|
|
|
let remaining_sync_blocks = estimated_height - current_height;
|
|
|
|
// Work out how long it has been since the state height has increased.
|
|
//
|
|
// Non-finalized forks can decrease the height, we only want to track increases.
|
|
if current_height > last_state_change_height {
|
|
last_state_change_height = current_height;
|
|
last_state_change_time = now;
|
|
}
|
|
|
|
let time_since_last_state_block = last_state_change_time.signed_duration_since(now);
|
|
|
|
// TODO:
|
|
// - log progress, remaining blocks, and remaining time to next network upgrade
|
|
// - add some of this info to the metrics
|
|
|
|
if time_since_last_state_block > min_state_block_interval {
|
|
// The state tip height hasn't increased for a long time.
|
|
//
|
|
// Block verification can fail if the local node's clock is wrong.
|
|
warn!(
|
|
%sync_percent,
|
|
?current_height,
|
|
%time_since_last_state_block,
|
|
%target_block_spacing,
|
|
%max_block_spacing,
|
|
?is_syncer_stopped,
|
|
"chain updates have stalled, \
|
|
state height has not increased for {} minutes. \
|
|
Hint: check your network connection, \
|
|
and your computer clock and time zone",
|
|
time_since_last_state_block.num_minutes(),
|
|
);
|
|
} else if is_syncer_stopped && remaining_sync_blocks > MIN_SYNC_WARNING_BLOCKS {
|
|
// We've stopped syncing blocks, but we estimate we're a long way from the tip.
|
|
//
|
|
// TODO: warn after fixing slow syncing near tip (#3375)
|
|
info!(
|
|
%sync_percent,
|
|
?current_height,
|
|
?remaining_sync_blocks,
|
|
?after_checkpoint_height,
|
|
%time_since_last_state_block,
|
|
"initial sync is very slow, or estimated tip is wrong. \
|
|
Hint: check your network connection, \
|
|
and your computer clock and time zone",
|
|
);
|
|
} else if is_syncer_stopped && current_height <= after_checkpoint_height {
|
|
// We've stopped syncing blocks,
|
|
// but we're below the minimum height estimated from our checkpoints.
|
|
let min_minutes_after_checkpoint_update: i64 = div_ceil(
|
|
i64::from(MIN_BLOCKS_MINED_AFTER_CHECKPOINT_UPDATE)
|
|
* POST_BLOSSOM_POW_TARGET_SPACING,
|
|
60,
|
|
);
|
|
|
|
warn!(
|
|
%sync_percent,
|
|
?current_height,
|
|
?remaining_sync_blocks,
|
|
?after_checkpoint_height,
|
|
%time_since_last_state_block,
|
|
"initial sync is very slow, and state is below the highest checkpoint. \
|
|
Hint: check your network connection, \
|
|
and your computer clock and time zone. \
|
|
Dev Hint: were the checkpoints updated in the last {} minutes?",
|
|
min_minutes_after_checkpoint_update,
|
|
);
|
|
} else if is_syncer_stopped {
|
|
// We've stayed near the tip for a while, and we've stopped syncing lots of blocks.
|
|
// So we're mostly using gossiped blocks now.
|
|
info!(
|
|
%sync_percent,
|
|
?current_height,
|
|
?remaining_sync_blocks,
|
|
%time_since_last_state_block,
|
|
"finished initial sync to chain tip, using gossiped blocks",
|
|
);
|
|
} else if remaining_sync_blocks <= MAX_CLOSE_TO_TIP_BLOCKS {
|
|
// We estimate we're near the tip, but we have been syncing lots of blocks recently.
|
|
// We might also be using some gossiped blocks.
|
|
info!(
|
|
%sync_percent,
|
|
?current_height,
|
|
?remaining_sync_blocks,
|
|
%time_since_last_state_block,
|
|
"close to finishing initial sync, \
|
|
confirming using syncer and gossiped blocks",
|
|
);
|
|
} else {
|
|
// We estimate we're far from the tip, and we've been syncing lots of blocks.
|
|
info!(
|
|
%sync_percent,
|
|
?current_height,
|
|
?remaining_sync_blocks,
|
|
%time_since_last_state_block,
|
|
"estimated progress to chain tip",
|
|
);
|
|
}
|
|
} else {
|
|
let sync_percent = format!("{:.frac$} %", 0.0f64, frac = SYNC_PERCENT_FRAC_DIGITS,);
|
|
|
|
if is_syncer_stopped {
|
|
// We've stopped syncing blocks,
|
|
// but we haven't downloaded and verified the genesis block.
|
|
warn!(
|
|
%sync_percent,
|
|
current_height = %"None",
|
|
"initial sync can't download and verify the genesis block. \
|
|
Hint: check your network connection, \
|
|
and your computer clock and time zone",
|
|
);
|
|
} else {
|
|
// We're waiting for the genesis block to be committed to the state,
|
|
// before we can estimate the best chain tip.
|
|
info!(
|
|
%sync_percent,
|
|
current_height = %"None",
|
|
"initial sync is waiting to download the genesis block",
|
|
);
|
|
}
|
|
}
|
|
|
|
tokio::time::sleep(LOG_INTERVAL).await;
|
|
}
|
|
}
|
|
}
|
|
|
|
impl Runnable for StartCmd {
|
|
/// Start the application.
|
|
fn run(&self) {
|
|
info!("Starting zebrad");
|
|
let rt = app_writer()
|
|
.state_mut()
|
|
.components
|
|
.get_downcast_mut::<TokioComponent>()
|
|
.expect("TokioComponent should be available")
|
|
.rt
|
|
.take();
|
|
|
|
rt.expect("runtime should not already be taken")
|
|
.run(self.start());
|
|
|
|
info!("stopping zebrad");
|
|
}
|
|
}
|
|
|
|
impl config::Override<ZebradConfig> for StartCmd {
|
|
// Process the given command line options, overriding settings from
|
|
// a configuration file using explicit flags taken from command-line
|
|
// arguments.
|
|
fn override_config(&self, mut config: ZebradConfig) -> Result<ZebradConfig, FrameworkError> {
|
|
if !self.filters.is_empty() {
|
|
config.tracing.filter = Some(self.filters.join(","));
|
|
}
|
|
|
|
Ok(config)
|
|
}
|
|
}
|