Chain Tip Estimate Test: Log chain progress while Zebra is syncing (#3495)

* Log chain progress while Zebra is syncing

This helps test if the chain tip estimate is accurate,
and helps diagnose problems during full sync tests.

* Update to the latest chain tip estimate API
This commit is contained in:
teor 2022-02-13 09:43:12 +10:00 committed by GitHub
parent fd8c727c31
commit 20ac7b1cae
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
1 changed files with 107 additions and 4 deletions

View File

@ -40,6 +40,8 @@
//! * Block Gossip Task //! * Block Gossip Task
//! * runs in the background and continuously queries the state for //! * runs in the background and continuously queries the state for
//! newly committed blocks to be gossiped to peers //! newly committed blocks to be gossiped to peers
//! * Progress Task
//! * logs progress towards the chain tip
//! //!
//! Mempool Transactions: //! Mempool Transactions:
//! * Mempool Service //! * Mempool Service
@ -52,20 +54,23 @@
//! * runs in the background and gossips newly added mempool transactions //! * runs in the background and gossips newly added mempool transactions
//! to peers //! to peers
use std::cmp::max; use std::{cmp::max, time::Duration};
use abscissa_core::{config, Command, FrameworkError, Options, Runnable}; use abscissa_core::{config, Command, FrameworkError, Options, Runnable};
use chrono::Utc;
use color_eyre::eyre::{eyre, Report}; use color_eyre::eyre::{eyre, Report};
use futures::FutureExt; use futures::FutureExt;
use tokio::{pin, select, sync::oneshot}; use tokio::{pin, select, sync::oneshot};
use tower::{builder::ServiceBuilder, util::BoxService}; use tower::{builder::ServiceBuilder, util::BoxService};
use tracing_futures::Instrument; use tracing_futures::Instrument;
use zebra_chain::{chain_tip::ChainTip, parameters::Network};
use crate::{ use crate::{
components::{ components::{
inbound::{self, InboundSetupData}, inbound::{self, InboundSetupData},
mempool::{self, Mempool}, mempool::{self, Mempool},
sync, sync::{self, SyncStatus},
tokio::{RuntimeRun, TokioComponent}, tokio::{RuntimeRun, TokioComponent},
ChainSync, Inbound, ChainSync, Inbound,
}, },
@ -146,7 +151,7 @@ impl StartCmd {
block_verifier: chain_verifier, block_verifier: chain_verifier,
mempool: mempool.clone(), mempool: mempool.clone(),
state, state,
latest_chain_tip, latest_chain_tip: latest_chain_tip.clone(),
}; };
setup_tx setup_tx
.send(setup_data) .send(setup_data)
@ -167,7 +172,7 @@ impl StartCmd {
&config.mempool, &config.mempool,
peer_set.clone(), peer_set.clone(),
mempool.clone(), mempool.clone(),
sync_status, sync_status.clone(),
chain_tip_change, chain_tip_change,
); );
@ -178,6 +183,11 @@ impl StartCmd {
.in_current_span(), .in_current_span(),
); );
let progress_task_handle = tokio::spawn(
Self::update_progress(config.network.network, latest_chain_tip, sync_status)
.in_current_span(),
);
info!("spawned initial Zebra tasks"); info!("spawned initial Zebra tasks");
// TODO: put tasks into an ongoing FuturesUnordered and a startup FuturesUnordered? // TODO: put tasks into an ongoing FuturesUnordered and a startup FuturesUnordered?
@ -187,6 +197,7 @@ impl StartCmd {
pin!(mempool_crawler_task_handle); pin!(mempool_crawler_task_handle);
pin!(mempool_queue_checker_task_handle); pin!(mempool_queue_checker_task_handle);
pin!(tx_gossip_task_handle); pin!(tx_gossip_task_handle);
pin!(progress_task_handle);
// startup tasks // startup tasks
let groth16_download_handle_fused = (&mut groth16_download_handle).fuse(); let groth16_download_handle_fused = (&mut groth16_download_handle).fuse();
@ -221,6 +232,13 @@ impl StartCmd {
.map(|_| info!("transaction gossip task exited")) .map(|_| info!("transaction gossip task exited"))
.map_err(|e| eyre!(e)), .map_err(|e| eyre!(e)),
progress_result = &mut progress_task_handle => {
progress_result
.expect("unexpected panic in the chain progress task");
info!("chain progress task exited");
Ok(())
}
// Unlike other tasks, we expect the download task to finish while Zebra is running. // Unlike other tasks, we expect the download task to finish while Zebra is running.
groth16_download_result = &mut groth16_download_handle_fused => { groth16_download_result = &mut groth16_download_handle_fused => {
groth16_download_result groth16_download_result
@ -275,6 +293,91 @@ impl StartCmd {
), ),
) )
} }
/// Logs Zebra's estimated progress towards the chain tip.
async fn update_progress(
network: Network,
latest_chain_tip: impl ChainTip,
sync_status: SyncStatus,
) {
// The amount of time between progress logs.
const LOG_INTERVAL: Duration = Duration::from_secs(60);
// The number of blocks we consider to be close to the tip.
//
// Most chain forks are 1-7 blocks long.
const MAX_CLOSE_TO_TIP_BLOCKS: i32 = 1;
// Skip slow sync warnings when we are this close to the tip.
//
// In testing, we've seen warnings around 30 blocks.
//
// TODO: replace with `MAX_CLOSE_TO_TIP_BLOCKS` after fixing slow syncing near tip (#3375)
const MIN_SYNC_WARNING_BLOCKS: i32 = 60;
loop {
let now = Utc::now();
let is_close_to_tip = sync_status.is_close_to_tip();
if let Some(estimated_height) =
latest_chain_tip.estimate_network_chain_tip_height(network, now)
{
// The estimate/actual race doesn't matter here,
// because we're only using it for metrics and logging.
let current_height = latest_chain_tip
.best_tip_height()
.expect("unexpected empty state: estimate requires a block height");
let sync_progress = f64::from(current_height.0) / f64::from(estimated_height.0);
let sync_percent = format!("{:.3}", sync_progress * 100.0);
let remaining_sync_blocks = estimated_height - current_height;
// TODO:
// - estimate the remaining sync time
// - log progress, remaining blocks, and remaining time to next network upgrade
// - also add this info to the metrics
if is_close_to_tip && remaining_sync_blocks > MIN_SYNC_WARNING_BLOCKS {
// We've stopped syncing blocks, but we estimate we're a long way from the tip.
//
// TODO: warn after fixing slow syncing near tip (#3375)
info!(
%sync_percent,
?remaining_sync_blocks,
"initial sync might have stalled, or estimated tip is wrong. \
Hint: check that your computer's clock and time zone are correct"
);
} else if is_close_to_tip {
// We've stayed near the tip for a while, and we've stopped syncing lots of blocks.
//
// TODO: reduce this log level after full sync tests are merged,
// and have been stable for a while (#1592)
info!(
%sync_percent,
?remaining_sync_blocks,
"finished initial sync to chain tip"
);
} else if remaining_sync_blocks <= MAX_CLOSE_TO_TIP_BLOCKS {
// We estimate we're near the tip, but we have been syncing lots of blocks recently.
info!(
%sync_percent,
?remaining_sync_blocks,
"finished initial sync to chain tip"
);
} else {
// We estimate we're far from the tip, and we've been syncing lots of blocks.
info!(
%sync_percent,
?remaining_sync_blocks,
"estimated progress to chain tip"
);
}
}
tokio::time::sleep(LOG_INTERVAL).await;
}
}
} }
impl Runnable for StartCmd { impl Runnable for StartCmd {