Ignore AlreadyInChain error in the syncer (#2890)
* Ignore AlreadyInChain error in the syncer * Split Cancelled errors; add them to should_restart_sync exceptions * Also filter 'block is already comitted'; try to detect a wrong downcast
This commit is contained in:
parent
a5d1467624
commit
84f2c07fbc
|
@ -48,6 +48,7 @@ pub struct BlockVerifier<S, V> {
|
||||||
|
|
||||||
// TODO: dedupe with crate::error::BlockError
|
// TODO: dedupe with crate::error::BlockError
|
||||||
#[non_exhaustive]
|
#[non_exhaustive]
|
||||||
|
#[allow(missing_docs)]
|
||||||
#[derive(Debug, Error)]
|
#[derive(Debug, Error)]
|
||||||
pub enum VerifyBlockError {
|
pub enum VerifyBlockError {
|
||||||
#[error("unable to verify depth for block {hash} from chain state during block verification")]
|
#[error("unable to verify depth for block {hash} from chain state during block verification")]
|
||||||
|
|
|
@ -836,6 +836,7 @@ where
|
||||||
}
|
}
|
||||||
|
|
||||||
#[derive(Debug, Error)]
|
#[derive(Debug, Error)]
|
||||||
|
#[allow(missing_docs)]
|
||||||
pub enum VerifyCheckpointError {
|
pub enum VerifyCheckpointError {
|
||||||
#[error("checkpoint request after the final checkpoint has been verified")]
|
#[error("checkpoint request after the final checkpoint has been verified")]
|
||||||
Finished,
|
Finished,
|
||||||
|
|
|
@ -53,9 +53,12 @@ pub mod chain;
|
||||||
#[allow(missing_docs)]
|
#[allow(missing_docs)]
|
||||||
pub mod error;
|
pub mod error;
|
||||||
|
|
||||||
|
pub use block::VerifyBlockError;
|
||||||
|
pub use checkpoint::VerifyCheckpointError;
|
||||||
pub use checkpoint::MAX_CHECKPOINT_BYTE_COUNT;
|
pub use checkpoint::MAX_CHECKPOINT_BYTE_COUNT;
|
||||||
pub use checkpoint::MAX_CHECKPOINT_HEIGHT_GAP;
|
pub use checkpoint::MAX_CHECKPOINT_HEIGHT_GAP;
|
||||||
pub use config::Config;
|
pub use config::Config;
|
||||||
|
pub use error::BlockError;
|
||||||
|
|
||||||
/// A boxed [`std::error::Error`].
|
/// A boxed [`std::error::Error`].
|
||||||
pub type BoxError = Box<dyn std::error::Error + Send + Sync + 'static>;
|
pub type BoxError = Box<dyn std::error::Error + Send + Sync + 'static>;
|
||||||
|
|
|
@ -15,10 +15,15 @@ use zebra_chain::{
|
||||||
block::{self, Block},
|
block::{self, Block},
|
||||||
parameters::genesis_hash,
|
parameters::genesis_hash,
|
||||||
};
|
};
|
||||||
|
use zebra_consensus::{
|
||||||
|
chain::VerifyChainError, BlockError, VerifyBlockError, VerifyCheckpointError,
|
||||||
|
};
|
||||||
use zebra_network as zn;
|
use zebra_network as zn;
|
||||||
use zebra_state as zs;
|
use zebra_state as zs;
|
||||||
|
|
||||||
use crate::{config::ZebradConfig, BoxError};
|
use crate::{
|
||||||
|
components::sync::downloads::BlockDownloadVerifyError, config::ZebradConfig, BoxError,
|
||||||
|
};
|
||||||
|
|
||||||
mod downloads;
|
mod downloads;
|
||||||
mod gossip;
|
mod gossip;
|
||||||
|
@ -315,10 +320,7 @@ where
|
||||||
tracing::trace!(?hash, "verified and committed block to state");
|
tracing::trace!(?hash, "verified and committed block to state");
|
||||||
}
|
}
|
||||||
Err(e) => {
|
Err(e) => {
|
||||||
if format!("{:?}", e).contains("AlreadyVerified {") {
|
if Self::should_restart_sync(e) {
|
||||||
tracing::debug!(error = ?e, "block was already verified, possibly from a previous sync run, continuing");
|
|
||||||
} else {
|
|
||||||
tracing::warn!(?e, "error downloading and verifying block");
|
|
||||||
continue 'sync;
|
continue 'sync;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -352,10 +354,7 @@ where
|
||||||
}
|
}
|
||||||
|
|
||||||
Err(e) => {
|
Err(e) => {
|
||||||
if format!("{:?}", e).contains("AlreadyVerified {") {
|
if Self::should_restart_sync(e) {
|
||||||
tracing::debug!(error = ?e, "block was already verified, possibly from a previous sync run, continuing");
|
|
||||||
} else {
|
|
||||||
tracing::warn!(?e, "error downloading and verifying block");
|
|
||||||
continue 'sync;
|
continue 'sync;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -715,4 +714,62 @@ where
|
||||||
self.downloads.in_flight() as f64
|
self.downloads.in_flight() as f64
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Return if the sync should be restarted based on the given error
|
||||||
|
/// from the block downloader and verifier stream.
|
||||||
|
fn should_restart_sync(e: BlockDownloadVerifyError) -> bool {
|
||||||
|
match e {
|
||||||
|
BlockDownloadVerifyError::Invalid(VerifyChainError::Checkpoint(
|
||||||
|
VerifyCheckpointError::AlreadyVerified { .. },
|
||||||
|
)) => {
|
||||||
|
tracing::debug!(error = ?e, "block was already verified, possibly from a previous sync run, continuing");
|
||||||
|
false
|
||||||
|
}
|
||||||
|
BlockDownloadVerifyError::Invalid(VerifyChainError::Block(
|
||||||
|
VerifyBlockError::Block {
|
||||||
|
source: BlockError::AlreadyInChain(_, _),
|
||||||
|
},
|
||||||
|
)) => {
|
||||||
|
tracing::debug!(error = ?e, "block is already in chain, possibly from a previous sync run, continuing");
|
||||||
|
false
|
||||||
|
}
|
||||||
|
BlockDownloadVerifyError::Invalid(VerifyChainError::Block(
|
||||||
|
VerifyBlockError::Commit(ref source),
|
||||||
|
)) if format!("{:?}", source).contains("block is already committed to the state") => {
|
||||||
|
// TODO: improve this by checking the type
|
||||||
|
// https://github.com/ZcashFoundation/zebra/issues/2908
|
||||||
|
tracing::debug!(error = ?e, "block is already committed, possibly from a previous sync run, continuing");
|
||||||
|
false
|
||||||
|
}
|
||||||
|
BlockDownloadVerifyError::CancelledDuringDownload
|
||||||
|
| BlockDownloadVerifyError::CancelledDuringVerification => {
|
||||||
|
tracing::debug!(error = ?e, "block verification was cancelled, continuing");
|
||||||
|
false
|
||||||
|
}
|
||||||
|
_ => {
|
||||||
|
// download_and_verify downcasts errors from the block verifier
|
||||||
|
// into VerifyChainError, and puts the result inside one of the
|
||||||
|
// BlockDownloadVerifyError enumerations. This downcast could
|
||||||
|
// become incorrect e.g. after some refactoring, and it is difficult
|
||||||
|
// to write a test to check it. The test below is a best-effort
|
||||||
|
// attempt to catch if that happens and log it.
|
||||||
|
// TODO: add a proper test and remove this
|
||||||
|
// https://github.com/ZcashFoundation/zebra/issues/2909
|
||||||
|
let err_str = format!("{:?}", e);
|
||||||
|
if err_str.contains("AlreadyVerified")
|
||||||
|
|| err_str.contains("AlreadyInChain")
|
||||||
|
|| err_str.contains("block is already committed to the state")
|
||||||
|
{
|
||||||
|
tracing::error!(?e,
|
||||||
|
"a BlockDownloadVerifyError that should have been filtered out was detected, \
|
||||||
|
which possibly indicates a programming error in the downcast inside \
|
||||||
|
zebrad::components::sync::downloads::Downloads::download_and_verify"
|
||||||
|
)
|
||||||
|
}
|
||||||
|
|
||||||
|
tracing::warn!(?e, "error downloading and verifying block");
|
||||||
|
true
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -12,6 +12,7 @@ use futures::{
|
||||||
stream::{FuturesUnordered, Stream},
|
stream::{FuturesUnordered, Stream},
|
||||||
};
|
};
|
||||||
use pin_project::pin_project;
|
use pin_project::pin_project;
|
||||||
|
use thiserror::Error;
|
||||||
use tokio::{sync::oneshot, task::JoinHandle};
|
use tokio::{sync::oneshot, task::JoinHandle};
|
||||||
use tower::{hedge, Service, ServiceExt};
|
use tower::{hedge, Service, ServiceExt};
|
||||||
use tracing_futures::Instrument;
|
use tracing_futures::Instrument;
|
||||||
|
@ -33,6 +34,26 @@ impl<Request: Clone> hedge::Policy<Request> for AlwaysHedge {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Errors that can occur while downloading and verifying a block.
|
||||||
|
#[derive(Error, Debug)]
|
||||||
|
#[allow(dead_code)]
|
||||||
|
pub enum BlockDownloadVerifyError {
|
||||||
|
#[error("error downloading block")]
|
||||||
|
DownloadFailed(#[source] BoxError),
|
||||||
|
|
||||||
|
#[error("error from the verifier service")]
|
||||||
|
VerifierError(#[source] BoxError),
|
||||||
|
|
||||||
|
#[error("block did not pass consensus validation")]
|
||||||
|
Invalid(#[from] zebra_consensus::chain::VerifyChainError),
|
||||||
|
|
||||||
|
#[error("block download / verification was cancelled during download")]
|
||||||
|
CancelledDuringDownload,
|
||||||
|
|
||||||
|
#[error("block download / verification was cancelled during verification")]
|
||||||
|
CancelledDuringVerification,
|
||||||
|
}
|
||||||
|
|
||||||
/// Represents a [`Stream`] of download and verification tasks during chain sync.
|
/// Represents a [`Stream`] of download and verification tasks during chain sync.
|
||||||
#[pin_project]
|
#[pin_project]
|
||||||
#[derive(Debug)]
|
#[derive(Debug)]
|
||||||
|
@ -54,7 +75,8 @@ where
|
||||||
// Internal downloads state
|
// Internal downloads state
|
||||||
/// A list of pending block download and verify tasks.
|
/// A list of pending block download and verify tasks.
|
||||||
#[pin]
|
#[pin]
|
||||||
pending: FuturesUnordered<JoinHandle<Result<block::Hash, (BoxError, block::Hash)>>>,
|
pending:
|
||||||
|
FuturesUnordered<JoinHandle<Result<block::Hash, (BlockDownloadVerifyError, block::Hash)>>>,
|
||||||
|
|
||||||
/// A list of channels that can be used to cancel pending block download and
|
/// A list of channels that can be used to cancel pending block download and
|
||||||
/// verify tasks.
|
/// verify tasks.
|
||||||
|
@ -68,7 +90,7 @@ where
|
||||||
ZV: Service<Arc<Block>, Response = block::Hash, Error = BoxError> + Send + Clone + 'static,
|
ZV: Service<Arc<Block>, Response = block::Hash, Error = BoxError> + Send + Clone + 'static,
|
||||||
ZV::Future: Send,
|
ZV::Future: Send,
|
||||||
{
|
{
|
||||||
type Item = Result<block::Hash, BoxError>;
|
type Item = Result<block::Hash, BlockDownloadVerifyError>;
|
||||||
|
|
||||||
fn poll_next(self: Pin<&mut Self>, cx: &mut Context) -> Poll<Option<Self::Item>> {
|
fn poll_next(self: Pin<&mut Self>, cx: &mut Context) -> Poll<Option<Self::Item>> {
|
||||||
let this = self.project();
|
let this = self.project();
|
||||||
|
@ -163,9 +185,9 @@ where
|
||||||
_ = &mut cancel_rx => {
|
_ = &mut cancel_rx => {
|
||||||
tracing::trace!("task cancelled prior to download completion");
|
tracing::trace!("task cancelled prior to download completion");
|
||||||
metrics::counter!("sync.cancelled.download.count", 1);
|
metrics::counter!("sync.cancelled.download.count", 1);
|
||||||
return Err("canceled block_fetch_verify".into())
|
return Err(BlockDownloadVerifyError::CancelledDuringDownload)
|
||||||
}
|
}
|
||||||
rsp = block_req => rsp?,
|
rsp = block_req => rsp.map_err(BlockDownloadVerifyError::DownloadFailed)?,
|
||||||
};
|
};
|
||||||
|
|
||||||
let block = if let zn::Response::Blocks(blocks) = rsp {
|
let block = if let zn::Response::Blocks(blocks) = rsp {
|
||||||
|
@ -178,14 +200,18 @@ where
|
||||||
};
|
};
|
||||||
metrics::counter!("sync.downloaded.block.count", 1);
|
metrics::counter!("sync.downloaded.block.count", 1);
|
||||||
|
|
||||||
let rsp = verifier.ready_and().await?.call(block);
|
let rsp = verifier
|
||||||
|
.ready_and()
|
||||||
|
.await
|
||||||
|
.map_err(BlockDownloadVerifyError::VerifierError)?
|
||||||
|
.call(block);
|
||||||
// TODO: if the verifier and cancel are both ready, which should
|
// TODO: if the verifier and cancel are both ready, which should
|
||||||
// we prefer? (Currently, select! chooses one at random.)
|
// we prefer? (Currently, select! chooses one at random.)
|
||||||
let verification = tokio::select! {
|
let verification = tokio::select! {
|
||||||
_ = &mut cancel_rx => {
|
_ = &mut cancel_rx => {
|
||||||
tracing::trace!("task cancelled prior to verification");
|
tracing::trace!("task cancelled prior to verification");
|
||||||
metrics::counter!("sync.cancelled.verify.count", 1);
|
metrics::counter!("sync.cancelled.verify.count", 1);
|
||||||
return Err("canceled block_fetch_verify".into())
|
return Err(BlockDownloadVerifyError::CancelledDuringVerification)
|
||||||
}
|
}
|
||||||
verification = rsp => verification,
|
verification = rsp => verification,
|
||||||
};
|
};
|
||||||
|
@ -193,7 +219,12 @@ where
|
||||||
metrics::counter!("sync.verified.block.count", 1);
|
metrics::counter!("sync.verified.block.count", 1);
|
||||||
}
|
}
|
||||||
|
|
||||||
verification
|
verification.map_err(|err| {
|
||||||
|
match err.downcast::<zebra_consensus::chain::VerifyChainError>() {
|
||||||
|
Ok(e) => BlockDownloadVerifyError::Invalid(*e),
|
||||||
|
Err(e) => BlockDownloadVerifyError::VerifierError(e),
|
||||||
|
}
|
||||||
|
})
|
||||||
}
|
}
|
||||||
.in_current_span()
|
.in_current_span()
|
||||||
// Tack the hash onto the error so we can remove the cancel handle
|
// Tack the hash onto the error so we can remove the cancel handle
|
||||||
|
|
Loading…
Reference in New Issue