From 146c08d5af7d6fb8128b305381b4685cd75ce042 Mon Sep 17 00:00:00 2001 From: Leo Date: Mon, 10 Jan 2022 20:13:33 +0100 Subject: [PATCH] node/pkg/ethereum: correctly log "not found" error commit-id:23a5fd80 --- node/pkg/ethereum/watcher.go | 46 +++++++++++++++++++++++++----------- 1 file changed, 32 insertions(+), 14 deletions(-) diff --git a/node/pkg/ethereum/watcher.go b/node/pkg/ethereum/watcher.go index 2423b45c..a2b13a3d 100644 --- a/node/pkg/ethereum/watcher.go +++ b/node/pkg/ethereum/watcher.go @@ -306,6 +306,7 @@ func (e *Watcher) Run(ctx context.Context) error { zap.Stringer("current_blockhash", currentHash), zap.String("eth_network", e.networkName), ) + ethMessagesOrphaned.WithLabelValues(e.networkName, "timeout").Inc() delete(e.pending, key) continue } @@ -315,7 +316,32 @@ func (e *Watcher) Run(ctx context.Context) error { timeout, cancel = context.WithTimeout(ctx, 15*time.Second) tx, err := c.TransactionReceipt(timeout, pLock.message.TxHash) cancel() - if err != nil && err != rpc.ErrNoResult { + + // If the node returns an error after waiting expectedConfirmation blocks, + // it means the chain reorged and the transaction was orphaned. The + // TransactionReceipt call is using the same websocket connection than the + // head notifications, so it's guaranteed to be atomic. + // + // Check multiple possible error cases - the node seems to return a + // "not found" error most of the time, but it could conceivably also + // return a nil tx or rpc.ErrNoResult. + if tx == nil || err == rpc.ErrNoResult || (err != nil && err.Error() == "not found") { + logger.Info("tx was orphaned", + zap.Stringer("tx", pLock.message.TxHash), + zap.Stringer("blockhash", key.BlockHash), + zap.Stringer("emitter_address", key.EmitterAddress), + zap.Uint64("sequence", key.Sequence), + zap.Stringer("current_block", ev.Number), + zap.Stringer("current_blockhash", currentHash), + zap.String("eth_network", e.networkName), + zap.Error(err)) + delete(e.pending, key) + ethMessagesOrphaned.WithLabelValues(e.networkName, "not_found").Inc() + continue + } + + // Any error other than "not found" is likely transient - we retry next block. + if err != nil { logger.Warn("transaction could not be fetched", zap.Stringer("tx", pLock.message.TxHash), zap.Stringer("blockhash", key.BlockHash), @@ -327,19 +353,10 @@ func (e *Watcher) Run(ctx context.Context) error { zap.Error(err)) continue } - if tx == nil || err == rpc.ErrNoResult { - logger.Info("tx was orphaned", - zap.Stringer("tx", pLock.message.TxHash), - zap.Stringer("blockhash", key.BlockHash), - zap.Stringer("emitter_address", key.EmitterAddress), - zap.Uint64("sequence", key.Sequence), - zap.Stringer("current_block", ev.Number), - zap.Stringer("current_blockhash", currentHash), - zap.String("eth_network", e.networkName)) - delete(e.pending, key) - ethMessagesOrphaned.WithLabelValues(e.networkName, "not_found").Inc() - continue - } + + // It's possible for a transaction to be orphaned and then included in a different block + // but with the same tx hash. Drop the observation (it will be re-observed and needs to + // wait for the full confirmation time again). if tx.BlockHash != key.BlockHash { logger.Info("tx got dropped and mined in a different block; the message should have been reobserved", zap.Stringer("tx", pLock.message.TxHash), @@ -353,6 +370,7 @@ func (e *Watcher) Run(ctx context.Context) error { ethMessagesOrphaned.WithLabelValues(e.networkName, "blockhash_mismatch").Inc() continue } + logger.Info("observation confirmed", zap.Stringer("tx", pLock.message.TxHash), zap.Stringer("blockhash", key.BlockHash),