From dd2b8e2bd2f542c53da4e84b6ff996d4b438c508 Mon Sep 17 00:00:00 2001 From: bruce-riley <96066700+bruce-riley@users.noreply.github.com> Date: Thu, 6 Oct 2022 00:19:31 -0500 Subject: [PATCH] node: poller timeout (#1670) * node: poller timeout Change-Id: Ia324f1ac482fa9c5bea2b501970f0b22b16e67ce * Add a comment explaining readiness change * Add comment explaining why we are using a timeout * Retry if polling fails --- node/cmd/guardiand/node.go | 11 ++++++----- node/pkg/watchers/evm/connectors/poller.go | 22 ++++++++++++++++++---- node/pkg/watchers/evm/watcher.go | 4 ++++ 3 files changed, 28 insertions(+), 9 deletions(-) diff --git a/node/cmd/guardiand/node.go b/node/cmd/guardiand/node.go index fc6d48095..06bbc8e5f 100644 --- a/node/cmd/guardiand/node.go +++ b/node/cmd/guardiand/node.go @@ -604,11 +604,12 @@ func runNode(cmd *cobra.Command, args []string) { if *injectiveContract == "" { logger.Fatal("Please specify --injectiveContract") } - if *arbitrumRPC == "" { - logger.Fatal("Please specify --arbitrumRPC") - } - if *arbitrumContract == "" { - logger.Fatal("Please specify --arbitrumContract") + if *arbitrumRPC != "" { + if *arbitrumContract == "" { + logger.Fatal("If --arbitrumRPC is specified, then --arbitrumContract is required") + } + } else if *arbitrumContract != "" { + logger.Fatal("If --arbitrumContract is specified, then --arbitrumRPC is required") } if *xplaWS == "" { logger.Fatal("Please specify --xplaWS") diff --git a/node/pkg/watchers/evm/connectors/poller.go b/node/pkg/watchers/evm/connectors/poller.go index 3531af01d..15683a9f7 100644 --- a/node/pkg/watchers/evm/connectors/poller.go +++ b/node/pkg/watchers/evm/connectors/poller.go @@ -64,7 +64,14 @@ func (b *BlockPollConnector) run(ctx context.Context) error { timer.Stop() return ctx.Err() case <-timer.C: - lastBlock, err = b.pollBlocks(ctx, logger, lastBlock) + for count := 0; count < 3; count++ { + lastBlock, err = b.pollBlocks(ctx, logger, lastBlock) + if err == nil { + break + } + logger.Error("polling encountered an error", zap.Error(err)) + } + if err != nil { b.errFeed.Send("polling encountered an error") } @@ -74,12 +81,19 @@ func (b *BlockPollConnector) run(ctx context.Context) error { } func (b *BlockPollConnector) pollBlocks(ctx context.Context, logger *zap.Logger, lastBlock *NewBlock) (lastPublishedBlock *NewBlock, retErr error) { + // Some of the testnet providers (like the one we are using for Arbitrum) limit how many transactions we can do. When that happens, the call hangs. + // Use a timeout so that the call will fail and the runable will get restarted. This should not happen in mainnet, but if it does, we will need to + // investigate why the runable is dying and fix the underlying problem. + + timeout, cancel := context.WithTimeout(ctx, 15*time.Second) + defer cancel() + lastPublishedBlock = lastBlock // Fetch the latest block on the chain // We could do this on every iteration such that if a new block is created while this function is being executed, // it would automatically fetch new blocks but in order to reduce API load this will be done on the next iteration. - latestBlock, err := b.getBlock(ctx, logger, nil) + latestBlock, err := b.getBlock(timeout, logger, nil) if err != nil { logger.Error("failed to look up latest block", zap.Uint64("lastSeenBlock", lastBlock.Number.Uint64()), zap.Error(err)) @@ -93,7 +107,7 @@ func (b *BlockPollConnector) pollBlocks(ctx context.Context, logger *zap.Logger, // Try to fetch the next block between lastBlock and latestBlock nextBlockNumber := new(big.Int).Add(lastPublishedBlock.Number, big.NewInt(1)) - block, err := b.getBlock(ctx, logger, nextBlockNumber) + block, err := b.getBlock(timeout, logger, nextBlockNumber) if err != nil { logger.Error("failed to fetch next block", zap.Uint64("block", nextBlockNumber.Uint64()), zap.Error(err)) @@ -101,7 +115,7 @@ func (b *BlockPollConnector) pollBlocks(ctx context.Context, logger *zap.Logger, } if b.finalizer != nil { - finalized, err := b.finalizer.IsBlockFinalized(ctx, block) + finalized, err := b.finalizer.IsBlockFinalized(timeout, block) if err != nil { logger.Error("failed to check block finalization", zap.Uint64("block", block.Number.Uint64()), zap.Error(err)) diff --git a/node/pkg/watchers/evm/watcher.go b/node/pkg/watchers/evm/watcher.go index ebcc5749a..ca1d273e1 100644 --- a/node/pkg/watchers/evm/watcher.go +++ b/node/pkg/watchers/evm/watcher.go @@ -634,6 +634,10 @@ func (w *Watcher) Run(ctx context.Context) error { } }() + // Now that the init is complete, peg readiness. That will also happen when we process a new head, but chains + // that wait for finality may take a while to receive the first block and we don't want to hold up the init. + readiness.SetReady(w.readiness) + select { case <-ctx.Done(): return ctx.Err()