node: poller timeout (#1670)
* node: poller timeout Change-Id: Ia324f1ac482fa9c5bea2b501970f0b22b16e67ce * Add a comment explaining readiness change * Add comment explaining why we are using a timeout * Retry if polling fails
This commit is contained in:
parent
9657f41561
commit
dd2b8e2bd2
|
@ -604,11 +604,12 @@ func runNode(cmd *cobra.Command, args []string) {
|
|||
if *injectiveContract == "" {
|
||||
logger.Fatal("Please specify --injectiveContract")
|
||||
}
|
||||
if *arbitrumRPC == "" {
|
||||
logger.Fatal("Please specify --arbitrumRPC")
|
||||
}
|
||||
if *arbitrumContract == "" {
|
||||
logger.Fatal("Please specify --arbitrumContract")
|
||||
if *arbitrumRPC != "" {
|
||||
if *arbitrumContract == "" {
|
||||
logger.Fatal("If --arbitrumRPC is specified, then --arbitrumContract is required")
|
||||
}
|
||||
} else if *arbitrumContract != "" {
|
||||
logger.Fatal("If --arbitrumContract is specified, then --arbitrumRPC is required")
|
||||
}
|
||||
if *xplaWS == "" {
|
||||
logger.Fatal("Please specify --xplaWS")
|
||||
|
|
|
@ -64,7 +64,14 @@ func (b *BlockPollConnector) run(ctx context.Context) error {
|
|||
timer.Stop()
|
||||
return ctx.Err()
|
||||
case <-timer.C:
|
||||
lastBlock, err = b.pollBlocks(ctx, logger, lastBlock)
|
||||
for count := 0; count < 3; count++ {
|
||||
lastBlock, err = b.pollBlocks(ctx, logger, lastBlock)
|
||||
if err == nil {
|
||||
break
|
||||
}
|
||||
logger.Error("polling encountered an error", zap.Error(err))
|
||||
}
|
||||
|
||||
if err != nil {
|
||||
b.errFeed.Send("polling encountered an error")
|
||||
}
|
||||
|
@ -74,12 +81,19 @@ func (b *BlockPollConnector) run(ctx context.Context) error {
|
|||
}
|
||||
|
||||
func (b *BlockPollConnector) pollBlocks(ctx context.Context, logger *zap.Logger, lastBlock *NewBlock) (lastPublishedBlock *NewBlock, retErr error) {
|
||||
// Some of the testnet providers (like the one we are using for Arbitrum) limit how many transactions we can do. When that happens, the call hangs.
|
||||
// Use a timeout so that the call will fail and the runable will get restarted. This should not happen in mainnet, but if it does, we will need to
|
||||
// investigate why the runable is dying and fix the underlying problem.
|
||||
|
||||
timeout, cancel := context.WithTimeout(ctx, 15*time.Second)
|
||||
defer cancel()
|
||||
|
||||
lastPublishedBlock = lastBlock
|
||||
|
||||
// Fetch the latest block on the chain
|
||||
// We could do this on every iteration such that if a new block is created while this function is being executed,
|
||||
// it would automatically fetch new blocks but in order to reduce API load this will be done on the next iteration.
|
||||
latestBlock, err := b.getBlock(ctx, logger, nil)
|
||||
latestBlock, err := b.getBlock(timeout, logger, nil)
|
||||
if err != nil {
|
||||
logger.Error("failed to look up latest block",
|
||||
zap.Uint64("lastSeenBlock", lastBlock.Number.Uint64()), zap.Error(err))
|
||||
|
@ -93,7 +107,7 @@ func (b *BlockPollConnector) pollBlocks(ctx context.Context, logger *zap.Logger,
|
|||
|
||||
// Try to fetch the next block between lastBlock and latestBlock
|
||||
nextBlockNumber := new(big.Int).Add(lastPublishedBlock.Number, big.NewInt(1))
|
||||
block, err := b.getBlock(ctx, logger, nextBlockNumber)
|
||||
block, err := b.getBlock(timeout, logger, nextBlockNumber)
|
||||
if err != nil {
|
||||
logger.Error("failed to fetch next block",
|
||||
zap.Uint64("block", nextBlockNumber.Uint64()), zap.Error(err))
|
||||
|
@ -101,7 +115,7 @@ func (b *BlockPollConnector) pollBlocks(ctx context.Context, logger *zap.Logger,
|
|||
}
|
||||
|
||||
if b.finalizer != nil {
|
||||
finalized, err := b.finalizer.IsBlockFinalized(ctx, block)
|
||||
finalized, err := b.finalizer.IsBlockFinalized(timeout, block)
|
||||
if err != nil {
|
||||
logger.Error("failed to check block finalization",
|
||||
zap.Uint64("block", block.Number.Uint64()), zap.Error(err))
|
||||
|
|
|
@ -634,6 +634,10 @@ func (w *Watcher) Run(ctx context.Context) error {
|
|||
}
|
||||
}()
|
||||
|
||||
// Now that the init is complete, peg readiness. That will also happen when we process a new head, but chains
|
||||
// that wait for finality may take a while to receive the first block and we don't want to hold up the init.
|
||||
readiness.SetReady(w.readiness)
|
||||
|
||||
select {
|
||||
case <-ctx.Done():
|
||||
return ctx.Err()
|
||||
|
|
Loading…
Reference in New Issue