node/p2p: Enforce connection to bootstrap node on startup
This commit is contained in:
parent
372beb01fe
commit
c8fca0f5b9
|
@ -833,8 +833,10 @@ func runNode(cmd *cobra.Command, args []string) {
|
||||||
logger.Info("Error resolving guardian-0.guardian. Trying again...")
|
logger.Info("Error resolving guardian-0.guardian. Trying again...")
|
||||||
time.Sleep(time.Second)
|
time.Sleep(time.Second)
|
||||||
}
|
}
|
||||||
// TODO this is a hack. If this is not the bootstrap Guardian, we wait 5s such that the bootstrap Guardian has enough time to start.
|
// TODO this is a hack. If this is not the bootstrap Guardian, we wait 10s such that the bootstrap Guardian has enough time to start.
|
||||||
logger.Info("This is not a bootstrap Guardian. Waiting another 10 seconds so the bootstrap guardian to come online.")
|
// This may no longer be necessary because now the p2p.go ensures that it can connect to at least one bootstrap peer and will
|
||||||
|
// exit the whole guardian if it is unable to. Sleeping here for a bit may reduce overall startup time by preventing unnecessary restarts, though.
|
||||||
|
logger.Info("This is not a bootstrap Guardian. Waiting another 10 seconds for the bootstrap guardian to come online.")
|
||||||
time.Sleep(time.Second * 10)
|
time.Sleep(time.Second * 10)
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
|
|
|
@ -542,6 +542,9 @@ func testConsensus(t *testing.T, testCases []testCase, numGuardians int) {
|
||||||
for i := 0; i < numGuardians; i++ {
|
for i := 0; i < numGuardians; i++ {
|
||||||
gRun := mockGuardianRunnable(gs, uint(i), obsDb)
|
gRun := mockGuardianRunnable(gs, uint(i), obsDb)
|
||||||
err := supervisor.Run(ctx, fmt.Sprintf("g-%d", i), gRun)
|
err := supervisor.Run(ctx, fmt.Sprintf("g-%d", i), gRun)
|
||||||
|
if i == 0 && numGuardians > 1 {
|
||||||
|
time.Sleep(time.Second) // give the bootstrap guardian some time to start up
|
||||||
|
}
|
||||||
assert.NoError(t, err)
|
assert.NoError(t, err)
|
||||||
}
|
}
|
||||||
logger.Info("All Guardians initiated.")
|
logger.Info("All Guardians initiated.")
|
||||||
|
|
|
@ -255,6 +255,49 @@ func Run(
|
||||||
return fmt.Errorf("failed to subscribe topic: %w", err)
|
return fmt.Errorf("failed to subscribe topic: %w", err)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Make sure we connect to at least 1 bootstrap node (this is particularly important in a local devnet and CI
|
||||||
|
// as peer discovery can take a long time).
|
||||||
|
|
||||||
|
// Count number of successful connection attempts. If we fail to connect to any bootstrap peer, kill the service
|
||||||
|
// TODO: Currently, returning from this function will lead to rootCtxCancel() being called in the defer() above.
|
||||||
|
// The service will then be restarted by Tilt/kubernetes
|
||||||
|
successes := 0
|
||||||
|
// Are we a bootstrap node? If so, it's okay to not have any peers.
|
||||||
|
bootstrapNode := false
|
||||||
|
|
||||||
|
for _, addr := range strings.Split(bootstrapPeers, ",") {
|
||||||
|
if addr == "" {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
ma, err := multiaddr.NewMultiaddr(addr)
|
||||||
|
if err != nil {
|
||||||
|
logger.Error("Invalid bootstrap address", zap.String("peer", addr), zap.Error(err))
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
pi, err := peer.AddrInfoFromP2pAddr(ma)
|
||||||
|
if err != nil {
|
||||||
|
logger.Error("Invalid bootstrap address", zap.String("peer", addr), zap.Error(err))
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
|
||||||
|
if pi.ID == h.ID() {
|
||||||
|
logger.Info("We're a bootstrap node")
|
||||||
|
bootstrapNode = true
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
|
||||||
|
if err = h.Connect(ctx, *pi); err != nil {
|
||||||
|
logger.Error("Failed to connect to bootstrap peer", zap.String("peer", addr), zap.Error(err))
|
||||||
|
} else {
|
||||||
|
successes += 1
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if successes == 0 && !bootstrapNode {
|
||||||
|
return fmt.Errorf("failed to connect to any bootstrap peer")
|
||||||
|
}
|
||||||
|
logger.Info("Connected to bootstrap peers", zap.Int("num", successes))
|
||||||
|
|
||||||
logger.Info("Node has been started", zap.String("peer_id", h.ID().String()),
|
logger.Info("Node has been started", zap.String("peer_id", h.ID().String()),
|
||||||
zap.String("addrs", fmt.Sprintf("%v", h.Addrs())))
|
zap.String("addrs", fmt.Sprintf("%v", h.Addrs())))
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue