Node: Gossip Topic Split (#4000)

* WIP: topic split

* Add cutover support

* Remove measurements that were moved to PR#3988

* Code review rework

* Code review rework

---------

Co-authored-by: Evan Gray <battledingo@gmail.com>
This commit is contained in:
bruce-riley 2024-08-05 11:28:10 -05:00 committed by GitHub
parent c2496cd2af
commit d3533aa2ca
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
11 changed files with 924 additions and 357 deletions

View File

@ -20,8 +20,14 @@ import (
)
const (
// gossipSendBufferSize configures the size of the gossip network send buffer
gossipSendBufferSize = 5000
// gossipControlSendBufferSize configures the size of the gossip network send buffer
gossipControlSendBufferSize = 100
// gossipAttestationSendBufferSize configures the size of the gossip network send buffer
gossipAttestationSendBufferSize = 5000
// gossipVaaSendBufferSize configures the size of the gossip network send buffer
gossipVaaSendBufferSize = 5000
// inboundObservationBufferSize configures the size of the obsvC channel that contains observations from other Guardians.
// One observation takes roughly 0.1ms to process on one core, so the whole queue could be processed in 1s
@ -69,8 +75,10 @@ type G struct {
runnables map[string]supervisor.Runnable
// various channels
// Outbound gossip message queue (needs to be read/write because p2p needs read/write)
gossipSendC chan []byte
// Outbound gossip message queues (need to be read/write because p2p needs read/write)
gossipControlSendC chan []byte
gossipAttestationSendC chan []byte
gossipVaaSendC chan []byte
// Inbound observations. This is read/write because the processor also writes to it as a fast-path when handling locally made observations.
obsvC chan *common.MsgWithTimeStamp[gossipv1.SignedObservation]
// Finalized guardian observations aggregated across all chains
@ -109,7 +117,9 @@ func (g *G) initializeBasic(rootCtxCancel context.CancelFunc) {
g.rootCtxCancel = rootCtxCancel
// Setup various channels...
g.gossipSendC = make(chan []byte, gossipSendBufferSize)
g.gossipControlSendC = make(chan []byte, gossipControlSendBufferSize)
g.gossipAttestationSendC = make(chan []byte, gossipAttestationSendBufferSize)
g.gossipVaaSendC = make(chan []byte, gossipVaaSendBufferSize)
g.obsvC = make(chan *common.MsgWithTimeStamp[gossipv1.SignedObservation], inboundObservationBufferSize)
g.msgC = makeChannelPair[*common.MessagePublication](0)
g.setC = makeChannelPair[*common.GuardianSet](1) // This needs to be a buffered channel because of a circular dependency between processor and accountant during startup.

View File

@ -67,7 +67,9 @@ func GuardianOptionP2P(p2pKey libp2p_crypto.PrivKey, networkId, bootstrapPeers,
g.obsvC,
g.signedInC.writeC,
g.obsvReqC.writeC,
g.gossipSendC,
g.gossipControlSendC,
g.gossipAttestationSendC,
g.gossipVaaSendC,
g.obsvReqSendC.readC,
g.acct,
g.gov,
@ -564,7 +566,8 @@ func GuardianOptionProcessor() *GuardianOption {
g.db,
g.msgC.readC,
g.setC.readC,
g.gossipSendC,
g.gossipAttestationSendC,
g.gossipVaaSendC,
g.obsvC,
g.obsvReqSendC.writeC,
g.signedInC.readC,

View File

@ -0,0 +1,87 @@
package p2p
import (
"fmt"
"strings"
"sync/atomic"
"time"
"go.uber.org/zap"
)
// The format of this time is very picky. Please use the exact format specified by cutOverFmtStr!
const mainnetCutOverTimeStr = ""
const testnetCutOverTimeStr = ""
const devnetCutOverTimeStr = ""
const cutOverFmtStr = "2006-01-02T15:04:05-0700"
// gossipCutoverCompleteFlag indicates if the cutover time has passed, meaning we should publish only on the new topics.
var gossipCutoverCompleteFlag atomic.Bool
// GossipCutoverComplete returns true if the cutover time has passed, meaning we should publish on the new topic.
func GossipCutoverComplete() bool {
return gossipCutoverCompleteFlag.Load()
}
// evaluateCutOver determines if the gossip cutover time has passed yet and sets the global flag accordingly. If the time has
// not yet passed, it creates a go routine to wait for that time and then set the flag.
func evaluateGossipCutOver(logger *zap.Logger, networkID string) error {
cutOverTimeStr := getCutOverTimeStr(networkID)
sco, delay, err := evaluateGossipCutOverImpl(logger, cutOverTimeStr, time.Now())
if err != nil {
return err
}
gossipCutoverCompleteFlag.Store(sco)
logger.Info("evaluated cutover flag", zap.Bool("cutOverFlag", GossipCutoverComplete()), zap.String("cutOverTime", cutOverTimeStr), zap.String("component", "p2pco"))
if delay != time.Duration(0) {
// Wait for the cut over time and then update the flag.
go func() {
time.Sleep(delay)
logger.Info("time to cut over to new gossip topics", zap.String("cutOverTime", cutOverTimeStr), zap.String("component", "p2pco"))
gossipCutoverCompleteFlag.Store(true)
}()
}
return nil
}
// evaluateGossipCutOverImpl performs the actual cut over check. It is a separate function for testing purposes.
func evaluateGossipCutOverImpl(logger *zap.Logger, cutOverTimeStr string, now time.Time) (bool, time.Duration, error) {
if cutOverTimeStr == "" {
return false, 0, nil
}
cutOverTime, err := time.Parse(cutOverFmtStr, cutOverTimeStr)
if err != nil {
return false, 0, fmt.Errorf(`failed to parse cut over time: %w`, err)
}
if cutOverTime.Before(now) {
logger.Info("cut over time has passed, should use new gossip topics", zap.String("cutOverTime", cutOverTime.Format(cutOverFmtStr)), zap.String("now", now.Format(cutOverFmtStr)), zap.String("component", "p2pco"))
return true, 0, nil
}
// If we get here, we need to wait for the cutover and then switch the global flag.
delay := cutOverTime.Sub(now)
logger.Info("still waiting for cut over time",
zap.Stringer("cutOverTime", cutOverTime),
zap.String("now", now.Format(cutOverFmtStr)),
zap.Stringer("delay", delay),
zap.String("component", "p2pco"))
return false, delay, nil
}
// getCutOverTimeStr returns the cut over time string based on the network ID passed in.
func getCutOverTimeStr(networkID string) string { //nolint:unparam
if strings.Contains(networkID, "/mainnet/") {
return mainnetCutOverTimeStr
}
if strings.Contains(networkID, "/testnet/") {
return testnetCutOverTimeStr
}
return devnetCutOverTimeStr
}

View File

@ -0,0 +1,81 @@
package p2p
import (
"testing"
"time"
"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require"
"go.uber.org/zap"
)
func TestVerifyCutOverTime(t *testing.T) {
if mainnetCutOverTimeStr != "" {
_, err := time.Parse(cutOverFmtStr, mainnetCutOverTimeStr)
require.NoError(t, err)
}
if testnetCutOverTimeStr != "" {
_, err := time.Parse(cutOverFmtStr, testnetCutOverTimeStr)
require.NoError(t, err)
}
if devnetCutOverTimeStr != "" {
_, err := time.Parse(cutOverFmtStr, devnetCutOverTimeStr)
require.NoError(t, err)
}
}
func TestGetCutOverTimeStr(t *testing.T) {
assert.Equal(t, mainnetCutOverTimeStr, getCutOverTimeStr("blah/blah/mainnet/blah"))
assert.Equal(t, testnetCutOverTimeStr, getCutOverTimeStr("blah/blah/testnet/blah"))
assert.Equal(t, devnetCutOverTimeStr, getCutOverTimeStr("blah/blah/devnet/blah"))
}
func TestCutOverDisabled(t *testing.T) {
logger := zap.NewNop()
cutOverTimeStr := ""
now, err := time.Parse(cutOverFmtStr, "2023-10-06T18:19:00-0000")
require.NoError(t, err)
cuttingOver, delay, err := evaluateGossipCutOverImpl(logger, cutOverTimeStr, now)
require.NoError(t, err)
assert.False(t, cuttingOver)
assert.Equal(t, time.Duration(0), delay)
}
func TestCutOverInvalidTime(t *testing.T) {
logger := zap.NewNop()
cutOverTimeStr := "Hello World"
now, err := time.Parse(cutOverFmtStr, "2023-10-06T18:19:00-0000")
require.NoError(t, err)
_, _, err = evaluateGossipCutOverImpl(logger, cutOverTimeStr, now)
require.EqualError(t, err, `failed to parse cut over time: parsing time "Hello World" as "2006-01-02T15:04:05-0700": cannot parse "Hello World" as "2006"`)
}
func TestCutOverAlreadyHappened(t *testing.T) {
logger := zap.NewNop()
cutOverTimeStr := "2023-10-06T18:18:00-0000"
now, err := time.Parse(cutOverFmtStr, "2023-10-06T18:19:00-0000")
require.NoError(t, err)
cuttingOver, delay, err := evaluateGossipCutOverImpl(logger, cutOverTimeStr, now)
require.NoError(t, err)
assert.True(t, cuttingOver)
assert.Equal(t, time.Duration(0), delay)
}
func TestCutOverDelayRequired(t *testing.T) {
logger := zap.NewNop()
cutOverTimeStr := "2023-10-06T18:18:00-0000"
now, err := time.Parse(cutOverFmtStr, "2023-10-06T17:18:00-0000")
require.NoError(t, err)
cuttingOver, delay, err := evaluateGossipCutOverImpl(logger, cutOverTimeStr, now)
require.NoError(t, err)
assert.False(t, cuttingOver)
assert.Equal(t, time.Duration(60*time.Minute), delay)
}

File diff suppressed because it is too large Load Diff

View File

@ -23,40 +23,42 @@ type (
gst *common.GuardianSetState
rootCtxCancel context.CancelFunc
// obsvC is optional and can be set with `WithSignedObservationListener`.
obsvC chan<- *common.MsgWithTimeStamp[gossipv1.SignedObservation]
// obsvRecvC is optional and can be set with `WithSignedObservationListener`.
obsvRecvC chan<- *common.MsgWithTimeStamp[gossipv1.SignedObservation]
// obsvReqC is optional and can be set with `WithObservationRequestListener`.
obsvReqC chan<- *gossipv1.ObservationRequest
// obsvReqRecvC is optional and can be set with `WithObservationRequestListener`.
obsvReqRecvC chan<- *gossipv1.ObservationRequest
// signedInC is optional and can be set with `WithSignedVAAListener`.
signedInC chan<- *gossipv1.SignedVAAWithQuorum
// signedIncomingVaaRecvC is optional and can be set with `WithSignedVAAListener`.
signedIncomingVaaRecvC chan<- *gossipv1.SignedVAAWithQuorum
// signedGovCfg is optional and can be set with `WithChainGovernorConfigListener`.
signedGovCfg chan *gossipv1.SignedChainGovernorConfig
// signedGovCfgRecvC is optional and can be set with `WithChainGovernorConfigListener`.
signedGovCfgRecvC chan *gossipv1.SignedChainGovernorConfig
// WithChainGovernorStatusListener is optional and can be set with `WithChainGovernorStatusListener`.
signedGovSt chan *gossipv1.SignedChainGovernorStatus
// signedGovStatusRecvC is optional and can be set with `WithChainGovernorStatusListener`.
signedGovStatusRecvC chan *gossipv1.SignedChainGovernorStatus
// disableHeartbeatVerify is optional and can be set with `WithDisableHeartbeatVerify` or `WithGuardianOptions`.
disableHeartbeatVerify bool
// The following options are guardian specific. Set with `WithGuardianOptions`.
nodeName string
gk *ecdsa.PrivateKey
gossipSendC chan []byte
obsvReqSendC <-chan *gossipv1.ObservationRequest
acct *accountant.Accountant
gov *governor.ChainGovernor
components *Components
ibcFeaturesFunc func() string
gatewayRelayerEnabled bool
ccqEnabled bool
signedQueryReqC chan<- *gossipv1.SignedQueryRequest
queryResponseReadC <-chan *query.QueryResponsePublication
ccqBootstrapPeers string
ccqPort uint
ccqAllowedPeers string
nodeName string
gk *ecdsa.PrivateKey
gossipControlSendC chan []byte
gossipAttestationSendC chan []byte
gossipVaaSendC chan []byte
obsvReqSendC <-chan *gossipv1.ObservationRequest
acct *accountant.Accountant
gov *governor.ChainGovernor
components *Components
ibcFeaturesFunc func() string
gatewayRelayerEnabled bool
ccqEnabled bool
signedQueryReqC chan<- *gossipv1.SignedQueryRequest
queryResponseReadC <-chan *query.QueryResponsePublication
ccqBootstrapPeers string
ccqPort uint
ccqAllowedPeers string
}
// RunOpt is used to specify optional parameters.
@ -96,41 +98,41 @@ func NewRunParams(
}
// WithSignedObservationListener is used to set the channel to receive `SignedObservation“ messages.
func WithSignedObservationListener(obsvC chan<- *common.MsgWithTimeStamp[gossipv1.SignedObservation]) RunOpt {
func WithSignedObservationListener(obsvRecvC chan<- *common.MsgWithTimeStamp[gossipv1.SignedObservation]) RunOpt {
return func(p *RunParams) error {
p.obsvC = obsvC
p.obsvRecvC = obsvRecvC
return nil
}
}
// WithSignedVAAListener is used to set the channel to receive `SignedVAAWithQuorum messages.
func WithSignedVAAListener(signedInC chan<- *gossipv1.SignedVAAWithQuorum) RunOpt {
func WithSignedVAAListener(signedIncomingVaaRecvC chan<- *gossipv1.SignedVAAWithQuorum) RunOpt {
return func(p *RunParams) error {
p.signedInC = signedInC
p.signedIncomingVaaRecvC = signedIncomingVaaRecvC
return nil
}
}
// WithObservationRequestListener is used to set the channel to receive `ObservationRequest messages.
func WithObservationRequestListener(obsvReqC chan<- *gossipv1.ObservationRequest) RunOpt {
func WithObservationRequestListener(obsvReqRecvC chan<- *gossipv1.ObservationRequest) RunOpt {
return func(p *RunParams) error {
p.obsvReqC = obsvReqC
p.obsvReqRecvC = obsvReqRecvC
return nil
}
}
// WithChainGovernorConfigListener is used to set the channel to receive `SignedChainGovernorConfig messages.
func WithChainGovernorConfigListener(signedGovCfg chan *gossipv1.SignedChainGovernorConfig) RunOpt {
func WithChainGovernorConfigListener(signedGovCfgRecvC chan *gossipv1.SignedChainGovernorConfig) RunOpt {
return func(p *RunParams) error {
p.signedGovCfg = signedGovCfg
p.signedGovCfgRecvC = signedGovCfgRecvC
return nil
}
}
// WithChainGovernorStatusListener is used to set the channel to receive `SignedChainGovernorStatus messages.
func WithChainGovernorStatusListener(signedGovSt chan *gossipv1.SignedChainGovernorStatus) RunOpt {
func WithChainGovernorStatusListener(signedGovStatusRecvC chan *gossipv1.SignedChainGovernorStatus) RunOpt {
return func(p *RunParams) error {
p.signedGovSt = signedGovSt
p.signedGovStatusRecvC = signedGovStatusRecvC
return nil
}
}
@ -147,10 +149,12 @@ func WithDisableHeartbeatVerify(disableHeartbeatVerify bool) RunOpt {
func WithGuardianOptions(
nodeName string,
gk *ecdsa.PrivateKey,
obsvC chan<- *common.MsgWithTimeStamp[gossipv1.SignedObservation],
signedInC chan<- *gossipv1.SignedVAAWithQuorum,
obsvReqC chan<- *gossipv1.ObservationRequest,
gossipSendC chan []byte,
obsvRecvC chan<- *common.MsgWithTimeStamp[gossipv1.SignedObservation],
signedIncomingVaaRecvC chan<- *gossipv1.SignedVAAWithQuorum,
obsvReqRecvC chan<- *gossipv1.ObservationRequest,
gossipControlSendC chan []byte,
gossipAttestationSendC chan []byte,
gossipVaaSendC chan []byte,
obsvReqSendC <-chan *gossipv1.ObservationRequest,
acct *accountant.Accountant,
gov *governor.ChainGovernor,
@ -168,10 +172,12 @@ func WithGuardianOptions(
return func(p *RunParams) error {
p.nodeName = nodeName
p.gk = gk
p.obsvC = obsvC
p.signedInC = signedInC
p.obsvReqC = obsvReqC
p.gossipSendC = gossipSendC
p.obsvRecvC = obsvRecvC
p.signedIncomingVaaRecvC = signedIncomingVaaRecvC
p.obsvReqRecvC = obsvReqRecvC
p.gossipControlSendC = gossipControlSendC
p.gossipAttestationSendC = gossipAttestationSendC
p.gossipVaaSendC = gossipVaaSendC
p.obsvReqSendC = obsvReqSendC
p.acct = acct
p.gov = gov

View File

@ -143,7 +143,9 @@ func TestRunParamsWithGuardianOptions(t *testing.T) {
obsvC := make(chan<- *common.MsgWithTimeStamp[gossipv1.SignedObservation], 42)
signedInC := make(chan<- *gossipv1.SignedVAAWithQuorum, 42)
obsvReqC := make(chan<- *gossipv1.ObservationRequest, 42)
gossipSendC := make(chan []byte, 42)
gossipControlSendC := make(chan []byte, 42)
gossipAttestationSendC := make(chan []byte, 42)
gossipVaaSendC := make(chan []byte, 42)
obsvReqSendC := make(<-chan *gossipv1.ObservationRequest, 42)
acct := &accountant.Accountant{}
@ -172,7 +174,9 @@ func TestRunParamsWithGuardianOptions(t *testing.T) {
obsvC,
signedInC,
obsvReqC,
gossipSendC,
gossipControlSendC,
gossipAttestationSendC,
gossipVaaSendC,
obsvReqSendC,
acct,
gov,
@ -191,10 +195,12 @@ func TestRunParamsWithGuardianOptions(t *testing.T) {
require.NoError(t, err)
require.NotNil(t, params)
assert.Equal(t, nodeName, params.nodeName)
assert.Equal(t, obsvC, params.obsvC)
assert.Equal(t, signedInC, params.signedInC)
assert.Equal(t, obsvReqC, params.obsvReqC)
assert.Equal(t, gossipSendC, params.gossipSendC)
assert.Equal(t, obsvC, params.obsvRecvC)
assert.Equal(t, signedInC, params.signedIncomingVaaRecvC)
assert.Equal(t, obsvReqC, params.obsvReqRecvC)
assert.Equal(t, gossipControlSendC, params.gossipControlSendC)
assert.Equal(t, gossipAttestationSendC, params.gossipAttestationSendC)
assert.Equal(t, gossipVaaSendC, params.gossipVaaSendC)
assert.Equal(t, obsvReqSendC, params.obsvReqSendC)
assert.Equal(t, acct, params.acct)
assert.Equal(t, gov, params.gov)

View File

@ -30,7 +30,9 @@ type G struct {
obsvC chan *node_common.MsgWithTimeStamp[gossipv1.SignedObservation]
obsvReqC chan *gossipv1.ObservationRequest
obsvReqSendC chan *gossipv1.ObservationRequest
sendC chan []byte
controlSendC chan []byte
attestationSendC chan []byte
vaaSendC chan []byte
signedInC chan *gossipv1.SignedVAAWithQuorum
priv p2pcrypto.PrivKey
gk *ecdsa.PrivateKey
@ -67,7 +69,9 @@ func NewG(t *testing.T, nodeName string) *G {
obsvC: make(chan *node_common.MsgWithTimeStamp[gossipv1.SignedObservation], cs),
obsvReqC: make(chan *gossipv1.ObservationRequest, cs),
obsvReqSendC: make(chan *gossipv1.ObservationRequest, cs),
sendC: make(chan []byte, cs),
controlSendC: make(chan []byte, cs),
attestationSendC: make(chan []byte, cs),
vaaSendC: make(chan []byte, cs),
signedInC: make(chan *gossipv1.SignedVAAWithQuorum, cs),
priv: p2ppriv,
gk: guardianpriv,
@ -91,7 +95,9 @@ func NewG(t *testing.T, nodeName string) *G {
case <-g.signedInC:
case <-g.signedGovCfg:
case <-g.signedGovSt:
case <-g.sendC:
case <-g.controlSendC:
case <-g.attestationSendC:
case <-g.vaaSendC:
}
}()
@ -178,7 +184,9 @@ func startGuardian(t *testing.T, ctx context.Context, g *G) {
g.obsvC,
g.signedInC,
g.obsvReqC,
g.sendC,
g.controlSendC,
g.attestationSendC,
g.vaaSendC,
g.obsvReqSendC,
g.acct,
g.gov,

View File

@ -57,7 +57,7 @@ func (p *Processor) broadcastSignature(
}
// Broadcast the observation.
p.gossipSendC <- msg
p.gossipAttestationSendC <- msg
observationsBroadcast.Inc()
hash := hex.EncodeToString(digest.Bytes())
@ -106,7 +106,7 @@ func (p *Processor) broadcastSignedVAA(v *vaa.VAA) {
}
// Broadcast the signed VAA.
p.gossipSendC <- msg
p.gossipVaaSendC <- msg
signedVAAsBroadcast.Inc()
if p.gatewayRelayer != nil {

View File

@ -228,7 +228,8 @@ func (p *Processor) handleCleanup(ctx context.Context) {
if err := common.PostObservationRequest(p.obsvReqSendC, req); err != nil {
p.logger.Warn("failed to broadcast re-observation request", zap.String("message_id", s.LoggingID()), zap.Error(err))
}
p.gossipSendC <- s.ourMsg
p.gossipAttestationSendC <- s.ourMsg
s.retryCtr++
s.nextRetry = time.Now().Add(nextRetryDuration(s.retryCtr))
aggregationStateRetries.Inc()

View File

@ -103,8 +103,13 @@ type Processor struct {
msgC <-chan *common.MessagePublication
// setC is a channel of guardian set updates
setC <-chan *common.GuardianSet
// gossipSendC is a channel of outbound messages to broadcast on p2p
gossipSendC chan<- []byte
// gossipAttestationSendC is a channel of outbound observation messages to broadcast on p2p
gossipAttestationSendC chan<- []byte
// gossipVaaSendC is a channel of outbound VAA messages to broadcast on p2p
gossipVaaSendC chan<- []byte
// obsvC is a channel of inbound decoded observations from p2p
obsvC chan *common.MsgWithTimeStamp[gossipv1.SignedObservation]
@ -162,7 +167,8 @@ func NewProcessor(
db *db.Database,
msgC <-chan *common.MessagePublication,
setC <-chan *common.GuardianSet,
gossipSendC chan<- []byte,
gossipAttestationSendC chan<- []byte,
gossipVaaSendC chan<- []byte,
obsvC chan *common.MsgWithTimeStamp[gossipv1.SignedObservation],
obsvReqSendC chan<- *gossipv1.ObservationRequest,
signedInC <-chan *gossipv1.SignedVAAWithQuorum,
@ -175,15 +181,16 @@ func NewProcessor(
) *Processor {
return &Processor{
msgC: msgC,
setC: setC,
gossipSendC: gossipSendC,
obsvC: obsvC,
obsvReqSendC: obsvReqSendC,
signedInC: signedInC,
gk: gk,
gst: gst,
db: db,
msgC: msgC,
setC: setC,
gossipAttestationSendC: gossipAttestationSendC,
gossipVaaSendC: gossipVaaSendC,
obsvC: obsvC,
obsvReqSendC: obsvReqSendC,
signedInC: signedInC,
gk: gk,
gst: gst,
db: db,
logger: supervisor.Logger(ctx),
state: &aggregationState{observationMap{}},