Node/CCQ: Load testing tweaks (#3857)

This commit is contained in:
bruce-riley 2024-03-26 13:02:41 -05:00 committed by GitHub
parent 2d680058cf
commit aa22a2b950
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
6 changed files with 30 additions and 13 deletions

View File

@ -57,7 +57,7 @@ func (p *PendingResponses) Add(r *PendingResponse) bool {
return false return false
} }
p.pendingResponses[signature] = r p.pendingResponses[signature] = r
p.updateMetricsAlreadyLocked() p.updateMetricsAlreadyLocked(nil)
return true return true
} }
@ -75,6 +75,7 @@ func (p *PendingResponses) Remove(r *PendingResponse) {
p.mu.Lock() p.mu.Lock()
defer p.mu.Unlock() defer p.mu.Unlock()
delete(p.pendingResponses, signature) delete(p.pendingResponses, signature)
p.updateMetricsAlreadyLocked(r)
} }
func (p *PendingResponses) NumPending() int { func (p *PendingResponses) NumPending() int {
@ -83,8 +84,14 @@ func (p *PendingResponses) NumPending() int {
return len(p.pendingResponses) return len(p.pendingResponses)
} }
func (p *PendingResponses) updateMetricsAlreadyLocked() { func (p *PendingResponses) updateMetricsAlreadyLocked(reqRemoved *PendingResponse) {
counts := make(map[vaa.ChainID]float64) counts := make(map[vaa.ChainID]float64)
if reqRemoved != nil {
// We may have removed the last request for a chain. Make sure we always update that chain.
for _, pcr := range reqRemoved.queryRequest.PerChainQueries {
counts[pcr.ChainId] = 0
}
}
for _, pr := range p.pendingResponses { for _, pr := range p.pendingResponses {
for _, pcr := range pr.queryRequest.PerChainQueries { for _, pcr := range pr.queryRequest.PerChainQueries {
counts[pcr.ChainId] = counts[pcr.ChainId] + 1 counts[pcr.ChainId] = counts[pcr.ChainId] + 1

View File

@ -120,8 +120,8 @@ func (g *G) initializeBasic(rootCtxCancel context.CancelFunc) {
// Cross Chain Query Handler channels // Cross Chain Query Handler channels
g.chainQueryReqC = make(map[vaa.ChainID]chan *query.PerChainQueryInternal) g.chainQueryReqC = make(map[vaa.ChainID]chan *query.PerChainQueryInternal)
g.signedQueryReqC = makeChannelPair[*gossipv1.SignedQueryRequest](query.SignedQueryRequestChannelSize) g.signedQueryReqC = makeChannelPair[*gossipv1.SignedQueryRequest](query.SignedQueryRequestChannelSize)
g.queryResponseC = makeChannelPair[*query.PerChainQueryResponseInternal](0) g.queryResponseC = makeChannelPair[*query.PerChainQueryResponseInternal](query.QueryResponseBufferSize)
g.queryResponsePublicationC = makeChannelPair[*query.QueryResponsePublication](0) g.queryResponsePublicationC = makeChannelPair[*query.QueryResponsePublication](query.QueryResponsePublicationChannelSize)
// Guardian set state managed by processor // Guardian set state managed by processor
g.gst = common.NewGuardianSetState(nil) g.gst = common.NewGuardianSetState(nil)

View File

@ -355,7 +355,7 @@ func GuardianOptionWatchers(watcherConfigs []watchers.WatcherConfig, ibcWatcherC
// aggregate per-chain msgC into msgC. // aggregate per-chain msgC into msgC.
// SECURITY defense-in-depth: This way we enforce that a watcher must set the msg.EmitterChain to its chainId, which makes the code easier to audit // SECURITY defense-in-depth: This way we enforce that a watcher must set the msg.EmitterChain to its chainId, which makes the code easier to audit
for _, chainId := range vaa.GetAllNetworkIDs() { for _, chainId := range vaa.GetAllNetworkIDs() {
chainQueryResponseC[chainId] = make(chan *query.PerChainQueryResponseInternal) chainQueryResponseC[chainId] = make(chan *query.PerChainQueryResponseInternal, query.QueryResponseBufferSize)
go func(c <-chan *query.PerChainQueryResponseInternal, chainId vaa.ChainID) { go func(c <-chan *query.PerChainQueryResponseInternal, chainId vaa.ChainID) {
for { for {
select { select {

View File

@ -29,10 +29,16 @@ const (
AuditInterval = time.Second AuditInterval = time.Second
// SignedQueryRequestChannelSize is the buffer size of the incoming query request channel. // SignedQueryRequestChannelSize is the buffer size of the incoming query request channel.
SignedQueryRequestChannelSize = 50 SignedQueryRequestChannelSize = 500
// QueryRequestBufferSize is the buffer size of the per-network query request channel. // QueryRequestBufferSize is the buffer size of the per-network query request channel.
QueryRequestBufferSize = 25 QueryRequestBufferSize = 250
// QueryResponseBufferSize is the buffer size of the single query response channel from the watchers.
QueryResponseBufferSize = 500
// QueryResponsePublicationChannelSize is the buffer size of the single query response channel back to the P2P publisher.
QueryResponsePublicationChannelSize = 500
) )
func NewQueryHandler( func NewQueryHandler(

View File

@ -115,7 +115,7 @@ func (w *Watcher) ccqBackfillInit(ctx context.Context) error {
} }
if len(newBlocks) == 0 { if len(newBlocks) == 0 {
w.ccqLogger.Error("failed to read any more blocks, giving up on the backfill") w.ccqLogger.Warn("failed to read any more blocks, giving up on the backfill")
break break
} }

View File

@ -93,6 +93,7 @@ func (w *SolanaWatcher) ccqBaseHandleSolanaAccountQueryRequest(
requestId string, requestId string,
isRetry bool, isRetry bool,
publisher ccqCustomPublisher, publisher ccqCustomPublisher,
numFastRetries int,
) { ) {
rCtx, cancel := context.WithTimeout(ctx, rpcTimeout) rCtx, cancel := context.WithTimeout(ctx, rpcTimeout)
defer cancel() defer cancel()
@ -123,7 +124,7 @@ func (w *SolanaWatcher) ccqBaseHandleSolanaAccountQueryRequest(
// Read the accounts. // Read the accounts.
info, err := w.getMultipleAccountsWithOpts(rCtx, accounts, &params) info, err := w.getMultipleAccountsWithOpts(rCtx, accounts, &params)
if err != nil { if err != nil {
if w.ccqCheckForMinSlotContext(ctx, queryRequest, req, requestId, err, giveUpTime, !isRetry, tag, publisher) { if w.ccqCheckForMinSlotContext(ctx, queryRequest, req, requestId, err, giveUpTime, !isRetry, tag, publisher, numFastRetries) {
// Return without posting a response because a go routine was created to handle it. // Return without posting a response because a go routine was created to handle it.
return return
} }
@ -216,6 +217,7 @@ func (w *SolanaWatcher) ccqBaseHandleSolanaAccountQueryRequest(
zap.Uint64("blockTime", uint64(*block.BlockTime)), zap.Uint64("blockTime", uint64(*block.BlockTime)),
zap.String("blockHash", hex.EncodeToString(block.Blockhash[:])), zap.String("blockHash", hex.EncodeToString(block.Blockhash[:])),
zap.Uint64("blockHeight", *block.BlockHeight), zap.Uint64("blockHeight", *block.BlockHeight),
zap.Int("numFastRetries", numFastRetries),
) )
// Publish the response using the custom publisher. // Publish the response using the custom publisher.
@ -236,6 +238,7 @@ func (w *SolanaWatcher) ccqCheckForMinSlotContext(
log bool, log bool,
tag string, tag string,
publisher ccqCustomPublisher, publisher ccqCustomPublisher,
numFastRetries int,
) bool { ) bool {
if req.MinContextSlot == 0 { if req.MinContextSlot == 0 {
return false return false
@ -274,7 +277,7 @@ func (w *SolanaWatcher) ccqCheckForMinSlotContext(
} }
// Kick off the retry after a short delay. // Kick off the retry after a short delay.
go w.ccqSleepAndRetryAccountQuery(ctx, queryRequest, req, requestId, currentSlot, currentSlotFromError, giveUpTime, log, tag, publisher) go w.ccqSleepAndRetryAccountQuery(ctx, queryRequest, req, requestId, currentSlot, currentSlotFromError, giveUpTime, log, tag, publisher, numFastRetries)
return true return true
} }
@ -290,6 +293,7 @@ func (w *SolanaWatcher) ccqSleepAndRetryAccountQuery(
log bool, log bool,
tag string, tag string,
publisher ccqCustomPublisher, publisher ccqCustomPublisher,
numFastRetries int,
) { ) {
if log { if log {
w.ccqLogger.Info("minimum context slot has not been reached, will retry shortly", w.ccqLogger.Info("minimum context slot has not been reached, will retry shortly",
@ -307,7 +311,7 @@ func (w *SolanaWatcher) ccqSleepAndRetryAccountQuery(
w.ccqLogger.Info("initiating fast retry", zap.String("requestId", requestId)) w.ccqLogger.Info("initiating fast retry", zap.String("requestId", requestId))
} }
w.ccqBaseHandleSolanaAccountQueryRequest(ctx, queryRequest, req, giveUpTime, tag, requestId, true, publisher) w.ccqBaseHandleSolanaAccountQueryRequest(ctx, queryRequest, req, giveUpTime, tag, requestId, true, publisher, numFastRetries+1)
} }
// ccqIsMinContextSlotError parses an error to see if it is "Minimum context slot has not been reached". If it is, it returns the slot number // ccqIsMinContextSlotError parses an error to see if it is "Minimum context slot has not been reached". If it is, it returns the slot number
@ -365,7 +369,7 @@ func (w *SolanaWatcher) ccqHandleSolanaAccountQueryRequest(ctx context.Context,
) )
publisher := ccqSolanaAccountPublisher{w} publisher := ccqSolanaAccountPublisher{w}
w.ccqBaseHandleSolanaAccountQueryRequest(ctx, queryRequest, req, giveUpTime, "sol_account", requestId, false, publisher) w.ccqBaseHandleSolanaAccountQueryRequest(ctx, queryRequest, req, giveUpTime, "sol_account", requestId, false, publisher, 0)
} }
// ccqSolanaAccountPublisher is the publisher for the sol_account query. All it has to do is forward the response passed in to the watcher, as is. // ccqSolanaAccountPublisher is the publisher for the sol_account query. All it has to do is forward the response passed in to the watcher, as is.
@ -427,7 +431,7 @@ func (w *SolanaWatcher) ccqHandleSolanaPdaQueryRequest(ctx context.Context, quer
} }
// Execute the standard sol_account query passing in the publisher to publish a sol_pda response. // Execute the standard sol_account query passing in the publisher to publish a sol_pda response.
w.ccqBaseHandleSolanaAccountQueryRequest(ctx, queryRequest, acctReq, giveUpTime, "sol_pda", requestId, false, publisher) w.ccqBaseHandleSolanaAccountQueryRequest(ctx, queryRequest, acctReq, giveUpTime, "sol_pda", requestId, false, publisher, 0)
} }
// ccqPdaPublisher is a custom publisher that publishes a sol_pda response. // ccqPdaPublisher is a custom publisher that publishes a sol_pda response.