tendermint/node/node.go

615 lines
19 KiB
Go
Raw Normal View History

package node
2014-07-08 00:02:04 -07:00
import (
2015-06-14 15:18:17 -07:00
"bytes"
"encoding/json"
2017-01-28 08:27:13 -08:00
"errors"
"fmt"
"net"
2015-04-10 02:12:17 -07:00
"net/http"
"strings"
2014-07-08 15:33:26 -07:00
2017-01-28 08:27:13 -08:00
abci "github.com/tendermint/abci/types"
crypto "github.com/tendermint/go-crypto"
wire "github.com/tendermint/go-wire"
cmn "github.com/tendermint/tmlibs/common"
dbm "github.com/tendermint/tmlibs/db"
"github.com/tendermint/tmlibs/log"
2015-11-01 11:34:08 -08:00
bc "github.com/tendermint/tendermint/blockchain"
cfg "github.com/tendermint/tendermint/config"
2015-11-01 11:34:08 -08:00
"github.com/tendermint/tendermint/consensus"
2017-11-19 20:22:25 -08:00
"github.com/tendermint/tendermint/evidence"
2015-04-01 17:30:16 -07:00
mempl "github.com/tendermint/tendermint/mempool"
"github.com/tendermint/tendermint/p2p"
"github.com/tendermint/tendermint/p2p/trust"
2015-12-01 20:12:01 -08:00
"github.com/tendermint/tendermint/proxy"
rpccore "github.com/tendermint/tendermint/rpc/core"
grpccore "github.com/tendermint/tendermint/rpc/grpc"
rpc "github.com/tendermint/tendermint/rpc/lib"
rpcserver "github.com/tendermint/tendermint/rpc/lib/server"
2015-04-01 17:30:16 -07:00
sm "github.com/tendermint/tendermint/state"
"github.com/tendermint/tendermint/state/txindex"
"github.com/tendermint/tendermint/state/txindex/kv"
"github.com/tendermint/tendermint/state/txindex/null"
"github.com/tendermint/tendermint/types"
"github.com/tendermint/tendermint/version"
2014-07-08 00:02:04 -07:00
2017-01-28 08:27:13 -08:00
_ "net/http/pprof"
)
2015-04-27 07:25:11 -07:00
//------------------------------------------------------------------------------
// DBContext specifies config information for loading a new DB.
type DBContext struct {
ID string
Config *cfg.Config
}
// DBProvider takes a DBContext and returns an instantiated DB.
type DBProvider func(*DBContext) (dbm.DB, error)
// DefaultDBProvider returns a database using the DBBackend and DBDir
// specified in the ctx.Config.
func DefaultDBProvider(ctx *DBContext) (dbm.DB, error) {
return dbm.NewDB(ctx.ID, ctx.Config.DBBackend, ctx.Config.DBDir()), nil
}
// GenesisDocProvider returns a GenesisDoc.
// It allows the GenesisDoc to be pulled from sources other than the
// filesystem, for instance from a distributed key-value store cluster.
type GenesisDocProvider func() (*types.GenesisDoc, error)
// DefaultGenesisDocProviderFunc returns a GenesisDocProvider that loads
// the GenesisDoc from the config.GenesisFile() on the filesystem.
func DefaultGenesisDocProviderFunc(config *cfg.Config) GenesisDocProvider {
return func() (*types.GenesisDoc, error) {
return types.GenesisDocFromFile(config.GenesisFile())
}
}
2017-09-21 14:08:17 -07:00
// NodeProvider takes a config and a logger and returns a ready to go Node.
type NodeProvider func(*cfg.Config, log.Logger) (*Node, error)
// DefaultNewNode returns a Tendermint node with default settings for the
// PrivValidator, ClientCreator, GenesisDoc, and DBProvider.
// It implements NodeProvider.
func DefaultNewNode(config *cfg.Config, logger log.Logger) (*Node, error) {
return NewNode(config,
types.LoadOrGenPrivValidatorFS(config.PrivValidatorFile()),
proxy.DefaultClientCreator(config.ProxyApp, config.ABCI, config.DBDir()),
DefaultGenesisDocProviderFunc(config),
DefaultDBProvider,
logger)
}
//------------------------------------------------------------------------------
// Node is the highest level interface to a full Tendermint node.
// It includes all configuration information and running services.
2014-07-09 18:33:44 -07:00
type Node struct {
cmn.BaseService
// config
config *cfg.Config
2017-09-18 15:12:31 -07:00
genesisDoc *types.GenesisDoc // initial validator set
privValidator types.PrivValidator // local node's validator key
// network
sw *p2p.Switch // p2p connections
addrBook *p2p.AddrBook // known peers
trustMetricStore *trust.TrustMetricStore // trust metrics for all peers
// services
2017-12-27 19:09:48 -08:00
eventBus *types.EventBus // pub/sub for services
stateDB dbm.DB
blockStore *bc.BlockStore // store the blockchain to disk
bcReactor *bc.BlockchainReactor // for fast-syncing
mempoolReactor *mempl.MempoolReactor // for gossipping transactions
consensusState *consensus.ConsensusState // latest consensus state
consensusReactor *consensus.ConsensusReactor // for participating in the consensus
evidencePool *evidence.EvidencePool // tracking evidence
proxyApp proxy.AppConns // connection to the application
rpcListeners []net.Listener // rpc servers
txIndexer txindex.TxIndexer
indexerService *txindex.IndexerService
2014-07-09 18:33:44 -07:00
}
2014-07-08 00:02:04 -07:00
// NewNode returns a new, ready to go, Tendermint Node.
func NewNode(config *cfg.Config,
privValidator types.PrivValidator,
clientCreator proxy.ClientCreator,
2017-09-21 18:44:36 -07:00
genesisDocProvider GenesisDocProvider,
dbProvider DBProvider,
logger log.Logger) (*Node, error) {
2014-10-22 17:20:44 -07:00
// Get BlockStore
blockStoreDB, err := dbProvider(&DBContext{"blockstore", config})
if err != nil {
return nil, err
}
blockStore := bc.NewBlockStore(blockStoreDB)
2014-10-22 17:20:44 -07:00
// Get State
stateDB, err := dbProvider(&DBContext{"state", config})
if err != nil {
return nil, err
}
// Get genesis doc
2017-12-28 15:26:13 -08:00
// TODO: move to state package?
genDoc, err := loadGenesisDoc(stateDB)
if err != nil {
genDoc, err = genesisDocProvider()
if err != nil {
return nil, err
}
// save genesis doc to prevent a certain class of user errors (e.g. when it
// was changed, accidentally or not). Also good for audit trail.
saveGenesisDoc(stateDB, genDoc)
}
2017-12-28 15:26:13 -08:00
state, err := sm.LoadStateFromDBOrGenesisDoc(stateDB, genDoc)
if err != nil {
return nil, err
}
2017-05-12 14:07:53 -07:00
2016-08-23 18:44:07 -07:00
// Create the proxyApp, which manages connections (consensus, mempool, query)
2017-12-27 19:09:48 -08:00
// and sync tendermint and the app by performing a handshake
// and replaying any necessary blocks
consensusLogger := logger.With("module", "consensus")
2017-12-27 19:09:48 -08:00
handshaker := consensus.NewHandshaker(stateDB, state, blockStore)
2017-05-12 14:07:53 -07:00
handshaker.SetLogger(consensusLogger)
proxyApp := proxy.NewAppConns(clientCreator, handshaker)
proxyApp.SetLogger(logger.With("module", "proxy"))
if err := proxyApp.Start(); err != nil {
return nil, fmt.Errorf("Error starting proxy app connections: %v", err)
}
2015-12-01 20:12:01 -08:00
// reload the state (it may have been updated by the handshake)
state = sm.LoadState(stateDB)
// Decide whether to fast-sync or not
// We don't fast-sync when the only validator is us.
fastSync := config.FastSync
if state.Validators.Size() == 1 {
addr, _ := state.Validators.GetByIndex(0)
if bytes.Equal(privValidator.GetAddress(), addr) {
fastSync = false
}
}
// Log whether this node is a validator or an observer
if state.Validators.HasAddress(privValidator.GetAddress()) {
consensusLogger.Info("This node is a validator", "addr", privValidator.GetAddress(), "pubKey", privValidator.GetPubKey())
} else {
consensusLogger.Info("This node is not a validator", "addr", privValidator.GetAddress(), "pubKey", privValidator.GetPubKey())
}
// Make MempoolReactor
2017-05-15 10:33:45 -07:00
mempoolLogger := logger.With("module", "mempool")
mempool := mempl.NewMempool(config.Mempool, proxyApp.Mempool(), state.LastBlockHeight)
mempool.InitWAL() // no need to have the mempool wal during tests
2017-05-12 14:07:53 -07:00
mempool.SetLogger(mempoolLogger)
mempoolReactor := mempl.NewMempoolReactor(config.Mempool, mempool)
2017-05-12 14:07:53 -07:00
mempoolReactor.SetLogger(mempoolLogger)
2014-10-22 17:20:44 -07:00
if config.Consensus.WaitForTxs() {
mempool.EnableTxsAvailable()
2017-07-13 10:19:44 -07:00
}
// Make Evidence Reactor
evidenceDB, err := dbProvider(&DBContext{"evidence", config})
if err != nil {
return nil, err
}
evidenceLogger := logger.With("module", "evidence")
evidenceStore := evidence.NewEvidenceStore(evidenceDB)
2017-12-28 18:08:39 -08:00
evidencePool := evidence.NewEvidencePool(stateDB, evidenceStore)
evidencePool.SetLogger(evidenceLogger)
2017-12-26 17:34:57 -08:00
evidenceReactor := evidence.NewEvidenceReactor(evidencePool)
evidenceReactor.SetLogger(evidenceLogger)
2017-12-27 19:09:48 -08:00
blockExecLogger := logger.With("module", "state")
// make block executor for consensus and blockchain reactors to execute blocks
2017-12-28 15:26:13 -08:00
blockExec := sm.NewBlockExecutor(stateDB, blockExecLogger, proxyApp.Consensus(), mempool, evidencePool)
2017-12-27 19:09:48 -08:00
// Make BlockchainReactor
bcReactor := bc.NewBlockchainReactor(state.Copy(), blockExec, blockStore, fastSync)
bcReactor.SetLogger(logger.With("module", "blockchain"))
// Make ConsensusReactor
consensusState := consensus.NewConsensusState(config.Consensus, state.Copy(),
2017-12-27 19:09:48 -08:00
blockExec, blockStore, mempool, evidencePool)
2017-05-12 14:07:53 -07:00
consensusState.SetLogger(consensusLogger)
if privValidator != nil {
2016-11-16 17:52:08 -08:00
consensusState.SetPrivValidator(privValidator)
}
2016-11-16 17:52:08 -08:00
consensusReactor := consensus.NewConsensusReactor(consensusState, fastSync)
2017-05-12 14:07:53 -07:00
consensusReactor.SetLogger(consensusLogger)
p2pLogger := logger.With("module", "p2p")
2014-10-22 17:20:44 -07:00
2017-05-01 19:14:37 -07:00
sw := p2p.NewSwitch(config.P2P)
2017-05-12 14:07:53 -07:00
sw.SetLogger(p2pLogger)
sw.AddReactor("MEMPOOL", mempoolReactor)
sw.AddReactor("BLOCKCHAIN", bcReactor)
sw.AddReactor("CONSENSUS", consensusReactor)
sw.AddReactor("EVIDENCE", evidenceReactor)
2014-07-08 00:02:04 -07:00
2016-11-30 19:58:47 -08:00
// Optionally, start the pex reactor
var addrBook *p2p.AddrBook
var trustMetricStore *trust.TrustMetricStore
if config.P2P.PexReactor {
addrBook = p2p.NewAddrBook(config.P2P.AddrBookFile(), config.P2P.AddrBookStrict)
2017-05-12 14:07:53 -07:00
addrBook.SetLogger(p2pLogger.With("book", config.P2P.AddrBookFile()))
// Get the trust metric history data
trustHistoryDB, err := dbProvider(&DBContext{"trusthistory", config})
if err != nil {
return nil, err
}
trustMetricStore = trust.NewTrustMetricStore(trustHistoryDB, trust.DefaultConfig())
trustMetricStore.SetLogger(p2pLogger)
2018-01-14 10:24:43 -08:00
var seeds []string
if config.P2P.Seeds != "" {
seeds = strings.Split(config.P2P.Seeds, ",")
}
pexReactor := p2p.NewPEXReactor(addrBook,
2018-01-14 10:24:43 -08:00
&p2p.PEXReactorConfig{Seeds: seeds})
2017-05-12 14:07:53 -07:00
pexReactor.SetLogger(p2pLogger)
2016-11-30 19:58:47 -08:00
sw.AddReactor("PEX", pexReactor)
}
2017-01-28 08:27:13 -08:00
// Filter peers by addr or pubkey with an ABCI query.
// If the query return code is OK, add peer.
// XXX: Query format subject to change
if config.FilterPeers {
2016-09-10 12:16:23 -07:00
// NOTE: addr is ip:port
2016-09-08 15:56:02 -07:00
sw.SetAddrFilter(func(addr net.Addr) error {
2017-01-28 08:27:13 -08:00
resQuery, err := proxyApp.Query().QuerySync(abci.RequestQuery{Path: cmn.Fmt("/p2p/filter/addr/%s", addr.String())})
if err != nil {
return err
}
2017-12-01 22:47:55 -08:00
if resQuery.IsErr() {
return resQuery
2016-09-08 15:56:02 -07:00
}
2017-12-01 22:47:55 -08:00
return nil
2016-09-08 15:56:02 -07:00
})
2017-12-28 22:53:41 -08:00
sw.SetPubKeyFilter(func(pubkey crypto.PubKey) error {
2017-01-28 08:27:13 -08:00
resQuery, err := proxyApp.Query().QuerySync(abci.RequestQuery{Path: cmn.Fmt("/p2p/filter/pubkey/%X", pubkey.Bytes())})
if err != nil {
return err
}
2017-12-01 22:47:55 -08:00
if resQuery.IsErr() {
return resQuery
2016-09-08 15:56:02 -07:00
}
2017-12-01 22:47:55 -08:00
return nil
2016-09-08 15:56:02 -07:00
})
}
new pubsub package comment out failing consensus tests for now rewrite rpc httpclient to use new pubsub package import pubsub as tmpubsub, query as tmquery make event IDs constants EventKey -> EventTypeKey rename EventsPubsub to PubSub mempool does not use pubsub rename eventsSub to pubsub new subscribe API fix channel size issues and consensus tests bugs refactor rpc client add missing discardFromChan method add mutex rename pubsub to eventBus remove IsRunning from WSRPCConnection interface (not needed) add a comment in broadcastNewRoundStepsAndVotes rename registerEventCallbacks to broadcastNewRoundStepsAndVotes See https://dave.cheney.net/2014/03/19/channel-axioms stop eventBuses after reactor tests remove unnecessary Unsubscribe return subscribe helper function move discardFromChan to where it is used subscribe now returns an err this gives us ability to refuse to subscribe if pubsub is at its max capacity. use context for control overflow cache queries handle err when subscribing in replay_test rename testClientID to testSubscriber extract var set channel buffer capacity to 1 in replay_file fix byzantine_test unsubscribe from single event, not all events refactor httpclient to return events to appropriate channels return failing testReplayCrashBeforeWriteVote test fix TestValidatorSetChanges refactor code a bit fix testReplayCrashBeforeWriteVote add comment fix TestValidatorSetChanges fixes from Bucky's review update comment [ci skip] test TxEventBuffer update changelog fix TestValidatorSetChanges (2nd attempt) only do wg.Done when no errors benchmark event bus create pubsub server inside NewEventBus only expose config params (later if needed) set buffer capacity to 0 so we are not testing cache new tx event format: key = "Tx" plus a tag {"tx.hash": XYZ} This should allow to subscribe to all transactions! or a specific one using a query: "tm.events.type = Tx and tx.hash = '013ABF99434...'" use TimeoutCommit instead of afterPublishEventNewBlockTimeout TimeoutCommit is the time a node waits after committing a block, before it goes into the next height. So it will finish everything from the last block, but then wait a bit. The idea is this gives it time to hear more votes from other validators, to strengthen the commit it includes in the next block. But it also gives it time to hear about new transactions. waitForBlockWithUpdatedVals rewrite WAL crash tests Task: test that we can recover from any WAL crash. Solution: the old tests were relying on event hub being run in the same thread (we were injecting the private validator's last signature). when considering a rewrite, we considered two possible solutions: write a "fuzzy" testing system where WAL is crashing upon receiving a new message, or inject failures and trigger them in tests using something like https://github.com/coreos/gofail. remove sleep no cs.Lock around wal.Save test different cases (empty block, non-empty block, ...) comments add comments test 4 cases: empty block, non-empty block, non-empty block with smaller part size, many blocks fixes as per Bucky's last review reset subscriptions on UnsubscribeAll use a simple counter to track message for which we panicked also, set a smaller part size for all test cases
2017-06-26 08:00:30 -07:00
eventBus := types.NewEventBus()
eventBus.SetLogger(logger.With("module", "events"))
// services which will be publishing and/or subscribing for messages (events)
2017-12-28 16:35:56 -08:00
// consensusReactor will set it on consensusState and blockExecutor
new pubsub package comment out failing consensus tests for now rewrite rpc httpclient to use new pubsub package import pubsub as tmpubsub, query as tmquery make event IDs constants EventKey -> EventTypeKey rename EventsPubsub to PubSub mempool does not use pubsub rename eventsSub to pubsub new subscribe API fix channel size issues and consensus tests bugs refactor rpc client add missing discardFromChan method add mutex rename pubsub to eventBus remove IsRunning from WSRPCConnection interface (not needed) add a comment in broadcastNewRoundStepsAndVotes rename registerEventCallbacks to broadcastNewRoundStepsAndVotes See https://dave.cheney.net/2014/03/19/channel-axioms stop eventBuses after reactor tests remove unnecessary Unsubscribe return subscribe helper function move discardFromChan to where it is used subscribe now returns an err this gives us ability to refuse to subscribe if pubsub is at its max capacity. use context for control overflow cache queries handle err when subscribing in replay_test rename testClientID to testSubscriber extract var set channel buffer capacity to 1 in replay_file fix byzantine_test unsubscribe from single event, not all events refactor httpclient to return events to appropriate channels return failing testReplayCrashBeforeWriteVote test fix TestValidatorSetChanges refactor code a bit fix testReplayCrashBeforeWriteVote add comment fix TestValidatorSetChanges fixes from Bucky's review update comment [ci skip] test TxEventBuffer update changelog fix TestValidatorSetChanges (2nd attempt) only do wg.Done when no errors benchmark event bus create pubsub server inside NewEventBus only expose config params (later if needed) set buffer capacity to 0 so we are not testing cache new tx event format: key = "Tx" plus a tag {"tx.hash": XYZ} This should allow to subscribe to all transactions! or a specific one using a query: "tm.events.type = Tx and tx.hash = '013ABF99434...'" use TimeoutCommit instead of afterPublishEventNewBlockTimeout TimeoutCommit is the time a node waits after committing a block, before it goes into the next height. So it will finish everything from the last block, but then wait a bit. The idea is this gives it time to hear more votes from other validators, to strengthen the commit it includes in the next block. But it also gives it time to hear about new transactions. waitForBlockWithUpdatedVals rewrite WAL crash tests Task: test that we can recover from any WAL crash. Solution: the old tests were relying on event hub being run in the same thread (we were injecting the private validator's last signature). when considering a rewrite, we considered two possible solutions: write a "fuzzy" testing system where WAL is crashing upon receiving a new message, or inject failures and trigger them in tests using something like https://github.com/coreos/gofail. remove sleep no cs.Lock around wal.Save test different cases (empty block, non-empty block, ...) comments add comments test 4 cases: empty block, non-empty block, non-empty block with smaller part size, many blocks fixes as per Bucky's last review reset subscriptions on UnsubscribeAll use a simple counter to track message for which we panicked also, set a smaller part size for all test cases
2017-06-26 08:00:30 -07:00
consensusReactor.SetEventBus(eventBus)
// Transaction indexing
var txIndexer txindex.TxIndexer
switch config.TxIndex.Indexer {
case "kv":
store, err := dbProvider(&DBContext{"tx_index", config})
if err != nil {
return nil, err
}
2017-11-30 18:02:39 -08:00
if config.TxIndex.IndexTags != "" {
txIndexer = kv.NewTxIndex(store, kv.IndexTags(strings.Split(config.TxIndex.IndexTags, ",")))
} else if config.TxIndex.IndexAllTags {
txIndexer = kv.NewTxIndex(store, kv.IndexAllTags())
} else {
txIndexer = kv.NewTxIndex(store)
}
default:
txIndexer = &null.TxIndex{}
}
indexerService := txindex.NewIndexerService(txIndexer, eventBus)
2015-09-24 12:48:44 -07:00
// run the profile server
profileHost := config.ProfListenAddress
2015-09-24 12:48:44 -07:00
if profileHost != "" {
go func() {
logger.Error("Profile server", "err", http.ListenAndServe(profileHost, nil))
2015-09-24 12:48:44 -07:00
}()
}
node := &Node{
config: config,
genesisDoc: genDoc,
privValidator: privValidator,
sw: sw,
addrBook: addrBook,
trustMetricStore: trustMetricStore,
2017-12-27 19:09:48 -08:00
stateDB: stateDB,
2015-01-06 15:51:41 -08:00
blockStore: blockStore,
bcReactor: bcReactor,
2014-10-22 17:20:44 -07:00
mempoolReactor: mempoolReactor,
2015-01-11 14:27:46 -08:00
consensusState: consensusState,
2014-10-22 17:20:44 -07:00
consensusReactor: consensusReactor,
evidencePool: evidencePool,
2016-08-22 13:00:08 -07:00
proxyApp: proxyApp,
txIndexer: txIndexer,
indexerService: indexerService,
new pubsub package comment out failing consensus tests for now rewrite rpc httpclient to use new pubsub package import pubsub as tmpubsub, query as tmquery make event IDs constants EventKey -> EventTypeKey rename EventsPubsub to PubSub mempool does not use pubsub rename eventsSub to pubsub new subscribe API fix channel size issues and consensus tests bugs refactor rpc client add missing discardFromChan method add mutex rename pubsub to eventBus remove IsRunning from WSRPCConnection interface (not needed) add a comment in broadcastNewRoundStepsAndVotes rename registerEventCallbacks to broadcastNewRoundStepsAndVotes See https://dave.cheney.net/2014/03/19/channel-axioms stop eventBuses after reactor tests remove unnecessary Unsubscribe return subscribe helper function move discardFromChan to where it is used subscribe now returns an err this gives us ability to refuse to subscribe if pubsub is at its max capacity. use context for control overflow cache queries handle err when subscribing in replay_test rename testClientID to testSubscriber extract var set channel buffer capacity to 1 in replay_file fix byzantine_test unsubscribe from single event, not all events refactor httpclient to return events to appropriate channels return failing testReplayCrashBeforeWriteVote test fix TestValidatorSetChanges refactor code a bit fix testReplayCrashBeforeWriteVote add comment fix TestValidatorSetChanges fixes from Bucky's review update comment [ci skip] test TxEventBuffer update changelog fix TestValidatorSetChanges (2nd attempt) only do wg.Done when no errors benchmark event bus create pubsub server inside NewEventBus only expose config params (later if needed) set buffer capacity to 0 so we are not testing cache new tx event format: key = "Tx" plus a tag {"tx.hash": XYZ} This should allow to subscribe to all transactions! or a specific one using a query: "tm.events.type = Tx and tx.hash = '013ABF99434...'" use TimeoutCommit instead of afterPublishEventNewBlockTimeout TimeoutCommit is the time a node waits after committing a block, before it goes into the next height. So it will finish everything from the last block, but then wait a bit. The idea is this gives it time to hear more votes from other validators, to strengthen the commit it includes in the next block. But it also gives it time to hear about new transactions. waitForBlockWithUpdatedVals rewrite WAL crash tests Task: test that we can recover from any WAL crash. Solution: the old tests were relying on event hub being run in the same thread (we were injecting the private validator's last signature). when considering a rewrite, we considered two possible solutions: write a "fuzzy" testing system where WAL is crashing upon receiving a new message, or inject failures and trigger them in tests using something like https://github.com/coreos/gofail. remove sleep no cs.Lock around wal.Save test different cases (empty block, non-empty block, ...) comments add comments test 4 cases: empty block, non-empty block, non-empty block with smaller part size, many blocks fixes as per Bucky's last review reset subscriptions on UnsubscribeAll use a simple counter to track message for which we panicked also, set a smaller part size for all test cases
2017-06-26 08:00:30 -07:00
eventBus: eventBus,
2014-07-09 18:33:44 -07:00
}
2017-05-02 00:53:32 -07:00
node.BaseService = *cmn.NewBaseService(logger, "Node", node)
return node, nil
2014-07-09 18:33:44 -07:00
}
// OnStart starts the Node. It implements cmn.Service.
func (n *Node) OnStart() error {
err := n.eventBus.Start()
new pubsub package comment out failing consensus tests for now rewrite rpc httpclient to use new pubsub package import pubsub as tmpubsub, query as tmquery make event IDs constants EventKey -> EventTypeKey rename EventsPubsub to PubSub mempool does not use pubsub rename eventsSub to pubsub new subscribe API fix channel size issues and consensus tests bugs refactor rpc client add missing discardFromChan method add mutex rename pubsub to eventBus remove IsRunning from WSRPCConnection interface (not needed) add a comment in broadcastNewRoundStepsAndVotes rename registerEventCallbacks to broadcastNewRoundStepsAndVotes See https://dave.cheney.net/2014/03/19/channel-axioms stop eventBuses after reactor tests remove unnecessary Unsubscribe return subscribe helper function move discardFromChan to where it is used subscribe now returns an err this gives us ability to refuse to subscribe if pubsub is at its max capacity. use context for control overflow cache queries handle err when subscribing in replay_test rename testClientID to testSubscriber extract var set channel buffer capacity to 1 in replay_file fix byzantine_test unsubscribe from single event, not all events refactor httpclient to return events to appropriate channels return failing testReplayCrashBeforeWriteVote test fix TestValidatorSetChanges refactor code a bit fix testReplayCrashBeforeWriteVote add comment fix TestValidatorSetChanges fixes from Bucky's review update comment [ci skip] test TxEventBuffer update changelog fix TestValidatorSetChanges (2nd attempt) only do wg.Done when no errors benchmark event bus create pubsub server inside NewEventBus only expose config params (later if needed) set buffer capacity to 0 so we are not testing cache new tx event format: key = "Tx" plus a tag {"tx.hash": XYZ} This should allow to subscribe to all transactions! or a specific one using a query: "tm.events.type = Tx and tx.hash = '013ABF99434...'" use TimeoutCommit instead of afterPublishEventNewBlockTimeout TimeoutCommit is the time a node waits after committing a block, before it goes into the next height. So it will finish everything from the last block, but then wait a bit. The idea is this gives it time to hear more votes from other validators, to strengthen the commit it includes in the next block. But it also gives it time to hear about new transactions. waitForBlockWithUpdatedVals rewrite WAL crash tests Task: test that we can recover from any WAL crash. Solution: the old tests were relying on event hub being run in the same thread (we were injecting the private validator's last signature). when considering a rewrite, we considered two possible solutions: write a "fuzzy" testing system where WAL is crashing upon receiving a new message, or inject failures and trigger them in tests using something like https://github.com/coreos/gofail. remove sleep no cs.Lock around wal.Save test different cases (empty block, non-empty block, ...) comments add comments test 4 cases: empty block, non-empty block, non-empty block with smaller part size, many blocks fixes as per Bucky's last review reset subscriptions on UnsubscribeAll use a simple counter to track message for which we panicked also, set a smaller part size for all test cases
2017-06-26 08:00:30 -07:00
if err != nil {
return err
}
2017-10-23 07:03:54 -07:00
// Run the RPC server first
// so we can eg. receive txs for the first block
if n.config.RPC.ListenAddress != "" {
listeners, err := n.startRPC()
if err != nil {
return err
}
n.rpcListeners = listeners
}
// Create & add listener
protocol, address := cmn.ProtocolAndAddress(n.config.P2P.ListenAddress)
2017-05-02 00:53:32 -07:00
l := p2p.NewDefaultListener(protocol, address, n.config.P2P.SkipUPNP, n.Logger.With("module", "p2p"))
n.sw.AddListener(l)
2017-12-28 22:53:41 -08:00
// Generate node PrivKey
2018-01-13 13:14:28 -08:00
// TODO: pass in like priv_val
2018-01-13 12:38:40 -08:00
nodeKey, err := p2p.LoadOrGenNodeKey(n.config.NodeKeyFile())
2018-01-01 17:21:42 -08:00
if err != nil {
return err
}
n.Logger.Info("P2P Node ID", "ID", nodeKey.ID(), "file", n.config.NodeKeyFile())
2017-12-28 22:53:41 -08:00
// Start the switch
2018-01-01 17:21:42 -08:00
n.sw.SetNodeInfo(n.makeNodeInfo(nodeKey.PubKey()))
n.sw.SetNodeKey(nodeKey)
err = n.sw.Start()
if err != nil {
return err
}
// Always connect to persistent peers
if n.config.P2P.PersistentPeers != "" {
err = n.sw.DialPeersAsync(n.addrBook, strings.Split(n.config.P2P.PersistentPeers, ","), true)
if err != nil {
return err
}
}
// start tx indexer
return n.indexerService.Start()
2014-07-09 18:33:44 -07:00
}
// OnStop stops the Node. It implements cmn.Service.
func (n *Node) OnStop() {
n.BaseService.OnStop()
2017-05-02 00:53:32 -07:00
n.Logger.Info("Stopping Node")
// TODO: gracefully disconnect from peers.
n.sw.Stop()
2017-03-03 16:28:17 -08:00
for _, l := range n.rpcListeners {
2017-05-02 00:53:32 -07:00
n.Logger.Info("Closing rpc listener", "listener", l)
2017-03-03 16:28:17 -08:00
if err := l.Close(); err != nil {
n.Logger.Error("Error closing listener", "listener", l, "err", err)
2017-03-03 16:28:17 -08:00
}
}
new pubsub package comment out failing consensus tests for now rewrite rpc httpclient to use new pubsub package import pubsub as tmpubsub, query as tmquery make event IDs constants EventKey -> EventTypeKey rename EventsPubsub to PubSub mempool does not use pubsub rename eventsSub to pubsub new subscribe API fix channel size issues and consensus tests bugs refactor rpc client add missing discardFromChan method add mutex rename pubsub to eventBus remove IsRunning from WSRPCConnection interface (not needed) add a comment in broadcastNewRoundStepsAndVotes rename registerEventCallbacks to broadcastNewRoundStepsAndVotes See https://dave.cheney.net/2014/03/19/channel-axioms stop eventBuses after reactor tests remove unnecessary Unsubscribe return subscribe helper function move discardFromChan to where it is used subscribe now returns an err this gives us ability to refuse to subscribe if pubsub is at its max capacity. use context for control overflow cache queries handle err when subscribing in replay_test rename testClientID to testSubscriber extract var set channel buffer capacity to 1 in replay_file fix byzantine_test unsubscribe from single event, not all events refactor httpclient to return events to appropriate channels return failing testReplayCrashBeforeWriteVote test fix TestValidatorSetChanges refactor code a bit fix testReplayCrashBeforeWriteVote add comment fix TestValidatorSetChanges fixes from Bucky's review update comment [ci skip] test TxEventBuffer update changelog fix TestValidatorSetChanges (2nd attempt) only do wg.Done when no errors benchmark event bus create pubsub server inside NewEventBus only expose config params (later if needed) set buffer capacity to 0 so we are not testing cache new tx event format: key = "Tx" plus a tag {"tx.hash": XYZ} This should allow to subscribe to all transactions! or a specific one using a query: "tm.events.type = Tx and tx.hash = '013ABF99434...'" use TimeoutCommit instead of afterPublishEventNewBlockTimeout TimeoutCommit is the time a node waits after committing a block, before it goes into the next height. So it will finish everything from the last block, but then wait a bit. The idea is this gives it time to hear more votes from other validators, to strengthen the commit it includes in the next block. But it also gives it time to hear about new transactions. waitForBlockWithUpdatedVals rewrite WAL crash tests Task: test that we can recover from any WAL crash. Solution: the old tests were relying on event hub being run in the same thread (we were injecting the private validator's last signature). when considering a rewrite, we considered two possible solutions: write a "fuzzy" testing system where WAL is crashing upon receiving a new message, or inject failures and trigger them in tests using something like https://github.com/coreos/gofail. remove sleep no cs.Lock around wal.Save test different cases (empty block, non-empty block, ...) comments add comments test 4 cases: empty block, non-empty block, non-empty block with smaller part size, many blocks fixes as per Bucky's last review reset subscriptions on UnsubscribeAll use a simple counter to track message for which we panicked also, set a smaller part size for all test cases
2017-06-26 08:00:30 -07:00
n.eventBus.Stop()
n.indexerService.Stop()
}
2017-10-03 16:11:55 -07:00
// RunForever waits for an interrupt signal and stops the node.
2017-03-05 20:13:34 -08:00
func (n *Node) RunForever() {
// Sleep forever and then...
cmn.TrapSignal(func() {
n.Stop()
})
}
// AddListener adds a listener to accept inbound peer connections.
// It should be called before starting the Node.
// The first listener is the primary listener (in NodeInfo)
2014-07-10 22:14:23 -07:00
func (n *Node) AddListener(l p2p.Listener) {
2015-04-01 14:52:25 -07:00
n.sw.AddListener(l)
2014-07-09 18:33:44 -07:00
}
2014-07-08 00:02:04 -07:00
2017-02-22 05:41:10 -08:00
// ConfigureRPC sets all variables in rpccore so they will serve
// rpc calls from this node
func (n *Node) ConfigureRPC() {
2017-12-27 19:09:48 -08:00
rpccore.SetStateDB(n.stateDB)
rpccore.SetBlockStore(n.blockStore)
rpccore.SetConsensusState(n.consensusState)
2016-10-14 18:36:42 -07:00
rpccore.SetMempool(n.mempoolReactor.Mempool)
rpccore.SetEvidencePool(n.evidencePool)
rpccore.SetSwitch(n.sw)
rpccore.SetPubKey(n.privValidator.GetPubKey())
rpccore.SetGenesisDoc(n.genesisDoc)
2017-03-05 20:13:34 -08:00
rpccore.SetAddrBook(n.addrBook)
2016-08-22 13:00:48 -07:00
rpccore.SetProxyAppQuery(n.proxyApp.Query())
rpccore.SetTxIndexer(n.txIndexer)
2017-07-16 23:44:23 -07:00
rpccore.SetConsensusReactor(n.consensusReactor)
new pubsub package comment out failing consensus tests for now rewrite rpc httpclient to use new pubsub package import pubsub as tmpubsub, query as tmquery make event IDs constants EventKey -> EventTypeKey rename EventsPubsub to PubSub mempool does not use pubsub rename eventsSub to pubsub new subscribe API fix channel size issues and consensus tests bugs refactor rpc client add missing discardFromChan method add mutex rename pubsub to eventBus remove IsRunning from WSRPCConnection interface (not needed) add a comment in broadcastNewRoundStepsAndVotes rename registerEventCallbacks to broadcastNewRoundStepsAndVotes See https://dave.cheney.net/2014/03/19/channel-axioms stop eventBuses after reactor tests remove unnecessary Unsubscribe return subscribe helper function move discardFromChan to where it is used subscribe now returns an err this gives us ability to refuse to subscribe if pubsub is at its max capacity. use context for control overflow cache queries handle err when subscribing in replay_test rename testClientID to testSubscriber extract var set channel buffer capacity to 1 in replay_file fix byzantine_test unsubscribe from single event, not all events refactor httpclient to return events to appropriate channels return failing testReplayCrashBeforeWriteVote test fix TestValidatorSetChanges refactor code a bit fix testReplayCrashBeforeWriteVote add comment fix TestValidatorSetChanges fixes from Bucky's review update comment [ci skip] test TxEventBuffer update changelog fix TestValidatorSetChanges (2nd attempt) only do wg.Done when no errors benchmark event bus create pubsub server inside NewEventBus only expose config params (later if needed) set buffer capacity to 0 so we are not testing cache new tx event format: key = "Tx" plus a tag {"tx.hash": XYZ} This should allow to subscribe to all transactions! or a specific one using a query: "tm.events.type = Tx and tx.hash = '013ABF99434...'" use TimeoutCommit instead of afterPublishEventNewBlockTimeout TimeoutCommit is the time a node waits after committing a block, before it goes into the next height. So it will finish everything from the last block, but then wait a bit. The idea is this gives it time to hear more votes from other validators, to strengthen the commit it includes in the next block. But it also gives it time to hear about new transactions. waitForBlockWithUpdatedVals rewrite WAL crash tests Task: test that we can recover from any WAL crash. Solution: the old tests were relying on event hub being run in the same thread (we were injecting the private validator's last signature). when considering a rewrite, we considered two possible solutions: write a "fuzzy" testing system where WAL is crashing upon receiving a new message, or inject failures and trigger them in tests using something like https://github.com/coreos/gofail. remove sleep no cs.Lock around wal.Save test different cases (empty block, non-empty block, ...) comments add comments test 4 cases: empty block, non-empty block, non-empty block with smaller part size, many blocks fixes as per Bucky's last review reset subscriptions on UnsubscribeAll use a simple counter to track message for which we panicked also, set a smaller part size for all test cases
2017-06-26 08:00:30 -07:00
rpccore.SetEventBus(n.eventBus)
2017-05-02 00:53:32 -07:00
rpccore.SetLogger(n.Logger.With("module", "rpc"))
2017-02-22 05:41:10 -08:00
}
2015-04-10 02:12:17 -07:00
2017-02-22 05:41:10 -08:00
func (n *Node) startRPC() ([]net.Listener, error) {
n.ConfigureRPC()
2017-05-24 10:56:12 -07:00
listenAddrs := strings.Split(n.config.RPC.ListenAddress, ",")
2016-02-18 16:21:02 -08:00
if n.config.RPC.Unsafe {
rpccore.AddUnsafeRoutes()
}
2016-02-18 16:21:02 -08:00
// we may expose the rpc over both a unix and tcp socket
listeners := make([]net.Listener, len(listenAddrs))
for i, listenAddr := range listenAddrs {
mux := http.NewServeMux()
2017-05-15 10:33:45 -07:00
rpcLogger := n.Logger.With("module", "rpc-server")
wm := rpcserver.NewWebsocketManager(rpccore.Routes, rpcserver.EventSubscriber(n.eventBus))
wm.SetLogger(rpcLogger.With("protocol", "websocket"))
2016-02-18 16:21:02 -08:00
mux.HandleFunc("/websocket", wm.WebsocketHandler)
2017-05-02 00:53:32 -07:00
rpcserver.RegisterRPCFuncs(mux, rpccore.Routes, rpcLogger)
listener, err := rpcserver.StartHTTPServer(listenAddr, mux, rpcLogger)
2016-02-18 16:21:02 -08:00
if err != nil {
return nil, err
}
listeners[i] = listener
}
2016-06-21 10:19:49 -07:00
// we expose a simplified api over grpc for convenience to app devs
2017-05-24 10:56:12 -07:00
grpcListenAddr := n.config.RPC.GRPCListenAddress
2016-06-21 10:19:49 -07:00
if grpcListenAddr != "" {
listener, err := grpccore.StartGRPCServer(grpcListenAddr)
if err != nil {
return nil, err
}
listeners = append(listeners, listener)
}
2016-02-18 16:21:02 -08:00
return listeners, nil
}
// Switch returns the Node's Switch.
func (n *Node) Switch() *p2p.Switch {
return n.sw
}
// BlockStore returns the Node's BlockStore.
2015-05-15 07:27:22 -07:00
func (n *Node) BlockStore() *bc.BlockStore {
return n.blockStore
}
// ConsensusState returns the Node's ConsensusState.
func (n *Node) ConsensusState() *consensus.ConsensusState {
return n.consensusState
}
// ConsensusReactor returns the Node's ConsensusReactor.
2016-05-11 11:45:20 -07:00
func (n *Node) ConsensusReactor() *consensus.ConsensusReactor {
return n.consensusReactor
}
// MempoolReactor returns the Node's MempoolReactor.
func (n *Node) MempoolReactor() *mempl.MempoolReactor {
return n.mempoolReactor
}
// EvidencePool returns the Node's EvidencePool.
func (n *Node) EvidencePool() *evidence.EvidencePool {
return n.evidencePool
}
new pubsub package comment out failing consensus tests for now rewrite rpc httpclient to use new pubsub package import pubsub as tmpubsub, query as tmquery make event IDs constants EventKey -> EventTypeKey rename EventsPubsub to PubSub mempool does not use pubsub rename eventsSub to pubsub new subscribe API fix channel size issues and consensus tests bugs refactor rpc client add missing discardFromChan method add mutex rename pubsub to eventBus remove IsRunning from WSRPCConnection interface (not needed) add a comment in broadcastNewRoundStepsAndVotes rename registerEventCallbacks to broadcastNewRoundStepsAndVotes See https://dave.cheney.net/2014/03/19/channel-axioms stop eventBuses after reactor tests remove unnecessary Unsubscribe return subscribe helper function move discardFromChan to where it is used subscribe now returns an err this gives us ability to refuse to subscribe if pubsub is at its max capacity. use context for control overflow cache queries handle err when subscribing in replay_test rename testClientID to testSubscriber extract var set channel buffer capacity to 1 in replay_file fix byzantine_test unsubscribe from single event, not all events refactor httpclient to return events to appropriate channels return failing testReplayCrashBeforeWriteVote test fix TestValidatorSetChanges refactor code a bit fix testReplayCrashBeforeWriteVote add comment fix TestValidatorSetChanges fixes from Bucky's review update comment [ci skip] test TxEventBuffer update changelog fix TestValidatorSetChanges (2nd attempt) only do wg.Done when no errors benchmark event bus create pubsub server inside NewEventBus only expose config params (later if needed) set buffer capacity to 0 so we are not testing cache new tx event format: key = "Tx" plus a tag {"tx.hash": XYZ} This should allow to subscribe to all transactions! or a specific one using a query: "tm.events.type = Tx and tx.hash = '013ABF99434...'" use TimeoutCommit instead of afterPublishEventNewBlockTimeout TimeoutCommit is the time a node waits after committing a block, before it goes into the next height. So it will finish everything from the last block, but then wait a bit. The idea is this gives it time to hear more votes from other validators, to strengthen the commit it includes in the next block. But it also gives it time to hear about new transactions. waitForBlockWithUpdatedVals rewrite WAL crash tests Task: test that we can recover from any WAL crash. Solution: the old tests were relying on event hub being run in the same thread (we were injecting the private validator's last signature). when considering a rewrite, we considered two possible solutions: write a "fuzzy" testing system where WAL is crashing upon receiving a new message, or inject failures and trigger them in tests using something like https://github.com/coreos/gofail. remove sleep no cs.Lock around wal.Save test different cases (empty block, non-empty block, ...) comments add comments test 4 cases: empty block, non-empty block, non-empty block with smaller part size, many blocks fixes as per Bucky's last review reset subscriptions on UnsubscribeAll use a simple counter to track message for which we panicked also, set a smaller part size for all test cases
2017-06-26 08:00:30 -07:00
// EventBus returns the Node's EventBus.
func (n *Node) EventBus() *types.EventBus {
return n.eventBus
2015-04-17 13:18:50 -07:00
}
// PrivValidator returns the Node's PrivValidator.
// XXX: for convenience only!
2017-09-18 15:12:31 -07:00
func (n *Node) PrivValidator() types.PrivValidator {
2016-05-11 11:45:20 -07:00
return n.privValidator
}
// GenesisDoc returns the Node's GenesisDoc.
2016-10-14 18:36:42 -07:00
func (n *Node) GenesisDoc() *types.GenesisDoc {
return n.genesisDoc
}
// ProxyApp returns the Node's AppConns, representing its connections to the ABCI application.
2016-10-14 18:36:42 -07:00
func (n *Node) ProxyApp() proxy.AppConns {
return n.proxyApp
}
2018-01-13 21:10:29 -08:00
func (n *Node) makeNodeInfo(pubKey crypto.PubKey) p2p.NodeInfo {
2017-04-18 17:10:02 -07:00
txIndexerStatus := "on"
if _, ok := n.txIndexer.(*null.TxIndex); ok {
txIndexerStatus = "off"
}
2018-01-13 21:10:29 -08:00
nodeInfo := p2p.NodeInfo{
2017-12-28 22:53:41 -08:00
PubKey: pubKey,
Network: n.genesisDoc.ChainID,
Version: version.Version,
2018-01-13 20:48:41 -08:00
Moniker: n.config.Moniker,
2015-11-01 11:34:08 -08:00
Other: []string{
cmn.Fmt("wire_version=%v", wire.Version),
cmn.Fmt("p2p_version=%v", p2p.Version),
cmn.Fmt("consensus_version=%v", consensus.Version),
cmn.Fmt("rpc_version=%v/%v", rpc.Version, rpccore.Version),
2017-04-18 17:55:40 -07:00
cmn.Fmt("tx_index=%v", txIndexerStatus),
2015-09-15 16:11:45 -07:00
},
}
2015-07-10 08:50:58 -07:00
2017-11-16 17:20:15 -08:00
rpcListenAddr := n.config.RPC.ListenAddress
nodeInfo.Other = append(nodeInfo.Other, cmn.Fmt("rpc_addr=%v", rpcListenAddr))
2015-07-10 08:50:58 -07:00
2017-04-18 17:10:02 -07:00
if !n.sw.IsListening() {
return nodeInfo
}
2015-07-10 08:50:58 -07:00
2017-04-18 17:10:02 -07:00
p2pListener := n.sw.Listeners()[0]
p2pHost := p2pListener.ExternalAddress().IP.String()
p2pPort := p2pListener.ExternalAddress().Port
nodeInfo.ListenAddr = cmn.Fmt("%v:%v", p2pHost, p2pPort)
2017-11-16 17:20:15 -08:00
return nodeInfo
}
2017-01-13 12:47:13 -08:00
//------------------------------------------------------------------------------
// NodeInfo returns the Node's Info from the Switch.
2018-01-13 21:10:29 -08:00
func (n *Node) NodeInfo() p2p.NodeInfo {
2016-04-13 15:23:25 -07:00
return n.sw.NodeInfo()
}
2017-04-28 20:59:02 -07:00
//------------------------------------------------------------------------------
var (
genesisDocKey = []byte("genesisDoc")
)
// panics if failed to unmarshal bytes
func loadGenesisDoc(db dbm.DB) (*types.GenesisDoc, error) {
bytes := db.Get(genesisDocKey)
if len(bytes) == 0 {
return nil, errors.New("Genesis doc not found")
} else {
var genDoc *types.GenesisDoc
err := json.Unmarshal(bytes, &genDoc)
if err != nil {
cmn.PanicCrisis(fmt.Sprintf("Failed to load genesis doc due to unmarshaling error: %v (bytes: %X)", err, bytes))
}
return genDoc, nil
}
}
// panics if failed to marshal the given genesis document
func saveGenesisDoc(db dbm.DB, genDoc *types.GenesisDoc) {
bytes, err := json.Marshal(genDoc)
if err != nil {
cmn.PanicCrisis(fmt.Sprintf("Failed to save genesis doc due to marshaling error: %v", err))
}
db.SetSync(genesisDocKey, bytes)
}