From a60a18b080197cad836f18f9d093bba3bcb6cef8 Mon Sep 17 00:00:00 2001 From: zelig Date: Tue, 3 Mar 2015 00:43:12 +0700 Subject: [PATCH 1/3] - fix peer disconnect by adding severity function to errs - improve logging - suicide -> removeChain - improved status BlocksInPool calculation --- blockpool/blockpool.go | 45 +++++++++++++++++++++++++++++++++--------- blockpool/peers.go | 2 +- blockpool/section.go | 45 +++++++++++++++++------------------------- blockpool/status.go | 1 - 4 files changed, 55 insertions(+), 38 deletions(-) diff --git a/blockpool/blockpool.go b/blockpool/blockpool.go index 0126d734c..5da671ec2 100644 --- a/blockpool/blockpool.go +++ b/blockpool/blockpool.go @@ -149,6 +149,15 @@ func New( } } +func severity(code int) ethlogger.LogLevel { + switch code { + case ErrUnrequestedBlock: + return ethlogger.WarnLevel + default: + return ethlogger.ErrorLevel + } +} + // allows restart func (self *BlockPool) Start() { self.lock.Lock() @@ -169,6 +178,7 @@ func (self *BlockPool) Start() { errors: &errs.Errors{ Package: "Blockpool", Errors: errorToString, + Level: severity, }, peers: make(map[string]*peer), status: self.status, @@ -363,6 +373,8 @@ LOOP: // check if known block connecting the downloaded chain to our blockchain plog.DebugDetailf("AddBlockHashes: peer <%s> (head: %s) found block %s in the blockchain", peerId, hex(bestpeer.currentBlockHash), hex(hash)) if len(nodes) == 1 { + plog.DebugDetailf("AddBlockHashes: singleton section pushed to blockchain peer <%s> (head: %s) found block %s in the blockchain", peerId, hex(bestpeer.currentBlockHash), hex(hash)) + // create new section if needed and push it to the blockchain sec = self.newSection(nodes) sec.addSectionToBlockChain(bestpeer) @@ -379,6 +391,8 @@ LOOP: and td together with blockBy are recorded on the node */ if len(nodes) == 0 && child != nil { + plog.DebugDetailf("AddBlockHashes: child section [%s] pushed to blockchain peer <%s> (head: %s) found block %s in the blockchain", sectionhex(child), peerId, hex(bestpeer.currentBlockHash), hex(hash)) + child.addSectionToBlockChain(bestpeer) } } @@ -446,10 +460,12 @@ LOOP: */ sec = self.linkSections(nodes, parent, child) - self.status.lock.Lock() - self.status.values.BlockHashes += len(nodes) - self.status.lock.Unlock() - plog.DebugDetailf("AddBlockHashes: peer <%s> (head: %s): section [%s] created", peerId, hex(bestpeer.currentBlockHash), sectionhex(sec)) + if sec != nil { + self.status.lock.Lock() + self.status.values.BlockHashes += len(nodes) + self.status.lock.Unlock() + plog.DebugDetailf("AddBlockHashes: peer <%s> (head: %s): section [%s] created", peerId, hex(bestpeer.currentBlockHash), sectionhex(sec)) + } self.chainLock.Unlock() @@ -549,6 +565,7 @@ func (self *BlockPool) AddBlock(block *types.Block, peerId string) { self.status.lock.Unlock() } else { plog.DebugDetailf("AddBlock: head block %s for peer <%s> (head: %s) already known", hex(hash), peerId, hex(sender.currentBlockHash)) + sender.currentBlockC <- block } } else { @@ -644,11 +661,15 @@ LOOP: we need to relink both complete and incomplete sections the latter could have been blockHashesRequestsComplete before being delinked from its parent */ - if parent == nil && sec.bottom.block != nil { - if entry := self.get(sec.bottom.block.ParentHash()); entry != nil { - parent = entry.section - plog.DebugDetailf("activateChain: [%s]-[%s] relink", sectionhex(parent), sectionhex(sec)) - link(parent, sec) + if parent == nil { + if sec.bottom.block != nil { + if entry := self.get(sec.bottom.block.ParentHash()); entry != nil { + parent = entry.section + plog.DebugDetailf("activateChain: [%s]-[%s] link", sectionhex(parent), sectionhex(sec)) + link(parent, sec) + } + } else { + plog.DebugDetailf("activateChain: section [%s] activated by peer <%s> has missing root block", sectionhex(sec), p.id) } } sec = parent @@ -704,9 +725,15 @@ func (self *BlockPool) remove(sec *section) { // delete node entries from pool index under pool lock self.lock.Lock() defer self.lock.Unlock() + for _, node := range sec.nodes { delete(self.pool, string(node.hash)) } + if sec.initialised && sec.poolRootIndex != 0 { + self.status.lock.Lock() + self.status.values.BlocksInPool -= len(sec.nodes) - sec.missing + self.status.lock.Unlock() + } } func (self *BlockPool) getHashSlice() (s [][]byte) { diff --git a/blockpool/peers.go b/blockpool/peers.go index 5f1b2017c..576d6e41d 100644 --- a/blockpool/peers.go +++ b/blockpool/peers.go @@ -503,7 +503,7 @@ LOOP: // quitting on timeout case <-self.suicide: - self.peerError(self.bp.peers.errors.New(ErrInsufficientChainInfo, "timed out without providing block hashes or head block", currentBlockHash)) + self.peerError(self.bp.peers.errors.New(ErrInsufficientChainInfo, "timed out without providing block hashes or head block %x", currentBlockHash)) self.bp.status.lock.Lock() self.bp.status.badPeers[self.id]++ diff --git a/blockpool/section.go b/blockpool/section.go index 48ea15d31..03c4f5cc6 100644 --- a/blockpool/section.go +++ b/blockpool/section.go @@ -138,7 +138,7 @@ func (self *section) addSectionToBlockChain(p *peer) { plog.Warnf("penalise peers %v (hash), %v (block)", node.hashBy, node.blockBy) // or invalid block and the entire chain needs to be removed - self.removeInvalidChain() + self.removeChain() } else { // if all blocks inserted in this section // then need to try to insert blocks in child section @@ -235,16 +235,14 @@ LOOP: // timebomb - if section is not complete in time, nuke the entire chain case <-self.suicideTimer: - self.suicide() + self.removeChain() plog.Debugf("[%s] timeout. (%v total attempts): missing %v/%v/%v...suicide", sectionhex(self), self.blocksRequests, self.missing, self.lastMissing, self.depth) self.suicideTimer = nil + break LOOP // closing suicideC triggers section suicide: removes section nodes from pool and terminates section process case <-self.suicideC: - plog.DebugDetailf("[%s] suicide", sectionhex(self)) - self.unlink() - self.bp.remove(self) - plog.DebugDetailf("[%s] done", sectionhex(self)) + plog.DebugDetailf("[%s] quit", sectionhex(self)) break LOOP // alarm for checking blocks in the section @@ -283,7 +281,7 @@ LOOP: checking = false break } - plog.DebugDetailf("[%s] section proc step %v: missing %v/%v/%v", sectionhex(self), self.step, self.missing, self.lastMissing, self.depth) + // plog.DebugDetailf("[%s] section proc step %v: missing %v/%v/%v", sectionhex(self), self.step, self.missing, self.lastMissing, self.depth) if !checking { self.step = 0 self.missing = 0 @@ -522,7 +520,7 @@ func (self *section) checkRound() { // too many idle rounds if self.idle >= self.bp.Config.BlocksRequestMaxIdleRounds { plog.DebugDetailf("[%s] block requests had %v idle rounds (%v total attempts): missing %v/%v/%v\ngiving up...", sectionhex(self), self.idle, self.blocksRequests, self.missing, self.lastMissing, self.depth) - self.suicide() + self.removeChain() } } else { self.idle = 0 @@ -602,10 +600,12 @@ func (self *BlockPool) linkSections(nodes []*node, parent, child *section) (sec link(parent, sec) link(sec, child) } else { - // now this can only happen if we allow response to hash request to include hash - // in this case we just link parent and child (without needing root block of child section) - plog.Debugf("[%s]->[%s] connecting known sections", sectionhex(parent), sectionhex(child)) - link(parent, child) + if parent != nil && child != nil { + // now this can only happen if we allow response to hash request to include hash + // in this case we just link parent and child (without needing root block of child section) + plog.Debugf("[%s]->[%s] connecting known sections", sectionhex(parent), sectionhex(child)) + link(parent, child) + } } return } @@ -614,6 +614,7 @@ func (self *section) activate(p *peer) { self.bp.wg.Add(1) select { case <-self.offC: + plog.DebugDetailf("[%s] completed section process. cannot activate for peer <%s>", sectionhex(self), p.id) self.bp.wg.Done() case self.controlC <- p: plog.DebugDetailf("[%s] activate section process for peer <%s>", sectionhex(self), p.id) @@ -625,22 +626,10 @@ func (self *section) deactivate() { self.controlC <- nil } -func (self *section) suicide() { - select { - case <-self.suicideC: - return - default: - } - close(self.suicideC) -} - // removes this section exacly func (self *section) remove() { select { case <-self.offC: - // section is complete, no process - self.unlink() - self.bp.remove(self) close(self.suicideC) plog.DebugDetailf("[%s] remove: suicide", sectionhex(self)) case <-self.suicideC: @@ -649,21 +638,23 @@ func (self *section) remove() { plog.DebugDetailf("[%s] remove: suicide", sectionhex(self)) close(self.suicideC) } + self.unlink() + self.bp.remove(self) plog.DebugDetailf("[%s] removed section.", sectionhex(self)) } // remove a section and all its descendents from the pool -func (self *section) removeInvalidChain() { +func (self *section) removeChain() { // need to get the child before removeSection delinks the section self.bp.chainLock.RLock() child := self.child self.bp.chainLock.RUnlock() - plog.DebugDetailf("[%s] remove invalid chain", sectionhex(self)) + plog.DebugDetailf("[%s] remove chain", sectionhex(self)) self.remove() if child != nil { - child.removeInvalidChain() + child.removeChain() } } diff --git a/blockpool/status.go b/blockpool/status.go index 0dd874232..4529c77fe 100644 --- a/blockpool/status.go +++ b/blockpool/status.go @@ -51,7 +51,6 @@ type Status struct { func (self *BlockPool) Status() *Status { self.status.lock.Lock() defer self.status.lock.Unlock() - self.status.values.BlockHashesInPool = len(self.pool) self.status.values.ActivePeers = len(self.status.activePeers) self.status.values.BestPeers = len(self.status.bestPeers) self.status.values.BadPeers = len(self.status.badPeers) From 47278a6e4eb4b8dc02ff322ba24c9c225267ffe5 Mon Sep 17 00:00:00 2001 From: zelig Date: Tue, 3 Mar 2015 03:39:21 +0700 Subject: [PATCH 2/3] log when added peer is behind (hash found in blockchain) --- blockpool/peers.go | 1 + 1 file changed, 1 insertion(+) diff --git a/blockpool/peers.go b/blockpool/peers.go index 576d6e41d..9c4ffc5c6 100644 --- a/blockpool/peers.go +++ b/blockpool/peers.go @@ -214,6 +214,7 @@ func (self *peers) addPeer( // check peer current head if self.bp.hasBlock(currentBlockHash) { // peer not ahead + plog.Debugf("addPeer: peer <%v> with td %v and current block %s is behind", id, td, hex(currentBlockHash)) return false } From 2c616bd2795974877b0d3e23e99f6eff9e775218 Mon Sep 17 00:00:00 2001 From: zelig Date: Wed, 4 Mar 2015 02:06:15 +0700 Subject: [PATCH 3/3] partial fix to idle best peer issue - best peer cannot be idle for more than idleBestPeerTimeout - introduce ErrIdleTooLong fatal error - modify default values --- blockpool/blockpool.go | 14 +++++++++++--- blockpool/config_test.go | 4 +++- blockpool/peers.go | 28 +++++++++++++++++++--------- 3 files changed, 33 insertions(+), 13 deletions(-) diff --git a/blockpool/blockpool.go b/blockpool/blockpool.go index 5da671ec2..eb3b415dc 100644 --- a/blockpool/blockpool.go +++ b/blockpool/blockpool.go @@ -17,7 +17,7 @@ var plog = ethlogger.NewLogger("Blockpool") var ( // max number of block hashes sent in one request - blockHashesBatchSize = 512 + blockHashesBatchSize = 256 // max number of blocks sent in one request blockBatchSize = 64 // interval between two consecutive block checks (and requests) @@ -27,11 +27,13 @@ var ( // interval between two consecutive block hash checks (and requests) blockHashesRequestInterval = 3 * time.Second // max number of idle iterations, ie., check through a section without new blocks coming in - blocksRequestMaxIdleRounds = 100 + blocksRequestMaxIdleRounds = 20 // timeout interval: max time allowed for peer without sending a block hash blockHashesTimeout = 60 * time.Second // timeout interval: max time allowed for peer without sending a block - blocksTimeout = 120 * time.Second + blocksTimeout = 60 * time.Second + // + idleBestPeerTimeout = 60 * time.Second ) // config embedded in components, by default fall back to constants @@ -45,6 +47,7 @@ type Config struct { BlocksRequestInterval time.Duration BlockHashesTimeout time.Duration BlocksTimeout time.Duration + IdleBestPeerTimeout time.Duration } // blockpool errors @@ -53,6 +56,7 @@ const ( ErrInvalidPoW ErrUnrequestedBlock ErrInsufficientChainInfo + ErrIdleTooLong ) var errorToString = map[int]string{ @@ -60,6 +64,7 @@ var errorToString = map[int]string{ ErrInvalidPoW: "Invalid PoW", ErrUnrequestedBlock: "Unrequested block", ErrInsufficientChainInfo: "Insufficient chain info", + ErrIdleTooLong: "Idle too long", } // init initialises all your laundry @@ -88,6 +93,9 @@ func (self *Config) init() { if self.BlocksTimeout == 0 { self.BlocksTimeout = blocksTimeout } + if self.IdleBestPeerTimeout == 0 { + self.IdleBestPeerTimeout = idleBestPeerTimeout + } } // node is the basic unit of the internal model of block chain/tree in the blockpool diff --git a/blockpool/config_test.go b/blockpool/config_test.go index c06649024..d5540c864 100644 --- a/blockpool/config_test.go +++ b/blockpool/config_test.go @@ -20,12 +20,13 @@ func TestBlockPoolConfig(t *testing.T) { test.CheckDuration("BlocksRequestInterval", c.BlocksRequestInterval, blocksRequestInterval, t) test.CheckDuration("BlockHashesTimeout", c.BlockHashesTimeout, blockHashesTimeout, t) test.CheckDuration("BlocksTimeout", c.BlocksTimeout, blocksTimeout, t) + test.CheckDuration("IdleBestPeerTimeout", c.IdleBestPeerTimeout, idleBestPeerTimeout, t) } func TestBlockPoolOverrideConfig(t *testing.T) { test.LogInit() blockPool := &BlockPool{Config: &Config{}} - c := &Config{128, 32, 1, 0, 300 * time.Millisecond, 100 * time.Millisecond, 90 * time.Second, 0} + c := &Config{128, 32, 1, 0, 300 * time.Millisecond, 100 * time.Millisecond, 90 * time.Second, 0, 30 * time.Second} blockPool.Config = c blockPool.Start() @@ -37,4 +38,5 @@ func TestBlockPoolOverrideConfig(t *testing.T) { test.CheckDuration("BlocksRequestInterval", c.BlocksRequestInterval, 100*time.Millisecond, t) test.CheckDuration("BlockHashesTimeout", c.BlockHashesTimeout, 90*time.Second, t) test.CheckDuration("BlocksTimeout", c.BlocksTimeout, blocksTimeout, t) + test.CheckDuration("IdleBestPeerTimeout", c.IdleBestPeerTimeout, 30*time.Second, t) } diff --git a/blockpool/peers.go b/blockpool/peers.go index 9c4ffc5c6..b4aa48f49 100644 --- a/blockpool/peers.go +++ b/blockpool/peers.go @@ -37,7 +37,7 @@ type peer struct { currentBlockC chan *types.Block headSectionC chan *section - // channels to signal peers witch and peer quit + // channels to signal peer switch and peer quit to section processes idleC chan bool switchC chan bool @@ -47,7 +47,7 @@ type peer struct { // timers for head section process blockHashesRequestTimer <-chan time.Time blocksRequestTimer <-chan time.Time - suicide <-chan time.Time + suicideC <-chan time.Time idle bool } @@ -286,8 +286,7 @@ func (self *peers) removePeer(id string) { } } -// switchPeer launches section processes based on information about -// shared interest and legacy of peers +// switchPeer launches section processes func (self *BlockPool) switchPeer(oldp, newp *peer) { // first quit AddBlockHashes, requestHeadSection and activateChain @@ -372,16 +371,16 @@ func (self *peer) handleSection(sec *section) { self.bp.syncing() } - self.suicide = time.After(self.bp.Config.BlockHashesTimeout) + self.suicideC = time.After(self.bp.Config.BlockHashesTimeout) plog.DebugDetailf("HeadSection: <%s> head block hash changed (mined block received). New head %s", self.id, hex(self.currentBlockHash)) } else { if !self.idle { self.idle = true - self.suicide = nil self.bp.wg.Done() } plog.DebugDetailf("HeadSection: <%s> head section [%s] created", self.id, sectionhex(sec)) + self.suicideC = time.After(self.bp.Config.IdleBestPeerTimeout) } } @@ -451,7 +450,7 @@ func (self *peer) getBlockHashes() { self.blockHashesRequestTimer = nil if !self.idle { self.idle = true - self.suicide = nil + self.suicideC = time.After(self.bp.Config.IdleBestPeerTimeout) self.bp.wg.Done() } } @@ -467,7 +466,7 @@ func (self *peer) run() { self.blockHashesRequestTimer = nil self.blocksRequestTimer = time.After(0) - self.suicide = time.After(self.bp.Config.BlockHashesTimeout) + self.suicideC = time.After(self.bp.Config.BlockHashesTimeout) var quit chan bool @@ -476,9 +475,20 @@ func (self *peer) run() { LOOP: for { select { + // to minitor section process behaviou case <-ping.C: plog.Debugf("HeadSection: <%s> section with head %s, idle: %v", self.id, hex(self.currentBlockHash), self.idle) + // idle timer started when process goes idle + case <-self.idleC: + if self.idle { + self.peerError(self.bp.peers.errors.New(ErrIdleTooLong, "timed out without providing new blocks...quitting", currentBlockHash)) + + self.bp.status.lock.Lock() + self.bp.status.badPeers[self.id]++ + self.bp.status.lock.Unlock() + } + // signal from AddBlockHashes that head section for current best peer is created // if sec == nil, it signals that chain info has updated (new block message) case sec := <-self.headSectionC: @@ -503,7 +513,7 @@ LOOP: self.getCurrentBlock(nil) // quitting on timeout - case <-self.suicide: + case <-self.suicideC: self.peerError(self.bp.peers.errors.New(ErrInsufficientChainInfo, "timed out without providing block hashes or head block %x", currentBlockHash)) self.bp.status.lock.Lock()