Merge pull request #1607 from tendermint/1600-wal-bug

[wal] small fixes in SearchEndHeight & replay logic
This commit is contained in:
Anton Kaliaev 2018-05-25 15:41:57 +04:00 committed by GitHub
commit eeabb4c06b
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
7 changed files with 70 additions and 55 deletions

View File

@ -8,6 +8,7 @@ IMPROVEMENTS:
- [consensus] consensus reactor now receives events from a separate event bus,
which is not dependant on external RPC load
- [consensus/wal] do not look for height in older files if we've seen height - 1
## 0.19.5

View File

@ -111,7 +111,7 @@ func (wal *baseWAL) OnStop() {
}
// Write is called in newStep and for each receive on the
// peerMsgQueue and the timoutTicker.
// peerMsgQueue and the timeoutTicker.
// NOTE: does not call fsync()
func (wal *baseWAL) Write(msg WALMessage) {
if wal == nil {
@ -144,13 +144,14 @@ type WALSearchOptions struct {
IgnoreDataCorruptionErrors bool
}
// SearchForEndHeight searches for the EndHeightMessage with the height and
// returns an auto.GroupReader, whenever it was found or not and an error.
// SearchForEndHeight searches for the EndHeightMessage with the given height
// and returns an auto.GroupReader, whenever it was found or not and an error.
// Group reader will be nil if found equals false.
//
// CONTRACT: caller must close group reader.
func (wal *baseWAL) SearchForEndHeight(height int64, options *WALSearchOptions) (gr *auto.GroupReader, found bool, err error) {
var msg *TimedWALMessage
lastHeightFound := int64(-1)
// NOTE: starting from the last file in the group because we're usually
// searching for the last height. See replay.go
@ -166,17 +167,25 @@ func (wal *baseWAL) SearchForEndHeight(height int64, options *WALSearchOptions)
for {
msg, err = dec.Decode()
if err == io.EOF {
// OPTIMISATION: no need to look for height in older files if we've seen h < height
if lastHeightFound > 0 && lastHeightFound < height {
gr.Close()
return nil, false, nil
}
// check next file
break
}
if options.IgnoreDataCorruptionErrors && IsDataCorruptionError(err) {
wal.Logger.Debug("Corrupted entry. Skipping...", "err", err)
// do nothing
continue
} else if err != nil {
gr.Close()
return nil, false, err
}
if m, ok := msg.Msg.(EndHeightMessage); ok {
lastHeightFound = m.Height
if m.Height == height { // found
wal.Logger.Debug("Found", "height", height, "index", index)
return gr, true, nil
@ -271,23 +280,17 @@ func (dec *WALDecoder) Decode() (*TimedWALMessage, error) {
b = make([]byte, 4)
_, err = dec.rd.Read(b)
if err == io.EOF {
return nil, err
}
if err != nil {
return nil, fmt.Errorf("failed to read length: %v", err)
}
length := binary.BigEndian.Uint32(b)
if length > maxMsgSizeBytes {
return nil, DataCorruptionError{fmt.Errorf("length %d exceeded maximum possible value of %d bytes", length, maxMsgSizeBytes)}
return nil, fmt.Errorf("length %d exceeded maximum possible value of %d bytes", length, maxMsgSizeBytes)
}
data := make([]byte, length)
_, err = dec.rd.Read(data)
if err == io.EOF {
return nil, err
}
if err != nil {
return nil, fmt.Errorf("failed to read data: %v", err)
}

View File

@ -6,7 +6,7 @@ import (
"sync/atomic"
"time"
"github.com/tendermint/go-crypto"
crypto "github.com/tendermint/go-crypto"
cmn "github.com/tendermint/tmlibs/common"
"github.com/tendermint/tmlibs/log"

View File

@ -7,7 +7,7 @@ import (
"sync"
"time"
"github.com/tendermint/go-amino"
amino "github.com/tendermint/go-amino"
cmn "github.com/tendermint/tmlibs/common"
"github.com/tendermint/tendermint/p2p"
@ -281,6 +281,7 @@ func (r *PEXReactor) receiveRequest(src Peer) error {
// RequestAddrs asks peer for more addresses if we do not already
// have a request out for this peer.
func (r *PEXReactor) RequestAddrs(p Peer) {
r.Logger.Debug("Request addrs", "from", p)
id := string(p.ID())
if r.requestsSent.Has(id) {
return

View File

@ -59,59 +59,69 @@ func TestPEXReactorAddRemovePeer(t *testing.T) {
assert.Equal(t, size+1, book.Size())
}
func TestPEXReactorRunning(t *testing.T) {
N := 3
switches := make([]*p2p.Switch, N)
// --- FAIL: TestPEXReactorRunning (11.10s)
// pex_reactor_test.go:411: expected all switches to be connected to at
// least one peer (switches: 0 => {outbound: 1, inbound: 0}, 1 =>
// {outbound: 0, inbound: 1}, 2 => {outbound: 0, inbound: 0}, )
//
// EXPLANATION: peers are getting rejected because in switch#addPeer we check
// if any peer (who we already connected to) has the same IP. Even though local
// peers have different IP addresses, they all have the same underlying remote
// IP: 127.0.0.1.
//
// func TestPEXReactorRunning(t *testing.T) {
// N := 3
// switches := make([]*p2p.Switch, N)
// directory to store address books
dir, err := ioutil.TempDir("", "pex_reactor")
require.Nil(t, err)
defer os.RemoveAll(dir) // nolint: errcheck
// // directory to store address books
// dir, err := ioutil.TempDir("", "pex_reactor")
// require.Nil(t, err)
// defer os.RemoveAll(dir) // nolint: errcheck
books := make([]*addrBook, N)
logger := log.TestingLogger()
// books := make([]*addrBook, N)
// logger := log.TestingLogger()
// create switches
for i := 0; i < N; i++ {
switches[i] = p2p.MakeSwitch(config, i, "testing", "123.123.123", func(i int, sw *p2p.Switch) *p2p.Switch {
books[i] = NewAddrBook(filepath.Join(dir, fmt.Sprintf("addrbook%d.json", i)), false)
books[i].SetLogger(logger.With("pex", i))
sw.SetAddrBook(books[i])
// // create switches
// for i := 0; i < N; i++ {
// switches[i] = p2p.MakeSwitch(config, i, "testing", "123.123.123", func(i int, sw *p2p.Switch) *p2p.Switch {
// books[i] = NewAddrBook(filepath.Join(dir, fmt.Sprintf("addrbook%d.json", i)), false)
// books[i].SetLogger(logger.With("pex", i))
// sw.SetAddrBook(books[i])
sw.SetLogger(logger.With("pex", i))
// sw.SetLogger(logger.With("pex", i))
r := NewPEXReactor(books[i], &PEXReactorConfig{})
r.SetLogger(logger.With("pex", i))
r.SetEnsurePeersPeriod(250 * time.Millisecond)
sw.AddReactor("pex", r)
// r := NewPEXReactor(books[i], &PEXReactorConfig{})
// r.SetLogger(logger.With("pex", i))
// r.SetEnsurePeersPeriod(250 * time.Millisecond)
// sw.AddReactor("pex", r)
return sw
})
}
// return sw
// })
// }
addOtherNodeAddrToAddrBook := func(switchIndex, otherSwitchIndex int) {
addr := switches[otherSwitchIndex].NodeInfo().NetAddress()
books[switchIndex].AddAddress(addr, addr)
}
// addOtherNodeAddrToAddrBook := func(switchIndex, otherSwitchIndex int) {
// addr := switches[otherSwitchIndex].NodeInfo().NetAddress()
// books[switchIndex].AddAddress(addr, addr)
// }
addOtherNodeAddrToAddrBook(0, 1)
addOtherNodeAddrToAddrBook(1, 0)
addOtherNodeAddrToAddrBook(2, 1)
// addOtherNodeAddrToAddrBook(0, 1)
// addOtherNodeAddrToAddrBook(1, 0)
// addOtherNodeAddrToAddrBook(2, 1)
for i, sw := range switches {
sw.AddListener(p2p.NewDefaultListener("tcp", sw.NodeInfo().ListenAddr, true, logger.With("pex", i)))
// for i, sw := range switches {
// sw.AddListener(p2p.NewDefaultListener("tcp", sw.NodeInfo().ListenAddr, true, logger.With("pex", i)))
err := sw.Start() // start switch and reactors
require.Nil(t, err)
}
// err := sw.Start() // start switch and reactors
// require.Nil(t, err)
// }
assertPeersWithTimeout(t, switches, 10*time.Millisecond, 10*time.Second, N-1)
// assertPeersWithTimeout(t, switches, 10*time.Millisecond, 10*time.Second, N-1)
// stop them
for _, s := range switches {
s.Stop()
}
}
// // stop them
// for _, s := range switches {
// s.Stop()
// }
// }
func TestPEXReactorReceive(t *testing.T) {
r, book := createReactor(&PEXReactorConfig{})

View File

@ -568,7 +568,7 @@ func (sw *Switch) addPeer(pc peerConn) error {
// and add to our addresses to avoid dialing again
sw.addrBook.RemoveAddress(addr)
sw.addrBook.AddOurAddress(addr)
return ErrSwitchConnectToSelf{}
return ErrSwitchConnectToSelf{addr}
}
// Avoid duplicate

View File

@ -193,7 +193,7 @@ func TestSwitchFiltersOutItself(t *testing.T) {
// addr should be rejected in addPeer based on the same ID
err := s1.DialPeerWithAddress(rp.Addr(), false)
if assert.Error(t, err) {
assert.EqualValues(t, ErrSwitchConnectToSelf{}, err)
assert.Equal(t, ErrSwitchConnectToSelf{rp.Addr()}.Error(), err.Error())
}
assert.True(t, s1.addrBook.OurAddress(rp.Addr()))