fail tests and fix

This commit is contained in:
Ethan Buchman 2016-09-11 15:32:33 -04:00
parent 8ec1839f5d
commit 3f90fcae48
4 changed files with 176 additions and 14 deletions

View File

@ -8,6 +8,8 @@ import (
"sync"
"time"
"github.com/ebuchman/fail-test"
. "github.com/tendermint/go-common"
cfg "github.com/tendermint/go-config"
"github.com/tendermint/go-wire"
@ -1255,16 +1257,19 @@ func (cs *ConsensusState) finalizeCommit(height int) {
"height", block.Height, "hash", block.Hash(), "root", block.AppHash)
log.Info(Fmt("%v", block))
fail.Fail() // XXX
// Save to blockStore.
if cs.blockStore.Height() < block.Height {
precommits := cs.Votes.Precommits(cs.CommitRound)
seenCommit := precommits.MakeCommit()
log.Notice("save block", "height", block.Height)
cs.blockStore.SaveBlock(block, blockParts, seenCommit)
} else {
log.Warn("Why are we finalizeCommitting a block height we already have?", "height", block.Height)
}
fail.Fail() // XXX
// Create a copy of the state for staging
// and an event cache for txs
stateCopy := cs.state.Copy()
@ -1277,6 +1282,8 @@ func (cs *ConsensusState) finalizeCommit(height int) {
// NOTE: the block.AppHash wont reflect these txs until the next block
stateCopy.ApplyBlock(eventCache, cs.proxyAppConn, block, blockParts.Header(), cs.mempool)
fail.Fail() // XXX
// Fire off event for new block.
// TODO: Handle app failure. See #177
types.FireEventNewBlock(cs.evsw, types.EventDataNewBlock{block})
@ -1284,9 +1291,10 @@ func (cs *ConsensusState) finalizeCommit(height int) {
eventCache.Flush()
// Save the state.
log.Notice("save state", "height", stateCopy.LastBlockHeight, "hash", stateCopy.AppHash)
stateCopy.Save()
fail.Fail() // XXX
// NewHeightStep!
cs.updateToState(stateCopy)

View File

@ -1,8 +1,11 @@
package state
import (
"bytes"
"errors"
"github.com/ebuchman/fail-test"
. "github.com/tendermint/go-common"
"github.com/tendermint/tendermint/proxy"
"github.com/tendermint/tendermint/types"
@ -98,20 +101,28 @@ func (s *State) execBlockOnProxyApp(eventCache types.Fireable, proxyAppConn prox
return err
}
fail.Fail() // XXX
// Run txs of block
for _, tx := range block.Txs {
fail.FailRand(len(block.Txs)) // XXX
proxyAppConn.AppendTxAsync(tx)
if err := proxyAppConn.Error(); err != nil {
return err
}
}
fail.Fail() // XXX
// End block
changedValidators, err := proxyAppConn.EndBlockSync(uint64(block.Height))
if err != nil {
log.Warn("Error in proxyAppConn.EndBlock", "error", err)
return err
}
fail.Fail() // XXX
// TODO: Do something with changedValidators
log.Debug("TODO: Do something with changedValidators", "changedValidators", changedValidators)
@ -248,6 +259,8 @@ func (m mockMempool) Update(height int, txs []types.Tx) {}
//----------------------------------------------------------------
// Replay blocks to sync app to latest state of core
type ErrReplay error
type ErrAppBlockHeightTooHigh struct {
coreHeight int
appHeight int
@ -257,6 +270,16 @@ func (e ErrAppBlockHeightTooHigh) Error() string {
return Fmt("App block height (%d) is higher than core (%d)", e.appHeight, e.coreHeight)
}
type ErrLastStateMismatch struct {
height int
core []byte
app []byte
}
func (e ErrLastStateMismatch) Error() string {
return Fmt("Latest tendermint block (%d) LastAppHash (%X) does not match app's AppHash (%X)", e.height, e.core, e.app)
}
type ErrStateMismatch struct {
got *State
expected *State
@ -289,29 +312,47 @@ func (s *State) ReplayBlocks(appHash []byte, header *types.Header, partsHeader t
appBlockHeight := stateCopy.LastBlockHeight
coreBlockHeight := blockStore.Height()
if coreBlockHeight < appBlockHeight {
// if the app is ahead, there's nothing we can do
return ErrAppBlockHeightTooHigh{coreBlockHeight, appBlockHeight}
} else if coreBlockHeight == appBlockHeight {
// if we crashed between Commit and SaveState,
// the state's app hash is stale
// the state's app hash is stale.
// otherwise we're synced
if s.Stale {
s.Stale = false
s.AppHash = appHash
}
return checkState(s, stateCopy)
} else if s.LastBlockHeight == appBlockHeight {
// core is ahead of app but core's state height is at apps height
// this happens if we crashed after saving the block,
// but before committing it. We should be 1 ahead
if coreBlockHeight != appBlockHeight+1 {
PanicSanity(Fmt("core.state.height == app.height but core.height (%d) > app.height+1 (%d)", coreBlockHeight, appBlockHeight+1))
}
// check that the blocks last apphash is the states apphash
blockMeta := blockStore.LoadBlockMeta(coreBlockHeight)
if !bytes.Equal(blockMeta.Header.AppHash, appHash) {
return ErrLastStateMismatch{coreBlockHeight, blockMeta.Header.AppHash, appHash}
}
// replay the block against the actual tendermint state (not the copy)
return loadApplyBlock(coreBlockHeight, s, blockStore, appConnConsensus)
} else {
// the app is behind.
// either we're caught up or there's blocks to replay
// replay all blocks starting with appBlockHeight+1
for i := appBlockHeight + 1; i <= coreBlockHeight; i++ {
blockMeta := blockStore.LoadBlockMeta(i)
block := blockStore.LoadBlock(i)
panicOnNilBlock(i, coreBlockHeight, block, blockMeta) // XXX
var eventCache events.Fireable // nil
stateCopy.ApplyBlock(eventCache, appConnConsensus, block, blockMeta.PartsHeader, mockMempool{})
loadApplyBlock(i, stateCopy, blockStore, appConnConsensus)
}
return checkState(s, stateCopy)
}
}
func checkState(s, stateCopy *State) error {
// The computed state and the previously set state should be identical
if !s.Equals(stateCopy) {
return ErrStateMismatch{stateCopy, s}
@ -319,6 +360,15 @@ func (s *State) ReplayBlocks(appHash []byte, header *types.Header, partsHeader t
return nil
}
func loadApplyBlock(blockIndex int, s *State, blockStore proxy.BlockStore, appConnConsensus proxy.AppConnConsensus) error {
blockMeta := blockStore.LoadBlockMeta(blockIndex)
block := blockStore.LoadBlock(blockIndex)
panicOnNilBlock(blockIndex, blockStore.Height(), block, blockMeta) // XXX
var eventCache events.Fireable // nil
return s.ApplyBlock(eventCache, appConnConsensus, block, blockMeta.PartsHeader, mockMempool{})
}
func panicOnNilBlock(height, bsHeight int, block *types.Block, blockMeta *types.BlockMeta) {
if block == nil || blockMeta == nil {
// Sanity?

View File

@ -24,13 +24,13 @@ function kill_procs(){
function send_txs(){
# send a bunch of txs over a few blocks
echo "Sending txs"
# for i in `seq 1 5`; do
# for j in `seq 1 100`; do
for i in `seq 1 5`; do
for j in `seq 1 100`; do
tx=`head -c 8 /dev/urandom | hexdump -ve '1/1 "%.2X"'`
curl -s 127.0.0.1:46657/broadcast_tx_async?tx=\"$tx\" &> /dev/null
# done
done
sleep 1
# done
done
}

104
test/persist/test2.sh Normal file
View File

@ -0,0 +1,104 @@
#! /bin/bash
export TMROOT=$HOME/.tendermint_persist
rm -rf $TMROOT
tendermint init
function start_procs(){
name=$1
indexToFail=$2
echo "Starting persistent dummy and tendermint"
dummy --persist $TMROOT/dummy &> "dummy_${name}.log" &
PID_DUMMY=$!
if [[ "$indexToFail" == "" ]]; then
# run in background, dont fail
tendermint node &> tendermint_${name}.log &
PID_TENDERMINT=$!
else
# run in foreground, fail
FAIL_TEST_INDEX=$indexToFail tendermint node &> tendermint_${name}.log
PID_TENDERMINT=$!
fi
}
function kill_procs(){
kill -9 $PID_DUMMY $PID_TENDERMINT
wait $PID_DUMMY
wait $PID_TENDERMINT
}
# wait till node is up, send txs
function send_txs(){
addr="127.0.0.1:46657"
curl -s $addr/status > /dev/null
ERR=$?
while [ "$ERR" != 0 ]; do
sleep 1
curl -s $addr/status > /dev/null
ERR=$?
done
# send a bunch of txs over a few blocks
echo "Node is up, sending txs"
for i in `seq 1 5`; do
for j in `seq 1 100`; do
tx=`head -c 8 /dev/urandom | hexdump -ve '1/1 "%.2X"'`
curl -s $addr/broadcast_tx_async?tx=\"$tx\" &> /dev/null
done
sleep 1
done
}
failsStart=0
fails=`grep -r "fail.Fail" --include \*.go . | wc -l`
failsEnd=$(($fails-1))
for failIndex in `seq $failsStart $failsEnd`; do
echo ""
echo "* Test FailIndex $failIndex"
# test failure at failIndex
send_txs &
start_procs 1 $failIndex
# tendermint should fail when it hits the fail index
kill -9 $PID_DUMMY
wait $PID_DUMMY
start_procs 2
# wait for node to handshake and make a new block
addr="localhost:46657"
curl -s $addr/status > /dev/null
ERR=$?
i=0
while [ "$ERR" != 0 ]; do
sleep 1
curl -s $addr/status > /dev/null
ERR=$?
i=$(($i + 1))
if [[ $i == 10 ]]; then
echo "Timed out waiting for tendermint to start"
exit 1
fi
done
# wait for a new block
h1=`curl -s $addr/status | jq .result[1].latest_block_height`
h2=$h1
while [ "$h2" == "$h1" ]; do
sleep 1
h2=`curl -s $addr/status | jq .result[1].latest_block_height`
done
kill_procs
echo "* Passed Test for FailIndex $failIndex"
echo ""
done
echo "Passed Test: Persistence"