tendermint/consensus/replay_test.go

package consensus

import (
	"fmt"
	"io/ioutil"
	"os"
	"path"
	"strings"
	"testing"
	"time"

	"github.com/tendermint/tendermint/config/tendermint_test"

	. "github.com/tendermint/go-common"
	"github.com/tendermint/go-wire"
	"github.com/tendermint/tendermint/types"
)

func init() {
	config = tendermint_test.ResetConfig("consensus_replay_test")
}

// TODO: these tests ensure we can always recover from any state of the wal,
// assuming it comes with a correct related state for the priv_validator.json.
// It would be better to verify explicitly which states we can recover from without the wal
// and which ones we need the wal for - then we'd also be able to only flush the
// wal writer when we need to, instead of with every message.

var data_dir = path.Join(GoPath, "src/github.com/tendermint/tendermint/consensus", "test_data")

// the priv validator changes step at these lines for a block with 1 val and 1 part
var baseStepChanges = []int{3, 6, 8}

// test recovery from each line in each testCase
var testCases = []*testCase{
	newTestCase("empty_block", baseStepChanges),   // empty block (has 1 block part)
	newTestCase("small_block1", baseStepChanges),  // small block with txs in 1 block part
	newTestCase("small_block2", []int{3, 10, 12}), // small block with txs across 5 smaller block parts
}

type testCase struct {
	name    string
	log     string       //full cs wal
	stepMap map[int]int8 // map lines of log to privval step

	proposeLine   int
	prevoteLine   int
	precommitLine int
}

func newTestCase(name string, stepChanges []int) *testCase {
	if len(stepChanges) != 3 {
		panic(Fmt("a full wal has 3 step changes! Got array %v", stepChanges))
	}
	return &testCase{
		name:    name,
		log:     readWAL(path.Join(data_dir, name+".cswal")),
		stepMap: newMapFromChanges(stepChanges),

		proposeLine:   stepChanges[0],
		prevoteLine:   stepChanges[1],
		precommitLine: stepChanges[2],
	}
}

func newMapFromChanges(changes []int) map[int]int8 {
	changes = append(changes, changes[2]+1) // so we add the last step change to the map
	m := make(map[int]int8)
	var count int
	for changeNum, nextChange := range changes {
		for ; count < nextChange; count++ {
			m[count] = int8(changeNum)
		}
	}
	return m
}

func readWAL(p string) string {
	b, err := ioutil.ReadFile(p)
	if err != nil {
		panic(err)
	}
	return string(b)
}

func writeWAL(walMsgs string) string {
	tempDir := os.TempDir()
	walDir := tempDir + "/wal" + RandStr(12)
	// Create WAL directory
	err := EnsureDir(walDir, 0700)
	if err != nil {
		panic(err)
	}
	// Write the needed WAL to file
	err = WriteFile(walDir+"/wal", []byte(walMsgs), 0600)
	if err != nil {
		panic(err)
	}
	return walDir
}

func waitForBlock(newBlockCh chan interface{}, thisCase *testCase, i int) {
	after := time.After(time.Second * 10)
	select {
	case <-newBlockCh:
	case <-after:
		panic(Fmt("Timed out waiting for new block for case '%s' line %d", thisCase.name, i))
	}
}

func runReplayTest(t *testing.T, cs *ConsensusState, walDir string, newBlockCh chan interface{},
	thisCase *testCase, i int) {

	cs.config.Set("cs_wal_dir", walDir)
	cs.Start()
	// Wait to make a new block.
	// This is just a signal that we haven't halted; its not something contained in the WAL itself.
	// Assuming the consensus state is running, replay of any WAL, including the empty one,
	// should eventually be followed by a new block, or else something is wrong
	waitForBlock(newBlockCh, thisCase, i)
	cs.Stop()
	cs.Wait()
}

func toPV(pv PrivValidator) *types.PrivValidator {
	return pv.(*types.PrivValidator)
}

func setupReplayTest(thisCase *testCase, nLines int, crashAfter bool) (*ConsensusState, chan interface{}, string, string) {
	fmt.Println("-------------------------------------")
	log.Notice(Fmt("Starting replay test %v (of %d lines of WAL). Crash after = %v", thisCase.name, nLines, crashAfter))

	lineStep := nLines
	if crashAfter {
		lineStep -= 1
	}

	split := strings.Split(thisCase.log, "\n")
	lastMsg := split[nLines]

	// we write those lines up to (not including) one with the signature
	walDir := writeWAL(strings.Join(split[:nLines], "\n") + "\n")

	cs := fixedConsensusStateDummy()

	// set the last step according to when we crashed vs the wal
	toPV(cs.privValidator).LastHeight = 1 // first block
	toPV(cs.privValidator).LastStep = thisCase.stepMap[lineStep]

	log.Warn("setupReplayTest", "LastStep", toPV(cs.privValidator).LastStep)

	newBlockCh := subscribeToEvent(cs.evsw, "tester", types.EventStringNewBlock(), 1)

	return cs, newBlockCh, lastMsg, walDir
}

func readTimedWALMessage(t *testing.T, walMsg string) TimedWALMessage {
	var err error
	var msg TimedWALMessage
	wire.ReadJSON(&msg, []byte(walMsg), &err)
	if err != nil {
		t.Fatalf("Error reading json data: %v", err)
	}
	return msg
}

//-----------------------------------------------
// Test the log at every iteration, and set the privVal last step
// as if the log was written after signing, before the crash

func TestReplayCrashAfterWrite(t *testing.T) {
	for _, thisCase := range testCases {
		split := strings.Split(thisCase.log, "\n")
		for i := 0; i < len(split)-1; i++ {
			cs, newBlockCh, _, walDir := setupReplayTest(thisCase, i+1, true)
			runReplayTest(t, cs, walDir, newBlockCh, thisCase, i+1)
		}
	}
}

//-----------------------------------------------
// Test the log as if we crashed after signing but before writing.
// This relies on privValidator.LastSignature being set

func TestReplayCrashBeforeWritePropose(t *testing.T) {
	for _, thisCase := range testCases {
		lineNum := thisCase.proposeLine
		// setup replay test where last message is a proposal
		cs, newBlockCh, proposalMsg, walDir := setupReplayTest(thisCase, lineNum, false)
		msg := readTimedWALMessage(t, proposalMsg)
		proposal := msg.Msg.(msgInfo).Msg.(*ProposalMessage)
		// Set LastSig
		toPV(cs.privValidator).LastSignBytes = types.SignBytes(cs.state.ChainID, proposal.Proposal)
		toPV(cs.privValidator).LastSignature = proposal.Proposal.Signature
		runReplayTest(t, cs, walDir, newBlockCh, thisCase, lineNum)
	}
}

func TestReplayCrashBeforeWritePrevote(t *testing.T) {
	for _, thisCase := range testCases {
		testReplayCrashBeforeWriteVote(t, thisCase, thisCase.prevoteLine, types.EventStringCompleteProposal())
	}
}

func TestReplayCrashBeforeWritePrecommit(t *testing.T) {
	for _, thisCase := range testCases {
		testReplayCrashBeforeWriteVote(t, thisCase, thisCase.precommitLine, types.EventStringPolka())
	}
}

func testReplayCrashBeforeWriteVote(t *testing.T, thisCase *testCase, lineNum int, eventString string) {
	// setup replay test where last message is a vote
	cs, newBlockCh, voteMsg, walDir := setupReplayTest(thisCase, lineNum, false)
	types.AddListenerForEvent(cs.evsw, "tester", eventString, func(data types.TMEventData) {
		msg := readTimedWALMessage(t, voteMsg)
		vote := msg.Msg.(msgInfo).Msg.(*VoteMessage)
		// Set LastSig
		toPV(cs.privValidator).LastSignBytes = types.SignBytes(cs.state.ChainID, vote.Vote)
		toPV(cs.privValidator).LastSignature = vote.Vote.Signature
	})
	runReplayTest(t, cs, walDir, newBlockCh, thisCase, lineNum)
}
replayCatchup test 2016-01-18 12:57:57 -08:00			`package consensus`

			`import (`
privVal.LastSignBytes and more replay tests 2016-08-14 09:31:24 -07:00			`"fmt"`
replayCatchup test 2016-01-18 12:57:57 -08:00			`"io/ioutil"`
			`"os"`
replay test data 2016-10-11 08:44:07 -07:00			`"path"`
privVal.LastSignBytes and more replay tests 2016-08-14 09:31:24 -07:00			`"strings"`
replayCatchup test 2016-01-18 12:57:57 -08:00			`"testing"`
			`"time"`

fix tests 2017-01-12 07:58:44 -08:00			`"github.com/tendermint/tendermint/config/tendermint_test"`

consensus: t.Fatal -> panic 2016-07-11 18:10:05 -07:00			`. "github.com/tendermint/go-common"`
types: privVal.LastSignature. closes #247 2016-08-09 14:18:29 -07:00			`"github.com/tendermint/go-wire"`
replayCatchup test 2016-01-18 12:57:57 -08:00			`"github.com/tendermint/tendermint/types"`
			`)`

fix tests 2017-01-12 07:58:44 -08:00			`func init() {`
			`config = tendermint_test.ResetConfig("consensus_replay_test")`
			`}`

consensus: wal.Flush() and cleanup replay tests 2016-12-17 20:43:17 -08:00			`// TODO: these tests ensure we can always recover from any state of the wal,`
fixes from review 2016-12-22 18:51:58 -08:00			`// assuming it comes with a correct related state for the priv_validator.json.`
			`// It would be better to verify explicitly which states we can recover from without the wal`
			`// and which ones we need the wal for - then we'd also be able to only flush the`
			`// wal writer when we need to, instead of with every message.`
consensus: wal.Flush() and cleanup replay tests 2016-12-17 20:43:17 -08:00
replay: more tests 2016-10-11 09:55:04 -07:00			`var data_dir = path.Join(GoPath, "src/github.com/tendermint/tendermint/consensus", "test_data")`

			`// the priv validator changes step at these lines for a block with 1 val and 1 part`
Consensus WAL uses AutoFile/Group 2016-10-28 15:01:14 -07:00			`var baseStepChanges = []int{3, 6, 8}`
replay: more tests 2016-10-11 09:55:04 -07:00
			`// test recovery from each line in each testCase`
			`var testCases = []*testCase{`
MakePartSet takes partSize from config. fix replay test 2016-09-16 09:20:07 -07:00			`newTestCase("empty_block", baseStepChanges), // empty block (has 1 block part)`
			`newTestCase("small_block1", baseStepChanges), // small block with txs in 1 block part`
			`newTestCase("small_block2", []int{3, 10, 12}), // small block with txs across 5 smaller block parts`
replay: more tests 2016-10-11 09:55:04 -07:00			`}`
Remove fees 2016-04-03 04:51:44 -07:00
replay test data 2016-10-11 08:44:07 -07:00			`type testCase struct {`
			`name string`
cswal -> cs_wal_dir 2016-10-30 03:55:27 -07:00			`log string //full cs wal`
replay test data 2016-10-11 08:44:07 -07:00			`stepMap map[int]int8 // map lines of log to privval step`
s/Validation/Commit/g 2016-04-02 09:10:16 -07:00
replay: more tests 2016-10-11 09:55:04 -07:00			`proposeLine int`
			`prevoteLine int`
			`precommitLine int`
privVal.LastSignBytes and more replay tests 2016-08-14 09:31:24 -07:00			`}`

replay: more tests 2016-10-11 09:55:04 -07:00			`func newTestCase(name string, stepChanges []int) *testCase {`
			`if len(stepChanges) != 3 {`
			`panic(Fmt("a full wal has 3 step changes! Got array %v", stepChanges))`
			`}`
			`return &testCase{`
			`name: name,`
			`log: readWAL(path.Join(data_dir, name+".cswal")),`
			`stepMap: newMapFromChanges(stepChanges),`

			`proposeLine: stepChanges[0],`
			`prevoteLine: stepChanges[1],`
			`precommitLine: stepChanges[2],`
			`}`
replay test data 2016-10-11 08:44:07 -07:00			`}`

replay: more tests 2016-10-11 09:55:04 -07:00			`func newMapFromChanges(changes []int) map[int]int8 {`
			`changes = append(changes, changes[2]+1) // so we add the last step change to the map`
			`m := make(map[int]int8)`
			`var count int`
			`for changeNum, nextChange := range changes {`
			`for ; count < nextChange; count++ {`
			`m[count] = int8(changeNum)`
			`}`
replay test data 2016-10-11 08:44:07 -07:00			`}`
replay: more tests 2016-10-11 09:55:04 -07:00			`return m`
replay test data 2016-10-11 08:44:07 -07:00			`}`

			`func readWAL(p string) string {`
			`b, err := ioutil.ReadFile(p)`
			`if err != nil {`
			`panic(err)`
			`}`
			`return string(b)`
			`}`

cswal -> cs_wal_dir 2016-10-30 03:55:27 -07:00			`func writeWAL(walMsgs string) string {`
			`tempDir := os.TempDir()`
			`walDir := tempDir + "/wal" + RandStr(12)`
			`// Create WAL directory`
			`err := EnsureDir(walDir, 0700)`
types: privVal.LastSignature. closes #247 2016-08-09 14:18:29 -07:00			`if err != nil {`
			`panic(err)`
			`}`
cswal -> cs_wal_dir 2016-10-30 03:55:27 -07:00			`// Write the needed WAL to file`
			`err = WriteFile(walDir+"/wal", []byte(walMsgs), 0600)`
types: privVal.LastSignature. closes #247 2016-08-09 14:18:29 -07:00			`if err != nil {`
			`panic(err)`
			`}`
cswal -> cs_wal_dir 2016-10-30 03:55:27 -07:00			`return walDir`
privVal.LastSignBytes and more replay tests 2016-08-14 09:31:24 -07:00			`}`
types: privVal.LastSignature. closes #247 2016-08-09 14:18:29 -07:00
replay test data 2016-10-11 08:44:07 -07:00			`func waitForBlock(newBlockCh chan interface{}, thisCase *testCase, i int) {`
privVal.LastSignBytes and more replay tests 2016-08-14 09:31:24 -07:00			`after := time.After(time.Second * 10)`
types: privVal.LastSignature. closes #247 2016-08-09 14:18:29 -07:00			`select {`
			`case <-newBlockCh:`
			`case <-after:`
replay test data 2016-10-11 08:44:07 -07:00			`panic(Fmt("Timed out waiting for new block for case '%s' line %d", thisCase.name, i))`
types: privVal.LastSignature. closes #247 2016-08-09 14:18:29 -07:00			`}`
			`}`

cswal -> cs_wal_dir 2016-10-30 03:55:27 -07:00			`func runReplayTest(t testing.T, cs ConsensusState, walDir string, newBlockCh chan interface{},`
replay test data 2016-10-11 08:44:07 -07:00			`thisCase *testCase, i int) {`

cswal -> cs_wal_dir 2016-10-30 03:55:27 -07:00			`cs.config.Set("cs_wal_dir", walDir)`
fixes from review 2016-08-17 20:08:43 -07:00			`cs.Start()`
consensus: add note about replay test 2016-08-23 08:33:18 -07:00			`// Wait to make a new block.`
			`// This is just a signal that we haven't halted; its not something contained in the WAL itself.`
			`// Assuming the consensus state is running, replay of any WAL, including the empty one,`
			`// should eventually be followed by a new block, or else something is wrong`
replay test data 2016-10-11 08:44:07 -07:00			`waitForBlock(newBlockCh, thisCase, i)`
fixes from review 2016-08-17 20:08:43 -07:00			`cs.Stop()`
Consensus WAL uses AutoFile/Group 2016-10-28 15:01:14 -07:00			`cs.Wait()`
privVal.LastSignBytes and more replay tests 2016-08-14 09:31:24 -07:00			`}`

make byzantine logic testable 2016-06-26 12:33:11 -07:00			`func toPV(pv PrivValidator) *types.PrivValidator {`
			`return pv.(*types.PrivValidator)`
			`}`

replay test data 2016-10-11 08:44:07 -07:00			`func setupReplayTest(thisCase testCase, nLines int, crashAfter bool) (ConsensusState, chan interface{}, string, string) {`
privVal.LastSignBytes and more replay tests 2016-08-14 09:31:24 -07:00			`fmt.Println("-------------------------------------")`
Fix Merge pull request #319 2016-12-06 01:16:13 -08:00			`log.Notice(Fmt("Starting replay test %v (of %d lines of WAL). Crash after = %v", thisCase.name, nLines, crashAfter))`
privVal.LastSignBytes and more replay tests 2016-08-14 09:31:24 -07:00
			`lineStep := nLines`
			`if crashAfter {`
			`lineStep -= 1`
replayCatchup test 2016-01-18 12:57:57 -08:00			`}`
privVal.LastSignBytes and more replay tests 2016-08-14 09:31:24 -07:00
replay test data 2016-10-11 08:44:07 -07:00			`split := strings.Split(thisCase.log, "\n")`
privVal.LastSignBytes and more replay tests 2016-08-14 09:31:24 -07:00			`lastMsg := split[nLines]`

			`// we write those lines up to (not including) one with the signature`
cswal -> cs_wal_dir 2016-10-30 03:55:27 -07:00			`walDir := writeWAL(strings.Join(split[:nLines], "\n") + "\n")`
replayCatchup test 2016-01-18 12:57:57 -08:00
replay: larger read buffer 2016-10-11 09:51:48 -07:00			`cs := fixedConsensusStateDummy()`
replayCatchup test 2016-01-18 12:57:57 -08:00
fixes from review 2016-08-17 20:08:43 -07:00			`// set the last step according to when we crashed vs the wal`
make byzantine logic testable 2016-06-26 12:33:11 -07:00			`toPV(cs.privValidator).LastHeight = 1 // first block`
MakePartSet takes partSize from config. fix replay test 2016-09-16 09:20:07 -07:00			`toPV(cs.privValidator).LastStep = thisCase.stepMap[lineStep]`
replayCatchup test 2016-01-18 12:57:57 -08:00
make byzantine logic testable 2016-06-26 12:33:11 -07:00			`log.Warn("setupReplayTest", "LastStep", toPV(cs.privValidator).LastStep)`
replayCatchup test 2016-01-18 12:57:57 -08:00
privVal.LastSignBytes and more replay tests 2016-08-14 09:31:24 -07:00			`newBlockCh := subscribeToEvent(cs.evsw, "tester", types.EventStringNewBlock(), 1)`
replayCatchup test 2016-01-18 12:57:57 -08:00
cswal -> cs_wal_dir 2016-10-30 03:55:27 -07:00			`return cs, newBlockCh, lastMsg, walDir`
replayCatchup test 2016-01-18 12:57:57 -08:00			`}`

fixes from review 2016-12-22 18:51:58 -08:00			`func readTimedWALMessage(t *testing.T, walMsg string) TimedWALMessage {`
consensus: wal.Flush() and cleanup replay tests 2016-12-17 20:43:17 -08:00			`var err error`
			`var msg TimedWALMessage`
			`wire.ReadJSON(&msg, []byte(walMsg), &err)`
			`if err != nil {`
			`t.Fatalf("Error reading json data: %v", err)`
			`}`
			`return msg`
			`}`

privVal.LastSignBytes and more replay tests 2016-08-14 09:31:24 -07:00			`//-----------------------------------------------`
			`// Test the log at every iteration, and set the privVal last step`
			`// as if the log was written after signing, before the crash`

			`func TestReplayCrashAfterWrite(t *testing.T) {`
replay test data 2016-10-11 08:44:07 -07:00			`for _, thisCase := range testCases {`
			`split := strings.Split(thisCase.log, "\n")`
			`for i := 0; i < len(split)-1; i++ {`
cswal -> cs_wal_dir 2016-10-30 03:55:27 -07:00			`cs, newBlockCh, _, walDir := setupReplayTest(thisCase, i+1, true)`
			`runReplayTest(t, cs, walDir, newBlockCh, thisCase, i+1)`
replay test data 2016-10-11 08:44:07 -07:00			`}`
privVal.LastSignBytes and more replay tests 2016-08-14 09:31:24 -07:00			`}`
			`}`

			`//-----------------------------------------------`
			`// Test the log as if we crashed after signing but before writing.`
			`// This relies on privValidator.LastSignature being set`

			`func TestReplayCrashBeforeWritePropose(t *testing.T) {`
replay test data 2016-10-11 08:44:07 -07:00			`for _, thisCase := range testCases {`
replay: more tests 2016-10-11 09:55:04 -07:00			`lineNum := thisCase.proposeLine`
fixes from review 2016-12-22 18:51:58 -08:00			`// setup replay test where last message is a proposal`
			`cs, newBlockCh, proposalMsg, walDir := setupReplayTest(thisCase, lineNum, false)`
			`msg := readTimedWALMessage(t, proposalMsg)`
replay test data 2016-10-11 08:44:07 -07:00			`proposal := msg.Msg.(msgInfo).Msg.(*ProposalMessage)`
consensus: wal.Flush() and cleanup replay tests 2016-12-17 20:43:17 -08:00			`// Set LastSig`
make byzantine logic testable 2016-06-26 12:33:11 -07:00			`toPV(cs.privValidator).LastSignBytes = types.SignBytes(cs.state.ChainID, proposal.Proposal)`
			`toPV(cs.privValidator).LastSignature = proposal.Proposal.Signature`
cswal -> cs_wal_dir 2016-10-30 03:55:27 -07:00			`runReplayTest(t, cs, walDir, newBlockCh, thisCase, lineNum)`
replay test data 2016-10-11 08:44:07 -07:00			`}`
			`}`

			`func TestReplayCrashBeforeWritePrevote(t *testing.T) {`
			`for _, thisCase := range testCases {`
consensus: wal.Flush() and cleanup replay tests 2016-12-17 20:43:17 -08:00			`testReplayCrashBeforeWriteVote(t, thisCase, thisCase.prevoteLine, types.EventStringCompleteProposal())`
replay test data 2016-10-11 08:44:07 -07:00			`}`
privVal.LastSignBytes and more replay tests 2016-08-14 09:31:24 -07:00			`}`

			`func TestReplayCrashBeforeWritePrecommit(t *testing.T) {`
replay test data 2016-10-11 08:44:07 -07:00			`for _, thisCase := range testCases {`
consensus: wal.Flush() and cleanup replay tests 2016-12-17 20:43:17 -08:00			`testReplayCrashBeforeWriteVote(t, thisCase, thisCase.precommitLine, types.EventStringPolka())`
replay test data 2016-10-11 08:44:07 -07:00			`}`
privVal.LastSignBytes and more replay tests 2016-08-14 09:31:24 -07:00			`}`
consensus: wal.Flush() and cleanup replay tests 2016-12-17 20:43:17 -08:00
			`func testReplayCrashBeforeWriteVote(t testing.T, thisCase testCase, lineNum int, eventString string) {`
fixes from review 2016-12-22 18:51:58 -08:00			`// setup replay test where last message is a vote`
			`cs, newBlockCh, voteMsg, walDir := setupReplayTest(thisCase, lineNum, false)`
consensus: wal.Flush() and cleanup replay tests 2016-12-17 20:43:17 -08:00			`types.AddListenerForEvent(cs.evsw, "tester", eventString, func(data types.TMEventData) {`
fixes from review 2016-12-22 18:51:58 -08:00			`msg := readTimedWALMessage(t, voteMsg)`
consensus: wal.Flush() and cleanup replay tests 2016-12-17 20:43:17 -08:00			`vote := msg.Msg.(msgInfo).Msg.(*VoteMessage)`
			`// Set LastSig`
			`toPV(cs.privValidator).LastSignBytes = types.SignBytes(cs.state.ChainID, vote.Vote)`
			`toPV(cs.privValidator).LastSignature = vote.Vote.Signature`
			`})`
			`runReplayTest(t, cs, walDir, newBlockCh, thisCase, lineNum)`
			`}`