From 0e7694ca94374922e3cc5e135a35a31c351c4501 Mon Sep 17 00:00:00 2001 From: Ethan Buchman Date: Thu, 22 Dec 2016 15:01:22 -0500 Subject: [PATCH] state: AppHashIsStale -> IntermediateState --- state/execution.go | 68 +++++++++++++++++++--------- state/state.go | 67 ++++++++++++++++++++------- test/persist/test_failure_indices.sh | 2 +- 3 files changed, 99 insertions(+), 38 deletions(-) diff --git a/state/execution.go b/state/execution.go index cec4849a..e1cea605 100644 --- a/state/execution.go +++ b/state/execution.go @@ -56,7 +56,9 @@ func (s *State) ExecBlock(eventCache types.Fireable, proxyAppConn proxy.AppConnC // save state with updated height/blockhash/validators // but stale apphash, in case we fail between Commit and Save - s.Save() + s.SaveIntermediate() + + fail.Fail() // XXX return nil } @@ -264,7 +266,6 @@ func (s *State) CommitStateUpdateMempool(proxyAppConn proxy.AppConnConsensus, bl // Set the state's new AppHash s.AppHash = res.Data - s.AppHashIsStale = false // Update mempool. mempool.Update(block.Height, block.Txs) @@ -322,7 +323,7 @@ func (h *Handshaker) Handshake(proxyApp proxy.AppConns) error { return nil } - log.Notice("TMSP Handshake", "height", blockInfo.BlockHeight, "app_hash", blockInfo.AppHash) + log.Notice("TMSP Handshake", "appHeight", blockInfo.BlockHeight, "appHash", blockInfo.AppHash) blockHeight := int(blockInfo.BlockHeight) // XXX: beware overflow appHash := blockInfo.AppHash @@ -352,29 +353,46 @@ func (h *Handshaker) Handshake(proxyApp proxy.AppConns) error { func (h *Handshaker) ReplayBlocks(appHash []byte, appBlockHeight int, appConnConsensus proxy.AppConnConsensus) error { storeBlockHeight := h.store.Height() - if storeBlockHeight < appBlockHeight { + stateBlockHeight := h.state.LastBlockHeight + log.Notice("TMSP Replay Blocks", "appHeight", appBlockHeight, "storeHeight", storeBlockHeight, "stateHeight", stateBlockHeight) + + if storeBlockHeight == 0 { + return nil + } else if storeBlockHeight < appBlockHeight { // if the app is ahead, there's nothing we can do return ErrAppBlockHeightTooHigh{storeBlockHeight, appBlockHeight} } else if storeBlockHeight == appBlockHeight { - // if we crashed between Commit and SaveState, - // the state's app hash is stale - // otherwise we're synced - if h.state.AppHashIsStale { - h.state.AppHashIsStale = false + // We ran Commit, but if we crashed before state.Save(), + // load the intermediate state and update the state.AppHash. + // NOTE: If TMSP allowed rollbacks, we could just replay the + // block even though it's been committed + stateAppHash := h.state.AppHash + lastBlockAppHash := h.store.LoadBlock(storeBlockHeight).AppHash + + if bytes.Equal(stateAppHash, appHash) { + // we're all synced up + log.Debug("TMSP RelpayBlocks: Already synced") + } else if bytes.Equal(stateAppHash, lastBlockAppHash) { + // we crashed after commit and before saving state, + // so load the intermediate state and update the hash + h.state.LoadIntermediate() h.state.AppHash = appHash + h.state.Save() + log.Debug("TMSP RelpayBlocks: Loaded intermediate state and updated state.AppHash") + } else { + PanicSanity(Fmt("Unexpected state.AppHash: state.AppHash %X; app.AppHash %X, lastBlock.AppHash %X", stateAppHash, appHash, lastBlockAppHash)) + } return nil - } else if h.state.LastBlockHeight == appBlockHeight { - // store is ahead of app but core's state height is at apps height - // this happens if we crashed after saving the block, - // but before committing it. We should be 1 ahead - if storeBlockHeight != appBlockHeight+1 { - PanicSanity(Fmt("core.state.height == app.height but store.height (%d) > app.height+1 (%d)", storeBlockHeight, appBlockHeight+1)) - } + } else if storeBlockHeight == appBlockHeight+1 && + storeBlockHeight == stateBlockHeight+1 { + // We crashed after saving the block + // but before Commit (both the state and app are behind), + // so just replay the block - // check that the blocks last apphash is the states apphash + // check that the lastBlock.AppHash matches the state apphash block := h.store.LoadBlock(storeBlockHeight) if !bytes.Equal(block.Header.AppHash, appHash) { return ErrLastStateMismatch{storeBlockHeight, block.Header.AppHash, appHash} @@ -385,13 +403,19 @@ func (h *Handshaker) ReplayBlocks(appHash []byte, appBlockHeight int, appConnCon h.nBlocks += 1 var eventCache types.Fireable // nil - // replay the block against the actual tendermint state + // replay the latest block return h.state.ApplyBlock(eventCache, appConnConsensus, block, blockMeta.PartsHeader, MockMempool{}) - + } else if storeBlockHeight != stateBlockHeight { + // unless we failed before committing or saving state (previous 2 case), + // the store and state should be at the same height! + PanicSanity(Fmt("Expected storeHeight (%d) and stateHeight (%d) to match.", storeBlockHeight, stateBlockHeight)) } else { - // either we're caught up or there's blocks to replay + // store is more than one ahead, + // so app wants to replay many blocks + // replay all blocks starting with appBlockHeight+1 var eventCache types.Fireable // nil + var appHash []byte for i := appBlockHeight + 1; i <= storeBlockHeight; i++ { h.nBlocks += 1 @@ -413,8 +437,10 @@ func (h *Handshaker) ReplayBlocks(appHash []byte, appBlockHeight int, appConnCon appHash = res.Data } if !bytes.Equal(h.state.AppHash, appHash) { - return errors.New(Fmt("Tendermint state.AppHash does not match AppHash after replay", "expected", h.state.AppHash, "got", appHash)) + return errors.New(Fmt("Tendermint state.AppHash does not match AppHash after replay. Got %X, expected %X", appHash, h.state.AppHash)) } return nil } + + return nil } diff --git a/state/state.go b/state/state.go index af2f69ca..455ba409 100644 --- a/state/state.go +++ b/state/state.go @@ -14,7 +14,8 @@ import ( ) var ( - stateKey = []byte("stateKey") + stateKey = []byte("stateKey") + stateIntermediateKey = []byte("stateIntermediateKey") ) //----------------------------------------------------------------------------- @@ -36,15 +37,17 @@ type State struct { Validators *types.ValidatorSet LastValidators *types.ValidatorSet // block.LastCommit validated against this - // AppHash is updated after Commit; - // it's stale after ExecBlock and before Commit - AppHashIsStale bool - AppHash []byte + // AppHash is updated after Commit + AppHash []byte } func LoadState(db dbm.DB) *State { + return loadState(db, stateKey) +} + +func loadState(db dbm.DB, key []byte) *State { s := &State{db: db} - buf := db.Get(stateKey) + buf := db.Get(key) if len(buf) == 0 { return nil } else { @@ -60,9 +63,6 @@ func LoadState(db dbm.DB) *State { } func (s *State) Copy() *State { - if s.AppHashIsStale { - PanicSanity(Fmt("App hash is stale: %v", s)) - } return &State{ db: s.db, GenesisDoc: s.GenesisDoc, @@ -72,7 +72,6 @@ func (s *State) Copy() *State { LastBlockTime: s.LastBlockTime, Validators: s.Validators.Copy(), LastValidators: s.LastValidators.Copy(), - AppHashIsStale: false, AppHash: s.AppHash, } } @@ -83,6 +82,35 @@ func (s *State) Save() { s.db.SetSync(stateKey, s.Bytes()) } +func (s *State) SaveIntermediate() { + s.mtx.Lock() + defer s.mtx.Unlock() + s.db.SetSync(stateIntermediateKey, s.Bytes()) +} + +// Load the intermediate state into the current state +// and do some sanity checks +func (s *State) LoadIntermediate() { + s2 := loadState(s.db, stateIntermediateKey) + if s.ChainID != s2.ChainID { + PanicSanity(Fmt("State mismatch for ChainID. Got %v, Expected %v", s2.ChainID, s.ChainID)) + } + + if s.LastBlockHeight+1 != s2.LastBlockHeight { + PanicSanity(Fmt("State mismatch for LastBlockHeight. Got %v, Expected %v", s2.LastBlockHeight, s.LastBlockHeight+1)) + } + + if !bytes.Equal(s.Validators.Hash(), s2.LastValidators.Hash()) { + PanicSanity(Fmt("State mismatch for LastValidators. Got %X, Expected %X", s2.LastValidators.Hash(), s.Validators.Hash())) + } + + if !bytes.Equal(s.AppHash, s2.AppHash) { + PanicSanity(Fmt("State mismatch for AppHash. Got %X, Expected %X", s2.AppHash, s.AppHash)) + } + + s.setBlockAndValidators(s2.LastBlockHeight, s2.LastBlockID, s2.LastBlockTime, s2.Validators.Copy(), s2.LastValidators.Copy()) +} + func (s *State) Equals(s2 *State) bool { return bytes.Equal(s.Bytes(), s2.Bytes()) } @@ -97,15 +125,22 @@ func (s *State) Bytes() []byte { } // Mutate state variables to match block and validators -// Since we don't have the new AppHash yet, we set s.AppHashIsStale=true +// after running EndBlock func (s *State) SetBlockAndValidators(header *types.Header, blockPartsHeader types.PartSetHeader, prevValSet, nextValSet *types.ValidatorSet) { - s.LastBlockHeight = header.Height - s.LastBlockID = types.BlockID{header.Hash(), blockPartsHeader} - s.LastBlockTime = header.Time + s.setBlockAndValidators(header.Height, + types.BlockID{header.Hash(), blockPartsHeader}, header.Time, + prevValSet, nextValSet) +} + +func (s *State) setBlockAndValidators( + height int, blockID types.BlockID, blockTime time.Time, + prevValSet, nextValSet *types.ValidatorSet) { + + s.LastBlockHeight = height + s.LastBlockID = blockID + s.LastBlockTime = blockTime s.Validators = nextValSet s.LastValidators = prevValSet - - s.AppHashIsStale = true } func (s *State) GetValidators() (*types.ValidatorSet, *types.ValidatorSet) { diff --git a/test/persist/test_failure_indices.sh b/test/persist/test_failure_indices.sh index 509deee7..7302ccac 100644 --- a/test/persist/test_failure_indices.sh +++ b/test/persist/test_failure_indices.sh @@ -14,7 +14,7 @@ function start_procs(){ PID_DUMMY=$! if [[ "$indexToFail" == "" ]]; then # run in background, dont fail - tendermint node &> tendermint_${name}.log & + tendermint node --log_level=debug &> tendermint_${name}.log & PID_TENDERMINT=$! else # run in foreground, fail