Auto merge of #4447 - nuttycom:bug/4119-rewind_detection, r=str4d

Reproduce and fix off-by-one error in reorg logic (#4119)

This attempts to reproduce #4119 using a simple chain split.It currently seems to trigger a different issue, an assertion failure in `CheckBlockIndex` when restarting
This commit is contained in:
Homu 2020-05-06 21:50:03 +00:00
commit eb06b0a8d4
7 changed files with 153 additions and 22 deletions

View File

@ -83,6 +83,7 @@ testScripts=(
'turnstile.py'
'mining_shielded_coinbase.py'
'framework.py'
'sapling_rewind_check.py'
'feature_zip221.py'
);
testScriptsExt=(

View File

@ -5,9 +5,10 @@
from test_framework.flyclient import (ZcashMMRNode, append, delete, make_root_commitment)
from test_framework.mininode import (HEARTWOOD_BRANCH_ID, CBlockHeader)
from test_framework.mininode import (CBlockHeader)
from test_framework.test_framework import BitcoinTestFramework
from test_framework.util import (
HEARTWOOD_BRANCH_ID,
assert_equal,
bytes_to_hex_str,
hex_str_to_bytes,

View File

@ -5,12 +5,13 @@
from test_framework.test_framework import BitcoinTestFramework
from test_framework.util import assert_equal, initialize_chain_clean, \
start_nodes, start_node, connect_nodes_bi, bitcoind_processes
start_nodes, start_node, connect_nodes_bi, bitcoind_processes, \
nuparams, OVERWINTER_BRANCH_ID, SAPLING_BRANCH_ID
import time
FAKE_SPROUT = ['-nuparams=5ba81b19:210', '-nuparams=76b809bb:220']
FAKE_OVERWINTER = ['-nuparams=5ba81b19:10', '-nuparams=76b809bb:220']
FAKE_SPROUT = [nuparams(OVERWINTER_BRANCH_ID, 210), nuparams(SAPLING_BRANCH_ID, 220)]
FAKE_OVERWINTER = [nuparams(OVERWINTER_BRANCH_ID, 10), nuparams(SAPLING_BRANCH_ID, 220)]
class RewindBlockIndexTest (BitcoinTestFramework):

View File

@ -0,0 +1,125 @@
#!/usr/bin/env python3
# Copyright (c) 2014 The Bitcoin Core developers
# Copyright (c) 2020 The Zcash developers
# Distributed under the MIT software license, see the accompanying
# file COPYING or https://www.opensource.org/licenses/mit-license.php .
"""
Exercise the chain rewind code at the Blossom boundary.
Test case is:
3 nodes are initialized, two of which are aware of the Blossom network upgrade,
and one of which is not. On each node, the chain is advanced to just prior to
the Blossom activation; height then, the network is split and each branch of
the network produces blocks into the range of the upgraded protocol.
The node that is not aware of Blossom activation is advanced beyond the maximum
reorg length of 99 blocks, then that node is shut down. When the node is
restarted with knowledge of the network activation height the checks on startup
identify a need to reorg to come into agreement with the rest of the network.
However, since the rollback required is greater than the maximum reorg length,
the node shuts down with an error as a precautionary measure. It was noticed in
#4119 that the error message indicated an incorrect computation of the rollback
length. This test reproduces that error, and the associated change in rollback
length computation (40b5d5e3ea4b602c34c4efaba0b9f6171dddfef5) corrects the issue.
"""
from test_framework.test_framework import BitcoinTestFramework
from test_framework.util import (assert_equal, assert_true,
initialize_chain_clean, start_nodes, start_node, connect_nodes_bi,
bitcoind_processes,
nuparams, OVERWINTER_BRANCH_ID, SAPLING_BRANCH_ID)
import re
import logging
HAS_SAPLING = [nuparams(OVERWINTER_BRANCH_ID, 10), nuparams(SAPLING_BRANCH_ID, 15)]
NO_SAPLING = [nuparams(OVERWINTER_BRANCH_ID, 10), nuparams(SAPLING_BRANCH_ID, 150)]
class SaplingRewindTest(BitcoinTestFramework):
def setup_chain(self):
logging.info("Initializing test directory "+self.options.tmpdir)
initialize_chain_clean(self.options.tmpdir, 4)
# This mirrors how the network was setup in the bash test
def setup_network(self, split=False):
logging.info("Initializing the network in "+self.options.tmpdir)
self.nodes = start_nodes(3, self.options.tmpdir, extra_args=[
HAS_SAPLING, # The first two nodes have a correct view of the network,
HAS_SAPLING, # the third will rewind after upgrading.
NO_SAPLING
])
connect_nodes_bi(self.nodes,0,1)
connect_nodes_bi(self.nodes,1,2)
connect_nodes_bi(self.nodes,0,2)
self.is_network_split=False
self.sync_all()
def run_test(self):
# Generate shared state up to the network split
logging.info("Generating initial blocks.")
self.nodes[0].generate(13)
block14 = self.nodes[0].generate(1)[0]
logging.info("Syncing network after initial generation...")
self.sync_all() # Everyone is still on overwinter
logging.info("Checking overwinter block propagation.")
assert_equal(self.nodes[0].getbestblockhash(), block14)
assert_equal(self.nodes[1].getbestblockhash(), block14)
assert_equal(self.nodes[2].getbestblockhash(), block14)
logging.info("All nodes are on overwinter.")
logging.info("Generating network split...")
self.is_network_split=True
# generate past the boundary into sapling; this will become the "canonical" branch
self.nodes[0].generate(50)
expected = self.nodes[0].getbestblockhash()
# generate blocks into sapling beyond the maximum rewind length (99 blocks)
self.nodes[2].generate(120)
self.sync_all()
assert_true(expected != self.nodes[2].getbestblockhash(), "Split chains have not diverged!")
# Stop the overwinter node to ensure state is flushed to disk.
logging.info("Shutting down lagging node...")
self.nodes[2].stop()
bitcoind_processes[2].wait()
# Restart the nodes, reconnect, and sync the network. This succeeds if "-reindex" is passed.
logging.info("Reconnecting the network...")
try:
# expect an exception; the node will refuse to fully start because its last point of
# agreement with the rest of the network was prior to the network upgrade activation
self.nodes[2] = start_node(2, self.options.tmpdir, extra_args=HAS_SAPLING) # + ["-reindex"])
except:
logpath = self.options.tmpdir + "/node2/regtest/debug.log"
found = False
with open(logpath, 'r') as f:
for line in f:
# Search for the rollback message in the debug log, and ensure that it has the
# correct expected rollback length.
m = re.search(r'roll back ([0-9]+)', line)
if m is None:
continue
elif m.group(1) == "120":
found = True
break
else:
raise AssertionError("Incorrect rollback length %s found, expected 120." %(m.group(1)))
if not found:
raise AssertionError("Expected rollback message not found in log file.")
# restart the node with -reindex to allow the test to complete gracefully,
# otherwise the node shutdown call in test cleanup will throw an error since
# it can't connect
self.nodes[2] = start_node(2, self.options.tmpdir, extra_args=NO_SAPLING + ["-reindex"])
else:
raise AssertionError("Expected node to halt due to excessive rewind length.")
if __name__ == '__main__':
SaplingRewindTest().main()

View File

@ -53,16 +53,8 @@ OVERWINTER_VERSION_GROUP_ID = 0x03C48270
SAPLING_VERSION_GROUP_ID = 0x892F2085
# No transaction format change in Blossom.
SPROUT_BRANCH_ID = 0x00000000
OVERWINTER_BRANCH_ID = 0x5BA81B19
SAPLING_BRANCH_ID = 0x76B809BB
BLOSSOM_BRANCH_ID = 0x2BB40E60
HEARTWOOD_BRANCH_ID = 0xF5B9230B
NU4_BRANCH_ID = 0xE9FF75A6
MAX_INV_SZ = 50000
COIN = 100000000 # 1 zec in zatoshis
# Keep our own socket map for asyncore, so that we can track disconnects

View File

@ -30,6 +30,12 @@ COVERAGE_DIR = None
PRE_BLOSSOM_BLOCK_TARGET_SPACING = 150
POST_BLOSSOM_BLOCK_TARGET_SPACING = 75
SPROUT_BRANCH_ID = 0x00000000
OVERWINTER_BRANCH_ID = 0x5BA81B19
SAPLING_BRANCH_ID = 0x76B809BB
BLOSSOM_BRANCH_ID = 0x2BB40E60
HEARTWOOD_BRANCH_ID = 0xF5B9230B
NU4_BRANCH_ID = 0xE9FF75A6
def enable_coverage(dirname):
"""Maintain a log of which RPC calls are made during testing."""
@ -591,3 +597,6 @@ def check_node_log(self, node_number, line_to_check, stop_node = True):
if line_to_check in logline:
return n
raise AssertionError(repr(line_to_check) + " not found")
def nuparams(branch_id, height):
return '-nuparams=%x:%d' % (branch_id, height)

View File

@ -4752,20 +4752,23 @@ bool RewindBlockIndex(const CChainParams& chainparams, bool& clearWitnessCaches)
*pindex->nCachedBranchId == CurrentEpochBranchId(pindex->nHeight, consensus);
};
int nHeight = 1;
while (nHeight <= chainActive.Height()) {
if (!sufficientlyValidated(chainActive[nHeight])) {
int lastValidHeight = 0;
while (lastValidHeight < chainActive.Height()) {
if (sufficientlyValidated(chainActive[lastValidHeight + 1])) {
lastValidHeight++;
} else {
break;
}
nHeight++;
}
// nHeight is now the height of the first insufficiently-validated block, or tipheight + 1
auto rewindLength = chainActive.Height() - nHeight;
// lastValidHeight is now the height of the last valid block below the active chain height
auto rewindLength = chainActive.Height() - lastValidHeight;
clearWitnessCaches = false;
if (rewindLength > 0) {
LogPrintf("*** First insufficiently validated block at height %d, rewind length %d\n", nHeight, rewindLength);
LogPrintf("*** Last validated block at height %d, active height is %d; rewind length %d\n", lastValidHeight, chainActive.Height(), rewindLength);
auto nHeight = lastValidHeight + 1;
const uint256 *phashFirstInsufValidated = chainActive[nHeight]->phashBlock;
auto networkID = chainparams.NetworkIDString();
@ -4777,7 +4780,6 @@ bool RewindBlockIndex(const CChainParams& chainparams, bool& clearWitnessCaches)
uint256S("002e1d6daf4ab7b296e7df839dc1bee9d615583bb4bc34b1926ce78307532852"));
clearWitnessCaches = (rewindLength > MAX_REORG_LENGTH && intendedRewind);
if (clearWitnessCaches) {
auto msg = strprintf(_(
"An intended block chain rewind has been detected: network %s, hash %s, height %d"
@ -4787,7 +4789,7 @@ bool RewindBlockIndex(const CChainParams& chainparams, bool& clearWitnessCaches)
if (rewindLength > MAX_REORG_LENGTH && !intendedRewind) {
auto pindexOldTip = chainActive.Tip();
auto pindexRewind = chainActive[nHeight - 1];
auto pindexRewind = chainActive[lastValidHeight];
auto msg = strprintf(_(
"A block chain rewind has been detected that would roll back %d blocks! "
"This is larger than the maximum of %d blocks, and so the node is shutting down for your safety."
@ -4807,7 +4809,7 @@ bool RewindBlockIndex(const CChainParams& chainparams, bool& clearWitnessCaches)
CValidationState state;
CBlockIndex* pindex = chainActive.Tip();
while (chainActive.Height() >= nHeight) {
while (chainActive.Height() > lastValidHeight) {
if (fPruneMode && !(chainActive.Tip()->nStatus & BLOCK_HAVE_DATA)) {
// If pruning, don't try rewinding past the HAVE_DATA point;
// since older blocks can't be served anyway, there's