From 633594478481acde6df27c664a9f9fc95b0f4595 Mon Sep 17 00:00:00 2001 From: Larry Ruane Date: Wed, 24 Jun 2020 09:27:11 -0600 Subject: [PATCH 1/7] add python test to reproduce bug 4301 --- qa/pull-tester/rpc-tests.sh | 1 + qa/rpc-tests/wallet_db_flush.py | 87 +++++++++++++++++++++++++++++++++ 2 files changed, 88 insertions(+) create mode 100755 qa/rpc-tests/wallet_db_flush.py diff --git a/qa/pull-tester/rpc-tests.sh b/qa/pull-tester/rpc-tests.sh index b6454a304..9c3666f5a 100755 --- a/qa/pull-tester/rpc-tests.sh +++ b/qa/pull-tester/rpc-tests.sh @@ -105,6 +105,7 @@ testScriptsExt=( 'invalidblockrequest.py' # 'forknotify.py' 'p2p-acceptblock.py' + 'wallet_db_flush.py' ); if [ "x$ENABLE_ZMQ" = "x1" ]; then diff --git a/qa/rpc-tests/wallet_db_flush.py b/qa/rpc-tests/wallet_db_flush.py new file mode 100755 index 000000000..d66e4ffb7 --- /dev/null +++ b/qa/rpc-tests/wallet_db_flush.py @@ -0,0 +1,87 @@ +#!/usr/bin/env python3 +# Copyright (c) 2020 The Zcash developers +# Distributed under the MIT software license, see the accompanying +# file COPYING or https://www.opensource.org/licenses/mit-license.php . +# +# This test reproduces https://github.com/zcash/zcash/issues/4301 +# It takes an hour to run! + +from decimal import Decimal +from test_framework.authproxy import JSONRPCException +from test_framework.test_framework import BitcoinTestFramework +from test_framework.util import ( + assert_equal, + bitcoind_processes, + initialize_chain_clean, + start_node, +) +import time + +class WalletDBFlush (BitcoinTestFramework): + + def setup_chain(self): + print("Initializing test directory "+self.options.tmpdir) + initialize_chain_clean(self.options.tmpdir, 1) + + def start_node_with(self, index, extra_args=[]): + args = [ + "-nuparams=2bb40e60:1", # Blossom + "-nuparams=f5b9230b:2", # Heartwood + "-nurejectoldversions=false", + ] + return start_node(index, self.options.tmpdir, args + extra_args) + + def setup_network(self, split=False): + self.nodes = [] + self.nodes.append(self.start_node_with(0)) + self.is_network_split=False + self.sync_all() + + def run_test (self): + print("PLEASE NOTE: This test takes an hour to run!") + + # This test requires shielded funds in the local wallet so + # there is witness data, and the easiest way to get shielded + # funds is to mine (since Heartwood, mining reward can go to + # a zaddr), so first create a Sapling address to mine to. + zaddr = self.nodes[0].z_getnewaddress('sapling') + self.nodes[0].generate(2) + + self.nodes[0].stop() + bitcoind_processes[0].wait() + + print("Start mining to address ", zaddr) + self.nodes[0] = self.start_node_with(0, [ + "-mineraddress=%s" % zaddr, + ]) + self.nodes[0].generate(1) + time.sleep(4) + self.nodes[0].stop() + bitcoind_processes[0].wait() + + # If you replace main.cpp:3129 DATABASE_WRITE_INTERVAL with + # 60 (seconds), then sleeptime here can be 80, and this test + # will fail (pre-PR) much faster. + sleeptime = 3620 # just over one hour (DATABASE_WRITE_INTERVAL) + + print("Restart, sleep {}, mine (pre-PR will flush bad wallet state)".format(sleeptime)) + self.nodes[0] = self.start_node_with(0, [ + "-mineraddress=%s" % zaddr, + ]) + assert_equal(self.nodes[0].z_getbalance(zaddr, 0), 5) + time.sleep(sleeptime) + self.nodes[0].generate(1) + time.sleep(4) + self.nodes[0].stop() + bitcoind_processes[0].wait() + + print("Restart, generate, expect assert in CopyPreviousWitnesses") + self.nodes[0] = self.start_node_with(0, [ + "-mineraddress=%s" % zaddr, + ]) + self.nodes[0].generate(1) + time.sleep(4) + self.nodes[0].stop() + +if __name__ == '__main__': + WalletDBFlush().main() From 0219a971392f58ab7e30a725aec2230393a2125c Mon Sep 17 00:00:00 2001 From: Larry Ruane Date: Wed, 24 Jun 2020 16:49:59 -0600 Subject: [PATCH 2/7] flush witness cache correctly Rather than flushing the witness cache from FlushStateToDisk(), called by ActivateBestChain() called by ProcessNewBlock(), do so from ThreadNotifyWallets() after the wallet has updated the in-memory witness data according to the new block, so it's always consistent on disk. --- src/init.cpp | 3 +++ src/main.cpp | 10 +--------- src/wallet/wallet.cpp | 15 ++++++++++++++- src/wallet/wallet.h | 2 ++ 4 files changed, 20 insertions(+), 10 deletions(-) diff --git a/src/init.cpp b/src/init.cpp index 1671dba90..631ab3836 100644 --- a/src/init.cpp +++ b/src/init.cpp @@ -229,6 +229,9 @@ void Shutdown() if (pcoinsTip != NULL) { FlushStateToDisk(); } + // Flush the wallet witness cache to disk (no longer done by FlushStateToDisk()) + GetMainSignals().SetBestChain(chainActive.GetLocator()); + delete pcoinsTip; pcoinsTip = NULL; delete pcoinscatcher; diff --git a/src/main.cpp b/src/main.cpp index d11ca5bae..cb3b6c7bc 100644 --- a/src/main.cpp +++ b/src/main.cpp @@ -3045,7 +3045,6 @@ bool static FlushStateToDisk(CValidationState &state, FlushStateMode mode) { LOCK2(cs_main, cs_LastBlockFile); static int64_t nLastWrite = 0; static int64_t nLastFlush = 0; - static int64_t nLastSetChain = 0; std::set setFilesToPrune; bool fFlushForPrune = false; try { @@ -3068,9 +3067,6 @@ bool static FlushStateToDisk(CValidationState &state, FlushStateMode mode) { if (nLastFlush == 0) { nLastFlush = nNow; } - if (nLastSetChain == 0) { - nLastSetChain = nNow; - } size_t cacheSize = pcoinsTip->DynamicMemoryUsage(); // The cache is large and close to the limit, but we have time now (not in the middle of a block processing). bool fCacheLarge = mode == FLUSH_STATE_PERIODIC && cacheSize * (10.0/9) > nCoinCacheUsage; @@ -3126,11 +3122,7 @@ bool static FlushStateToDisk(CValidationState &state, FlushStateMode mode) { return AbortNode(state, "Failed to write to coin database"); nLastFlush = nNow; } - if ((mode == FLUSH_STATE_ALWAYS || mode == FLUSH_STATE_PERIODIC) && nNow > nLastSetChain + (int64_t)DATABASE_WRITE_INTERVAL * 1000000) { - // Update best block in wallet (so we can detect restored wallets). - GetMainSignals().SetBestChain(chainActive.GetLocator()); - nLastSetChain = nNow; - } + // Don't flush the wallet witness cache (SetBestChain()) here, see #4301 } catch (const std::runtime_error& e) { return AbortNode(state, std::string("System error while flushing: ") + e.what()); } diff --git a/src/wallet/wallet.cpp b/src/wallet/wallet.cpp index a1c7bb5c0..5f6ea468a 100644 --- a/src/wallet/wallet.cpp +++ b/src/wallet/wallet.cpp @@ -585,6 +585,19 @@ void CWallet::ChainTipAdded(const CBlockIndex *pindex, { IncrementNoteWitnesses(pindex, pblock, sproutTree, saplingTree); UpdateSaplingNullifierNoteMapForBlock(pblock); + + // SetBestChain() can be expensive for large wallets, so do this + // at most once per hour; the wallet state will be brought up to + // date during rescanning on startup. + int64_t nNow = GetTimeMicros(); + if (nLastSetChain == 0) { + // Don't flush during startup. + nLastSetChain = nNow; + } + if (nLastSetChain + (int64_t)DATABASE_WRITE_INTERVAL * 1000000 < nNow) { + nLastSetChain = nNow; + SetBestChain(chainActive.GetLocator()); + } } void CWallet::ChainTip(const CBlockIndex *pindex, @@ -4641,7 +4654,7 @@ bool CWallet::InitLoadWallet(bool clearWitnessCaches) RegisterValidationInterface(walletInstance); - CBlockIndex *pindexRescan = chainActive.Tip(); + CBlockIndex *pindexRescan; if (clearWitnessCaches || GetBoolArg("-rescan", false)) { walletInstance->ClearNoteWitnessCache(); diff --git a/src/wallet/wallet.h b/src/wallet/wallet.h index c673dfcea..89737ccaa 100644 --- a/src/wallet/wallet.h +++ b/src/wallet/wallet.h @@ -760,6 +760,7 @@ private: int64_t nNextResend; int64_t nLastResend; + int64_t nLastSetChain; bool fBroadcastTransactions; template @@ -920,6 +921,7 @@ public: nOrderPosNext = 0; nNextResend = 0; nLastResend = 0; + nLastSetChain = 0; nTimeFirstKey = 0; fBroadcastTransactions = false; nWitnessCacheSize = 0; From 73fae6bf0a57dc627cc5196d0c7abb87b76c9d6e Mon Sep 17 00:00:00 2001 From: Larry Ruane Date: Thu, 25 Jun 2020 09:32:36 -0600 Subject: [PATCH 3/7] review, cleanup: eliminate uninitialized variable --- src/wallet/wallet.cpp | 12 +++--------- 1 file changed, 3 insertions(+), 9 deletions(-) diff --git a/src/wallet/wallet.cpp b/src/wallet/wallet.cpp index 5f6ea468a..13b0adf56 100644 --- a/src/wallet/wallet.cpp +++ b/src/wallet/wallet.cpp @@ -4654,20 +4654,14 @@ bool CWallet::InitLoadWallet(bool clearWitnessCaches) RegisterValidationInterface(walletInstance); - CBlockIndex *pindexRescan; - if (clearWitnessCaches || GetBoolArg("-rescan", false)) - { + CBlockIndex *pindexRescan = chainActive.Genesis(); + if (clearWitnessCaches || GetBoolArg("-rescan", false)) { walletInstance->ClearNoteWitnessCache(); - pindexRescan = chainActive.Genesis(); - } - else - { + } else { CWalletDB walletdb(walletFile); CBlockLocator locator; if (walletdb.ReadBestBlock(locator)) pindexRescan = FindForkInGlobalIndex(chainActive, locator); - else - pindexRescan = chainActive.Genesis(); } if (chainActive.Tip() && chainActive.Tip() != pindexRescan) { From 5b7fed5bbffb247f474d99c8b171f8cec7795d7e Mon Sep 17 00:00:00 2001 From: Larry Ruane Date: Wed, 1 Jul 2020 10:28:15 -0600 Subject: [PATCH 4/7] self.sync_all(), not time.sleep(4) --- qa/rpc-tests/wallet_db_flush.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/qa/rpc-tests/wallet_db_flush.py b/qa/rpc-tests/wallet_db_flush.py index d66e4ffb7..4474cb344 100755 --- a/qa/rpc-tests/wallet_db_flush.py +++ b/qa/rpc-tests/wallet_db_flush.py @@ -55,7 +55,7 @@ class WalletDBFlush (BitcoinTestFramework): "-mineraddress=%s" % zaddr, ]) self.nodes[0].generate(1) - time.sleep(4) + self.sync_all() self.nodes[0].stop() bitcoind_processes[0].wait() @@ -71,7 +71,7 @@ class WalletDBFlush (BitcoinTestFramework): assert_equal(self.nodes[0].z_getbalance(zaddr, 0), 5) time.sleep(sleeptime) self.nodes[0].generate(1) - time.sleep(4) + self.sync_all() self.nodes[0].stop() bitcoind_processes[0].wait() @@ -80,7 +80,7 @@ class WalletDBFlush (BitcoinTestFramework): "-mineraddress=%s" % zaddr, ]) self.nodes[0].generate(1) - time.sleep(4) + self.sync_all() self.nodes[0].stop() if __name__ == '__main__': From bf568273a78c240c5bebf2a6ab000b37395b6a5e Mon Sep 17 00:00:00 2001 From: Larry Ruane Date: Thu, 2 Jul 2020 14:32:48 -0600 Subject: [PATCH 5/7] fix pyflakes CI errors --- qa/rpc-tests/wallet_db_flush.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/qa/rpc-tests/wallet_db_flush.py b/qa/rpc-tests/wallet_db_flush.py index 4474cb344..56fd83e9e 100755 --- a/qa/rpc-tests/wallet_db_flush.py +++ b/qa/rpc-tests/wallet_db_flush.py @@ -6,8 +6,6 @@ # This test reproduces https://github.com/zcash/zcash/issues/4301 # It takes an hour to run! -from decimal import Decimal -from test_framework.authproxy import JSONRPCException from test_framework.test_framework import BitcoinTestFramework from test_framework.util import ( assert_equal, From 872af58d592247146ad565cb7b05a220539fa7bb Mon Sep 17 00:00:00 2001 From: Larry Ruane Date: Wed, 8 Jul 2020 07:50:48 -0600 Subject: [PATCH 6/7] undo flushing witness data on shutdown This fixes wallet_anchorfork.py CI failure, but a separate PR will restore flushing witness data on shutdown while also fixing DecrementNoteWitnesses() to not assert when nd->witnessHeight < indexHeight, which can happen when the node reorgs upon restart (which this test causes to happen). --- src/init.cpp | 3 --- 1 file changed, 3 deletions(-) diff --git a/src/init.cpp b/src/init.cpp index 631ab3836..1671dba90 100644 --- a/src/init.cpp +++ b/src/init.cpp @@ -229,9 +229,6 @@ void Shutdown() if (pcoinsTip != NULL) { FlushStateToDisk(); } - // Flush the wallet witness cache to disk (no longer done by FlushStateToDisk()) - GetMainSignals().SetBestChain(chainActive.GetLocator()); - delete pcoinsTip; pcoinsTip = NULL; delete pcoinscatcher; From 4811dcc426ef3e20a3ddac7bc392d64be583a704 Mon Sep 17 00:00:00 2001 From: Larry Ruane Date: Wed, 8 Jul 2020 20:48:50 -0600 Subject: [PATCH 7/7] sync before stopping nodes This makes the test more deterministic. When the nodes are split, and partition B (node 1) mines a joinsplit transaction, that block only sometimes appears on the blockchain after the nodes are all reconnected. --- qa/rpc-tests/wallet_anchorfork.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/qa/rpc-tests/wallet_anchorfork.py b/qa/rpc-tests/wallet_anchorfork.py index f02ab542a..86c56e72d 100755 --- a/qa/rpc-tests/wallet_anchorfork.py +++ b/qa/rpc-tests/wallet_anchorfork.py @@ -6,7 +6,9 @@ from test_framework.test_framework import BitcoinTestFramework from test_framework.util import assert_equal, initialize_chain_clean, \ start_nodes, stop_nodes, connect_nodes_bi, \ - wait_and_assert_operationid_status, wait_bitcoinds, get_coinbase_address + wait_and_assert_operationid_status, wait_bitcoinds, get_coinbase_address, \ + sync_blocks, sync_mempools + from decimal import Decimal class WalletAnchorForkTest (BitcoinTestFramework): @@ -83,6 +85,8 @@ class WalletAnchorForkTest (BitcoinTestFramework): txid2 = self.nodes[1].sendrawtransaction(rawhex) assert_equal(txid, txid2) self.nodes[1].generate(1) + sync_blocks(self.nodes[1:]) + sync_mempools(self.nodes[1:]) # Check that Partition B is one block ahead and that they have different tips assert_equal(self.nodes[0].getblockcount() + 1, self.nodes[1].getblockcount())