diff --git a/src/net_processing.cpp b/src/net_processing.cpp index 61f98ca74..50ac76924 100644 --- a/src/net_processing.cpp +++ b/src/net_processing.cpp @@ -124,6 +124,9 @@ namespace { /** Number of peers from which we're downloading blocks. */ int nPeersWithValidatedDownloads = 0; + /** Number of outbound peers with m_chain_sync.m_protect. */ + int g_outbound_peers_with_protect_from_disconnect = 0; + /** Relay map, protected by cs_main. */ typedef std::map MapRelay; MapRelay mapRelay; @@ -201,6 +204,33 @@ struct CNodeState { */ bool fSupportsDesiredCmpctVersion; + /** State used to enforce CHAIN_SYNC_TIMEOUT + * Only in effect for outbound, non-manual connections, with + * m_protect == false + * Algorithm: if a peer's best known block has less work than our tip, + * set a timeout CHAIN_SYNC_TIMEOUT seconds in the future: + * - If at timeout their best known block now has more work than our tip + * when the timeout was set, then either reset the timeout or clear it + * (after comparing against our current tip's work) + * - If at timeout their best known block still has less work than our + * tip did when the timeout was set, then send a getheaders message, + * and set a shorter timeout, HEADERS_RESPONSE_TIME seconds in future. + * If their best known block is still behind when that new timeout is + * reached, disconnect. + */ + struct ChainSyncTimeoutState { + //! A timeout used for checking whether our peer has sufficiently synced + int64_t m_timeout; + //! A header with the work we require on our peer's chain + const CBlockIndex * m_work_header; + //! After timeout is reached, set to true after sending getheaders + bool m_sent_getheaders; + //! Whether this peer is protected from disconnection due to a bad/slow chain + bool m_protect; + }; + + ChainSyncTimeoutState m_chain_sync; + CNodeState(CAddress addrIn, std::string addrNameIn) : address(addrIn), name(addrNameIn) { fCurrentlyConnected = false; nMisbehavior = 0; @@ -223,6 +253,7 @@ struct CNodeState { fHaveWitness = false; fWantsCmpctWitness = false; fSupportsDesiredCmpctVersion = false; + m_chain_sync = { 0, nullptr, false, false }; } }; @@ -502,6 +533,13 @@ void FindNextBlocksToDownload(NodeId nodeid, unsigned int count, std::vectorfInbound || node->m_manual_connection || node->fFeeler || node->fOneShot); +} + void PeerLogicValidation::InitializeNode(CNode *pnode) { CAddress addr = pnode->addr; std::string addrName = pnode->GetAddrName(); @@ -534,6 +572,8 @@ void PeerLogicValidation::FinalizeNode(NodeId nodeid, bool& fUpdateConnectionTim nPreferredDownload -= state->fPreferredDownload; nPeersWithValidatedDownloads -= (state->nBlocksInFlightValidHeaders != 0); assert(nPeersWithValidatedDownloads >= 0); + g_outbound_peers_with_protect_from_disconnect -= state->m_chain_sync.m_protect; + assert(g_outbound_peers_with_protect_from_disconnect >= 0); mapNodeState.erase(nodeid); @@ -542,6 +582,7 @@ void PeerLogicValidation::FinalizeNode(NodeId nodeid, bool& fUpdateConnectionTim assert(mapBlocksInFlight.empty()); assert(nPreferredDownload == 0); assert(nPeersWithValidatedDownloads == 0); + assert(g_outbound_peers_with_protect_from_disconnect == 0); } LogPrint(BCLog::NET, "Cleared nodestate for peer=%d\n", nodeid); } @@ -2337,6 +2378,10 @@ bool static ProcessMessage(CNode* pfrom, const std::string& strCommand, CDataStr assert(pindexLast); UpdateBlockAvailability(pfrom->GetId(), pindexLast->GetBlockHash()); + // From here, pindexBestKnownBlock should be guaranteed to be non-null, + // because it is set in UpdateBlockAvailability. Some nullptr checks + // are still present, however, as belt-and-suspenders. + if (nCount == MAX_HEADERS_RESULTS) { // Headers message had its maximum size; the peer may have more headers. // TODO: optimize: if pindexLast is an ancestor of chainActive.Tip or pindexBestHeader, continue @@ -2396,6 +2441,35 @@ bool static ProcessMessage(CNode* pfrom, const std::string& strCommand, CDataStr } } } + // If we're in IBD, we want outbound peers that will serve us a useful + // chain. Disconnect peers that are on chains with insufficient work. + if (IsInitialBlockDownload() && nCount != MAX_HEADERS_RESULTS) { + // When nCount < MAX_HEADERS_RESULTS, we know we have no more + // headers to fetch from this peer. + if (nodestate->pindexBestKnownBlock && nodestate->pindexBestKnownBlock->nChainWork < nMinimumChainWork) { + // This peer has too little work on their headers chain to help + // us sync -- disconnect if using an outbound slot (unless + // whitelisted or addnode). + // Note: We compare their tip to nMinimumChainWork (rather than + // chainActive.Tip()) because we won't start block download + // until we have a headers chain that has at least + // nMinimumChainWork, even if a peer has a chain past our tip, + // as an anti-DoS measure. + if (IsOutboundDisconnectionCandidate(pfrom)) { + LogPrintf("Disconnecting outbound peer %d -- headers chain has insufficient work\n", pfrom->GetId()); + pfrom->fDisconnect = true; + } + } + } + + if (!pfrom->fDisconnect && IsOutboundDisconnectionCandidate(pfrom) && nodestate->pindexBestKnownBlock != nullptr) { + // If this is an outbound peer, check to see if we should protect + // it from the bad/lagging chain logic. + if (g_outbound_peers_with_protect_from_disconnect < MAX_OUTBOUND_PEERS_TO_PROTECT_FROM_DISCONNECT && nodestate->pindexBestKnownBlock->nChainWork >= chainActive.Tip()->nChainWork && !nodestate->m_chain_sync.m_protect) { + nodestate->m_chain_sync.m_protect = true; + ++g_outbound_peers_with_protect_from_disconnect; + } + } } } @@ -2794,6 +2868,58 @@ bool PeerLogicValidation::ProcessMessages(CNode* pfrom, std::atomic& inter return fMoreWork; } +void PeerLogicValidation::ConsiderEviction(CNode *pto, int64_t time_in_seconds) +{ + AssertLockHeld(cs_main); + + CNodeState &state = *State(pto->GetId()); + const CNetMsgMaker msgMaker(pto->GetSendVersion()); + + if (!state.m_chain_sync.m_protect && IsOutboundDisconnectionCandidate(pto) && state.fSyncStarted) { + // This is an outbound peer subject to disconnection if they don't + // announce a block with as much work as the current tip within + // CHAIN_SYNC_TIMEOUT + HEADERS_RESPONSE_TIME seconds (note: if + // their chain has more work than ours, we should sync to it, + // unless it's invalid, in which case we should find that out and + // disconnect from them elsewhere). + if (state.pindexBestKnownBlock != nullptr && state.pindexBestKnownBlock->nChainWork >= chainActive.Tip()->nChainWork) { + if (state.m_chain_sync.m_timeout != 0) { + state.m_chain_sync.m_timeout = 0; + state.m_chain_sync.m_work_header = nullptr; + state.m_chain_sync.m_sent_getheaders = false; + } + } else if (state.m_chain_sync.m_timeout == 0 || (state.m_chain_sync.m_work_header != nullptr && state.pindexBestKnownBlock != nullptr && state.pindexBestKnownBlock->nChainWork >= state.m_chain_sync.m_work_header->nChainWork)) { + // Our best block known by this peer is behind our tip, and we're either noticing + // that for the first time, OR this peer was able to catch up to some earlier point + // where we checked against our tip. + // Either way, set a new timeout based on current tip. + state.m_chain_sync.m_timeout = time_in_seconds + CHAIN_SYNC_TIMEOUT; + state.m_chain_sync.m_work_header = chainActive.Tip(); + state.m_chain_sync.m_sent_getheaders = false; + } else if (state.m_chain_sync.m_timeout > 0 && time_in_seconds > state.m_chain_sync.m_timeout) { + // No evidence yet that our peer has synced to a chain with work equal to that + // of our tip, when we first detected it was behind. Send a single getheaders + // message to give the peer a chance to update us. + if (state.m_chain_sync.m_sent_getheaders) { + // They've run out of time to catch up! + LogPrintf("Disconnecting outbound peer %d for old chain, best known block = %s\n", pto->GetId(), state.pindexBestKnownBlock != nullptr ? state.pindexBestKnownBlock->GetBlockHash().ToString() : ""); + pto->fDisconnect = true; + } else { + LogPrint(BCLog::NET, "sending getheaders to outbound peer=%d to verify chain work (current best known block:%s, benchmark blockhash: %s)\n", pto->GetId(), state.pindexBestKnownBlock != nullptr ? state.pindexBestKnownBlock->GetBlockHash().ToString() : "", state.m_chain_sync.m_work_header->GetBlockHash().ToString()); + connman->PushMessage(pto, msgMaker.Make(NetMsgType::GETHEADERS, chainActive.GetLocator(state.m_chain_sync.m_work_header->pprev), uint256())); + state.m_chain_sync.m_sent_getheaders = true; + constexpr int64_t HEADERS_RESPONSE_TIME = 120; // 2 minutes + // Bump the timeout to allow a response, which could clear the timeout + // (if the response shows the peer has synced), reset the timeout (if + // the peer syncs to the required work but not to our tip), or result + // in disconnect (if we advance to the timeout and pindexBestKnownBlock + // has not sufficiently progressed) + state.m_chain_sync.m_timeout = time_in_seconds + HEADERS_RESPONSE_TIME; + } + } + } +} + class CompareInvMempoolOrder { CTxMemPool *mp; @@ -3260,6 +3386,9 @@ bool PeerLogicValidation::SendMessages(CNode* pto, std::atomic& interruptM } } + // Check that outbound peers have reasonable chains + // GetTime() is used by this anti-DoS logic so we can test this using mocktime + ConsiderEviction(pto, GetTime()); // // Message: getdata (blocks) diff --git a/src/net_processing.h b/src/net_processing.h index 79745cdd4..656324bba 100644 --- a/src/net_processing.h +++ b/src/net_processing.h @@ -21,6 +21,12 @@ static const unsigned int DEFAULT_BLOCK_RECONSTRUCTION_EXTRA_TXN = 100; * Timeout = base + per_header * (expected number of headers) */ static constexpr int64_t HEADERS_DOWNLOAD_TIMEOUT_BASE = 15 * 60 * 1000000; // 15 minutes static constexpr int64_t HEADERS_DOWNLOAD_TIMEOUT_PER_HEADER = 1000; // 1ms/header +/** Protect at least this many outbound peers from disconnection due to slow/ + * behind headers chain. + */ +static constexpr int32_t MAX_OUTBOUND_PEERS_TO_PROTECT_FROM_DISCONNECT = 4; +/** Timeout for (unprotected) outbound peers to sync to our chainwork, in seconds */ +static constexpr int64_t CHAIN_SYNC_TIMEOUT = 20 * 60; // 20 minutes class PeerLogicValidation : public CValidationInterface, public NetEventsInterface { private: @@ -47,6 +53,8 @@ public: * @return True if there is more work to be done */ bool SendMessages(CNode* pto, std::atomic& interrupt) override; + + void ConsiderEviction(CNode *pto, int64_t time_in_seconds); }; struct CNodeStateStats { diff --git a/src/test/DoS_tests.cpp b/src/test/DoS_tests.cpp index b88ad5ed1..7bcf30483 100644 --- a/src/test/DoS_tests.cpp +++ b/src/test/DoS_tests.cpp @@ -42,6 +42,51 @@ static NodeId id = 0; BOOST_FIXTURE_TEST_SUITE(DoS_tests, TestingSetup) +// Test eviction of an outbound peer whose chain never advances +// Mock a node connection, and use mocktime to simulate a peer +// which never sends any headers messages. PeerLogic should +// decide to evict that outbound peer, after the appropriate timeouts. +// Note that we protect 4 outbound nodes from being subject to +// this logic; this test takes advantage of that protection only +// being applied to nodes which send headers with sufficient +// work. +BOOST_AUTO_TEST_CASE(outbound_slow_chain_eviction) +{ + std::atomic interruptDummy(false); + + // Mock an outbound peer + CAddress addr1(ip(0xa0b0c001), NODE_NONE); + CNode dummyNode1(id++, ServiceFlags(NODE_NETWORK|NODE_WITNESS), 0, INVALID_SOCKET, addr1, 0, 0, CAddress(), "", /*fInboundIn=*/ false); + dummyNode1.SetSendVersion(PROTOCOL_VERSION); + + peerLogic->InitializeNode(&dummyNode1); + dummyNode1.nVersion = 1; + dummyNode1.fSuccessfullyConnected = true; + + // This test requires that we have a chain with non-zero work. + BOOST_CHECK(chainActive.Tip() != nullptr); + BOOST_CHECK(chainActive.Tip()->nChainWork > 0); + + // Test starts here + peerLogic->SendMessages(&dummyNode1, interruptDummy); // should result in getheaders + BOOST_CHECK(dummyNode1.vSendMsg.size() > 0); + dummyNode1.vSendMsg.clear(); + + int64_t nStartTime = GetTime(); + // Wait 21 minutes + SetMockTime(nStartTime+21*60); + peerLogic->SendMessages(&dummyNode1, interruptDummy); // should result in getheaders + BOOST_CHECK(dummyNode1.vSendMsg.size() > 0); + // Wait 3 more minutes + SetMockTime(nStartTime+24*60); + peerLogic->SendMessages(&dummyNode1, interruptDummy); // should result in disconnect + BOOST_CHECK(dummyNode1.fDisconnect == true); + SetMockTime(0); + + bool dummy; + peerLogic->FinalizeNode(dummyNode1.GetId(), dummy); +} + BOOST_AUTO_TEST_CASE(DoS_banning) { std::atomic interruptDummy(false); @@ -71,6 +116,10 @@ BOOST_AUTO_TEST_CASE(DoS_banning) Misbehaving(dummyNode2.GetId(), 50); peerLogic->SendMessages(&dummyNode2, interruptDummy); BOOST_CHECK(connman->IsBanned(addr2)); + + bool dummy; + peerLogic->FinalizeNode(dummyNode1.GetId(), dummy); + peerLogic->FinalizeNode(dummyNode2.GetId(), dummy); } BOOST_AUTO_TEST_CASE(DoS_banscore) @@ -95,6 +144,9 @@ BOOST_AUTO_TEST_CASE(DoS_banscore) peerLogic->SendMessages(&dummyNode1, interruptDummy); BOOST_CHECK(connman->IsBanned(addr1)); gArgs.ForceSetArg("-banscore", std::to_string(DEFAULT_BANSCORE_THRESHOLD)); + + bool dummy; + peerLogic->FinalizeNode(dummyNode1.GetId(), dummy); } BOOST_AUTO_TEST_CASE(DoS_bantime) @@ -121,6 +173,9 @@ BOOST_AUTO_TEST_CASE(DoS_bantime) SetMockTime(nStartTime+60*60*24+1); BOOST_CHECK(!connman->IsBanned(addr)); + + bool dummy; + peerLogic->FinalizeNode(dummyNode.GetId(), dummy); } CTransactionRef RandomOrphan() diff --git a/test/functional/minchainwork.py b/test/functional/minchainwork.py index c7579d254..35cd7ad14 100755 --- a/test/functional/minchainwork.py +++ b/test/functional/minchainwork.py @@ -27,6 +27,7 @@ class MinimumChainWorkTest(BitcoinTestFramework): def set_test_params(self): self.setup_clean_chain = True self.num_nodes = 3 + self.extra_args = [[], ["-minimumchainwork=0x65"], ["-minimumchainwork=0x65"]] self.node_min_work = [0, 101, 101] @@ -74,6 +75,13 @@ class MinimumChainWorkTest(BitcoinTestFramework): self.nodes[0].generate(1) self.log.info("Verifying nodes are all synced") + + # Because nodes in regtest are all manual connections (eg using + # addnode), node1 should not have disconnected node0. If not for that, + # we'd expect node1 to have disconnected node0 for serving an + # insufficient work chain, in which case we'd need to reconnect them to + # continue the test. + self.sync_all() self.log.info("Blockcounts: %s", [n.getblockcount() for n in self.nodes])