Make CRollingBloomFilter set nTweak for you

While CBloomFilter is usually used with an explicitly set nTweak,
CRollingBloomFilter is only used internally. Requiring every caller to
set nTweak is error-prone and redundant; better to have the class handle
that for you with a high-quality randomness source.

Additionally when clearing the filter it makes sense to change nTweak as
well to recover from a bad setting, e.g. due to insufficient randomness
at initialization, so the clear() method is replaced by a reset() method
that sets a new, random, nTweak value.
This commit is contained in:
Peter Todd 2015-07-20 04:43:34 +09:00 committed by Pieter Wuille
parent a3d65fedaa
commit d2d7ee0e86
5 changed files with 29 additions and 12 deletions

View File

@ -8,6 +8,7 @@
#include "hash.h" #include "hash.h"
#include "script/script.h" #include "script/script.h"
#include "script/standard.h" #include "script/standard.h"
#include "random.h"
#include "streams.h" #include "streams.h"
#include <math.h> #include <math.h>
@ -121,6 +122,12 @@ void CBloomFilter::clear()
isEmpty = true; isEmpty = true;
} }
void CBloomFilter::reset(unsigned int nNewTweak)
{
clear();
nTweak = nNewTweak;
}
bool CBloomFilter::IsWithinSizeConstraints() const bool CBloomFilter::IsWithinSizeConstraints() const
{ {
return vData.size() <= MAX_BLOOM_FILTER_SIZE && nHashFuncs <= MAX_HASH_FUNCS; return vData.size() <= MAX_BLOOM_FILTER_SIZE && nHashFuncs <= MAX_HASH_FUNCS;
@ -217,7 +224,8 @@ CRollingBloomFilter::CRollingBloomFilter(unsigned int nElements, double fpRate,
// inserted, so at least one always contains the last nElements // inserted, so at least one always contains the last nElements
// inserted. // inserted.
nBloomSize = nElements * 2; nBloomSize = nElements * 2;
nInsertions = 0;
reset(nTweak);
} }
void CRollingBloomFilter::insert(const std::vector<unsigned char>& vKey) void CRollingBloomFilter::insert(const std::vector<unsigned char>& vKey)
@ -254,9 +262,12 @@ bool CRollingBloomFilter::contains(const uint256& hash) const
return contains(data); return contains(data);
} }
void CRollingBloomFilter::clear() void CRollingBloomFilter::reset(unsigned int nNewTweak)
{ {
b1.clear(); if (!nNewTweak)
b2.clear(); nNewTweak = GetRand(std::numeric_limits<unsigned int>::max());
b1.reset(nNewTweak);
b2.reset(nNewTweak);
nInsertions = 0; nInsertions = 0;
} }

View File

@ -89,6 +89,7 @@ public:
bool contains(const uint256& hash) const; bool contains(const uint256& hash) const;
void clear(); void clear();
void reset(unsigned int nNewTweak);
//! True if the size is <= MAX_BLOOM_FILTER_SIZE and the number of hash functions is <= MAX_HASH_FUNCS //! True if the size is <= MAX_BLOOM_FILTER_SIZE and the number of hash functions is <= MAX_HASH_FUNCS
//! (catch a filter which was just deserialized which was too big) //! (catch a filter which was just deserialized which was too big)
@ -103,7 +104,11 @@ public:
/** /**
* RollingBloomFilter is a probabilistic "keep track of most recently inserted" set. * RollingBloomFilter is a probabilistic "keep track of most recently inserted" set.
* Construct it with the number of items to keep track of, and a false-positive rate. * Construct it with the number of items to keep track of, and a false-positive
* rate. Unlike CBloomFilter, by default nTweak is set to a cryptographically
* secure random value for you. Similarly rather than clear() the method
* reset() is provided, which also changes nTweak to decrease the impact of
* false-positives.
* *
* contains(item) will always return true if item was one of the last N things * contains(item) will always return true if item was one of the last N things
* insert()'ed ... but may also return true for items that were not inserted. * insert()'ed ... but may also return true for items that were not inserted.
@ -111,14 +116,15 @@ public:
class CRollingBloomFilter class CRollingBloomFilter
{ {
public: public:
CRollingBloomFilter(unsigned int nElements, double nFPRate, unsigned int nTweak); CRollingBloomFilter(unsigned int nElements, double nFPRate,
unsigned int nTweak = 0);
void insert(const std::vector<unsigned char>& vKey); void insert(const std::vector<unsigned char>& vKey);
void insert(const uint256& hash); void insert(const uint256& hash);
bool contains(const std::vector<unsigned char>& vKey) const; bool contains(const std::vector<unsigned char>& vKey) const;
bool contains(const uint256& hash) const; bool contains(const uint256& hash) const;
void clear(); void reset(unsigned int nNewTweak = 0);
private: private:
unsigned int nBloomSize; unsigned int nBloomSize;

View File

@ -4812,7 +4812,7 @@ bool SendMessages(CNode* pto, bool fSendTrickle)
{ {
// Periodically clear addrKnown to allow refresh broadcasts // Periodically clear addrKnown to allow refresh broadcasts
if (nLastRebroadcast) if (nLastRebroadcast)
pnode->addrKnown.clear(); pnode->addrKnown.reset();
// Rebroadcast our address // Rebroadcast our address
AdvertizeLocal(pnode); AdvertizeLocal(pnode);

View File

@ -2060,7 +2060,7 @@ unsigned int SendBufferSize() { return 1000*GetArg("-maxsendbuffer", 1*1000); }
CNode::CNode(SOCKET hSocketIn, const CAddress& addrIn, const std::string& addrNameIn, bool fInboundIn) : CNode::CNode(SOCKET hSocketIn, const CAddress& addrIn, const std::string& addrNameIn, bool fInboundIn) :
ssSend(SER_NETWORK, INIT_PROTO_VERSION), ssSend(SER_NETWORK, INIT_PROTO_VERSION),
addrKnown(5000, 0.001, insecure_rand()), addrKnown(5000, 0.001),
setInventoryKnown(SendBufferSize() / 1000) setInventoryKnown(SendBufferSize() / 1000)
{ {
nServices = 0; nServices = 0;

View File

@ -469,7 +469,7 @@ static std::vector<unsigned char> RandomData()
BOOST_AUTO_TEST_CASE(rolling_bloom) BOOST_AUTO_TEST_CASE(rolling_bloom)
{ {
// last-100-entry, 1% false positive: // last-100-entry, 1% false positive:
CRollingBloomFilter rb1(100, 0.01, 0); CRollingBloomFilter rb1(100, 0.01, 1);
// Overfill: // Overfill:
static const int DATASIZE=399; static const int DATASIZE=399;
@ -500,7 +500,7 @@ BOOST_AUTO_TEST_CASE(rolling_bloom)
BOOST_CHECK(nHits < 175); BOOST_CHECK(nHits < 175);
BOOST_CHECK(rb1.contains(data[DATASIZE-1])); BOOST_CHECK(rb1.contains(data[DATASIZE-1]));
rb1.clear(); rb1.reset(1);
BOOST_CHECK(!rb1.contains(data[DATASIZE-1])); BOOST_CHECK(!rb1.contains(data[DATASIZE-1]));
// Now roll through data, make sure last 100 entries // Now roll through data, make sure last 100 entries
@ -527,7 +527,7 @@ BOOST_AUTO_TEST_CASE(rolling_bloom)
BOOST_CHECK(nHits < 100); BOOST_CHECK(nHits < 100);
// last-1000-entry, 0.01% false positive: // last-1000-entry, 0.01% false positive:
CRollingBloomFilter rb2(1000, 0.001, 0); CRollingBloomFilter rb2(1000, 0.001, 1);
for (int i = 0; i < DATASIZE; i++) { for (int i = 0; i < DATASIZE; i++) {
rb2.insert(data[i]); rb2.insert(data[i]);
} }