2014-12-16 17:47:57 -08:00
|
|
|
// Copyright (c) 2012-2014 The Bitcoin Core developers
|
2023-01-23 10:31:54 -08:00
|
|
|
// Copyright (c) 2018-2023 The Zcash developers
|
2014-10-30 17:43:19 -07:00
|
|
|
// Distributed under the MIT software license, see the accompanying
|
2019-07-18 07:16:09 -07:00
|
|
|
// file COPYING or https://www.opensource.org/licenses/mit-license.php .
|
2013-04-12 22:13:08 -07:00
|
|
|
|
2012-08-12 20:26:27 -07:00
|
|
|
#ifndef BITCOIN_BLOOM_H
|
|
|
|
#define BITCOIN_BLOOM_H
|
|
|
|
|
|
|
|
#include "serialize.h"
|
|
|
|
|
2013-04-12 22:13:08 -07:00
|
|
|
#include <vector>
|
|
|
|
|
2012-08-12 20:26:27 -07:00
|
|
|
class COutPoint;
|
|
|
|
class CTransaction;
|
2013-04-12 22:13:08 -07:00
|
|
|
class uint256;
|
2012-08-12 20:26:27 -07:00
|
|
|
|
2014-10-30 17:43:19 -07:00
|
|
|
//! 20,000 items with fp rate < 0.1% or 10,000 items and <0.0001%
|
2012-08-12 20:26:27 -07:00
|
|
|
static const unsigned int MAX_BLOOM_FILTER_SIZE = 36000; // bytes
|
|
|
|
static const unsigned int MAX_HASH_FUNCS = 50;
|
|
|
|
|
2014-10-30 17:43:19 -07:00
|
|
|
/**
|
|
|
|
* First two bits of nFlags control how much IsRelevantAndUpdate actually updates
|
|
|
|
* The remaining bits are reserved
|
|
|
|
*/
|
2013-01-10 17:23:28 -08:00
|
|
|
enum bloomflags
|
|
|
|
{
|
|
|
|
BLOOM_UPDATE_NONE = 0,
|
|
|
|
BLOOM_UPDATE_ALL = 1,
|
|
|
|
// Only adds outpoints to the filter if the output is a pay-to-pubkey/pay-to-multisig script
|
|
|
|
BLOOM_UPDATE_P2PUBKEY_ONLY = 2,
|
|
|
|
BLOOM_UPDATE_MASK = 3,
|
|
|
|
};
|
2012-08-12 20:26:27 -07:00
|
|
|
|
|
|
|
/**
|
|
|
|
* BloomFilter is a probabilistic filter which SPV clients provide
|
2015-04-28 07:48:28 -07:00
|
|
|
* so that we can filter the transactions we send them.
|
2012-08-12 20:26:27 -07:00
|
|
|
*
|
|
|
|
* This allows for significantly more efficient transaction and block downloads.
|
|
|
|
*
|
2015-04-28 07:48:28 -07:00
|
|
|
* Because bloom filters are probabilistic, a SPV node can increase the false-
|
|
|
|
* positive rate, making us send it transactions which aren't actually its,
|
2012-08-12 20:26:27 -07:00
|
|
|
* allowing clients to trade more bandwidth for more privacy by obfuscating which
|
2015-04-28 07:48:28 -07:00
|
|
|
* keys are controlled by them.
|
2012-08-12 20:26:27 -07:00
|
|
|
*/
|
|
|
|
class CBloomFilter
|
|
|
|
{
|
|
|
|
private:
|
|
|
|
std::vector<unsigned char> vData;
|
|
|
|
unsigned int nHashFuncs;
|
2012-11-02 15:33:50 -07:00
|
|
|
unsigned int nTweak;
|
2013-01-10 17:23:28 -08:00
|
|
|
unsigned char nFlags;
|
2012-08-12 20:26:27 -07:00
|
|
|
|
|
|
|
unsigned int Hash(unsigned int nHashNum, const std::vector<unsigned char>& vDataToHash) const;
|
|
|
|
|
|
|
|
public:
|
2014-10-30 17:43:19 -07:00
|
|
|
/**
|
|
|
|
* Creates a new bloom filter which will provide the given fp rate when filled with the given number of elements
|
|
|
|
* Note that if the given parameters will result in a filter outside the bounds of the protocol limits,
|
|
|
|
* the filter created will be as close to the given parameters as possible within the protocol limits.
|
|
|
|
* This will apply if nFPRate is very low or nElements is unreasonably high.
|
|
|
|
* nTweak is a constant which is added to the seed value passed to the hash function
|
|
|
|
* It should generally always be a random value (and is largely only exposed for unit testing)
|
|
|
|
* nFlags should be one of the BLOOM_UPDATE_* enums (not _MASK)
|
|
|
|
*/
|
2017-02-12 21:39:48 -08:00
|
|
|
CBloomFilter(const unsigned int nElements, const double nFPRate, const unsigned int nTweak, unsigned char nFlagsIn);
|
2020-04-28 10:19:34 -07:00
|
|
|
CBloomFilter() : nHashFuncs(0), nTweak(0), nFlags(0) {}
|
2012-08-12 20:26:27 -07:00
|
|
|
|
2014-09-02 00:58:09 -07:00
|
|
|
ADD_SERIALIZE_METHODS;
|
overhaul serialization code
The implementation of each class' serialization/deserialization is no longer
passed within a macro. The implementation now lies within a template of form:
template <typename T, typename Stream, typename Operation>
inline static size_t SerializationOp(T thisPtr, Stream& s, Operation ser_action, int nType, int nVersion) {
size_t nSerSize = 0;
/* CODE */
return nSerSize;
}
In cases when codepath should depend on whether or not we are just deserializing
(old fGetSize, fWrite, fRead flags) an additional clause can be used:
bool fRead = boost::is_same<Operation, CSerActionUnserialize>();
The IMPLEMENT_SERIALIZE macro will now be a freestanding clause added within
class' body (similiar to Qt's Q_OBJECT) to implement GetSerializeSize,
Serialize and Unserialize. These are now wrappers around
the "SerializationOp" template.
2014-08-19 23:42:31 -07:00
|
|
|
|
2014-08-20 13:44:38 -07:00
|
|
|
template <typename Stream, typename Operation>
|
2016-10-28 16:29:17 -07:00
|
|
|
inline void SerializationOp(Stream& s, Operation ser_action) {
|
2012-08-12 20:26:27 -07:00
|
|
|
READWRITE(vData);
|
|
|
|
READWRITE(nHashFuncs);
|
2012-11-02 15:33:50 -07:00
|
|
|
READWRITE(nTweak);
|
2013-01-10 17:23:28 -08:00
|
|
|
READWRITE(nFlags);
|
overhaul serialization code
The implementation of each class' serialization/deserialization is no longer
passed within a macro. The implementation now lies within a template of form:
template <typename T, typename Stream, typename Operation>
inline static size_t SerializationOp(T thisPtr, Stream& s, Operation ser_action, int nType, int nVersion) {
size_t nSerSize = 0;
/* CODE */
return nSerSize;
}
In cases when codepath should depend on whether or not we are just deserializing
(old fGetSize, fWrite, fRead flags) an additional clause can be used:
bool fRead = boost::is_same<Operation, CSerActionUnserialize>();
The IMPLEMENT_SERIALIZE macro will now be a freestanding clause added within
class' body (similiar to Qt's Q_OBJECT) to implement GetSerializeSize,
Serialize and Unserialize. These are now wrappers around
the "SerializationOp" template.
2014-08-19 23:42:31 -07:00
|
|
|
}
|
2012-08-12 20:26:27 -07:00
|
|
|
|
|
|
|
void insert(const std::vector<unsigned char>& vKey);
|
|
|
|
void insert(const COutPoint& outpoint);
|
|
|
|
void insert(const uint256& hash);
|
|
|
|
|
|
|
|
bool contains(const std::vector<unsigned char>& vKey) const;
|
|
|
|
bool contains(const COutPoint& outpoint) const;
|
|
|
|
bool contains(const uint256& hash) const;
|
|
|
|
|
2014-10-30 17:43:19 -07:00
|
|
|
//! True if the size is <= MAX_BLOOM_FILTER_SIZE and the number of hash functions is <= MAX_HASH_FUNCS
|
|
|
|
//! (catch a filter which was just deserialized which was too big)
|
2012-08-12 20:26:27 -07:00
|
|
|
bool IsWithinSizeConstraints() const;
|
|
|
|
|
2014-10-30 17:43:19 -07:00
|
|
|
//! Also adds any outputs which match the filter to the filter (to match their spending txes)
|
2014-06-09 01:02:00 -07:00
|
|
|
bool IsRelevantAndUpdate(const CTransaction& tx);
|
2012-08-12 20:26:27 -07:00
|
|
|
};
|
|
|
|
|
2015-04-24 10:14:45 -07:00
|
|
|
/**
|
|
|
|
* RollingBloomFilter is a probabilistic "keep track of most recently inserted" set.
|
2015-07-19 12:43:34 -07:00
|
|
|
* Construct it with the number of items to keep track of, and a false-positive
|
|
|
|
* rate. Unlike CBloomFilter, by default nTweak is set to a cryptographically
|
|
|
|
* secure random value for you. Similarly rather than clear() the method
|
|
|
|
* reset() is provided, which also changes nTweak to decrease the impact of
|
|
|
|
* false-positives.
|
2015-04-24 10:14:45 -07:00
|
|
|
*
|
2015-11-27 04:20:29 -08:00
|
|
|
* contains(item) will always return true if item was one of the last N to 1.5*N
|
2015-04-24 10:14:45 -07:00
|
|
|
* insert()'ed ... but may also return true for items that were not inserted.
|
2015-11-27 04:20:29 -08:00
|
|
|
*
|
|
|
|
* It needs around 1.8 bytes per element per factor 0.1 of false positive rate.
|
2020-05-31 21:18:32 -07:00
|
|
|
* For example, if we want 1000 elements, we'd need:
|
|
|
|
* - ~1800 bytes for a false positive rate of 0.1
|
|
|
|
* - ~3600 bytes for a false positive rate of 0.01
|
|
|
|
* - ~5400 bytes for a false positive rate of 0.001
|
|
|
|
*
|
|
|
|
* If we make these simplifying assumptions:
|
|
|
|
* - logFpRate / log(0.5) doesn't get rounded or clamped in the nHashFuncs calculation
|
|
|
|
* - nElements is even, so that nEntriesPerGeneration == nElements / 2
|
|
|
|
*
|
|
|
|
* Then we get a more accurate estimate for filter bytes:
|
|
|
|
*
|
|
|
|
* 3/(log(256)*log(2)) * log(1/fpRate) * nElements
|
2015-04-24 10:14:45 -07:00
|
|
|
*/
|
|
|
|
class CRollingBloomFilter
|
|
|
|
{
|
|
|
|
public:
|
2015-07-27 09:58:00 -07:00
|
|
|
// A random bloom filter calls GetRand() at creation time.
|
|
|
|
// Don't create global CRollingBloomFilter objects, as they may be
|
|
|
|
// constructed before the randomizer is properly initialized.
|
2017-02-12 21:39:48 -08:00
|
|
|
CRollingBloomFilter(const unsigned int nElements, const double nFPRate);
|
2015-04-24 10:14:45 -07:00
|
|
|
|
|
|
|
void insert(const std::vector<unsigned char>& vKey);
|
2015-07-17 03:42:43 -07:00
|
|
|
void insert(const uint256& hash);
|
2015-04-24 10:14:45 -07:00
|
|
|
bool contains(const std::vector<unsigned char>& vKey) const;
|
2015-07-17 03:42:43 -07:00
|
|
|
bool contains(const uint256& hash) const;
|
2015-04-24 10:14:45 -07:00
|
|
|
|
2015-07-27 09:58:00 -07:00
|
|
|
void reset();
|
2015-04-24 10:14:45 -07:00
|
|
|
|
2023-02-17 12:38:04 -08:00
|
|
|
protected:
|
|
|
|
bool is_data_empty() const { return data.empty(); }
|
|
|
|
|
2015-04-24 10:14:45 -07:00
|
|
|
private:
|
2023-02-17 12:38:04 -08:00
|
|
|
void initialize();
|
|
|
|
|
|
|
|
uint32_t nFilterBits;
|
2015-11-27 04:20:29 -08:00
|
|
|
int nEntriesPerGeneration;
|
|
|
|
int nEntriesThisGeneration;
|
|
|
|
int nGeneration;
|
2016-04-24 09:37:29 -07:00
|
|
|
std::vector<uint64_t> data;
|
2015-11-27 04:20:29 -08:00
|
|
|
unsigned int nTweak;
|
|
|
|
int nHashFuncs;
|
|
|
|
};
|
2015-04-24 10:14:45 -07:00
|
|
|
|
2014-08-28 13:21:03 -07:00
|
|
|
#endif // BITCOIN_BLOOM_H
|