diff options
Diffstat (limited to 'src/backend/lib/bloomfilter.c')
-rw-r--r-- | src/backend/lib/bloomfilter.c | 305 |
1 files changed, 305 insertions, 0 deletions
diff --git a/src/backend/lib/bloomfilter.c b/src/backend/lib/bloomfilter.c new file mode 100644 index 00000000000..eb08f4a7b86 --- /dev/null +++ b/src/backend/lib/bloomfilter.c @@ -0,0 +1,305 @@ +/*------------------------------------------------------------------------- + * + * bloomfilter.c + * Space-efficient set membership testing + * + * A Bloom filter is a probabilistic data structure that is used to test an + * element's membership of a set. False positives are possible, but false + * negatives are not; a test of membership of the set returns either "possibly + * in set" or "definitely not in set". This is typically very space efficient, + * which can be a decisive advantage. + * + * Elements can be added to the set, but not removed. The more elements that + * are added, the larger the probability of false positives. Caller must hint + * an estimated total size of the set when the Bloom filter is initialized. + * This is used to balance the use of memory against the final false positive + * rate. + * + * The implementation is well suited to data synchronization problems between + * unordered sets, especially where predictable performance is important and + * some false positives are acceptable. It's also well suited to cache + * filtering problems where a relatively small and/or low cardinality set is + * fingerprinted, especially when many subsequent membership tests end up + * indicating that values of interest are not present. That should save the + * caller many authoritative lookups, such as expensive probes of a much larger + * on-disk structure. + * + * Copyright (c) 2018, PostgreSQL Global Development Group + * + * IDENTIFICATION + * src/backend/lib/bloomfilter.c + * + *------------------------------------------------------------------------- + */ +#include "postgres.h" + +#include <math.h> + +#include "access/hash.h" +#include "lib/bloomfilter.h" + +#define MAX_HASH_FUNCS 10 + +struct bloom_filter +{ + /* K hash functions are used, seeded by caller's seed */ + int k_hash_funcs; + uint64 seed; + /* m is bitset size, in bits. Must be a power of two <= 2^32. */ + uint64 m; + unsigned char bitset[FLEXIBLE_ARRAY_MEMBER]; +}; + +static int my_bloom_power(uint64 target_bitset_bits); +static int optimal_k(uint64 bitset_bits, int64 total_elems); +static void k_hashes(bloom_filter *filter, uint32 *hashes, unsigned char *elem, + size_t len); +static inline uint32 mod_m(uint32 a, uint64 m); + +/* + * Create Bloom filter in caller's memory context. We aim for a false positive + * rate of between 1% and 2% when bitset size is not constrained by memory + * availability. + * + * total_elems is an estimate of the final size of the set. It should be + * approximately correct, but the implementation can cope well with it being + * off by perhaps a factor of five or more. See "Bloom Filters in + * Probabilistic Verification" (Dillinger & Manolios, 2004) for details of why + * this is the case. + * + * bloom_work_mem is sized in KB, in line with the general work_mem convention. + * This determines the size of the underlying bitset (trivial bookkeeping space + * isn't counted). The bitset is always sized as a power of two number of + * bits, and the largest possible bitset is 512MB (2^32 bits). The + * implementation allocates only enough memory to target its standard false + * positive rate, using a simple formula with caller's total_elems estimate as + * an input. The bitset might be as small as 1MB, even when bloom_work_mem is + * much higher. + * + * The Bloom filter is seeded using a value provided by the caller. Using a + * distinct seed value on every call makes it unlikely that the same false + * positives will reoccur when the same set is fingerprinted a second time. + * Callers that don't care about this pass a constant as their seed, typically + * 0. Callers can use a pseudo-random seed in the range of 0 - INT_MAX by + * calling random(). + */ +bloom_filter * +bloom_create(int64 total_elems, int bloom_work_mem, uint64 seed) +{ + bloom_filter *filter; + int bloom_power; + uint64 bitset_bytes; + uint64 bitset_bits; + + /* + * Aim for two bytes per element; this is sufficient to get a false + * positive rate below 1%, independent of the size of the bitset or total + * number of elements. Also, if rounding down the size of the bitset to + * the next lowest power of two turns out to be a significant drop, the + * false positive rate still won't exceed 2% in almost all cases. + */ + bitset_bytes = Min(bloom_work_mem * UINT64CONST(1024), total_elems * 2); + bitset_bytes = Max(1024 * 1024, bitset_bytes); + + /* + * Size in bits should be the highest power of two <= target. bitset_bits + * is uint64 because PG_UINT32_MAX is 2^32 - 1, not 2^32 + */ + bloom_power = my_bloom_power(bitset_bytes * BITS_PER_BYTE); + bitset_bits = UINT64CONST(1) << bloom_power; + bitset_bytes = bitset_bits / BITS_PER_BYTE; + + /* Allocate bloom filter with unset bitset */ + filter = palloc0(offsetof(bloom_filter, bitset) + + sizeof(unsigned char) * bitset_bytes); + filter->k_hash_funcs = optimal_k(bitset_bits, total_elems); + filter->seed = seed; + filter->m = bitset_bits; + + return filter; +} + +/* + * Free Bloom filter + */ +void +bloom_free(bloom_filter *filter) +{ + pfree(filter); +} + +/* + * Add element to Bloom filter + */ +void +bloom_add_element(bloom_filter *filter, unsigned char *elem, size_t len) +{ + uint32 hashes[MAX_HASH_FUNCS]; + int i; + + k_hashes(filter, hashes, elem, len); + + /* Map a bit-wise address to a byte-wise address + bit offset */ + for (i = 0; i < filter->k_hash_funcs; i++) + { + filter->bitset[hashes[i] >> 3] |= 1 << (hashes[i] & 7); + } +} + +/* + * Test if Bloom filter definitely lacks element. + * + * Returns true if the element is definitely not in the set of elements + * observed by bloom_add_element(). Otherwise, returns false, indicating that + * element is probably present in set. + */ +bool +bloom_lacks_element(bloom_filter *filter, unsigned char *elem, size_t len) +{ + uint32 hashes[MAX_HASH_FUNCS]; + int i; + + k_hashes(filter, hashes, elem, len); + + /* Map a bit-wise address to a byte-wise address + bit offset */ + for (i = 0; i < filter->k_hash_funcs; i++) + { + if (!(filter->bitset[hashes[i] >> 3] & (1 << (hashes[i] & 7)))) + return true; + } + + return false; +} + +/* + * What proportion of bits are currently set? + * + * Returns proportion, expressed as a multiplier of filter size. That should + * generally be close to 0.5, even when we have more than enough memory to + * ensure a false positive rate within target 1% to 2% band, since more hash + * functions are used as more memory is available per element. + * + * This is the only instrumentation that is low overhead enough to appear in + * debug traces. When debugging Bloom filter code, it's likely to be far more + * interesting to directly test the false positive rate. + */ +double +bloom_prop_bits_set(bloom_filter *filter) +{ + int bitset_bytes = filter->m / BITS_PER_BYTE; + uint64 bits_set = 0; + int i; + + for (i = 0; i < bitset_bytes; i++) + { + unsigned char byte = filter->bitset[i]; + + while (byte) + { + bits_set++; + byte &= (byte - 1); + } + } + + return bits_set / (double) filter->m; +} + +/* + * Which element in the sequence of powers of two is less than or equal to + * target_bitset_bits? + * + * Value returned here must be generally safe as the basis for actual bitset + * size. + * + * Bitset is never allowed to exceed 2 ^ 32 bits (512MB). This is sufficient + * for the needs of all current callers, and allows us to use 32-bit hash + * functions. It also makes it easy to stay under the MaxAllocSize restriction + * (caller needs to leave room for non-bitset fields that appear before + * flexible array member, so a 1GB bitset would use an allocation that just + * exceeds MaxAllocSize). + */ +static int +my_bloom_power(uint64 target_bitset_bits) +{ + int bloom_power = -1; + + while (target_bitset_bits > 0 && bloom_power < 32) + { + bloom_power++; + target_bitset_bits >>= 1; + } + + return bloom_power; +} + +/* + * Determine optimal number of hash functions based on size of filter in bits, + * and projected total number of elements. The optimal number is the number + * that minimizes the false positive rate. + */ +static int +optimal_k(uint64 bitset_bits, int64 total_elems) +{ + int k = round(log(2.0) * bitset_bits / total_elems); + + return Max(1, Min(k, MAX_HASH_FUNCS)); +} + +/* + * Generate k hash values for element. + * + * Caller passes array, which is filled-in with k values determined by hashing + * caller's element. + * + * Only 2 real independent hash functions are actually used to support an + * interface of up to MAX_HASH_FUNCS hash functions; enhanced double hashing is + * used to make this work. The main reason we prefer enhanced double hashing + * to classic double hashing is that the latter has an issue with collisions + * when using power of two sized bitsets. See Dillinger & Manolios for full + * details. + */ +static void +k_hashes(bloom_filter *filter, uint32 *hashes, unsigned char *elem, size_t len) +{ + uint64 hash; + uint32 x, y; + uint64 m; + int i; + + /* Use 64-bit hashing to get two independent 32-bit hashes */ + hash = DatumGetUInt64(hash_any_extended(elem, len, filter->seed)); + x = (uint32) hash; + y = (uint32) (hash >> 32); + m = filter->m; + + x = mod_m(x, m); + y = mod_m(y, m); + + /* Accumulate hashes */ + hashes[0] = x; + for (i = 1; i < filter->k_hash_funcs; i++) + { + x = mod_m(x + y, m); + y = mod_m(y + i, m); + + hashes[i] = x; + } +} + +/* + * Calculate "val MOD m" inexpensively. + * + * Assumes that m (which is bitset size) is a power of two. + * + * Using a power of two number of bits for bitset size allows us to use bitwise + * AND operations to calculate the modulo of a hash value. It's also a simple + * way of avoiding the modulo bias effect. + */ +static inline uint32 +mod_m(uint32 val, uint64 m) +{ + Assert(m <= PG_UINT32_MAX + UINT64CONST(1)); + Assert(((m - 1) & m) == 0); + + return val & (m - 1); +} |