aboutsummaryrefslogtreecommitdiff
path: root/src/common/container.c
diff options
context:
space:
mode:
authorNick Mathewson <nickm@torproject.org>2008-12-26 17:35:18 +0000
committerNick Mathewson <nickm@torproject.org>2008-12-26 17:35:18 +0000
commit73e1a1d26e587e6a372954fbd5ef535bbb1713db (patch)
tree5c4ad445ea831c39b272d95c753c9f1df312de9e /src/common/container.c
parent0f9f45ff3348b1c2662b9ce55fed8483fb593ea0 (diff)
downloadtor-73e1a1d26e587e6a372954fbd5ef535bbb1713db.tar
tor-73e1a1d26e587e6a372954fbd5ef535bbb1713db.tar.gz
Document our Bloom filter parameter choices.
svn:r17785
Diffstat (limited to 'src/common/container.c')
-rw-r--r--src/common/container.c10
1 files changed, 10 insertions, 0 deletions
diff --git a/src/common/container.c b/src/common/container.c
index f5cde0261..5e1bc63a6 100644
--- a/src/common/container.c
+++ b/src/common/container.c
@@ -1233,6 +1233,16 @@ IMPLEMENT_ORDER_FUNC(find_nth_long, long)
digestset_t *
digestset_new(int max_elements)
{
+ /* The probability of false positivies is about P=(1 - exp(-kn/m))^k, where k
+ * is the number of hash functions per entry, m is the bits in the array,
+ * and n is the number of elements inserted. For us, k==4, n<=max_elements,
+ * and m==n_bits= approximately max_elements*32. This gives
+ * P<(1-exp(-4*n/(32*n)))^4 == (1-exp(1/-8))^4 == .00019
+ *
+ * It would be more optimal in space vs false positives to get this false
+ * positive rate by going for k==13, and m==18.5n, but we also want to
+ * conserve CPU, and k==13 is pretty big.
+ */
int n_bits = 1u << (tor_log2(max_elements)+5);
digestset_t *r = tor_malloc(sizeof(digestset_t));
r->mask = n_bits - 1;