aboutsummaryrefslogtreecommitdiff
path: root/src/or
diff options
context:
space:
mode:
authorNick Mathewson <nickm@torproject.org>2008-04-08 17:06:41 +0000
committerNick Mathewson <nickm@torproject.org>2008-04-08 17:06:41 +0000
commita627407fcba1d5b1671e5789f420e4b5f8b63f99 (patch)
treee15ea16047c2352de2100aaa816ead54232cb236 /src/or
parent0c9efd6a1e252c2d6f495d158fdc1ec6877e0f10 (diff)
downloadtor-a627407fcba1d5b1671e5789f420e4b5f8b63f99.tar
tor-a627407fcba1d5b1671e5789f420e4b5f8b63f99.tar.gz
r19233@catbus: nickm | 2008-04-08 13:06:34 -0400
When we remove old routers, use Bloom filters rather than a digestmap-based set in order to tell which ones we absolutely need to keep. This will save us roughly a kazillion little short-lived allocations for hash table entries. svn:r14318
Diffstat (limited to 'src/or')
-rw-r--r--src/or/routerlist.c51
-rw-r--r--src/or/test.c103
2 files changed, 134 insertions, 20 deletions
diff --git a/src/or/routerlist.c b/src/or/routerlist.c
index 46248fb67..60427bb58 100644
--- a/src/or/routerlist.c
+++ b/src/or/routerlist.c
@@ -2957,7 +2957,7 @@ _compare_duration_idx(const void *_d1, const void *_d2)
static void
routerlist_remove_old_cached_routers_with_id(time_t now,
time_t cutoff, int lo, int hi,
- digestmap_t *retain)
+ digestset_t *retain)
{
int i, n = hi-lo+1;
unsigned n_extra, n_rmv = 0;
@@ -2974,10 +2974,9 @@ routerlist_remove_old_cached_routers_with_id(time_t now,
tor_assert(!memcmp(ident, r->identity_digest, DIGEST_LEN));
}
#endif
-
/* Check whether we need to do anything at all. */
{
- int mdpr = directory_caches_dir_info(get_options()) ? 5 : 2;
+ int mdpr = directory_caches_dir_info(get_options()) ? 2 : 1;
if (n <= mdpr)
return;
n_extra = n - mdpr;
@@ -2993,7 +2992,7 @@ routerlist_remove_old_cached_routers_with_id(time_t now,
signed_descriptor_t *r_next;
lifespans[i-lo].idx = i;
if (r->last_listed_as_valid_until >= now ||
- (retain && digestmap_get(retain, r->signed_descriptor_digest))) {
+ (retain && digestset_isin(retain, r->signed_descriptor_digest))) {
must_keep[i-lo] = 1;
}
if (i < hi) {
@@ -3049,10 +3048,11 @@ routerlist_remove_old_routers(void)
time_t cutoff;
routerinfo_t *router;
signed_descriptor_t *sd;
- digestmap_t *retain;
+ digestset_t *retain;
int caches = directory_caches_dir_info(get_options());
const networkstatus_t *consensus = networkstatus_get_latest_consensus();
const smartlist_t *networkstatus_v2_list = networkstatus_get_v2_list();
+ int n_expected_retain = 0;
trusted_dirs_remove_old_certs();
@@ -3061,7 +3061,18 @@ routerlist_remove_old_routers(void)
// routerlist_assert_ok(routerlist);
- retain = digestmap_new();
+ n_expected_retain = smartlist_len(consensus->routerstatus_list);
+ if (caches &&
+ networkstatus_v2_list && smartlist_len(networkstatus_v2_list)) {
+ SMARTLIST_FOREACH(networkstatus_v2_list, networkstatus_v2_t *, ns,
+ n_expected_retain += smartlist_len(ns->entries));
+ /*XXXX021 too much magic. */
+ n_expected_retain /= (smartlist_len(networkstatus_v2_list)/2+1);
+ }
+ //log_notice(LD_DIR,"n_expected_retain=%d",n_expected_retain);
+
+ retain = digestset_new(n_expected_retain);
+
cutoff = now - OLD_ROUTER_DESC_MAX_AGE;
/* Build a list of all the descriptors that _anybody_ lists. */
if (caches) {
@@ -3077,7 +3088,7 @@ routerlist_remove_old_routers(void)
* system will obsolete this whole thing in 0.2.0.x. */
SMARTLIST_FOREACH(ns->entries, routerstatus_t *, rs,
if (rs->published_on >= cutoff)
- digestmap_set(retain, rs->descriptor_digest, (void*)1));
+ digestset_add(retain, rs->descriptor_digest));
});
}
@@ -3085,13 +3096,13 @@ routerlist_remove_old_routers(void)
if (consensus) {
SMARTLIST_FOREACH(consensus->routerstatus_list, routerstatus_t *, rs,
if (rs->published_on >= cutoff)
- digestmap_set(retain, rs->descriptor_digest, (void*)1));
+ digestset_add(retain, rs->descriptor_digest));
}
- /* If we have a bunch of networkstatuses, we should consider pruning current
- * routers that are too old and that nobody recommends. (If we don't have
- * enough networkstatuses, then we should get more before we decide to kill
- * routers.) */
+ /* If we have nearly as many networkstatuses as we want, we should consider
+ * pruning current routers that are too old and that nobody recommends. (If
+ * we don't have enough networkstatuses, then we should get more before we
+ * decide to kill routers.) */
if (!caches ||
smartlist_len(networkstatus_v2_list) > get_n_v2_authorities() / 2) {
cutoff = now - ROUTER_MAX_AGE;
@@ -3100,7 +3111,8 @@ routerlist_remove_old_routers(void)
router = smartlist_get(routerlist->routers, i);
if (router->cache_info.published_on <= cutoff &&
router->cache_info.last_listed_as_valid_until < now &&
- !digestmap_get(retain,router->cache_info.signed_descriptor_digest)) {
+ !digestset_isin(retain,
+ router->cache_info.signed_descriptor_digest)) {
/* Too old: remove it. (If we're a cache, just move it into
* old_routers.) */
log_info(LD_DIR,
@@ -3120,7 +3132,7 @@ routerlist_remove_old_routers(void)
sd = smartlist_get(routerlist->old_routers, i);
if (sd->published_on <= cutoff &&
sd->last_listed_as_valid_until < now &&
- !digestmap_get(retain, sd->signed_descriptor_digest)) {
+ !digestset_isin(retain, sd->signed_descriptor_digest)) {
/* Too old. Remove it. */
routerlist_remove_old(routerlist, sd, i--);
}
@@ -3128,11 +3140,9 @@ routerlist_remove_old_routers(void)
//routerlist_assert_ok(routerlist);
- log_info(LD_DIR, "We have %d live routers and %d old router descriptors. "
- "At most %d must be retained because of networkstatuses.",
+ log_info(LD_DIR, "We have %d live routers and %d old router descriptors.",
smartlist_len(routerlist->routers),
- smartlist_len(routerlist->old_routers),
- digestmap_size(retain));
+ smartlist_len(routerlist->old_routers));
/* Now we might have to look at routerlist->old_routers for extraneous
* members. (We'd keep all the members if we could, but we need to save
@@ -3141,9 +3151,10 @@ routerlist_remove_old_routers(void)
* total number doesn't approach max_descriptors_per_router()*len(router).
*/
if (smartlist_len(routerlist->old_routers) <
- smartlist_len(routerlist->routers) * (caches?4:2))
+ smartlist_len(routerlist->routers))
goto done;
+ /* Sort by identity, then fix indices. */
smartlist_sort(routerlist->old_routers, _compare_old_routers_by_identity);
/* Fix indices. */
for (i = 0; i < smartlist_len(routerlist->old_routers); ++i) {
@@ -3171,7 +3182,7 @@ routerlist_remove_old_routers(void)
//routerlist_assert_ok(routerlist);
done:
- digestmap_free(retain, NULL);
+ digestset_free(retain);
}
/** We just added a new set of descriptors. Take whatever extra steps
diff --git a/src/or/test.c b/src/or/test.c
index c421523a3..41d1d354a 100644
--- a/src/or/test.c
+++ b/src/or/test.c
@@ -1922,6 +1922,40 @@ test_util_bitarray(void)
bitarray_free(ba);
}
+static void
+test_util_digestset(void)
+{
+ smartlist_t *included = smartlist_create();
+ char d[DIGEST_LEN];
+ int i;
+ int ok = 1;
+ int false_positives = 0;
+ digestset_t *set;
+
+ for (i = 0; i < 1000; ++i) {
+ crypto_rand(d, DIGEST_LEN);
+ smartlist_add(included, tor_memdup(d, DIGEST_LEN));
+ }
+ set = digestset_new(1000);
+ SMARTLIST_FOREACH(included, const char *, cp,
+ if (digestset_isin(set, cp))
+ ok = 0);
+ test_assert(ok);
+ SMARTLIST_FOREACH(included, const char *, cp,
+ digestset_add(set, cp));
+ SMARTLIST_FOREACH(included, const char *, cp,
+ if (!digestset_isin(set, cp))
+ ok = 0);
+ test_assert(ok);
+ for (i = 0; i < 1000; ++i) {
+ crypto_rand(d, DIGEST_LEN);
+ if (digestset_isin(set, d))
+ ++false_positives;
+ }
+ test_assert(false_positives < 50); /* Should be far lower. */
+ digestset_free(set);
+}
+
/* stop threads running at once. */
static tor_mutex_t *_thread_test_mutex = NULL;
/* make sure that threads have to run at the same time. */
@@ -3362,6 +3396,69 @@ bench_aes(void)
}
static void
+bench_dmap(void)
+{
+ smartlist_t *sl = smartlist_create();
+ smartlist_t *sl2 = smartlist_create();
+ struct timeval start, end, pt2, pt3, pt4;
+ const int iters = 10000;
+ const int elts = 4000;
+ const int fpostests = 1000000;
+ char d[20];
+ int i,n=0, fp = 0;
+ digestmap_t *dm = digestmap_new();
+ digestset_t *ds = digestset_new(elts);
+
+ for (i = 0; i < elts; ++i) {
+ crypto_rand(d, 20);
+ smartlist_add(sl, tor_memdup(d, 20));
+ }
+ for (i = 0; i < elts; ++i) {
+ crypto_rand(d, 20);
+ smartlist_add(sl2, tor_memdup(d, 20));
+ }
+ printf("nbits=%d\n", ds->mask+1);
+
+ tor_gettimeofday(&start);
+ for (i = 0; i < iters; ++i) {
+ SMARTLIST_FOREACH(sl, const char *, cp, digestmap_set(dm, cp, (void*)1));
+ }
+ tor_gettimeofday(&pt2);
+ for (i = 0; i < iters; ++i) {
+ SMARTLIST_FOREACH(sl, const char *, cp, digestmap_get(dm, cp));
+ SMARTLIST_FOREACH(sl2, const char *, cp, digestmap_get(dm, cp));
+ }
+ tor_gettimeofday(&pt3);
+ for (i = 0; i < iters; ++i) {
+ SMARTLIST_FOREACH(sl, const char *, cp, digestset_add(ds, cp));
+ }
+ tor_gettimeofday(&pt4);
+ for (i = 0; i < iters; ++i) {
+ SMARTLIST_FOREACH(sl, const char *, cp, n += digestset_isin(ds, cp));
+ SMARTLIST_FOREACH(sl2, const char *, cp, n += digestset_isin(ds, cp));
+ }
+ tor_gettimeofday(&end);
+
+ for (i = 0; i < fpostests; ++i) {
+ crypto_rand(d, 20);
+ if (digestset_isin(ds, d)) ++fp;
+ }
+
+ printf("%ld\n",(unsigned long)tv_udiff(&start, &pt2));
+ printf("%ld\n",(unsigned long)tv_udiff(&pt2, &pt3));
+ printf("%ld\n",(unsigned long)tv_udiff(&pt3, &pt4));
+ printf("%ld\n",(unsigned long)tv_udiff(&pt4, &end));
+ printf("-- %d\n", n);
+ printf("++ %f\n", fp/(double)fpostests);
+ digestmap_free(dm, NULL);
+ digestset_free(ds);
+ SMARTLIST_FOREACH(sl, char *, cp, tor_free(cp));
+ SMARTLIST_FOREACH(sl2, char *, cp, tor_free(cp));
+ smartlist_free(sl);
+ smartlist_free(sl2);
+}
+
+static void
test_util_mempool(void)
{
mp_pool_t *pool;
@@ -3850,6 +3947,7 @@ static struct {
SUBENT(util, datadir),
SUBENT(util, smartlist),
SUBENT(util, bitarray),
+ SUBENT(util, digestset),
SUBENT(util, mempool),
SUBENT(util, memarea),
SUBENT(util, strmap),
@@ -3960,6 +4058,11 @@ main(int c, char**v)
return 0;
}
+ if (0) {
+ bench_dmap();
+ return 0;
+ }
+
atexit(remove_directory);
printf("Running Tor unit tests on %s\n", get_uname());