diff options
author | Nick Mathewson <nickm@torproject.org> | 2008-04-08 17:06:41 +0000 |
---|---|---|
committer | Nick Mathewson <nickm@torproject.org> | 2008-04-08 17:06:41 +0000 |
commit | a627407fcba1d5b1671e5789f420e4b5f8b63f99 (patch) | |
tree | e15ea16047c2352de2100aaa816ead54232cb236 /src/or | |
parent | 0c9efd6a1e252c2d6f495d158fdc1ec6877e0f10 (diff) | |
download | tor-a627407fcba1d5b1671e5789f420e4b5f8b63f99.tar tor-a627407fcba1d5b1671e5789f420e4b5f8b63f99.tar.gz |
r19233@catbus: nickm | 2008-04-08 13:06:34 -0400
When we remove old routers, use Bloom filters rather than a digestmap-based set in order to tell which ones we absolutely need to keep. This will save us roughly a kazillion little short-lived allocations for hash table entries.
svn:r14318
Diffstat (limited to 'src/or')
-rw-r--r-- | src/or/routerlist.c | 51 | ||||
-rw-r--r-- | src/or/test.c | 103 |
2 files changed, 134 insertions, 20 deletions
diff --git a/src/or/routerlist.c b/src/or/routerlist.c index 46248fb67..60427bb58 100644 --- a/src/or/routerlist.c +++ b/src/or/routerlist.c @@ -2957,7 +2957,7 @@ _compare_duration_idx(const void *_d1, const void *_d2) static void routerlist_remove_old_cached_routers_with_id(time_t now, time_t cutoff, int lo, int hi, - digestmap_t *retain) + digestset_t *retain) { int i, n = hi-lo+1; unsigned n_extra, n_rmv = 0; @@ -2974,10 +2974,9 @@ routerlist_remove_old_cached_routers_with_id(time_t now, tor_assert(!memcmp(ident, r->identity_digest, DIGEST_LEN)); } #endif - /* Check whether we need to do anything at all. */ { - int mdpr = directory_caches_dir_info(get_options()) ? 5 : 2; + int mdpr = directory_caches_dir_info(get_options()) ? 2 : 1; if (n <= mdpr) return; n_extra = n - mdpr; @@ -2993,7 +2992,7 @@ routerlist_remove_old_cached_routers_with_id(time_t now, signed_descriptor_t *r_next; lifespans[i-lo].idx = i; if (r->last_listed_as_valid_until >= now || - (retain && digestmap_get(retain, r->signed_descriptor_digest))) { + (retain && digestset_isin(retain, r->signed_descriptor_digest))) { must_keep[i-lo] = 1; } if (i < hi) { @@ -3049,10 +3048,11 @@ routerlist_remove_old_routers(void) time_t cutoff; routerinfo_t *router; signed_descriptor_t *sd; - digestmap_t *retain; + digestset_t *retain; int caches = directory_caches_dir_info(get_options()); const networkstatus_t *consensus = networkstatus_get_latest_consensus(); const smartlist_t *networkstatus_v2_list = networkstatus_get_v2_list(); + int n_expected_retain = 0; trusted_dirs_remove_old_certs(); @@ -3061,7 +3061,18 @@ routerlist_remove_old_routers(void) // routerlist_assert_ok(routerlist); - retain = digestmap_new(); + n_expected_retain = smartlist_len(consensus->routerstatus_list); + if (caches && + networkstatus_v2_list && smartlist_len(networkstatus_v2_list)) { + SMARTLIST_FOREACH(networkstatus_v2_list, networkstatus_v2_t *, ns, + n_expected_retain += smartlist_len(ns->entries)); + /*XXXX021 too much magic. */ + n_expected_retain /= (smartlist_len(networkstatus_v2_list)/2+1); + } + //log_notice(LD_DIR,"n_expected_retain=%d",n_expected_retain); + + retain = digestset_new(n_expected_retain); + cutoff = now - OLD_ROUTER_DESC_MAX_AGE; /* Build a list of all the descriptors that _anybody_ lists. */ if (caches) { @@ -3077,7 +3088,7 @@ routerlist_remove_old_routers(void) * system will obsolete this whole thing in 0.2.0.x. */ SMARTLIST_FOREACH(ns->entries, routerstatus_t *, rs, if (rs->published_on >= cutoff) - digestmap_set(retain, rs->descriptor_digest, (void*)1)); + digestset_add(retain, rs->descriptor_digest)); }); } @@ -3085,13 +3096,13 @@ routerlist_remove_old_routers(void) if (consensus) { SMARTLIST_FOREACH(consensus->routerstatus_list, routerstatus_t *, rs, if (rs->published_on >= cutoff) - digestmap_set(retain, rs->descriptor_digest, (void*)1)); + digestset_add(retain, rs->descriptor_digest)); } - /* If we have a bunch of networkstatuses, we should consider pruning current - * routers that are too old and that nobody recommends. (If we don't have - * enough networkstatuses, then we should get more before we decide to kill - * routers.) */ + /* If we have nearly as many networkstatuses as we want, we should consider + * pruning current routers that are too old and that nobody recommends. (If + * we don't have enough networkstatuses, then we should get more before we + * decide to kill routers.) */ if (!caches || smartlist_len(networkstatus_v2_list) > get_n_v2_authorities() / 2) { cutoff = now - ROUTER_MAX_AGE; @@ -3100,7 +3111,8 @@ routerlist_remove_old_routers(void) router = smartlist_get(routerlist->routers, i); if (router->cache_info.published_on <= cutoff && router->cache_info.last_listed_as_valid_until < now && - !digestmap_get(retain,router->cache_info.signed_descriptor_digest)) { + !digestset_isin(retain, + router->cache_info.signed_descriptor_digest)) { /* Too old: remove it. (If we're a cache, just move it into * old_routers.) */ log_info(LD_DIR, @@ -3120,7 +3132,7 @@ routerlist_remove_old_routers(void) sd = smartlist_get(routerlist->old_routers, i); if (sd->published_on <= cutoff && sd->last_listed_as_valid_until < now && - !digestmap_get(retain, sd->signed_descriptor_digest)) { + !digestset_isin(retain, sd->signed_descriptor_digest)) { /* Too old. Remove it. */ routerlist_remove_old(routerlist, sd, i--); } @@ -3128,11 +3140,9 @@ routerlist_remove_old_routers(void) //routerlist_assert_ok(routerlist); - log_info(LD_DIR, "We have %d live routers and %d old router descriptors. " - "At most %d must be retained because of networkstatuses.", + log_info(LD_DIR, "We have %d live routers and %d old router descriptors.", smartlist_len(routerlist->routers), - smartlist_len(routerlist->old_routers), - digestmap_size(retain)); + smartlist_len(routerlist->old_routers)); /* Now we might have to look at routerlist->old_routers for extraneous * members. (We'd keep all the members if we could, but we need to save @@ -3141,9 +3151,10 @@ routerlist_remove_old_routers(void) * total number doesn't approach max_descriptors_per_router()*len(router). */ if (smartlist_len(routerlist->old_routers) < - smartlist_len(routerlist->routers) * (caches?4:2)) + smartlist_len(routerlist->routers)) goto done; + /* Sort by identity, then fix indices. */ smartlist_sort(routerlist->old_routers, _compare_old_routers_by_identity); /* Fix indices. */ for (i = 0; i < smartlist_len(routerlist->old_routers); ++i) { @@ -3171,7 +3182,7 @@ routerlist_remove_old_routers(void) //routerlist_assert_ok(routerlist); done: - digestmap_free(retain, NULL); + digestset_free(retain); } /** We just added a new set of descriptors. Take whatever extra steps diff --git a/src/or/test.c b/src/or/test.c index c421523a3..41d1d354a 100644 --- a/src/or/test.c +++ b/src/or/test.c @@ -1922,6 +1922,40 @@ test_util_bitarray(void) bitarray_free(ba); } +static void +test_util_digestset(void) +{ + smartlist_t *included = smartlist_create(); + char d[DIGEST_LEN]; + int i; + int ok = 1; + int false_positives = 0; + digestset_t *set; + + for (i = 0; i < 1000; ++i) { + crypto_rand(d, DIGEST_LEN); + smartlist_add(included, tor_memdup(d, DIGEST_LEN)); + } + set = digestset_new(1000); + SMARTLIST_FOREACH(included, const char *, cp, + if (digestset_isin(set, cp)) + ok = 0); + test_assert(ok); + SMARTLIST_FOREACH(included, const char *, cp, + digestset_add(set, cp)); + SMARTLIST_FOREACH(included, const char *, cp, + if (!digestset_isin(set, cp)) + ok = 0); + test_assert(ok); + for (i = 0; i < 1000; ++i) { + crypto_rand(d, DIGEST_LEN); + if (digestset_isin(set, d)) + ++false_positives; + } + test_assert(false_positives < 50); /* Should be far lower. */ + digestset_free(set); +} + /* stop threads running at once. */ static tor_mutex_t *_thread_test_mutex = NULL; /* make sure that threads have to run at the same time. */ @@ -3362,6 +3396,69 @@ bench_aes(void) } static void +bench_dmap(void) +{ + smartlist_t *sl = smartlist_create(); + smartlist_t *sl2 = smartlist_create(); + struct timeval start, end, pt2, pt3, pt4; + const int iters = 10000; + const int elts = 4000; + const int fpostests = 1000000; + char d[20]; + int i,n=0, fp = 0; + digestmap_t *dm = digestmap_new(); + digestset_t *ds = digestset_new(elts); + + for (i = 0; i < elts; ++i) { + crypto_rand(d, 20); + smartlist_add(sl, tor_memdup(d, 20)); + } + for (i = 0; i < elts; ++i) { + crypto_rand(d, 20); + smartlist_add(sl2, tor_memdup(d, 20)); + } + printf("nbits=%d\n", ds->mask+1); + + tor_gettimeofday(&start); + for (i = 0; i < iters; ++i) { + SMARTLIST_FOREACH(sl, const char *, cp, digestmap_set(dm, cp, (void*)1)); + } + tor_gettimeofday(&pt2); + for (i = 0; i < iters; ++i) { + SMARTLIST_FOREACH(sl, const char *, cp, digestmap_get(dm, cp)); + SMARTLIST_FOREACH(sl2, const char *, cp, digestmap_get(dm, cp)); + } + tor_gettimeofday(&pt3); + for (i = 0; i < iters; ++i) { + SMARTLIST_FOREACH(sl, const char *, cp, digestset_add(ds, cp)); + } + tor_gettimeofday(&pt4); + for (i = 0; i < iters; ++i) { + SMARTLIST_FOREACH(sl, const char *, cp, n += digestset_isin(ds, cp)); + SMARTLIST_FOREACH(sl2, const char *, cp, n += digestset_isin(ds, cp)); + } + tor_gettimeofday(&end); + + for (i = 0; i < fpostests; ++i) { + crypto_rand(d, 20); + if (digestset_isin(ds, d)) ++fp; + } + + printf("%ld\n",(unsigned long)tv_udiff(&start, &pt2)); + printf("%ld\n",(unsigned long)tv_udiff(&pt2, &pt3)); + printf("%ld\n",(unsigned long)tv_udiff(&pt3, &pt4)); + printf("%ld\n",(unsigned long)tv_udiff(&pt4, &end)); + printf("-- %d\n", n); + printf("++ %f\n", fp/(double)fpostests); + digestmap_free(dm, NULL); + digestset_free(ds); + SMARTLIST_FOREACH(sl, char *, cp, tor_free(cp)); + SMARTLIST_FOREACH(sl2, char *, cp, tor_free(cp)); + smartlist_free(sl); + smartlist_free(sl2); +} + +static void test_util_mempool(void) { mp_pool_t *pool; @@ -3850,6 +3947,7 @@ static struct { SUBENT(util, datadir), SUBENT(util, smartlist), SUBENT(util, bitarray), + SUBENT(util, digestset), SUBENT(util, mempool), SUBENT(util, memarea), SUBENT(util, strmap), @@ -3960,6 +4058,11 @@ main(int c, char**v) return 0; } + if (0) { + bench_dmap(); + return 0; + } + atexit(remove_directory); printf("Running Tor unit tests on %s\n", get_uname()); |