From 07df4dd52d3ab2eea2e8a8fc3222a5d297d077de Mon Sep 17 00:00:00 2001 From: Nick Mathewson Date: Thu, 9 Aug 2012 13:47:42 -0400 Subject: Refactor the core of choosing by weights into a function This eliminates duplicated code, and lets us test a hairy piece of functionality. --- src/test/test.h | 4 +++ src/test/test_dir.c | 81 +++++++++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 85 insertions(+) (limited to 'src/test') diff --git a/src/test/test.h b/src/test/test.h index 0b6e6c60c..6dcb9490b 100644 --- a/src/test/test.h +++ b/src/test/test.h @@ -65,6 +65,10 @@ #define test_memeq_hex(expr1, hex) test_mem_op_hex(expr1, ==, hex) +#define tt_double_op(a,op,b) \ + tt_assert_test_type(a,b,#a" "#op" "#b,double,(val1_ op val2_),"%f", \ + TT_EXIT_TEST_FUNCTION) + const char *get_fname(const char *name); crypto_pk_t *pk_generate(int idx); diff --git a/src/test/test_dir.c b/src/test/test_dir.c index 83c612045..ed0c5a1af 100644 --- a/src/test/test_dir.c +++ b/src/test/test_dir.c @@ -7,6 +7,7 @@ #define DIRSERV_PRIVATE #define DIRVOTE_PRIVATE #define ROUTER_PRIVATE +#define ROUTERLIST_PRIVATE #define HIBERNATE_PRIVATE #include "or.h" #include "directory.h" @@ -1381,6 +1382,85 @@ test_dir_v3_networkstatus(void) ns_detached_signatures_free(dsig2); } +static void +test_dir_random_weighted(void *testdata) +{ + int histogram[10]; + uint64_t vals[10] = {3,1,2,4,6,0,7,5,8,9}, total=0; + uint64_t zeros[5] = {0,0,0,0,0}; + int i, choice; + const int n = 50000; + double max_sq_error; + (void) testdata; + + /* Try a ten-element array with values from 0 through 10. The values are + * in a scrambled order to make sure we don't depend on order. */ + memset(histogram,0,sizeof(histogram)); + for (i=0; i<10; ++i) + total += vals[i]; + tt_int_op(total, ==, 45); + for (i=0; i=, 0); + tt_int_op(choice, <, 10); + histogram[choice]++; + } + + /* Now see if we chose things about frequently enough. */ + max_sq_error = 0; + for (i=0; i<10; ++i) { + int expected = (int)(n*vals[i]/total); + double frac_diff = 0, sq; + TT_BLATHER((" %d : %5d vs %5d\n", (int)vals[i], histogram[i], expected)); + if (expected) + frac_diff = (histogram[i] - expected) / ((double)expected); + else + tt_int_op(histogram[i], ==, 0); + + sq = frac_diff * frac_diff; + if (sq > max_sq_error) + max_sq_error = sq; + } + /* It should almost always be much much less than this. If you want to + * figure out the odds, please feel free. */ + tt_double_op(max_sq_error, <, .05); + + /* Now try a singleton; do we choose it? */ + for (i = 0; i < 100; ++i) { + choice = choose_array_element_by_weight(vals, 1, NULL); + tt_int_op(choice, ==, 0); + } + + /* Now try an array of zeros. We should choose randomly. */ + memset(histogram,0,sizeof(histogram)); + for (i = 0; i < n; ++i) { + uint64_t t; + choice = choose_array_element_by_weight(zeros, 5, &t); + tt_int_op(t, ==, 0); + tt_int_op(choice, >=, 0); + tt_int_op(choice, <, 5); + histogram[choice]++; + } + /* Now see if we chose things about frequently enough. */ + max_sq_error = 0; + for (i=0; i<5; ++i) { + int expected = n/5; + double frac_diff = 0, sq; + TT_BLATHER((" %d : %5d vs %5d\n", (int)vals[i], histogram[i], expected)); + frac_diff = (histogram[i] - expected) / ((double)expected); + sq = frac_diff * frac_diff; + if (sq > max_sq_error) + max_sq_error = sq; + } + /* It should almost always be much much less than this. If you want to + * figure out the odds, please feel free. */ + tt_double_op(max_sq_error, <, .05); + done: + ; +} + #define DIR_LEGACY(name) \ { #name, legacy_test_helper, TT_FORK, &legacy_setup, test_dir_ ## name } @@ -1396,6 +1476,7 @@ struct testcase_t dir_tests[] = { DIR_LEGACY(measured_bw), DIR_LEGACY(param_voting), DIR_LEGACY(v3_networkstatus), + DIR(random_weighted), END_OF_TESTCASES }; -- cgit v1.2.3 From 5c3199cda72fbdcf8f801219a0f9932673801da5 Mon Sep 17 00:00:00 2001 From: Nick Mathewson Date: Mon, 27 Aug 2012 17:37:56 -0400 Subject: In choose-by-bw, scale to better use the range of uint64 The smart part of this is based on an approach and a suggestion by rransom. The unsmart part is my own fault. --- src/or/routerlist.c | 109 +++++++++++++++++++++++++++++++++------------------- src/or/routerlist.h | 13 ++++++- src/test/test_dir.c | 60 ++++++++++++++++++++++++----- 3 files changed, 131 insertions(+), 51 deletions(-) (limited to 'src/test') diff --git a/src/or/routerlist.c b/src/or/routerlist.c index 1c0aca8ad..185abf53b 100644 --- a/src/or/routerlist.c +++ b/src/or/routerlist.c @@ -1653,15 +1653,41 @@ router_get_advertised_bandwidth_capped(const routerinfo_t *router) return result; } +/** Given an array of double/uint64_t unions that are currently being used as + * doubles, convert them to uint64_t, and try to scale them linearly so as to + * much of the range of uint64_t. If total_out is provided, set it to + * the sum of all elements in the array _before_ scaling. */ +/* private */ void +scale_array_elements_to_u64(u64_dbl_t *entries, int n_entries, + uint64_t *total_out) +{ + double total = 0.0; + double scale_factor; + int i; + /* big, but far away from overflowing an int64_t */ +#define SCALE_TO_U64_MAX (INT64_MAX / 4) + + for (i = 0; i < n_entries; ++i) + total += entries[i].dbl; + + scale_factor = SCALE_TO_U64_MAX / total; + + for (i = 0; i < n_entries; ++i) + entries[i].u64 = tor_llround(entries[i].dbl * scale_factor); + + if (total_out) + *total_out = (uint64_t) total; + +#undef SCALE_TO_U64_MAX +} + /** Pick a random element of n_entries-element array entries, - * choosing each element with a probability proportional to its value, and - * return the index of that element. If all elements are 0, choose an index - * at random. If total_out is provided, set it to the sum of all - * elements in the array. Return -1 on error. + * choosing each element with a probability proportional to its (uint64_t) + * value, and return the index of that element. If all elements are 0, choose + * an index at random. Return -1 on error. */ /* private */ int -choose_array_element_by_weight(const uint64_t *entries, int n_entries, - uint64_t *total_out) +choose_array_element_by_weight(const u64_dbl_t *entries, int n_entries) { int i, i_chosen=-1, n_chosen=0; uint64_t total_so_far = 0; @@ -1669,10 +1695,7 @@ choose_array_element_by_weight(const uint64_t *entries, int n_entries, uint64_t total = 0; for (i = 0; i < n_entries; ++i) - total += entries[i]; - - if (total_out) - *total_out = total; + total += entries[i].u64; if (n_entries < 1) return -1; @@ -1683,7 +1706,7 @@ choose_array_element_by_weight(const uint64_t *entries, int n_entries, rand_val = crypto_rand_uint64(total); for (i = 0; i < n_entries; ++i) { - total_so_far += entries[i]; + total_so_far += entries[i].u64; if (total_so_far > rand_val) { i_chosen = i; n_chosen++; @@ -1753,7 +1776,7 @@ smartlist_choose_node_by_bandwidth_weights(smartlist_t *sl, double Wg = -1, Wm = -1, We = -1, Wd = -1; double Wgb = -1, Wmb = -1, Web = -1, Wdb = -1; uint64_t weighted_bw = 0; - uint64_t *bandwidths; + u64_dbl_t *bandwidths; /* Can't choose exit and guard at same time */ tor_assert(rule == NO_WEIGHTING || @@ -1834,7 +1857,7 @@ smartlist_choose_node_by_bandwidth_weights(smartlist_t *sl, Web /= weight_scale; Wdb /= weight_scale; - bandwidths = tor_malloc_zero(sizeof(uint64_t)*smartlist_len(sl)); + bandwidths = tor_malloc_zero(sizeof(u64_dbl_t)*smartlist_len(sl)); // Cycle through smartlist and total the bandwidth. SMARTLIST_FOREACH_BEGIN(sl, const node_t *, node) { @@ -1879,9 +1902,9 @@ smartlist_choose_node_by_bandwidth_weights(smartlist_t *sl, if (weight < 0.0) weight = 0.0; - bandwidths[node_sl_idx] = tor_llround(weight*this_bw + 0.5); + bandwidths[node_sl_idx].dbl = weight*this_bw + 0.5; if (is_me) - sl_last_weighted_bw_of_me = bandwidths[node_sl_idx]; + sl_last_weighted_bw_of_me = (uint64_t) bandwidths[node_sl_idx].dbl; } SMARTLIST_FOREACH_END(node); log_debug(LD_CIRC, "Choosing node for rule %s based on weights " @@ -1889,10 +1912,12 @@ smartlist_choose_node_by_bandwidth_weights(smartlist_t *sl, bandwidth_weight_rule_to_string(rule), Wg, Wm, We, Wd, U64_PRINTF_ARG(weighted_bw)); + scale_array_elements_to_u64(bandwidths, smartlist_len(sl), + &sl_last_total_weighted_bw); + { int idx = choose_array_element_by_weight(bandwidths, - smartlist_len(sl), - &sl_last_total_weighted_bw); + smartlist_len(sl)); tor_free(bandwidths); return idx < 0 ? NULL : smartlist_get(sl, idx); } @@ -1916,12 +1941,12 @@ smartlist_choose_node_by_bandwidth(smartlist_t *sl, bandwidth_weight_rule_t rule) { unsigned int i; - uint64_t *bandwidths; + u64_dbl_t *bandwidths; int is_exit; int is_guard; int is_fast; - uint64_t total_nonexit_bw = 0, total_exit_bw = 0; - uint64_t total_nonguard_bw = 0, total_guard_bw = 0; + double total_nonexit_bw = 0, total_exit_bw = 0; + double total_nonguard_bw = 0, total_guard_bw = 0; double exit_weight; double guard_weight; int n_unknown = 0; @@ -1950,7 +1975,7 @@ smartlist_choose_node_by_bandwidth(smartlist_t *sl, /* First count the total bandwidth weight, and make a list * of each value. We use UINT64_MAX to indicate "unknown". */ - bandwidths = tor_malloc_zero(sizeof(uint64_t)*smartlist_len(sl)); + bandwidths = tor_malloc_zero(sizeof(u64_dbl_t)*smartlist_len(sl)); fast_bits = bitarray_init_zero(smartlist_len(sl)); exit_bits = bitarray_init_zero(smartlist_len(sl)); guard_bits = bitarray_init_zero(smartlist_len(sl)); @@ -1986,7 +2011,7 @@ smartlist_choose_node_by_bandwidth(smartlist_t *sl, bitarray_set(fast_bits, i); if (is_known) { - bandwidths[i] = this_bw; + bandwidths[i].dbl = this_bw; if (is_guard) total_guard_bw += this_bw; else @@ -1997,14 +2022,16 @@ smartlist_choose_node_by_bandwidth(smartlist_t *sl, total_nonexit_bw += this_bw; } else { ++n_unknown; - bandwidths[i] = UINT64_MAX; + bandwidths[i].dbl = -1.0; } } SMARTLIST_FOREACH_END(node); +#define EPSILON .1 + /* Now, fill in the unknown values. */ if (n_unknown) { int32_t avg_fast, avg_slow; - if (total_exit_bw+total_nonexit_bw) { + if (total_exit_bw+total_nonexit_bw < EPSILON) { /* if there's some bandwidth, there's at least one known router, * so no worries about div by 0 here */ int n_known = smartlist_len(sl)-n_unknown; @@ -2015,25 +2042,25 @@ smartlist_choose_node_by_bandwidth(smartlist_t *sl, avg_slow = 20000; } for (i=0; i<(unsigned)smartlist_len(sl); ++i) { - if (bandwidths[i] != UINT64_MAX) + if (bandwidths[i].dbl >= 0.0) continue; is_fast = bitarray_is_set(fast_bits, i); is_exit = bitarray_is_set(exit_bits, i); is_guard = bitarray_is_set(guard_bits, i); - bandwidths[i] = is_fast ? avg_fast : avg_slow; + bandwidths[i].dbl = is_fast ? avg_fast : avg_slow; if (is_exit) - total_exit_bw += bandwidths[i]; + total_exit_bw += bandwidths[i].dbl; else - total_nonexit_bw += bandwidths[i]; + total_nonexit_bw += bandwidths[i].dbl; if (is_guard) - total_guard_bw += bandwidths[i]; + total_guard_bw += bandwidths[i].dbl; else - total_nonguard_bw += bandwidths[i]; + total_nonguard_bw += bandwidths[i].dbl; } } /* If there's no bandwidth at all, pick at random. */ - if (!(total_exit_bw+total_nonexit_bw)) { + if (total_exit_bw+total_nonexit_bw < EPSILON) { tor_free(bandwidths); tor_free(fast_bits); tor_free(exit_bits); @@ -2050,12 +2077,12 @@ smartlist_choose_node_by_bandwidth(smartlist_t *sl, * For detailed derivation of this formula, see * http://archives.seul.org/or/dev/Jul-2007/msg00056.html */ - if (rule == WEIGHT_FOR_EXIT || !total_exit_bw) + if (rule == WEIGHT_FOR_EXIT || total_exit_bw= 0.0); is_exit = bitarray_is_set(exit_bits, i); is_guard = bitarray_is_set(guard_bits, i); if (is_exit && is_guard) - bandwidths[i] = tor_llround(bandwidths[i] * exit_weight * guard_weight); + bandwidths[i].dbl *= exit_weight * guard_weight; else if (is_guard) - bandwidths[i] = tor_llround(bandwidths[i] * guard_weight); + bandwidths[i].dbl *= guard_weight; else if (is_exit) - bandwidths[i] = tor_llround(bandwidths[i] * exit_weight); + bandwidths[i].dbl *= exit_weight; if (i == (unsigned) me_idx) - sl_last_weighted_bw_of_me = bandwidths[i]; + sl_last_weighted_bw_of_me = (uint64_t) bandwidths[i].dbl; } } @@ -2099,10 +2126,12 @@ smartlist_choose_node_by_bandwidth(smartlist_t *sl, guard_weight, (int)(rule == WEIGHT_FOR_GUARD)); #endif + scale_array_elements_to_u64(bandwidths, smartlist_len(sl), + &sl_last_total_weighted_bw); + { int idx = choose_array_element_by_weight(bandwidths, - smartlist_len(sl), - &sl_last_total_weighted_bw); + smartlist_len(sl)); tor_free(bandwidths); tor_free(fast_bits); tor_free(exit_bits); diff --git a/src/or/routerlist.h b/src/or/routerlist.h index 0b9b29751..207261189 100644 --- a/src/or/routerlist.h +++ b/src/or/routerlist.h @@ -217,8 +217,17 @@ int hex_digest_nickname_decode(const char *hexdigest, char *nickname_out); #ifdef ROUTERLIST_PRIVATE -int choose_array_element_by_weight(const uint64_t *entries, int n_entries, - uint64_t *total_out); +/** Helper type for choosing routers by bandwidth: contains a union of + * double and uint64_t. Before we call scale_array_elements_to_u64, it holds + * a double; after, it holds a uint64_t. */ +typedef union u64_dbl_t { + uint64_t u64; + double dbl; +} u64_dbl_t; + +int choose_array_element_by_weight(const u64_dbl_t *entries, int n_entries); +void scale_array_elements_to_u64(u64_dbl_t *entries, int n_entries, + uint64_t *total_out); #endif #endif diff --git a/src/test/test_dir.c b/src/test/test_dir.c index ed0c5a1af..3f78ece6a 100644 --- a/src/test/test_dir.c +++ b/src/test/test_dir.c @@ -4,6 +4,8 @@ /* See LICENSE for licensing information */ #include "orconfig.h" +#include + #define DIRSERV_PRIVATE #define DIRVOTE_PRIVATE #define ROUTER_PRIVATE @@ -1382,12 +1384,51 @@ test_dir_v3_networkstatus(void) ns_detached_signatures_free(dsig2); } +static void +test_dir_scale_bw(void *testdata) +{ + double v[8] = { 2.0/3, + 7.0, + 1.0, + 3.0, + 1.0/5, + 1.0/7, + 12.0, + 24.0 }; + u64_dbl_t vals[8]; + uint64_t total; + int i; + + (void) testdata; + + for (i=0; i<8; ++i) + vals[i].dbl = v[i]; + + scale_array_elements_to_u64(vals, 8, &total); + + tt_int_op((int)total, ==, 48); + total = 0; + for (i=0; i<8; ++i) { + total += vals[i].u64; + } + tt_assert(total >= (U64_LITERAL(1)<<60)); + tt_assert(total <= (U64_LITERAL(1)<<62)); + + for (i=0; i<8; ++i) { + double ratio = ((double)vals[i].u64) / vals[2].u64; + tt_double_op(fabs(ratio - v[i]), <, .00001); + } + + done: + ; +} + static void test_dir_random_weighted(void *testdata) { int histogram[10]; uint64_t vals[10] = {3,1,2,4,6,0,7,5,8,9}, total=0; - uint64_t zeros[5] = {0,0,0,0,0}; + u64_dbl_t inp[10]; int i, choice; const int n = 50000; double max_sq_error; @@ -1396,13 +1437,13 @@ test_dir_random_weighted(void *testdata) /* Try a ten-element array with values from 0 through 10. The values are * in a scrambled order to make sure we don't depend on order. */ memset(histogram,0,sizeof(histogram)); - for (i=0; i<10; ++i) + for (i=0; i<10; ++i) { + inp[i].u64 = vals[i]; total += vals[i]; + } tt_int_op(total, ==, 45); for (i=0; i=, 0); tt_int_op(choice, <, 10); histogram[choice]++; @@ -1429,16 +1470,16 @@ test_dir_random_weighted(void *testdata) /* Now try a singleton; do we choose it? */ for (i = 0; i < 100; ++i) { - choice = choose_array_element_by_weight(vals, 1, NULL); + choice = choose_array_element_by_weight(inp, 1); tt_int_op(choice, ==, 0); } /* Now try an array of zeros. We should choose randomly. */ memset(histogram,0,sizeof(histogram)); + for (i = 0; i < 5; ++i) + inp[i].u64 = 0; for (i = 0; i < n; ++i) { - uint64_t t; - choice = choose_array_element_by_weight(zeros, 5, &t); - tt_int_op(t, ==, 0); + choice = choose_array_element_by_weight(inp, 5); tt_int_op(choice, >=, 0); tt_int_op(choice, <, 5); histogram[choice]++; @@ -1477,6 +1518,7 @@ struct testcase_t dir_tests[] = { DIR_LEGACY(param_voting), DIR_LEGACY(v3_networkstatus), DIR(random_weighted), + DIR(scale_bw), END_OF_TESTCASES }; -- cgit v1.2.3