aboutsummaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
Diffstat (limited to 'src')
-rw-r--r--src/or/circuitbuild.c50
-rw-r--r--src/or/or.h3
2 files changed, 43 insertions, 10 deletions
diff --git a/src/or/circuitbuild.c b/src/or/circuitbuild.c
index 233d60f15..98f6baf2b 100644
--- a/src/or/circuitbuild.c
+++ b/src/or/circuitbuild.c
@@ -20,6 +20,8 @@
#define MIN(a,b) ((a)<(b)?(a):(b))
#endif
+#define CBT_BIN_TO_MS(bin) ((bin)*CBT_BIN_WIDTH + (CBT_BIN_WIDTH/2))
+
/********* START VARIABLES **********/
/** Global list of circuit build times */
// FIXME: Add this as a member for entry_guard_t instead of global?
@@ -387,25 +389,53 @@ circuit_build_times_create_histogram(circuit_build_times_t *cbt,
}
/**
- * Return the most frequent build time (rounded to CBT_BIN_WIDTH ms).
+ * Return the Pareto start-of-curve parameter Xm.
*
- * Ties go in favor of the slower time.
+ * Because we are not a true Pareto curve, we compute this as the
+ * weighted average of the N=3 most frequent build time bins.
*/
static build_time_t
-circuit_build_times_mode(circuit_build_times_t *cbt)
+circuit_build_times_get_xm(circuit_build_times_t *cbt)
{
- build_time_t i, nbins, max_bin=0;
+ build_time_t i, nbins;
+ build_time_t nth_max_bin[CBT_NUM_XM_MODES];
+ int32_t bin_counts=0;
+ build_time_t ret = 0;
uint32_t *histogram = circuit_build_times_create_histogram(cbt, &nbins);
+ int n=0;
+ int num_modes = CBT_NUM_XM_MODES;
+
+ // Only use one mode if < 1000 buildtimes. Not enough data
+ // for multiple.
+ if (cbt->total_build_times < CBT_NCIRCUITS_TO_OBSERVE)
+ num_modes = 1;
+ memset(nth_max_bin, 0, sizeof(nth_max_bin));
for (i = 0; i < nbins; i++) {
- if (histogram[i] >= histogram[max_bin]) {
- max_bin = i;
+ if (histogram[i] >= histogram[nth_max_bin[0]]) {
+ nth_max_bin[0] = i;
}
+
+ for (n = 1; n < num_modes; n++) {
+ if (histogram[i] >= histogram[nth_max_bin[n]] &&
+ (!histogram[nth_max_bin[n-1]]
+ || histogram[i] < histogram[nth_max_bin[n-1]])) {
+ nth_max_bin[n] = i;
+ }
+ }
+ }
+
+ for (n = 0; n < num_modes; n++) {
+ bin_counts += histogram[nth_max_bin[n]];
+ ret += CBT_BIN_TO_MS(nth_max_bin[n])*histogram[nth_max_bin[n]];
+ log_info(LD_CIRC, "Xm mode #%d: %u %u", n, CBT_BIN_TO_MS(nth_max_bin[n]),
+ histogram[nth_max_bin[n]]);
}
+ ret /= bin_counts;
tor_free(histogram);
- return max_bin*CBT_BIN_WIDTH+CBT_BIN_WIDTH/2;
+ return ret;
}
/**
@@ -436,7 +466,7 @@ circuit_build_times_update_state(circuit_build_times_t *cbt,
line->key = tor_strdup("CircuitBuildTimeBin");
line->value = tor_malloc(25);
tor_snprintf(line->value, 25, "%d %d",
- i*CBT_BIN_WIDTH+CBT_BIN_WIDTH/2, histogram[i]);
+ CBT_BIN_TO_MS(i), histogram[i]);
next = &(line->next);
}
@@ -596,7 +626,7 @@ circuit_build_times_update_alpha(circuit_build_times_t *cbt)
/* http://en.wikipedia.org/wiki/Pareto_distribution#Parameter_estimation */
/* We sort of cheat here and make our samples slightly more pareto-like
* and less frechet-like. */
- cbt->Xm = circuit_build_times_mode(cbt);
+ cbt->Xm = circuit_build_times_get_xm(cbt);
for (i=0; i< CBT_NCIRCUITS_TO_OBSERVE; i++) {
if (!x[i]) {
@@ -763,7 +793,7 @@ circuit_build_times_count_pretimeouts(circuit_build_times_t *cbt)
(cbt->pre_timeouts+cbt->total_build_times);
/* Make sure it doesn't exceed the synthetic max */
timeout_quantile *= CBT_MAX_SYNTHETIC_QUANTILE;
- cbt->Xm = circuit_build_times_mode(cbt);
+ cbt->Xm = circuit_build_times_get_xm(cbt);
tor_assert(cbt->Xm > 0);
/* Use current timeout to get an estimate on alpha */
circuit_build_times_initial_alpha(cbt, timeout_quantile,
diff --git a/src/or/or.h b/src/or/or.h
index 9c613d28d..0d35de1be 100644
--- a/src/or/or.h
+++ b/src/or/or.h
@@ -3023,6 +3023,9 @@ void entry_guards_free_all(void);
/** Width of the histogram bins in milliseconds */
#define CBT_BIN_WIDTH ((build_time_t)50)
+/** Number of modes to use in the weighted-avg computation of Xm */
+#define CBT_NUM_XM_MODES (3)
+
/** A build_time_t is milliseconds */
typedef uint32_t build_time_t;
#define CBT_BUILD_TIME_MAX ((build_time_t)(INT32_MAX))