From 463f6628d316cecdd612b4a78cd5349ab4a824c5 Mon Sep 17 00:00:00 2001 From: Nick Mathewson Date: Wed, 9 Apr 2014 11:13:37 -0400 Subject: Give each or_connection_t a slightly randomized idle_timeout Instead of killing an or_connection_t that has had no circuits for the last 3 minutes, give every or_connection_t a randomized timeout, so that an observer can't so easily infer from the connection close time the time at which its last circuit closed. Also, increase the base timeout for canonical connections from 3 minutes to 15 minutes. Fix for ticket 6799. --- changes/bug6799 | 13 +++++++++++++ src/or/channeltls.c | 2 +- src/or/connection.c | 2 ++ src/or/connection_or.c | 41 ++++++++++++++++++++++++++++++++++++++++- src/or/connection_or.h | 2 ++ src/or/main.c | 11 +---------- src/or/or.h | 5 ++++- 7 files changed, 63 insertions(+), 13 deletions(-) create mode 100644 changes/bug6799 diff --git a/changes/bug6799 b/changes/bug6799 new file mode 100644 index 000000000..b50762bb0 --- /dev/null +++ b/changes/bug6799 @@ -0,0 +1,13 @@ + o Major features: + + - Increate the base amount of time that a canonical connection + (one that we have made to a known OR) is allowed to stay open + from a 3 minutes to 15 minutes. This leaks less information + about when circuits have closed, and avoids unnecessary overhead + from renegotiating connections. Part of a fix for ticket 6799. + + - Instead of closing connections at a fixed interval after their + last circuit closed, randomly add up to 50% to each connection's + maximum timout. This makes it harder to tell when the last + circuit closed by looking at when a connection closes. Part of a + fix for ticket 6799. diff --git a/src/or/channeltls.c b/src/or/channeltls.c index d5428c1ab..92e51b21a 100644 --- a/src/or/channeltls.c +++ b/src/or/channeltls.c @@ -1514,7 +1514,7 @@ channel_tls_process_netinfo_cell(cell_t *cell, channel_tls_t *chan) return; } if (tor_addr_eq(&addr, &(chan->conn->real_addr))) { - chan->conn->is_canonical = 1; + connection_or_set_canonical(chan->conn, 1); break; } cp = next; diff --git a/src/or/connection.c b/src/or/connection.c index 4f74a1d04..b967eacf2 100644 --- a/src/or/connection.c +++ b/src/or/connection.c @@ -266,6 +266,8 @@ or_connection_new(int socket_family) or_conn->timestamp_last_added_nonpadding = time(NULL); + connection_or_set_canonical(or_conn, 0); + return or_conn; } diff --git a/src/or/connection_or.c b/src/or/connection_or.c index 8e7cd9ea5..f03b18ddf 100644 --- a/src/or/connection_or.c +++ b/src/or/connection_or.c @@ -756,6 +756,45 @@ connection_or_update_token_buckets(smartlist_t *conns, }); } +/** How long do we wait before killing non-canonical OR connections with no + * circuits? In Tor versions up to 0.2.1.25 and 0.2.2.12-alpha, we waited 15 + * minutes before cancelling these connections, which caused fast relays to + * accrue many many idle connections. Hopefully 3-4.5 minutes is low enough + * that it kills most idle connections, without being so low that we cause + * clients to bounce on and off. + * + * For canonical connections, the limit is higher, at 15-22.5 minutes. + * + * For each OR connection, we randomly add up to 50% extra to its idle_timeout + * field, to avoid exposing when exactly the last circuit closed. Since we're + * storing idle_timeout in a uint16_t, don't let these values get higher than + * 12 hours or so without revising connection_or_set_canonical and/or expanding + * idle_timeout. + */ +#define IDLE_OR_CONN_TIMEOUT_NONCANONICAL 180 +#define IDLE_OR_CONN_TIMEOUT_CANONICAL 900 + +/* Mark or_conn as canonical if is_canonical is set, and + * non-canonical otherwise. Adjust idle_timeout accordingly. + */ +void +connection_or_set_canonical(or_connection_t *or_conn, + int is_canonical) +{ + const unsigned int timeout_base = is_canonical ? + IDLE_OR_CONN_TIMEOUT_CANONICAL : IDLE_OR_CONN_TIMEOUT_NONCANONICAL; + + if (bool_eq(is_canonical, or_conn->is_canonical) && + or_conn->idle_timeout != 0) { + /* Don't recalculate an existing idle_timeout unless the canonical + * status changed. */ + return; + } + + or_conn->is_canonical = !! is_canonical; /* force to a 1-bit boolean */ + or_conn->idle_timeout = timeout_base + crypto_rand_int(timeout_base / 2); +} + /** If we don't necessarily know the router we're connecting to, but we * have an addr/port/id_digest, then fill in as much as we can. Start * by checking to see if this describes a router we know. @@ -780,7 +819,7 @@ connection_or_init_conn_from_address(or_connection_t *conn, /* XXXX proposal 186 is making this more complex. For now, a conn is canonical when it uses the _preferred_ address. */ if (tor_addr_eq(&conn->base_.addr, &node_ap.addr)) - conn->is_canonical = 1; + connection_or_set_canonical(conn, 1); if (!started_here) { /* Override the addr/port, so our log messages will make sense. * This is dangerous, since if we ever try looking up a conn by diff --git a/src/or/connection_or.h b/src/or/connection_or.h index 85e68f1a3..896556c03 100644 --- a/src/or/connection_or.h +++ b/src/or/connection_or.h @@ -47,6 +47,8 @@ void connection_or_report_broken_states(int severity, int domain); int connection_tls_start_handshake(or_connection_t *conn, int receiving); int connection_tls_continue_handshake(or_connection_t *conn); +void connection_or_set_canonical(or_connection_t *or_conn, + int is_canonical); int connection_init_or_handshake_state(or_connection_t *conn, int started_here); diff --git a/src/or/main.c b/src/or/main.c index bd23141b9..8a653ca40 100644 --- a/src/or/main.c +++ b/src/or/main.c @@ -993,15 +993,6 @@ directory_info_has_arrived(time_t now, int from_cache) consider_testing_reachability(1, 1); } -/** How long do we wait before killing OR connections with no circuits? - * In Tor versions up to 0.2.1.25 and 0.2.2.12-alpha, we waited 15 minutes - * before cancelling these connections, which caused fast relays to accrue - * many many idle connections. Hopefully 3 minutes is low enough that - * it kills most idle connections, without being so low that we cause - * clients to bounce on and off. - */ -#define IDLE_OR_CONN_TIMEOUT 180 - /** Perform regular maintenance tasks for a single connection. This * function gets run once per second per connection by run_scheduled_events. */ @@ -1088,7 +1079,7 @@ run_connection_housekeeping(int i, time_t now) connection_or_close_normally(TO_OR_CONN(conn), 1); } else if (!connection_or_get_num_circuits(or_conn) && now >= or_conn->timestamp_last_added_nonpadding + - IDLE_OR_CONN_TIMEOUT) { + or_conn->idle_timeout) { log_info(LD_OR,"Expiring non-used OR connection to fd %d (%s:%d) " "[idle %d].", (int)conn->s,conn->address, conn->port, (int)(now - or_conn->timestamp_last_added_nonpadding)); diff --git a/src/or/or.h b/src/or/or.h index 3eaf3447d..21ee1855c 100644 --- a/src/or/or.h +++ b/src/or/or.h @@ -1424,7 +1424,10 @@ typedef struct or_connection_t { unsigned int wide_circ_ids:1; uint16_t link_proto; /**< What protocol version are we using? 0 for * "none negotiated yet." */ - + uint16_t idle_timeout; /**< How long can this connection sit with no + * circuits on it before we close it? Based on + * IDLE_CIRCUIT_TIMEOUT_{NON,}CANONICAL and + * on is_canonical, randomized. */ or_handshake_state_t *handshake_state; /**< If we are setting this connection * up, state information to do so. */ -- cgit v1.2.3 From 6557e612959dd9a1df4e85df4a11153be38db3ca Mon Sep 17 00:00:00 2001 From: Nick Mathewson Date: Fri, 16 May 2014 10:32:31 -0400 Subject: Replace last_added_nonpadding with last_had_circuits The point of the "idle timeout" for connections is to kill the connection a while after it has no more circuits. But using "last added a non-padding cell" as a proxy for that is wrong, since if the last circuit is closed from the other side of the connection, we will not have sent anything on that connection since well before the last circuit closed. This is part of fixing 6799. When applied to 0.2.5, it is also a fix for 12023. --- changes/bug6799 | 15 +++++++++++---- src/or/channel.c | 11 +++++------ src/or/channel.h | 6 ++++-- src/or/circuitlist.c | 10 ++++++++-- src/or/connection.c | 4 ---- src/or/connection_or.c | 5 ----- src/or/main.c | 29 ++++++++++++++++++++++------- src/or/or.h | 2 -- 8 files changed, 50 insertions(+), 32 deletions(-) diff --git a/changes/bug6799 b/changes/bug6799 index b50762bb0..14ba4ae0c 100644 --- a/changes/bug6799 +++ b/changes/bug6799 @@ -1,13 +1,20 @@ o Major features: - - Increate the base amount of time that a canonical connection + - Increase the base amount of time that a canonical connection (one that we have made to a known OR) is allowed to stay open from a 3 minutes to 15 minutes. This leaks less information about when circuits have closed, and avoids unnecessary overhead from renegotiating connections. Part of a fix for ticket 6799. - - Instead of closing connections at a fixed interval after their - last circuit closed, randomly add up to 50% to each connection's - maximum timout. This makes it harder to tell when the last + - Instead of closing connections after they have been idle for a + fixed interval, randomly add up to 50% to each connection's + maximum timeout. This makes it harder to tell when the last circuit closed by looking at when a connection closes. Part of a fix for ticket 6799. + + - Base connection idleness tests on the actual time elapsed since + the connection last had circuits, not on the time when we last + added non-padding. This also makes it harder to tell when the last + circuit closed by looking at when a connection closes. Part of a + fix for ticket 6799. + Incidentally fixes bug 12023; bugfix on 0.2.5.1-alpha. diff --git a/src/or/channel.c b/src/or/channel.c index 1270eace7..ce2d01201 100644 --- a/src/or/channel.c +++ b/src/or/channel.c @@ -117,7 +117,9 @@ HT_GENERATE(channel_idmap, channel_idmap_entry_s, node, channel_idmap_hash, static cell_queue_entry_t * cell_queue_entry_dup(cell_queue_entry_t *q); static void cell_queue_entry_free(cell_queue_entry_t *q, int handed_off); +#if 0 static int cell_queue_entry_is_padding(cell_queue_entry_t *q); +#endif static cell_queue_entry_t * cell_queue_entry_new_fixed(cell_t *cell); static cell_queue_entry_t * @@ -729,7 +731,7 @@ channel_init(channel_t *chan) chan->global_identifier = n_channels_allocated++; /* Init timestamp */ - chan->timestamp_last_added_nonpadding = time(NULL); + chan->timestamp_last_had_circuits = time(NULL); /* Init next_circ_id */ chan->next_circ_id = crypto_rand_int(1 << 15); @@ -1597,6 +1599,7 @@ cell_queue_entry_free(cell_queue_entry_t *q, int handed_off) tor_free(q); } +#if 0 /** * Check whether a cell queue entry is padding; this is a helper function * for channel_write_cell_queue_entry() @@ -1625,6 +1628,7 @@ cell_queue_entry_is_padding(cell_queue_entry_t *q) return 0; } +#endif /** * Allocate a new cell queue entry for a fixed-size cell @@ -1683,11 +1687,6 @@ channel_write_cell_queue_entry(channel_t *chan, cell_queue_entry_t *q) chan->state == CHANNEL_STATE_OPEN || chan->state == CHANNEL_STATE_MAINT); - /* Increment the timestamp unless it's padding */ - if (!cell_queue_entry_is_padding(q)) { - chan->timestamp_last_added_nonpadding = approx_time(); - } - /* Can we send it right out? If so, try */ if (TOR_SIMPLEQ_EMPTY(&chan->outgoing_queue) && chan->state == CHANNEL_STATE_OPEN) { diff --git a/src/or/channel.h b/src/or/channel.h index 2dca81705..be40a30cc 100644 --- a/src/or/channel.h +++ b/src/or/channel.h @@ -187,8 +187,10 @@ struct channel_s { time_t timestamp_recv; /* Cell received from lower layer */ time_t timestamp_xmit; /* Cell sent to lower layer */ - /* Timestamp for relay.c */ - time_t timestamp_last_added_nonpadding; + /** Timestamp for run_connection_housekeeping(). We update this once a + * second when we run housekeeping and find a circuit on this channel, and + * whenever we add a circuit to the channel. */ + time_t timestamp_last_had_circuits; /** Unique ID for measuring direct network status requests;vtunneled ones * come over a circuit_t, which has a dirreq_id field as well, but is a diff --git a/src/or/circuitlist.c b/src/or/circuitlist.c index c7b15e40b..3cb429be1 100644 --- a/src/or/circuitlist.c +++ b/src/or/circuitlist.c @@ -217,8 +217,11 @@ circuit_set_p_circid_chan(or_circuit_t *circ, circid_t id, circuit_set_circid_chan_helper(TO_CIRCUIT(circ), CELL_DIRECTION_IN, id, chan); - if (chan) + if (chan) { tor_assert(bool_eq(circ->p_chan_cells.n, circ->next_active_on_p_chan)); + + chan->timestamp_last_had_circuits = approx_time(); + } } /** Set the n_conn field of a circuit circ, along @@ -230,8 +233,11 @@ circuit_set_n_circid_chan(circuit_t *circ, circid_t id, { circuit_set_circid_chan_helper(circ, CELL_DIRECTION_OUT, id, chan); - if (chan) + if (chan) { tor_assert(bool_eq(circ->n_chan_cells.n, circ->next_active_on_n_chan)); + + chan->timestamp_last_had_circuits = approx_time(); + } } /** Change the state of circ to state, adding it to or removing diff --git a/src/or/connection.c b/src/or/connection.c index b967eacf2..d99a54c15 100644 --- a/src/or/connection.c +++ b/src/or/connection.c @@ -249,8 +249,6 @@ dir_connection_new(int socket_family) /** Allocate and return a new or_connection_t, initialized as by * connection_init(). * - * Set timestamp_last_added_nonpadding to now. - * * Assign a pseudorandom next_circ_id between 0 and 2**15. * * Initialize active_circuit_pqueue. @@ -264,8 +262,6 @@ or_connection_new(int socket_family) time_t now = time(NULL); connection_init(now, TO_CONN(or_conn), CONN_TYPE_OR, socket_family); - or_conn->timestamp_last_added_nonpadding = time(NULL); - connection_or_set_canonical(or_conn, 0); return or_conn; diff --git a/src/or/connection_or.c b/src/or/connection_or.c index f03b18ddf..3d239d480 100644 --- a/src/or/connection_or.c +++ b/src/or/connection_or.c @@ -1929,9 +1929,6 @@ connection_or_write_cell_to_buf(const cell_t *cell, or_connection_t *conn) if (conn->base_.state == OR_CONN_STATE_OR_HANDSHAKING_V3) or_handshake_state_record_cell(conn, conn->handshake_state, cell, 0); - - if (cell->command != CELL_PADDING) - conn->timestamp_last_added_nonpadding = approx_time(); } /** Pack a variable-length cell into wire-format, and write it onto @@ -1952,8 +1949,6 @@ connection_or_write_var_cell_to_buf(const var_cell_t *cell, cell->payload_len, TO_CONN(conn)); if (conn->base_.state == OR_CONN_STATE_OR_HANDSHAKING_V3) or_handshake_state_record_var_cell(conn, conn->handshake_state, cell, 0); - if (cell->command != CELL_PADDING) - conn->timestamp_last_added_nonpadding = approx_time(); /* Touch the channel's active timestamp if there is one */ if (conn->chan) diff --git a/src/or/main.c b/src/or/main.c index 8a653ca40..70b1340cb 100644 --- a/src/or/main.c +++ b/src/or/main.c @@ -1003,6 +1003,8 @@ run_connection_housekeeping(int i, time_t now) connection_t *conn = smartlist_get(connection_array, i); const or_options_t *options = get_options(); or_connection_t *or_conn; + channel_t *chan = NULL; + int have_any_circuits; int past_keepalive = now >= conn->timestamp_lastwritten + options->KeepalivePeriod; @@ -1050,8 +1052,19 @@ run_connection_housekeeping(int i, time_t now) tor_assert(conn->outbuf); #endif + chan = TLS_CHAN_TO_BASE(or_conn->chan); + tor_assert(chan); + + + if (channel_num_circuits(chan) != 0) { + have_any_circuits = 1; + chan->timestamp_last_had_circuits = now; + } else { + have_any_circuits = 0; + } + if (channel_is_bad_for_new_circs(TLS_CHAN_TO_BASE(or_conn->chan)) && - !connection_or_get_num_circuits(or_conn)) { + ! have_any_circuits) { /* It's bad for new circuits, and has no unmarked circuits on it: * mark it now. */ log_info(LD_OR, @@ -1070,19 +1083,21 @@ run_connection_housekeeping(int i, time_t now) connection_or_close_normally(TO_OR_CONN(conn), 0); } } else if (we_are_hibernating() && - !connection_or_get_num_circuits(or_conn) && + ! have_any_circuits && !connection_get_outbuf_len(conn)) { /* We're hibernating, there's no circuits, and nothing to flush.*/ log_info(LD_OR,"Expiring non-used OR connection to fd %d (%s:%d) " "[Hibernating or exiting].", (int)conn->s,conn->address, conn->port); connection_or_close_normally(TO_OR_CONN(conn), 1); - } else if (!connection_or_get_num_circuits(or_conn) && - now >= or_conn->timestamp_last_added_nonpadding + - or_conn->idle_timeout) { + } else if (!have_any_circuits && + now - or_conn->idle_timeout >= chan->timestamp_last_had_circuits) { log_info(LD_OR,"Expiring non-used OR connection to fd %d (%s:%d) " - "[idle %d].", (int)conn->s,conn->address, conn->port, - (int)(now - or_conn->timestamp_last_added_nonpadding)); + "[no circuits for %d; timeout %d; %scanonical].", + (int)conn->s, conn->address, conn->port, + (int)(now - chan->timestamp_last_had_circuits), + or_conn->idle_timeout, + or_conn->is_canonical ? "" : "non"); connection_or_close_normally(TO_OR_CONN(conn), 0); } else if ( now >= or_conn->timestamp_lastempty + options->KeepalivePeriod*10 && diff --git a/src/or/or.h b/src/or/or.h index 21ee1855c..3ed9f9f58 100644 --- a/src/or/or.h +++ b/src/or/or.h @@ -1432,8 +1432,6 @@ typedef struct or_connection_t { * up, state information to do so. */ time_t timestamp_lastempty; /**< When was the outbuf last completely empty?*/ - time_t timestamp_last_added_nonpadding; /** When did we last add a - * non-padding cell to the outbuf? */ /* bandwidth* and *_bucket only used by ORs in OPEN state: */ int bandwidthrate; /**< Bytes/s added to the bucket. (OPEN ORs only.) */ -- cgit v1.2.3 From bbb8f12ee4efd6f1c2bc8b34dfaf50e314863476 Mon Sep 17 00:00:00 2001 From: Nick Mathewson Date: Wed, 11 Jun 2014 11:52:58 -0400 Subject: Tweak changes entry for 6799 --- changes/bug6799 | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/changes/bug6799 b/changes/bug6799 index 14ba4ae0c..72b6519a2 100644 --- a/changes/bug6799 +++ b/changes/bug6799 @@ -1,8 +1,8 @@ o Major features: - Increase the base amount of time that a canonical connection - (one that we have made to a known OR) is allowed to stay open - from a 3 minutes to 15 minutes. This leaks less information + (one that we have made to a known OR) is allowed to stay idle + from 3 minutes to 15 minutes. This leaks less information about when circuits have closed, and avoids unnecessary overhead from renegotiating connections. Part of a fix for ticket 6799. @@ -14,7 +14,7 @@ - Base connection idleness tests on the actual time elapsed since the connection last had circuits, not on the time when we last - added non-padding. This also makes it harder to tell when the last - circuit closed by looking at when a connection closes. Part of a - fix for ticket 6799. + added non-padding. This change also makes it harder for an + observer to tell when the last circuit closed by looking at when + a connection closes. Part of a fix for ticket 6799. Incidentally fixes bug 12023; bugfix on 0.2.5.1-alpha. -- cgit v1.2.3