diff options
Diffstat (limited to 'src/or/cpuworker.c')
-rw-r--r-- | src/or/cpuworker.c | 430 |
1 files changed, 321 insertions, 109 deletions
diff --git a/src/or/cpuworker.c b/src/or/cpuworker.c index 0255227e7..61f9faa39 100644 --- a/src/or/cpuworker.c +++ b/src/or/cpuworker.c @@ -1,6 +1,6 @@ /* Copyright (c) 2003-2004, Roger Dingledine. * Copyright (c) 2004-2006, Roger Dingledine, Nick Mathewson. - * Copyright (c) 2007-2012, The Tor Project, Inc. */ + * Copyright (c) 2007-2013, The Tor Project, Inc. */ /* See LICENSE for licensing information */ /** @@ -11,9 +11,10 @@ * * Right now, we only use this for processing onionskins. **/ - #include "or.h" #include "buffers.h" +#include "channel.h" +#include "channeltls.h" #include "circuitbuild.h" #include "circuitlist.h" #include "config.h" @@ -29,10 +30,7 @@ #define MIN_CPUWORKERS 1 /** The tag specifies which circuit this onionskin was from. */ -#define TAG_LEN 10 -/** How many bytes are sent from the cpuworker back to tor? */ -#define LEN_ONION_RESPONSE \ - (1+TAG_LEN+ONIONSKIN_REPLY_LEN+CPATH_KEY_MATERIAL_LEN) +#define TAG_LEN 12 /** How many cpuworkers we have running right now. */ static int num_cpuworkers=0; @@ -68,22 +66,80 @@ connection_cpu_finished_flushing(connection_t *conn) /** Pack global_id and circ_id; set *tag to the result. (See note on * cpuworker_main for wire format.) */ static void -tag_pack(char *tag, uint64_t conn_id, circid_t circ_id) +tag_pack(uint8_t *tag, uint64_t chan_id, circid_t circ_id) { /*XXXX RETHINK THIS WHOLE MESS !!!! !NM NM NM NM*/ - set_uint64(tag, conn_id); - set_uint16(tag+8, circ_id); + /*XXXX DOUBLEPLUSTHIS!!!! AS AS AS AS*/ + set_uint64(tag, chan_id); + set_uint32(tag+8, circ_id); } /** Unpack <b>tag</b> into addr, port, and circ_id. */ static void -tag_unpack(const char *tag, uint64_t *conn_id, circid_t *circ_id) +tag_unpack(const uint8_t *tag, uint64_t *chan_id, circid_t *circ_id) { - *conn_id = get_uint64(tag); - *circ_id = get_uint16(tag+8); + *chan_id = get_uint64(tag); + *circ_id = get_uint32(tag+8); } +/** Magic numbers to make sure our cpuworker_requests don't grow any + * mis-framing bugs. */ +#define CPUWORKER_REQUEST_MAGIC 0xda4afeed +#define CPUWORKER_REPLY_MAGIC 0x5eedf00d + +/** A request sent to a cpuworker. */ +typedef struct cpuworker_request_t { + /** Magic number; must be CPUWORKER_REQUEST_MAGIC. */ + uint32_t magic; + /** Opaque tag to identify the job */ + uint8_t tag[TAG_LEN]; + /** Task code. Must be one of CPUWORKER_TASK_* */ + uint8_t task; + + /** Flag: Are we timing this request? */ + unsigned timed : 1; + /** If we're timing this request, when was it sent to the cpuworker? */ + struct timeval started_at; + + /** A create cell for the cpuworker to process. */ + create_cell_t create_cell; + + /* Turn the above into a tagged union if needed. */ +} cpuworker_request_t; + +/** A reply sent by a cpuworker. */ +typedef struct cpuworker_reply_t { + /** Magic number; must be CPUWORKER_REPLY_MAGIC. */ + uint32_t magic; + /** Opaque tag to identify the job; matches the request's tag.*/ + uint8_t tag[TAG_LEN]; + /** True iff we got a successful request. */ + uint8_t success; + + /** Are we timing this request? */ + unsigned int timed : 1; + /** What handshake type was the request? (Used for timing) */ + uint16_t handshake_type; + /** When did we send the request to the cpuworker? */ + struct timeval started_at; + /** Once the cpuworker received the request, how many microseconds did it + * take? (This shouldn't overflow; 4 billion micoseconds is over an hour, + * and we'll never have an onion handshake that takes so long.) */ + uint32_t n_usec; + + /** Output of processing a create cell + * + * @{ + */ + /** The created cell to send back. */ + created_cell_t created_cell; + /** The keys to use on this circuit. */ + uint8_t keys[CPATH_KEY_MATERIAL_LEN]; + /** Input to use for authenticating introduce1 cells. */ + uint8_t rend_auth_material[DIGEST_LEN]; +} cpuworker_reply_t; + /** Called when the onion key has changed and we need to spawn new * cpuworkers. Close all currently idle cpuworkers, and mark the last * rotation time as now. @@ -122,6 +178,112 @@ connection_cpu_reached_eof(connection_t *conn) return 0; } +/** Indexed by handshake type: how many onionskins have we processed and + * counted of that type? */ +static uint64_t onionskins_n_processed[MAX_ONION_HANDSHAKE_TYPE+1]; +/** Indexed by handshake type, corresponding to the onionskins counted in + * onionskins_n_processed: how many microseconds have we spent in cpuworkers + * processing that kind of onionskin? */ +static uint64_t onionskins_usec_internal[MAX_ONION_HANDSHAKE_TYPE+1]; +/** Indexed by handshake type, corresponding to onionskins counted in + * onionskins_n_processed: how many microseconds have we spent waiting for + * cpuworkers to give us answers for that kind of onionskin? + */ +static uint64_t onionskins_usec_roundtrip[MAX_ONION_HANDSHAKE_TYPE+1]; + +/** If any onionskin takes longer than this, we clip them to this + * time. (microseconds) */ +#define MAX_BELIEVABLE_ONIONSKIN_DELAY (2*1000*1000) + +static tor_weak_rng_t request_sample_rng = TOR_WEAK_RNG_INIT; + +/** Return true iff we'd like to measure a handshake of type + * <b>onionskin_type</b>. Call only from the main thread. */ +static int +should_time_request(uint16_t onionskin_type) +{ + /* If we've never heard of this type, we shouldn't even be here. */ + if (onionskin_type > MAX_ONION_HANDSHAKE_TYPE) + return 0; + /* Measure the first N handshakes of each type, to ensure we have a + * sample */ + if (onionskins_n_processed[onionskin_type] < 4096) + return 1; + /** Otherwise, measure with P=1/128. We avoid doing this for every + * handshake, since the measurement itself can take a little time. */ + return tor_weak_random_one_in_n(&request_sample_rng, 128); +} + +/** Return an estimate of how many microseconds we will need for a single + * cpuworker to to process <b>n_requests</b> onionskins of type + * <b>onionskin_type</b>. */ +uint64_t +estimated_usec_for_onionskins(uint32_t n_requests, uint16_t onionskin_type) +{ + if (onionskin_type > MAX_ONION_HANDSHAKE_TYPE) /* should be impossible */ + return 1000 * (uint64_t)n_requests; + if (PREDICT_UNLIKELY(onionskins_n_processed[onionskin_type] < 100)) { + /* Until we have 100 data points, just asssume everything takes 1 msec. */ + return 1000 * (uint64_t)n_requests; + } else { + /* This can't overflow: we'll never have more than 500000 onionskins + * measured in onionskin_usec_internal, and they won't take anything near + * 1 sec each, and we won't have anything like 1 million queued + * onionskins. But that's 5e5 * 1e6 * 1e6, which is still less than + * UINT64_MAX. */ + return (onionskins_usec_internal[onionskin_type] * n_requests) / + onionskins_n_processed[onionskin_type]; + } +} + +/** Compute the absolute and relative overhead of using the cpuworker + * framework for onionskins of type <b>onionskin_type</b>.*/ +static int +get_overhead_for_onionskins(uint32_t *usec_out, double *frac_out, + uint16_t onionskin_type) +{ + uint64_t overhead; + + *usec_out = 0; + *frac_out = 0.0; + + if (onionskin_type > MAX_ONION_HANDSHAKE_TYPE) /* should be impossible */ + return -1; + if (onionskins_n_processed[onionskin_type] == 0 || + onionskins_usec_internal[onionskin_type] == 0 || + onionskins_usec_roundtrip[onionskin_type] == 0) + return -1; + + overhead = onionskins_usec_roundtrip[onionskin_type] - + onionskins_usec_internal[onionskin_type]; + + *usec_out = (uint32_t)(overhead / onionskins_n_processed[onionskin_type]); + *frac_out = U64_TO_DBL(overhead) / onionskins_usec_internal[onionskin_type]; + + return 0; +} + +/** If we've measured overhead for onionskins of type <b>onionskin_type</b>, + * log it. */ +void +cpuworker_log_onionskin_overhead(int severity, int onionskin_type, + const char *onionskin_type_name) +{ + uint32_t overhead; + double relative_overhead; + int r; + + r = get_overhead_for_onionskins(&overhead, &relative_overhead, + onionskin_type); + if (!overhead || r<0) + return; + + log_fn(severity, LD_OR, + "%s onionskins have averaged %u usec overhead (%.2f%%) in " + "cpuworker code ", + onionskin_type_name, (unsigned)overhead, relative_overhead*100); +} + /** Called when we get data from a cpuworker. If the answer is not complete, * wait for a complete answer. If the answer is complete, * process it as appropriate. @@ -129,12 +291,9 @@ connection_cpu_reached_eof(connection_t *conn) int connection_cpu_process_inbuf(connection_t *conn) { - char success; - char buf[LEN_ONION_RESPONSE]; - uint64_t conn_id; + uint64_t chan_id; circid_t circ_id; - connection_t *tmp_conn; - or_connection_t *p_conn = NULL; + channel_t *p_chan = NULL; circuit_t *circ; tor_assert(conn); @@ -144,25 +303,51 @@ connection_cpu_process_inbuf(connection_t *conn) return 0; if (conn->state == CPUWORKER_STATE_BUSY_ONION) { - if (connection_get_inbuf_len(conn) < LEN_ONION_RESPONSE) + cpuworker_reply_t rpl; + if (connection_get_inbuf_len(conn) < sizeof(cpuworker_reply_t)) return 0; /* not yet */ - tor_assert(connection_get_inbuf_len(conn) == LEN_ONION_RESPONSE); - - connection_fetch_from_buf(&success,1,conn); - connection_fetch_from_buf(buf,LEN_ONION_RESPONSE-1,conn); - + tor_assert(connection_get_inbuf_len(conn) == sizeof(cpuworker_reply_t)); + + connection_fetch_from_buf((void*)&rpl,sizeof(cpuworker_reply_t),conn); + + tor_assert(rpl.magic == CPUWORKER_REPLY_MAGIC); + + if (rpl.timed && rpl.success && + rpl.handshake_type <= MAX_ONION_HANDSHAKE_TYPE) { + /* Time how long this request took. The handshake_type check should be + needless, but let's leave it in to be safe. */ + struct timeval tv_end, tv_diff; + int64_t usec_roundtrip; + tor_gettimeofday(&tv_end); + timersub(&tv_end, &rpl.started_at, &tv_diff); + usec_roundtrip = ((int64_t)tv_diff.tv_sec)*1000000 + tv_diff.tv_usec; + if (usec_roundtrip >= 0 && + usec_roundtrip < MAX_BELIEVABLE_ONIONSKIN_DELAY) { + ++onionskins_n_processed[rpl.handshake_type]; + onionskins_usec_internal[rpl.handshake_type] += rpl.n_usec; + onionskins_usec_roundtrip[rpl.handshake_type] += usec_roundtrip; + if (onionskins_n_processed[rpl.handshake_type] >= 500000) { + /* Scale down every 500000 handshakes. On a busy server, that's + * less impressive than it sounds. */ + onionskins_n_processed[rpl.handshake_type] /= 2; + onionskins_usec_internal[rpl.handshake_type] /= 2; + onionskins_usec_roundtrip[rpl.handshake_type] /= 2; + } + } + } /* parse out the circ it was talking about */ - tag_unpack(buf, &conn_id, &circ_id); + tag_unpack(rpl.tag, &chan_id, &circ_id); circ = NULL; - tmp_conn = connection_get_by_global_id(conn_id); - if (tmp_conn && !tmp_conn->marked_for_close && - tmp_conn->type == CONN_TYPE_OR) - p_conn = TO_OR_CONN(tmp_conn); + log_debug(LD_OR, + "Unpacking cpuworker reply, chan_id is " U64_FORMAT + ", circ_id is %u", + U64_PRINTF_ARG(chan_id), (unsigned)circ_id); + p_chan = channel_find_by_global_id(chan_id); - if (p_conn) - circ = circuit_get_by_circid_orconn(circ_id, p_conn); + if (p_chan) + circ = circuit_get_by_circid_channel(circ_id, p_chan); - if (success == 0) { + if (rpl.success == 0) { log_debug(LD_OR, "decoding onionskin failed. " "(Old key or bad software.) Closing."); @@ -180,8 +365,10 @@ connection_cpu_process_inbuf(connection_t *conn) goto done_processing; } tor_assert(! CIRCUIT_IS_ORIGIN(circ)); - if (onionskin_answer(TO_OR_CIRCUIT(circ), CELL_CREATED, buf+TAG_LEN, - buf+TAG_LEN+ONIONSKIN_REPLY_LEN) < 0) { + if (onionskin_answer(TO_OR_CIRCUIT(circ), + &rpl.created_cell, + (const char*)rpl.keys, + rpl.rend_auth_material) < 0) { log_warn(LD_OR,"onionskin_answer failed. Closing."); circuit_mark_for_close(circ, END_CIRC_REASON_INTERNAL); goto done_processing; @@ -208,32 +395,21 @@ connection_cpu_process_inbuf(connection_t *conn) * Read and writes from fdarray[1]. Reads requests, writes answers. * * Request format: - * Task type [1 byte, always CPUWORKER_TASK_ONION] - * Opaque tag TAG_LEN - * Onionskin challenge ONIONSKIN_CHALLENGE_LEN + * cpuworker_request_t. * Response format: - * Success/failure [1 byte, boolean.] - * Opaque tag TAG_LEN - * Onionskin challenge ONIONSKIN_REPLY_LEN - * Negotiated keys KEY_LEN*2+DIGEST_LEN*2 - * - * (Note: this _should_ be by addr/port, since we're concerned with specific - * connections, not with routers (where we'd use identity).) + * cpuworker_reply_t */ static void cpuworker_main(void *data) { - char question[ONIONSKIN_CHALLENGE_LEN]; - uint8_t question_type; + /* For talking to the parent thread/process */ tor_socket_t *fdarray = data; tor_socket_t fd; /* variables for onion processing */ - char keys[CPATH_KEY_MATERIAL_LEN]; - char reply_to_proxy[ONIONSKIN_REPLY_LEN]; - char buf[LEN_ONION_RESPONSE]; - char tag[TAG_LEN]; - crypto_pk_t *onion_key = NULL, *last_onion_key = NULL; + server_onion_keys_t onion_keys; + cpuworker_request_t req; + cpuworker_reply_t rpl; fd = fdarray[1]; /* this side is ours */ #ifndef TOR_IS_MULTITHREADED @@ -244,68 +420,85 @@ cpuworker_main(void *data) #endif tor_free(data); - dup_onion_keys(&onion_key, &last_onion_key); + setup_server_onion_keys(&onion_keys); for (;;) { - ssize_t r; - - if ((r = recv(fd, (void *)&question_type, 1, 0)) != 1) { -// log_fn(LOG_ERR,"read type failed. Exiting."); - if (r == 0) { - log_info(LD_OR, - "CPU worker exiting because Tor process closed connection " - "(either rotated keys or died)."); - } else { - log_info(LD_OR, - "CPU worker exiting because of error on connection to Tor " - "process."); - log_info(LD_OR,"(Error on %d was %s)", - fd, tor_socket_strerror(tor_socket_errno(fd))); - } + if (read_all(fd, (void *)&req, sizeof(req), 1) != sizeof(req)) { + log_info(LD_OR, "read request failed. Exiting."); goto end; } - tor_assert(question_type == CPUWORKER_TASK_ONION); - - if (read_all(fd, tag, TAG_LEN, 1) != TAG_LEN) { - log_err(LD_BUG,"read tag failed. Exiting."); - goto end; - } - - if (read_all(fd, question, ONIONSKIN_CHALLENGE_LEN, 1) != - ONIONSKIN_CHALLENGE_LEN) { - log_err(LD_BUG,"read question failed. Exiting."); - goto end; - } - - if (question_type == CPUWORKER_TASK_ONION) { - if (onion_skin_server_handshake(question, onion_key, last_onion_key, - reply_to_proxy, keys, CPATH_KEY_MATERIAL_LEN) < 0) { + tor_assert(req.magic == CPUWORKER_REQUEST_MAGIC); + + memset(&rpl, 0, sizeof(rpl)); + + if (req.task == CPUWORKER_TASK_ONION) { + const create_cell_t *cc = &req.create_cell; + created_cell_t *cell_out = &rpl.created_cell; + struct timeval tv_start, tv_end; + int n; + rpl.timed = req.timed; + rpl.started_at = req.started_at; + rpl.handshake_type = cc->handshake_type; + if (req.timed) + tor_gettimeofday(&tv_start); + n = onion_skin_server_handshake(cc->handshake_type, + cc->onionskin, cc->handshake_len, + &onion_keys, + cell_out->reply, + rpl.keys, CPATH_KEY_MATERIAL_LEN, + rpl.rend_auth_material); + if (n < 0) { /* failure */ log_debug(LD_OR,"onion_skin_server_handshake failed."); - *buf = 0; /* indicate failure in first byte */ - memcpy(buf+1,tag,TAG_LEN); - /* send all zeros as answer */ - memset(buf+1+TAG_LEN, 0, LEN_ONION_RESPONSE-(1+TAG_LEN)); + memset(&rpl, 0, sizeof(rpl)); + memcpy(rpl.tag, req.tag, TAG_LEN); + rpl.success = 0; } else { /* success */ log_debug(LD_OR,"onion_skin_server_handshake succeeded."); - buf[0] = 1; /* 1 means success */ - memcpy(buf+1,tag,TAG_LEN); - memcpy(buf+1+TAG_LEN,reply_to_proxy,ONIONSKIN_REPLY_LEN); - memcpy(buf+1+TAG_LEN+ONIONSKIN_REPLY_LEN,keys,CPATH_KEY_MATERIAL_LEN); + memcpy(rpl.tag, req.tag, TAG_LEN); + cell_out->handshake_len = n; + switch (cc->cell_type) { + case CELL_CREATE: + cell_out->cell_type = CELL_CREATED; break; + case CELL_CREATE2: + cell_out->cell_type = CELL_CREATED2; break; + case CELL_CREATE_FAST: + cell_out->cell_type = CELL_CREATED_FAST; break; + default: + tor_assert(0); + goto end; + } + rpl.success = 1; + } + rpl.magic = CPUWORKER_REPLY_MAGIC; + if (req.timed) { + struct timeval tv_diff; + int64_t usec; + tor_gettimeofday(&tv_end); + timersub(&tv_end, &tv_start, &tv_diff); + usec = ((int64_t)tv_diff.tv_sec)*1000000 + tv_diff.tv_usec; + if (usec < 0 || usec > MAX_BELIEVABLE_ONIONSKIN_DELAY) + rpl.n_usec = MAX_BELIEVABLE_ONIONSKIN_DELAY; + else + rpl.n_usec = (uint32_t) usec; } - if (write_all(fd, buf, LEN_ONION_RESPONSE, 1) != LEN_ONION_RESPONSE) { + if (write_all(fd, (void*)&rpl, sizeof(rpl), 1) != sizeof(rpl)) { log_err(LD_BUG,"writing response buf failed. Exiting."); goto end; } log_debug(LD_OR,"finished writing response."); + } else if (req.task == CPUWORKER_TASK_SHUTDOWN) { + log_info(LD_OR,"Clean shutdown: exiting"); + goto end; } + memwipe(&req, 0, sizeof(req)); + memwipe(&rpl, 0, sizeof(req)); } end: - if (onion_key) - crypto_pk_free(onion_key); - if (last_onion_key) - crypto_pk_free(last_onion_key); + memwipe(&req, 0, sizeof(req)); + memwipe(&rpl, 0, sizeof(req)); + release_server_onion_keys(&onion_keys); tor_close_socket(fd); crypto_thread_cleanup(); spawn_exit(); @@ -342,13 +535,16 @@ spawn_cpuworker(void) conn = connection_new(CONN_TYPE_CPUWORKER, AF_UNIX); - set_socket_nonblocking(fd); - /* set up conn so it's got all the data we need to remember */ conn->s = fd; conn->address = tor_strdup("localhost"); tor_addr_make_unspec(&conn->addr); + if (set_socket_nonblocking(fd) == -1) { + connection_free(conn); /* this closes fd */ + return -1; + } + if (connection_add(conn) < 0) { /* no space, forget it */ log_warn(LD_NET,"connection_add for cpuworker failed. Giving up."); connection_free(conn); /* this closes fd */ @@ -368,6 +564,7 @@ static void spawn_enough_cpuworkers(void) { int num_cpuworkers_needed = get_num_cpus(get_options()); + int reseed = 0; if (num_cpuworkers_needed < MIN_CPUWORKERS) num_cpuworkers_needed = MIN_CPUWORKERS; @@ -380,7 +577,11 @@ spawn_enough_cpuworkers(void) return; } num_cpuworkers++; + reseed++; } + + if (reseed) + crypto_seed_weak_rng(&request_sample_rng); } /** Take a pending task from the queue and assign it to 'cpuworker'. */ @@ -388,7 +589,7 @@ static void process_pending_task(connection_t *cpuworker) { or_circuit_t *circ; - char *onionskin = NULL; + create_cell_t *onionskin = NULL; tor_assert(cpuworker); @@ -441,12 +642,13 @@ cull_wedged_cpuworkers(void) */ int assign_onionskin_to_cpuworker(connection_t *cpuworker, - or_circuit_t *circ, char *onionskin) + or_circuit_t *circ, + create_cell_t *onionskin) { - char qbuf[1]; - char tag[TAG_LEN]; + cpuworker_request_t req; time_t now = approx_time(); static time_t last_culled_cpuworkers = 0; + int should_time; /* Checking for wedged cpuworkers requires a linear search over all * connections, so let's do it only once a minute. @@ -475,26 +677,36 @@ assign_onionskin_to_cpuworker(connection_t *cpuworker, tor_assert(cpuworker); - if (!circ->p_conn) { - log_info(LD_OR,"circ->p_conn gone. Failing circ."); + if (!circ->p_chan) { + log_info(LD_OR,"circ->p_chan gone. Failing circ."); tor_free(onionskin); return -1; } - tag_pack(tag, circ->p_conn->_base.global_identifier, + + should_time = should_time_request(onionskin->handshake_type); + memset(&req, 0, sizeof(req)); + req.magic = CPUWORKER_REQUEST_MAGIC; + tag_pack(req.tag, circ->p_chan->global_identifier, circ->p_circ_id); + req.timed = should_time; cpuworker->state = CPUWORKER_STATE_BUSY_ONION; /* touch the lastwritten timestamp, since that's how we check to * see how long it's been since we asked the question, and sometimes * we check before the first call to connection_handle_write(). */ - cpuworker->timestamp_lastwritten = time(NULL); + cpuworker->timestamp_lastwritten = now; num_cpuworkers_busy++; - qbuf[0] = CPUWORKER_TASK_ONION; - connection_write_to_buf(qbuf, 1, cpuworker); - connection_write_to_buf(tag, sizeof(tag), cpuworker); - connection_write_to_buf(onionskin, ONIONSKIN_CHALLENGE_LEN, cpuworker); + req.task = CPUWORKER_TASK_ONION; + memcpy(&req.create_cell, onionskin, sizeof(create_cell_t)); + tor_free(onionskin); + + if (should_time) + tor_gettimeofday(&req.started_at); + + connection_write_to_buf((void*)&req, sizeof(req), cpuworker); + memwipe(&req, 0, sizeof(req)); } return 0; } |