diff options
author | Nick Mathewson <nickm@torproject.org> | 2005-09-22 06:34:29 +0000 |
---|---|---|
committer | Nick Mathewson <nickm@torproject.org> | 2005-09-22 06:34:29 +0000 |
commit | cdc912714eeee2d72f72e1f4446daaa8804e12db (patch) | |
tree | f209e15c65bd3cce88e382b3338e61bd9ba6c21b | |
parent | 66930319473ff63796672f26d52890f60666e770 (diff) | |
download | tor-cdc912714eeee2d72f72e1f4446daaa8804e12db.tar tor-cdc912714eeee2d72f72e1f4446daaa8804e12db.tar.gz |
I love the smell of C in the morning. Make router-download rules smarter (download more so long as we dont duplicate existing requests; relaunch at staggered intervals); relaunch one a minute or on failure; reset 60 minutes; always open 3 requests if we can; add authority opinion to networkstatus; make naming rule correct. There is a remaining bug where we retry servers too quickly; We need to look at that harder.
svn:r5110
-rw-r--r-- | doc/TODO | 24 | ||||
-rw-r--r-- | doc/dir-spec.txt | 1 | ||||
-rw-r--r-- | src/or/directory.c | 37 | ||||
-rw-r--r-- | src/or/dirserv.c | 11 | ||||
-rw-r--r-- | src/or/main.c | 14 | ||||
-rw-r--r-- | src/or/or.h | 3 | ||||
-rw-r--r-- | src/or/routerlist.c | 183 |
7 files changed, 205 insertions, 68 deletions
@@ -153,14 +153,15 @@ R - check reachability as soon as you hear about a new server o Alice sets descriptor status from network-status o Implement o Use -N - Routerdesc download changes - - Refactor combined-status to be its own type. - - Change rule from "do not launch new connections when one exists" to +N . Routerdesc download changes + o Refactor combined-status to be its own type. + o Change rule from "do not launch new connections when one exists" to "do not request any fingerprint that we're currently requesting." - - Launch connections every minute, or whenever a download fails - - Retry failed routerdescs after 0, 1, 5, 10 minutes. - - Mirrors retry harder and more often. - - Reset failure count every 60 minutes + o Launch connections every minute, or whenever a download fails + o Retry failed routerdescs after 0, 1, 5, 10 minutes. + o Mirrors retry harder and more often. (0, 0, 1, 1, 2, 5, and 15) + o Reset failure count every 60 minutes + o Drop fallback to download-all. Also, always split download. - Only use a routerdesc if you recognize its hash. - Must defer till dirservers are upgraded to latest. - Of course, authdirservers must not do this. @@ -170,12 +171,11 @@ N - Routerdesc download changes - If we have a routerdesc for Bob, and he says, "I'm 0.1.0.x", don't fetch a new one if it was published in the last 2 hours. - How does this interact with the 'recognized hash' rule? - - Drop fallback to download-all. Also, always split download. - - Downgrade new directory events from notice to info + . Downgrade new directory events from notice to info - Clients should estimate their skew as median of skew from directory connections over last N seconds. - Call dirport_is_reachable from somewhere else. - - Networkstatus should list who's an authority. + o Networkstatus should list who's an authority. - Add nickname element to dirserver line. Log this along with IP:Port. - Warn when using non-default directory servers. - When giving up on a non-finished dir request, log how many bytes @@ -194,10 +194,10 @@ N . Naming and validation: o Authdirs need to be able to decline to include baased on IP range and key. o Not all authdirs name. - - Change naming rule: N->K iff any naming authdir says N->K, + o Change naming rule: N->K iff any naming authdir says N->K, and none says N->K' or N'->K. - Clients choose names based on network-status options. - - Names are remembered in client state + - Names are remembered in client state (?) - Okay to have two valid servers with same nickname, but not two named servers with same nickname. Update logic. diff --git a/doc/dir-spec.txt b/doc/dir-spec.txt index 1c3247ea0..fadec9bfc 100644 --- a/doc/dir-spec.txt +++ b/doc/dir-spec.txt @@ -120,6 +120,7 @@ $Id$ "Running" if the router is currently usable. "Named" if the router's identity-nickname mapping is canonical. "Valid" if the router has been 'validated'. + "Authority" if the router is a directory authority. The "r" entry for each router must appear first and is required. The 's" entry is optional. Unrecognized flags, or extra elements on the diff --git a/src/or/directory.c b/src/or/directory.c index 8dade1a44..8ca14c5f5 100644 --- a/src/or/directory.c +++ b/src/or/directory.c @@ -320,8 +320,8 @@ connection_dir_download_networkstatus_failed(connection_t *conn) static void connection_dir_download_routerdesc_failed(connection_t *conn) { - /* Try again. */ - /*XXXX011 plays poorly with multiple conns. */ + /* Try again. No need to increment the failure count for routerdescs, since + * it's not their fault.*/ update_router_descriptor_downloads(time(NULL)); } @@ -1558,6 +1558,8 @@ dir_routerdesc_download_failed(smartlist_t *failed) { char digest[DIGEST_LEN]; local_routerstatus_t *rs; + time_t now = time(NULL); + int server = server_mode(get_options()) && get_options()->DirPort; SMARTLIST_FOREACH(failed, const char *, cp, { base16_decode(digest, DIGEST_LEN, cp, strlen(cp)); @@ -1565,7 +1567,38 @@ dir_routerdesc_download_failed(smartlist_t *failed) if (!rs || rs->n_download_failures >= MAX_ROUTERDESC_DOWNLOAD_FAILURES) continue; ++rs->n_download_failures; + if (server) { + switch (rs->n_download_failures) { + case 1: rs->next_attempt_at = 0; break; + case 2: rs->next_attempt_at = 0; break; + case 3: rs->next_attempt_at = now+60; break; + case 4: rs->next_attempt_at = now+60; break; + case 5: rs->next_attempt_at = now+60*2; break; + case 6: rs->next_attempt_at = now+60*5; break; + case 7: rs->next_attempt_at = now+60*15; break; + default: rs->next_attempt_at = TIME_MAX; break; + } + } else { + switch (rs->n_download_failures) { + case 1: rs->next_attempt_at = 0; break; + case 2: rs->next_attempt_at = now+60; break; + case 3: rs->next_attempt_at = now+60*5; break; + case 4: rs->next_attempt_at = now+60*10; break; + default: rs->next_attempt_at = TIME_MAX; break; + } + } + if (rs->next_attempt_at == 0) + log_fn(LOG_NOTICE, "%s failed %d time(s); I'll try again immediately.", + cp, (int)rs->n_download_failures); + else if (rs->next_attempt_at < TIME_MAX) + log_fn(LOG_NOTICE, "%s failed %d time(s); I'll try again in %d seconds.", + cp, (int)rs->n_download_failures, (int)(rs->next_attempt_at-now)); + else + log_fn(LOG_NOTICE, "%s failed %d time(s); Giving up for a while.", + cp, (int)rs->n_download_failures); }); + + update_router_descriptor_downloads(time(NULL)); } /* DOCDOC */ diff --git a/src/or/dirserv.c b/src/or/dirserv.c index 7416af010..a52935757 100644 --- a/src/or/dirserv.c +++ b/src/or/dirserv.c @@ -1215,11 +1215,11 @@ generate_v2_networkstatus(void) int f_stable = !router_is_unreliable(ri, 1, 0); int f_fast = !router_is_unreliable(ri, 0, 1); int f_running; + int f_authority = router_digest_is_trusted_dir(ri->identity_digest); int f_named = naming && ri->is_named; int f_valid = ri->is_verified; char identity64[BASE64_DIGEST_LEN+1]; char digest64[BASE64_DIGEST_LEN+1]; - if (options->AuthoritativeDir) { ri->is_running = dirserv_thinks_router_is_reachable(ri, now); } @@ -1235,7 +1235,7 @@ generate_v2_networkstatus(void) if (tor_snprintf(outp, endp-outp, "r %s %s %s %s %s %d %d\n" - "s%s%s%s%s%s%s\n", + "s%s%s%s%s%s%s%s\n", ri->nickname, identity64, digest64, @@ -1243,15 +1243,16 @@ generate_v2_networkstatus(void) ipaddr, ri->or_port, ri->dir_port, + f_authority?" Authority":"", f_exit?" Exit":"", - f_stable?" Stable":"", f_fast?" Fast":"", - f_running?" Running":"", f_named?" Named":"", + f_stable?" Stable":"", + f_running?" Running":"", f_valid?" Valid":"")<0) { log_fn(LOG_WARN, "Unable to print router status."); goto done; - } + } outp += strlen(outp); }); diff --git a/src/or/main.c b/src/or/main.c index 801742f6f..869424e96 100644 --- a/src/or/main.c +++ b/src/or/main.c @@ -95,6 +95,8 @@ static char* nt_strerror(uint32_t errnum); #define FORCE_REGENERATE_DESCRIPTOR_INTERVAL 18*60*60 /* 18 hours */ #define CHECK_DESCRIPTOR_INTERVAL 60 /* one minute */ #define BUF_SHRINK_INTERVAL 60 /* one minute */ +#define DESCRIPTOR_RETRY_INTERVAL 60 +#define DESCRIPTOR_FAILURE_RESET_INTERVAL 60*60 #define TIMEOUT_UNTIL_UNREACHABILITY_COMPLAINT (20*60) /* 20 minutes */ /********* END VARIABLES ************/ @@ -628,6 +630,8 @@ run_scheduled_events(time_t now) static time_t time_to_check_listeners = 0; static time_t time_to_check_descriptor = 0; static time_t time_to_shrink_buffers = 0; + static time_t time_to_try_getting_descriptors = 0; + static time_t time_to_reset_descriptor_failures = 0; or_options_t *options = get_options(); int i; @@ -653,6 +657,16 @@ run_scheduled_events(time_t now) router_upload_dir_desc_to_dirservers(0); } + if (time_to_try_getting_descriptors < now) { + update_router_descriptor_downloads(now); + time_to_try_getting_descriptors = now + DESCRIPTOR_RETRY_INTERVAL; + } + + if (time_to_reset_descriptor_failures < now) { + router_reset_descriptor_download_failures(); + time_to_try_getting_descriptors = now + DESCRIPTOR_FAILURE_RESET_INTERVAL; + } + /** 1b. Every MAX_SSL_KEY_LIFETIME seconds, we change our TLS context. */ if (!last_rotated_certificate) last_rotated_certificate = now; diff --git a/src/or/or.h b/src/or/or.h index 485fc171f..092def5aa 100644 --- a/src/or/or.h +++ b/src/or/or.h @@ -807,6 +807,8 @@ typedef struct local_routerstatus_t { routerstatus_t status; uint8_t n_download_failures; /**< Number of failures trying to download the * most recent descriptor. */ + time_t next_attempt_at; /**< When should we try this descriptor again? */ + unsigned int should_download:1; /**< DOCDOC */ } local_routerstatus_t; /*XXXX011 make this configurable? */ @@ -2130,6 +2132,7 @@ void routers_update_status_from_networkstatus(smartlist_t *routers, smartlist_t *router_list_superseded(void); int router_have_minimum_dir_info(void); void networkstatus_list_update_recent(time_t now); +void router_reset_descriptor_download_failures(void); /********************************* routerparse.c ************************/ diff --git a/src/or/routerlist.c b/src/or/routerlist.c index 5f9b5ad07..cf48ad151 100644 --- a/src/or/routerlist.c +++ b/src/or/routerlist.c @@ -2032,8 +2032,6 @@ routers_update_all_from_networkstatus(void) helper_nodes_set_status_from_directory(); - update_router_descriptor_downloads(time(NULL)); - if (!have_warned_about_old_version) { int n_recent = 0; int n_recommended = 0; @@ -2136,6 +2134,8 @@ routerstatus_list_update_from_networkstatus(time_t now) int *index, *size; networkstatus_t **networkstatus; smartlist_t *result; + strmap_t *name_map; + char conflict[DIGEST_LEN]; networkstatus_list_update_recent(now); @@ -2173,6 +2173,30 @@ routerstatus_list_update_from_networkstatus(time_t now) ++n_recent; } + name_map = strmap_new(); + memset(conflict, 0xff, sizeof(conflict)); + for (i = 0; i < n_statuses; ++i) { + if (!networkstatus[i]->binds_names) + continue; + SMARTLIST_FOREACH(networkstatus[i]->entries, routerstatus_t *, rs, + { + const char *other_digest; + if (!rs->is_named) + continue; + other_digest = strmap_get_lc(name_map, rs->nickname); + if (!other_digest) + strmap_set_lc(name_map, rs->nickname, rs->identity_digest); + else if (memcmp(other_digest, rs->identity_digest, DIGEST_LEN) && + other_digest != conflict) { + /*XXXX011 rate-limit this?*/ + log_fn(LOG_WARN, + "Naming authorities disagree about which key goes with %s.", + rs->nickname); + strmap_set_lc(name_map, rs->nickname, conflict); + } + }); + } + result = smartlist_create(); /* Iterate through all of the sorted routerstatus lists in step. @@ -2226,7 +2250,11 @@ routerstatus_list_update_from_networkstatus(time_t now) the_name = rs->nickname; if (!strcasecmp(rs->nickname, the_name)) { ++n_named; - } else { + } else if (strcmp(the_name,"**mismatch**")) { + char hd[HEX_DIGEST_LEN+1]; + base16_encode(hd, HEX_DIGEST_LEN+1, rs->identity_digest, DIGEST_LEN); + log_fn(LOG_WARN, "Naming authorities disagree about nicknames for $%s", + hd); the_name = "**mismatch**"; } } @@ -2239,6 +2267,7 @@ routerstatus_list_update_from_networkstatus(time_t now) memcpy(&rs_out->status, most_recent, sizeof(routerstatus_t)); if ((rs_old = router_get_combined_status_by_digest(lowest))) { rs_out->n_download_failures = rs_old->n_download_failures; + rs_out->next_attempt_at = rs_old->next_attempt_at; } smartlist_add(result, rs_out); log_fn(LOG_DEBUG, "Router '%s' is listed by %d/%d directories, " @@ -2247,8 +2276,12 @@ routerstatus_list_update_from_networkstatus(time_t now) rs_out->status.nickname, n_listing, n_statuses, n_named, n_naming, n_valid, n_statuses, n_running, n_recent); - rs_out->status.is_named = the_name && strcmp(the_name, "**mismatch**") && - n_named > n_naming/2; + rs_out->status.is_named = 0; + if (the_name && strcmp(the_name, "**mismatch**") && n_named > 0) { + const char *d = strmap_get_lc(name_map, the_name); + if (d && d != conflict) + rs_out->status.is_named = 1; + } if (rs_out->status.is_named) strlcpy(rs_out->status.nickname, the_name, sizeof(rs_out->status.nickname)); rs_out->status.is_valid = n_valid > n_statuses/2; @@ -2262,6 +2295,7 @@ routerstatus_list_update_from_networkstatus(time_t now) tor_free(networkstatus); tor_free(index); tor_free(size); + strmap_free(name_map, NULL); networkstatus_list_has_changed = 0; routerstatus_list_has_changed = 1; @@ -2290,8 +2324,10 @@ routers_update_status_from_networkstatus(smartlist_t *routers, int reset_failure if (!rs) continue; - if (reset_failures) + if (reset_failures) { rs->n_download_failures = 0; + rs->next_attempt_at = 0; + } if (!namingdir) router->is_named = rs->status.is_named; @@ -2318,39 +2354,76 @@ routers_update_status_from_networkstatus(smartlist_t *routers, int reset_failure static smartlist_t * router_list_downloadable(void) { + int n_conns, i, n_downloadable = 0; + connection_t **carray; smartlist_t *superseded = smartlist_create(); - strmap_iter_t *iter; + smartlist_t *downloading; time_t now = time(NULL); - strmap_t *status_map = NULL; - char fp[HEX_DIGEST_LEN+1]; if (!routerstatus_list) return superseded; + get_connection_array(&carray, &n_conns); + routerstatus_list_update_from_networkstatus(now); - status_map = strmap_new(); SMARTLIST_FOREACH(routerstatus_list, local_routerstatus_t *, rs, { - base16_encode(fp, sizeof(fp), rs->status.identity_digest, DIGEST_LEN); - strmap_set(status_map, fp, rs); + if (rs->next_attempt_at < now) { + rs->should_download = 1; + ++n_downloadable; + } else { + rs->should_download = 0; + } }); + downloading = smartlist_create(); + for (i = 0; i < n_conns; ++i) { + connection_t *conn = carray[i]; + if (conn->type == CONN_TYPE_DIR && + conn->purpose == DIR_PURPOSE_FETCH_SERVERDESC && + !conn->marked_for_close) { + if (!strcmpstart(conn->requested_resource, "all")) + n_downloadable = 0; + dir_split_resource_into_fingerprints(conn->requested_resource, + downloading, NULL); + } + } + if (n_downloadable) { + SMARTLIST_FOREACH(downloading, const char *, dl, + { + char d[DIGEST_LEN]; + local_routerstatus_t *rs; + base16_decode(d, DIGEST_LEN, dl, strlen(dl)); + if ((rs = router_get_combined_status_by_digest(d)) && rs->should_download) { + rs->should_download = 0; + --n_downloadable; + } + }); + } + SMARTLIST_FOREACH(downloading, char *, cp, tor_free(cp)); + smartlist_free(downloading); + if (!n_downloadable) + return superseded; + if (routerlist) { SMARTLIST_FOREACH(routerlist->routers, routerinfo_t *, ri, { - routerstatus_t *rs; - base16_encode(fp, sizeof(fp), ri->identity_digest, DIGEST_LEN); - if (!(rs = strmap_get(status_map, fp))) { + local_routerstatus_t *rs; + if (!(rs = router_get_combined_status_by_digest(ri->identity_digest)) || + !rs->should_download) { // log_fn(LOG_NOTICE, "No status for %s", fp); continue; } - /*XXXX001 reset max_routerdesc_download_failures somewhere! */ - if (!memcmp(ri->signed_descriptor_digest,rs->descriptor_digest,DIGEST_LEN)|| - rs->published_on <= ri->published_on) { + /* Change this "or" to be an "and" once dirs generate hashes right. + * XXXXX. NM */ + if (!memcmp(ri->signed_descriptor_digest, rs->status.descriptor_digest, + DIGEST_LEN) || + rs->status.published_on <= ri->published_on) { /* Same digest, or earlier. No need to download it. */ // log_fn(LOG_NOTICE, "Up-to-date status for %s", fp); - strmap_remove(status_map, fp); + rs->should_download = 0; + --n_downloadable; } #if 0 else { @@ -2369,24 +2442,17 @@ router_list_downloadable(void) }); } - /* For all remaining entries in statuses, we either have no descriptor, or - * our descriptor is out of date. */ - for (iter = strmap_iter_init(status_map); !strmap_iter_done(iter); - iter = strmap_iter_next(status_map, iter)) { - const char *key; - void *val; - local_routerstatus_t *rs; - strmap_iter_get(iter, &key, &val); - rs = val; - if (rs->n_download_failures >= MAX_ROUTERDESC_DOWNLOAD_FAILURES) - continue; - smartlist_add(superseded, tor_strdup(key)); - } - - strmap_free(status_map, NULL); + if (!n_downloadable) + return superseded; - /* Send the keys in sorted order. */ - smartlist_sort_strings(superseded); + SMARTLIST_FOREACH(routerstatus_list, local_routerstatus_t *, rs, + { + if (rs->should_download) { + char *fp = tor_malloc(HEX_DIGEST_LEN+1); + base16_encode(fp, HEX_DIGEST_LEN+1, rs->status.identity_digest, DIGEST_LEN); + smartlist_add(superseded, fp); + } + }); return superseded; } @@ -2402,53 +2468,60 @@ router_list_downloadable(void) void update_router_descriptor_downloads(time_t now) { -#define AVG_ROUTER_LEN 1300 -#define DL_PER_REQUEST 128 +#define MAX_DL_PER_REQUEST 128 +#define MIN_REQUESTS 3 smartlist_t *downloadable = NULL; int get_all = 0; - if (connection_get_by_type_purpose(CONN_TYPE_DIR, - DIR_PURPOSE_FETCH_SERVERDESC)) - return; - if (!networkstatus_list || smartlist_len(networkstatus_list)<2) get_all = 1; if (!get_all) { /* Check whether we aren't just better off downloading everybody. */ - int excess; downloadable = router_list_downloadable(); +#if 0 + /* Actually, asking a single source for "all" routers can be horribly + * dangerous. Let's disable this. + */ +#define AVG_ROUTER_LEN 1300 + int excess; excess = smartlist_len(routerstatus_list)-smartlist_len(downloadable); if (smartlist_len(downloadable)*(HEX_DIGEST_LEN+1) > excess*AVG_ROUTER_LEN) { get_all = 1; } +#endif } if (get_all) { log_fn(LOG_NOTICE, "Launching request for all routers"); directory_get_from_dirserver(DIR_PURPOSE_FETCH_SERVERDESC,"all.z",1); } else if (smartlist_len(downloadable)) { - int i, j, n; - size_t r_len = DL_PER_REQUEST*(HEX_DIGEST_LEN+1)+16; + int i, j, n, n_per_request; + size_t r_len = MAX_DL_PER_REQUEST*(HEX_DIGEST_LEN+1)+16; char *resource = tor_malloc(r_len); n = smartlist_len(downloadable); - log_fn(LOG_NOTICE, "Launching request for %d routers", n); - for (i=0; i < n; i += DL_PER_REQUEST) { + n_per_request = (n+MIN_REQUESTS-1) / MIN_REQUESTS; + if (n_per_request > MAX_DL_PER_REQUEST) + n_per_request = MAX_DL_PER_REQUEST; + if (n_per_request < 1) + n_per_request = 1; + for (i=0; i < n; i += n_per_request) { char *cp = resource; memcpy(resource, "fp/", 3); cp = resource + 3; - for (j=i; j < i+DL_PER_REQUEST && j < n; ++j) { + for (j=i; j < i+n_per_request && j < n; ++j) { memcpy(cp, smartlist_get(downloadable, j), HEX_DIGEST_LEN); cp += HEX_DIGEST_LEN; *cp++ = '+'; } memcpy(cp-1, ".z", 3); + log_fn(LOG_NOTICE, "Launching request for %d routers", j-i); directory_get_from_dirserver(DIR_PURPOSE_FETCH_SERVERDESC,resource,1); } tor_free(resource); } else { - log_fn(LOG_NOTICE, "No routers to download."); + log_fn(LOG_DEBUG, "No routers to download."); } if (downloadable) { @@ -2473,3 +2546,15 @@ router_have_minimum_dir_info(void) return smartlist_len(routerlist->routers) > (avg/4); } +/** DOCDOC */ +void +router_reset_descriptor_download_failures(void) +{ + if (!routerstatus_list) + return; + SMARTLIST_FOREACH(routerstatus_list, local_routerstatus_t *, rs, + { + rs->n_download_failures = 0; + rs->next_attempt_at = 0; + }); +} |