From e1e5c1b3aba96afbd6fa3b5ece262720dd45a80e Mon Sep 17 00:00:00 2001 From: Karsten Loesing Date: Sat, 12 Dec 2009 08:32:46 +0100 Subject: Fix bridge statistics. Fix statistics on client numbers by country as seen by bridges that were broken in 0.2.2.1-alpha. Also switch to reporting full 24-hour intervals instead of variable 12-to-48-hour intervals. --- ChangeLog | 3 + doc/spec/control-spec.txt | 10 +-- doc/spec/dir-spec.txt | 31 ++++++++- src/or/control.c | 22 ++---- src/or/geoip.c | 167 ++++++++++++++++++++++++++++++++++++++++++++++ src/or/main.c | 27 +++++++- src/or/or.h | 8 ++- src/or/router.c | 48 +++---------- 8 files changed, 251 insertions(+), 65 deletions(-) diff --git a/ChangeLog b/ChangeLog index 2cb238393..56c8958c9 100644 --- a/ChangeLog +++ b/ChangeLog @@ -25,6 +25,9 @@ Changes in version 0.2.2.7-alpha - 2009-??-?? - Do not segfault when writing buffer stats when we haven't observed a single circuit to report about. Found by Fabian Lanze. Bugfix on 0.2.2.1-alpha. + - Fix statistics on client numbers by country as seen by bridges that + were broken in 0.2.2.1-alpha. Also switch to reporting full 24-hour + intervals instead of variable 12-to-48-hour intervals. Changes in version 0.2.2.6-alpha - 2009-11-19 diff --git a/doc/spec/control-spec.txt b/doc/spec/control-spec.txt index 33d465847..73f0b020a 100644 --- a/doc/spec/control-spec.txt +++ b/doc/spec/control-spec.txt @@ -1637,11 +1637,11 @@ TimeStarted is a quoted string indicating when the reported summary counts from (in GMT). - The CountrySummary keyword has as its argument a comma-separated - set of "countrycode=count" pairs. For example, - 650-CLIENTS_SEEN TimeStarted="Thu Dec 25 23:50:43 EST 2008" - 650 CountrySummary=us=16,de=8,uk=8 -[XXX Matt Edman informs me that the time format above is wrong. -RD] + The CountrySummary keyword has as its argument a comma-separated, + possibly empty set of "countrycode=count" pairs. For example (without + linebreak), + 650-CLIENTS_SEEN TimeStarted="2008-12-25 23:50:43" + CountrySummary=us=16,de=8,uk=8 4.1.15. New consensus networkstatus has arrived. diff --git a/doc/spec/dir-spec.txt b/doc/spec/dir-spec.txt index 5eedbd534..0bfc758ee 100644 --- a/doc/spec/dir-spec.txt +++ b/doc/spec/dir-spec.txt @@ -627,8 +627,8 @@ As documented in 2.1 above. See migration notes in section 2.2.1. - "geoip-start" YYYY-MM-DD HH:MM:SS NL - "geoip-client-origins" CC=N,CC=N,... NL + ("geoip-start" YYYY-MM-DD HH:MM:SS NL) + ("geoip-client-origins" CC=N,CC=N,... NL) Only generated by bridge routers (see blocking.pdf), and only when they have been configured with a geoip database. @@ -641,6 +641,33 @@ "geoip-start" is the time at which we began collecting geoip statistics. + "geoip-start" and "geoip-client-origins" have been replaced by + "bridge-stats-end" and "bridge-stats-ips" in 0.2.2.4-alpha. The + reason is that the measurement interval with "geoip-stats" as + determined by subtracting "geoip-start" from "published" could + have had a variable length, whereas the measurement interval in + 0.2.2.4-alpha and later is set to be exactly 24 hours long. In + order to clearly distinguish the new measurement intervals from + the old ones, the new keywords have been introduced. + + "bridge-stats-end" YYYY-MM-DD HH:MM:SS (NSEC s) NL + [At most once.] + + YYYY-MM-DD HH:MM:SS defines the end of the included measurement + interval of length NSEC seconds (86400 seconds by default). + + A "bridge-stats-end" line, as well as any other "bridge-*" line, + is only added when the relay has been running as a bridge for at + least 24 hours. + + "bridge-ips" CC=N,CC=N,... NL + [At most once.] + + List of mappings from two-letter country codes to the number of + unique IP addresses that have connected from that country to the + bridge and which are no known relays, rounded up to the nearest + multiple of 8. + "dirreq-stats-end" YYYY-MM-DD HH:MM:SS (NSEC s) NL [At most once.] diff --git a/src/or/control.c b/src/or/control.c index c3567bdaf..d3ed8587b 100644 --- a/src/or/control.c +++ b/src/or/control.c @@ -1755,21 +1755,10 @@ getinfo_helper_events(control_connection_t *control_conn, "information", question); } } else if (!strcmp(question, "status/clients-seen")) { - char geoip_start[ISO_TIME_LEN+1]; - size_t answer_len; - char *geoip_summary = extrainfo_get_client_geoip_summary(time(NULL)); - - if (!geoip_summary) + char *bridge_stats = geoip_get_bridge_stats_controller(time(NULL)); + if (!bridge_stats) return -1; - - answer_len = strlen("TimeStarted=\"\" CountrySummary=") + - ISO_TIME_LEN + strlen(geoip_summary) + 1; - *answer = tor_malloc(answer_len); - format_iso_time(geoip_start, geoip_get_history_start()); - tor_snprintf(*answer, answer_len, - "TimeStarted=\"%s\" CountrySummary=%s", - geoip_start, geoip_summary); - tor_free(geoip_summary); + *answer = bridge_stats; } else { return 0; } @@ -3846,10 +3835,9 @@ control_event_bootstrap_problem(const char *warn, int reason) * from recently. Send a copy to the controller in case it wants to * display it for the user. */ void -control_event_clients_seen(const char *timestarted, const char *countries) +control_event_clients_seen(const char *controller_str) { send_control_event(EVENT_CLIENTS_SEEN, 0, - "650 CLIENTS_SEEN TimeStarted=\"%s\" CountrySummary=%s\r\n", - timestarted, countries); + "650 CLIENTS_SEEN %s\r\n", controller_str); } diff --git a/src/or/geoip.c b/src/or/geoip.c index a57e1fb72..a43daaf06 100644 --- a/src/or/geoip.c +++ b/src/or/geoip.c @@ -1081,6 +1081,173 @@ geoip_dirreq_stats_write(time_t now) tor_free(data_v3); } +/** Start time of bridge stats. */ +static time_t start_of_bridge_stats_interval; + +/** Initialize bridge stats. */ +void +geoip_bridge_stats_init(time_t now) +{ + start_of_bridge_stats_interval = now; +} + +/** Parse the bridge statistics as they are written to extra-info + * descriptors for being returned to controller clients. Return the + * controller string if successful, or NULL otherwise. */ +static char * +parse_bridge_stats_controller(const char *stats_str, time_t now) +{ + char stats_end_str[ISO_TIME_LEN+1], stats_start_str[ISO_TIME_LEN+1], + *controller_str, *eos; + const char *stats_end_line = "bridge-stats-end", + *ips_line = "bridge-ips", *tmp; + time_t stats_end_time; + size_t controller_len; + int seconds; + tor_assert(stats_str); + + /* Parse timestamp and number of seconds from + "bridge-stats-end YYYY-MM-DD HH:MM:SS (N s)" */ + tmp = strstr(stats_str, stats_end_line); + if (!tmp || strlen(tmp) < strlen(stats_end_line) + 1 + ISO_TIME_LEN + 6) + return NULL; + strlcpy(stats_end_str, tmp + strlen(stats_end_line) + 1, + sizeof(stats_end_str)); + if (parse_iso_time(stats_end_str, &stats_end_time) < 0) + return NULL; + if (stats_end_time < now - (25*60*60) || + stats_end_time > now + (1*60*60)) + return NULL; + seconds = (int)strtol(tmp + strlen(stats_end_line) + 1 + + ISO_TIME_LEN + 2, &eos, 10); + if (!eos || seconds < 23*60*60) + return NULL; + format_iso_time(stats_start_str, stats_end_time - seconds); + + /* Parse: "bridge-ips CC=N,CC=N,..." */ + tmp = strstr(tmp, ips_line); + if (!tmp || strlen(tmp) < strlen(ips_line)) + return NULL; + if (strlen(tmp) > strlen(ips_line) + 2) + tmp += strlen(ips_line) + 1; + else + tmp = ""; + controller_len = strlen("TimeStarted=\"\" CountrySummary=") + + ISO_TIME_LEN + strlen(tmp) + 1; + controller_str = tor_malloc(controller_len); + if (tor_snprintf(controller_str, controller_len, + "TimeStarted=\"%s\" CountrySummary=%s", + stats_start_str, tmp) < 0) { + tor_free(controller_str); + return NULL; + } + return controller_str; +} + +/** Most recent bridge statistics formatted to be written to extra-info + * descriptors. */ +static char *bridge_stats_extrainfo = NULL; + +/** Most recent bridge statistics formatted to be returned to controller + * clients. */ +static char *bridge_stats_controller = NULL; + +/** Write bridge statistics to $DATADIR/stats/bridge-stats and return + * when we should next try to write statistics. */ +time_t +geoip_bridge_stats_write(time_t now) +{ + char *statsdir = NULL, *filename = NULL, *data = NULL, + written[ISO_TIME_LEN+1], *out = NULL, *controller_str; + size_t len; + + /* If we changed from relay to bridge recently, adapt starting time + * of current measurements. */ + if (start_of_bridge_stats_interval < client_history_starts) + start_of_bridge_stats_interval = client_history_starts; + + /* Check if 24 hours have passed since starting measurements. */ + if (now < start_of_bridge_stats_interval + + DIR_ENTRY_RECORD_USAGE_RETAIN_IPS) + return start_of_bridge_stats_interval + + DIR_ENTRY_RECORD_USAGE_RETAIN_IPS; + + /* Discard all items in the client history that are too old. */ + geoip_remove_old_clients(start_of_bridge_stats_interval); + + statsdir = get_datadir_fname("stats"); + if (check_private_dir(statsdir, CPD_CREATE) < 0) + goto done; + filename = get_datadir_fname("stats"PATH_SEPARATOR"bridge-stats"); + data = geoip_get_client_history_bridge(now, GEOIP_CLIENT_CONNECT); + format_iso_time(written, now); + len = strlen("bridge-stats-end (999999 s)\nbridge-ips \n") + + ISO_TIME_LEN + (data ? strlen(data) : 0) + 1; + out = tor_malloc(len); + if (tor_snprintf(out, len, "bridge-stats-end %s (%u s)\nbridge-ips %s\n", + written, (unsigned) (now - start_of_bridge_stats_interval), + data ? data : "") < 0) + goto done; + write_str_to_file(filename, out, 0); + controller_str = parse_bridge_stats_controller(out, now); + if (!controller_str) + goto done; + start_of_bridge_stats_interval = now; + tor_free(bridge_stats_extrainfo); + tor_free(bridge_stats_controller); + bridge_stats_extrainfo = out; + out = NULL; + bridge_stats_controller = controller_str; + control_event_clients_seen(controller_str); + done: + tor_free(filename); + tor_free(statsdir); + tor_free(data); + tor_free(out); + return start_of_bridge_stats_interval + + DIR_ENTRY_RECORD_USAGE_RETAIN_IPS; +} + +/** Try to load the most recent bridge statistics from disk, unless we + * have finished a measurement interval lately. */ +static void +load_bridge_stats(time_t now) +{ + char *fname, *contents, *controller_str; + if (bridge_stats_extrainfo) + return; + fname = get_datadir_fname("stats"PATH_SEPARATOR"bridge-stats"); + contents = read_file_to_str(fname, 0, NULL); + if (contents) { + controller_str = parse_bridge_stats_controller(contents, now); + if (controller_str) { + bridge_stats_extrainfo = contents; + bridge_stats_controller = controller_str; + } else { + tor_free(contents); + } + } + tor_free(fname); +} + +/** Return most recent bridge statistics for inclusion in extra-info + * descriptors, or NULL if we don't have recent bridge statistics. */ +char * +geoip_get_bridge_stats_extrainfo(time_t now) +{ + load_bridge_stats(now); + return bridge_stats_extrainfo; +} + +/** Return most recent bridge statistics to be returned to controller + * clients, or NULL if we don't have recent bridge statistics. */ +char * +geoip_get_bridge_stats_controller(time_t now) +{ + load_bridge_stats(now); + return bridge_stats_controller; +} + /** Start time of entry stats. */ static time_t start_of_entry_stats_interval; diff --git a/src/or/main.c b/src/or/main.c index 4dc182efe..101c08429 100644 --- a/src/or/main.c +++ b/src/or/main.c @@ -832,6 +832,8 @@ run_scheduled_events(time_t now) static time_t time_to_recheck_bandwidth = 0; static time_t time_to_check_for_expired_networkstatus = 0; static time_t time_to_write_stats_files = 0; + static time_t time_to_write_bridge_stats = 0; + static int should_init_bridge_stats = 1; static time_t time_to_retry_dns_init = 0; or_options_t *options = get_options(); int i; @@ -966,7 +968,8 @@ run_scheduled_events(time_t now) if (options->CellStatistics || options->DirReqStatistics || options->EntryStatistics || options->ExitPortStatistics) { if (!time_to_write_stats_files) { - /* Initialize stats. */ + /* Initialize stats. We're doing this here and not in options_act, + * so that we know exactly when the 24 hours interval ends. */ if (options->CellStatistics) rep_hist_buffer_stats_init(now); if (options->DirReqStatistics) @@ -998,6 +1001,28 @@ run_scheduled_events(time_t now) } } + /* 1h. Check whether we should write bridge statistics to disk. + */ + if (should_record_bridge_info(options)) { + if (time_to_write_bridge_stats < now) { + if (should_init_bridge_stats) { + /* (Re-)initialize bridge statistics. */ + geoip_bridge_stats_init(now); + time_to_write_bridge_stats = now + WRITE_STATS_INTERVAL; + should_init_bridge_stats = 0; + } else { + /* Possibly write bridge statistics to disk and ask when to write + * them next time. */ + time_to_write_bridge_stats = geoip_bridge_stats_write( + time_to_write_bridge_stats); + } + } + } else if (!should_init_bridge_stats) { + /* Bridge mode was turned off. Ensure that stats are re-initialized + * next time bridge mode is turned on. */ + should_init_bridge_stats = 1; + } + /* Remove old information from rephist and the rend cache. */ if (time_to_clean_caches < now) { rep_history_clean(now - options->RephistTrackTime); diff --git a/src/or/or.h b/src/or/or.h index 736e66a52..85f549fc4 100644 --- a/src/or/or.h +++ b/src/or/or.h @@ -3630,8 +3630,7 @@ typedef enum { void control_event_bootstrap(bootstrap_status_t status, int progress); void control_event_bootstrap_problem(const char *warn, int reason); -void control_event_clients_seen(const char *timestarted, - const char *countries); +void control_event_clients_seen(const char *controller_str); #ifdef CONTROL_PRIVATE /* Used only by control.c and test.c */ @@ -4089,6 +4088,10 @@ void geoip_dirreq_stats_init(time_t now); void geoip_dirreq_stats_write(time_t now); void geoip_entry_stats_init(time_t now); void geoip_entry_stats_write(time_t now); +void geoip_bridge_stats_init(time_t now); +time_t geoip_bridge_stats_write(time_t now); +char *geoip_get_bridge_stats_extrainfo(time_t); +char *geoip_get_bridge_stats_controller(time_t); /********************************* hibernate.c **********************/ @@ -4749,7 +4752,6 @@ int router_dump_router_to_string(char *s, size_t maxlen, routerinfo_t *router, crypto_pk_env_t *ident_key); int extrainfo_dump_to_string(char *s, size_t maxlen, extrainfo_t *extrainfo, crypto_pk_env_t *ident_key); -char *extrainfo_get_client_geoip_summary(time_t); int is_legal_nickname(const char *s); int is_legal_nickname_or_hexdigest(const char *s); int is_legal_hexdigest(const char *s); diff --git a/src/or/router.c b/src/or/router.c index 8b6d1d9bb..7362e60f6 100644 --- a/src/or/router.c +++ b/src/or/router.c @@ -1882,6 +1882,7 @@ extrainfo_dump_to_string(char *s, size_t maxlen, extrainfo_t *extrainfo, int result; size_t len; static int write_stats_to_extrainfo = 1; + time_t now = time(NULL); base16_encode(identity, sizeof(identity), extrainfo->cache_info.identity_digest, DIGEST_LEN); @@ -1896,7 +1897,6 @@ extrainfo_dump_to_string(char *s, size_t maxlen, extrainfo_t *extrainfo, if (options->ExtraInfoStatistics && write_stats_to_extrainfo) { char *contents = NULL; - time_t now = time(NULL); log_info(LD_GENERAL, "Adding stats to extra-info descriptor."); if (options->DirReqStatistics && load_stats_file("stats"PATH_SEPARATOR"dirreq-stats", @@ -1953,18 +1953,16 @@ extrainfo_dump_to_string(char *s, size_t maxlen, extrainfo_t *extrainfo, return -1; if (should_record_bridge_info(options)) { - char *geoip_summary = extrainfo_get_client_geoip_summary(time(NULL)); - if (geoip_summary) { - char geoip_start[ISO_TIME_LEN+1]; - format_iso_time(geoip_start, geoip_get_history_start()); - result = tor_snprintf(s+strlen(s), maxlen-strlen(s), - "geoip-start-time %s\n" - "geoip-client-origins %s\n", - geoip_start, geoip_summary); - control_event_clients_seen(geoip_start, geoip_summary); - tor_free(geoip_summary); - if (result<0) - return -1; + char *bridge_stats = geoip_get_bridge_stats_extrainfo(now); + if (bridge_stats) { + size_t pos = strlen(s); + if (strlcpy(s + pos, bridge_stats, maxlen - strlen(s)) != + strlen(bridge_stats)) { + log_warn(LD_DIR, "Could not write bridge-stats to extra-info " + "descriptor."); + s[pos] = '\0'; + } + tor_free(bridge_stats); } } @@ -2013,30 +2011,6 @@ extrainfo_dump_to_string(char *s, size_t maxlen, extrainfo_t *extrainfo, return (int)strlen(s)+1; } -/** Wrapper function for geoip_get_client_history(). It first discards - * any items in the client history that are too old -- it dumps anything - * more than 48 hours old, but it only considers whether to dump at most - * once per 48 hours, so we aren't too precise to an observer (see also - * r14780). - */ -char * -extrainfo_get_client_geoip_summary(time_t now) -{ - static time_t last_purged_at = 0; - int geoip_purge_interval = - (get_options()->DirReqStatistics || get_options()->EntryStatistics) ? - DIR_ENTRY_RECORD_USAGE_RETAIN_IPS : 48*60*60; - if (now > last_purged_at+geoip_purge_interval) { - /* (Note that this also discards items in the client history with - * action GEOIP_CLIENT_NETWORKSTATUS{_V2}, which doesn't matter - * because bridge and directory stats are independent. Keep in mind - * for future extensions, though.) */ - geoip_remove_old_clients(now-geoip_purge_interval); - last_purged_at = now; - } - return geoip_get_client_history_bridge(now, GEOIP_CLIENT_CONNECT); -} - /** Return true iff s is a legally valid server nickname. */ int is_legal_nickname(const char *s) -- cgit v1.2.3 From 498c293afe3570210e964456e6cf5a8f315dbfc6 Mon Sep 17 00:00:00 2001 From: Karsten Loesing Date: Thu, 17 Dec 2009 11:20:31 +0100 Subject: Make changes to latest bridge-stats fixes as suggested by Nick. --- src/or/connection_or.c | 1 - src/or/geoip.c | 62 +++++++++++++++++++++++++++++++++----------------- src/or/rephist.c | 4 ++-- 3 files changed, 43 insertions(+), 24 deletions(-) diff --git a/src/or/connection_or.c b/src/or/connection_or.c index 4eaf74277..60c5c7fca 100644 --- a/src/or/connection_or.c +++ b/src/or/connection_or.c @@ -80,7 +80,6 @@ connection_or_clear_identity_map(void) } }); - digestmap_free(orconn_identity_map, NULL); orconn_identity_map = NULL; } diff --git a/src/or/geoip.c b/src/or/geoip.c index a43daaf06..535e9afff 100644 --- a/src/or/geoip.c +++ b/src/or/geoip.c @@ -988,7 +988,7 @@ geoip_dirreq_stats_write(time_t now) statsdir = get_datadir_fname("stats"); if (check_private_dir(statsdir, CPD_CREATE) < 0) goto done; - filename = get_datadir_fname("stats"PATH_SEPARATOR"dirreq-stats"); + filename = get_datadir_fname2("stats", "dirreq-stats"); data_v2 = geoip_get_client_history_dirreq(now, GEOIP_CLIENT_NETWORKSTATUS_V2); data_v3 = geoip_get_client_history_dirreq(now, @@ -1098,9 +1098,12 @@ static char * parse_bridge_stats_controller(const char *stats_str, time_t now) { char stats_end_str[ISO_TIME_LEN+1], stats_start_str[ISO_TIME_LEN+1], - *controller_str, *eos; - const char *stats_end_line = "bridge-stats-end", - *ips_line = "bridge-ips", *tmp; + *controller_str, *eos, *eol, *summary; + const char *stats_end_first = "bridge-stats-end ", + *stats_end_middle = "\nbridge-stats-end ", + *ips_first = "bridge-ips", + *ips_middle = "\nbridge-ips", + *tmp; time_t stats_end_time; size_t controller_len; int seconds; @@ -1108,39 +1111,56 @@ parse_bridge_stats_controller(const char *stats_str, time_t now) /* Parse timestamp and number of seconds from "bridge-stats-end YYYY-MM-DD HH:MM:SS (N s)" */ - tmp = strstr(stats_str, stats_end_line); - if (!tmp || strlen(tmp) < strlen(stats_end_line) + 1 + ISO_TIME_LEN + 6) + if (!strncmp(stats_str, stats_end_first, strlen(stats_end_first))) { + tmp = stats_str + strlen(stats_end_first); + } else { + tmp = strstr(stats_str, stats_end_middle); + if (!tmp) + return NULL; + tmp += strlen(stats_end_middle); + } + if (strlen(tmp) < ISO_TIME_LEN + 6) return NULL; - strlcpy(stats_end_str, tmp + strlen(stats_end_line) + 1, - sizeof(stats_end_str)); + strlcpy(stats_end_str, tmp, sizeof(stats_end_str)); if (parse_iso_time(stats_end_str, &stats_end_time) < 0) return NULL; if (stats_end_time < now - (25*60*60) || stats_end_time > now + (1*60*60)) return NULL; - seconds = (int)strtol(tmp + strlen(stats_end_line) + 1 + - ISO_TIME_LEN + 2, &eos, 10); + seconds = (int)strtol(tmp + ISO_TIME_LEN + 2, &eos, 10); if (!eos || seconds < 23*60*60) return NULL; format_iso_time(stats_start_str, stats_end_time - seconds); /* Parse: "bridge-ips CC=N,CC=N,..." */ - tmp = strstr(tmp, ips_line); - if (!tmp || strlen(tmp) < strlen(ips_line)) - return NULL; - if (strlen(tmp) > strlen(ips_line) + 2) - tmp += strlen(ips_line) + 1; + if (!strncmp(stats_str, ips_first, strlen(ips_first))) { + tmp = stats_str + strlen(ips_first); + } else { + tmp = strstr(stats_str, ips_middle); + if (!tmp) + return NULL; + tmp += strlen(ips_middle); + } + if (strlen(tmp) > 2 && !strncmp(tmp, " ", 1)) + tmp += 1; else tmp = ""; + summary = tor_malloc(strlen(tmp) + 1); + strlcpy(summary, tmp, strlen(tmp) + 1); + eol = strstr(summary, "\n"); + if (eol) + eol[0] = '\0'; controller_len = strlen("TimeStarted=\"\" CountrySummary=") + - ISO_TIME_LEN + strlen(tmp) + 1; + strlen(summary) + 42; controller_str = tor_malloc(controller_len); if (tor_snprintf(controller_str, controller_len, "TimeStarted=\"%s\" CountrySummary=%s", - stats_start_str, tmp) < 0) { + stats_start_str, summary) < 0) { tor_free(controller_str); + tor_free(summary); return NULL; } + tor_free(summary); return controller_str; } @@ -1178,11 +1198,11 @@ geoip_bridge_stats_write(time_t now) statsdir = get_datadir_fname("stats"); if (check_private_dir(statsdir, CPD_CREATE) < 0) goto done; - filename = get_datadir_fname("stats"PATH_SEPARATOR"bridge-stats"); + filename = get_datadir_fname2("stats", "bridge-stats"); data = geoip_get_client_history_bridge(now, GEOIP_CLIENT_CONNECT); format_iso_time(written, now); len = strlen("bridge-stats-end (999999 s)\nbridge-ips \n") + - ISO_TIME_LEN + (data ? strlen(data) : 0) + 1; + ISO_TIME_LEN + (data ? strlen(data) : 0) + 42; out = tor_malloc(len); if (tor_snprintf(out, len, "bridge-stats-end %s (%u s)\nbridge-ips %s\n", written, (unsigned) (now - start_of_bridge_stats_interval), @@ -1216,7 +1236,7 @@ load_bridge_stats(time_t now) char *fname, *contents, *controller_str; if (bridge_stats_extrainfo) return; - fname = get_datadir_fname("stats"PATH_SEPARATOR"bridge-stats"); + fname = get_datadir_fname2("stats", "bridge-stats"); contents = read_file_to_str(fname, 0, NULL); if (contents) { controller_str = parse_bridge_stats_controller(contents, now); @@ -1277,7 +1297,7 @@ geoip_entry_stats_write(time_t now) statsdir = get_datadir_fname("stats"); if (check_private_dir(statsdir, CPD_CREATE) < 0) goto done; - filename = get_datadir_fname("stats"PATH_SEPARATOR"entry-stats"); + filename = get_datadir_fname2("stats", "entry-stats"); data = geoip_get_client_history_dirreq(now, GEOIP_CLIENT_CONNECT); format_iso_time(written, now); out = start_writing_to_stdio_file(filename, OPEN_FLAGS_APPEND, diff --git a/src/or/rephist.c b/src/or/rephist.c index 36318c755..681588bff 100644 --- a/src/or/rephist.c +++ b/src/or/rephist.c @@ -1377,7 +1377,7 @@ rep_hist_exit_stats_write(time_t now) statsdir = get_datadir_fname("stats"); if (check_private_dir(statsdir, CPD_CREATE) < 0) goto done; - filename = get_datadir_fname("stats"PATH_SEPARATOR"exit-stats"); + filename = get_datadir_fname2("stats", "exit-stats"); format_iso_time(t, now); log_info(LD_HIST, "Writing exit port statistics to disk for period " "ending at %s.", t); @@ -2734,7 +2734,7 @@ rep_hist_buffer_stats_write(time_t now) statsdir = get_datadir_fname("stats"); if (check_private_dir(statsdir, CPD_CREATE) < 0) goto done; - filename = get_datadir_fname("stats"PATH_SEPARATOR"buffer-stats"); + filename = get_datadir_fname2("stats", "buffer-stats"); out = start_writing_to_stdio_file(filename, OPEN_FLAGS_APPEND, 0600, &open_file); if (!out) -- cgit v1.2.3 From 235f1e1a967cb070c7246617461f58f0413394b3 Mon Sep 17 00:00:00 2001 From: Nick Mathewson Date: Thu, 17 Dec 2009 18:29:37 -0500 Subject: Refactor out the 'find string at start of any line' logic. We do this in too many places throughout the code; it's time to start clamping down. Also, refactor Karsten's patch to use strchr-then-strndup, rather than malloc-then-strlcpy-then-strchr-then-clear. --- src/common/util.c | 23 +++++++++++++++++++++++ src/common/util.h | 2 ++ src/or/geoip.c | 50 +++++++++++++++++++++----------------------------- src/test/test_util.c | 31 +++++++++++++++++++++++++++++++ 4 files changed, 77 insertions(+), 29 deletions(-) diff --git a/src/common/util.c b/src/common/util.c index e70a9ea5f..6177a3e73 100644 --- a/src/common/util.c +++ b/src/common/util.c @@ -682,6 +682,29 @@ find_whitespace_eos(const char *s, const char *eos) return s; } +/** Return the the first occurrence of needle in haystack that + * occurs at the start of a line (that is, at the beginning of haystack + * or immediately after a newline). Return NULL if no such string is found. + */ +const char * +find_str_at_start_of_line(const char *haystack, const char *needle) +{ + size_t needle_len = strlen(needle); + + do { + if (!strncmp(haystack, needle, needle_len)) + return haystack; + + haystack = strchr(haystack, '\n'); + if (!haystack) + return NULL; + else + ++haystack; + } while (*haystack); + + return NULL; +} + /** Return true iff the 'len' bytes at 'mem' are all zero. */ int tor_mem_is_zero(const char *mem, size_t len) diff --git a/src/common/util.h b/src/common/util.h index 17cbb4a44..b55bb91c5 100644 --- a/src/common/util.h +++ b/src/common/util.h @@ -195,6 +195,8 @@ const char *eat_whitespace_no_nl(const char *s) ATTR_PURE; const char *eat_whitespace_eos_no_nl(const char *s, const char *eos) ATTR_PURE; const char *find_whitespace(const char *s) ATTR_PURE; const char *find_whitespace_eos(const char *s, const char *eos) ATTR_PURE; +const char *find_str_at_start_of_line(const char *haystack, const char *needle) + ATTR_PURE; int tor_mem_is_zero(const char *mem, size_t len) ATTR_PURE; int tor_digest_is_zero(const char *digest) ATTR_PURE; int tor_digest256_is_zero(const char *digest) ATTR_PURE; diff --git a/src/or/geoip.c b/src/or/geoip.c index 535e9afff..d99e47211 100644 --- a/src/or/geoip.c +++ b/src/or/geoip.c @@ -1099,11 +1099,10 @@ parse_bridge_stats_controller(const char *stats_str, time_t now) { char stats_end_str[ISO_TIME_LEN+1], stats_start_str[ISO_TIME_LEN+1], *controller_str, *eos, *eol, *summary; - const char *stats_end_first = "bridge-stats-end ", - *stats_end_middle = "\nbridge-stats-end ", - *ips_first = "bridge-ips", - *ips_middle = "\nbridge-ips", - *tmp; + + const char *BRIDGE_STATS_END = "bridge-stats-end "; + const char *BRIDGE_IPS = "bridge-ips "; + const char *tmp; time_t stats_end_time; size_t controller_len; int seconds; @@ -1111,14 +1110,11 @@ parse_bridge_stats_controller(const char *stats_str, time_t now) /* Parse timestamp and number of seconds from "bridge-stats-end YYYY-MM-DD HH:MM:SS (N s)" */ - if (!strncmp(stats_str, stats_end_first, strlen(stats_end_first))) { - tmp = stats_str + strlen(stats_end_first); - } else { - tmp = strstr(stats_str, stats_end_middle); - if (!tmp) - return NULL; - tmp += strlen(stats_end_middle); - } + tmp = find_str_at_start_of_line(stats_str, BRIDGE_STATS_END); + if (!tmp) + return NULL; + tmp += strlen(BRIDGE_STATS_END); + if (strlen(tmp) < ISO_TIME_LEN + 6) return NULL; strlcpy(stats_end_str, tmp, sizeof(stats_end_str)); @@ -1133,23 +1129,19 @@ parse_bridge_stats_controller(const char *stats_str, time_t now) format_iso_time(stats_start_str, stats_end_time - seconds); /* Parse: "bridge-ips CC=N,CC=N,..." */ - if (!strncmp(stats_str, ips_first, strlen(ips_first))) { - tmp = stats_str + strlen(ips_first); - } else { - tmp = strstr(stats_str, ips_middle); - if (!tmp) - return NULL; - tmp += strlen(ips_middle); - } - if (strlen(tmp) > 2 && !strncmp(tmp, " ", 1)) - tmp += 1; - else - tmp = ""; - summary = tor_malloc(strlen(tmp) + 1); - strlcpy(summary, tmp, strlen(tmp) + 1); - eol = strstr(summary, "\n"); + tmp = find_str_at_start_of_line(stats_str, BRIDGE_IPS); + if (!tmp) + return NULL; + tmp += strlen(BRIDGE_IPS); + + tmp = eat_whitespace_no_nl(tmp); + + eol = strchr(tmp, '\n'); if (eol) - eol[0] = '\0'; + summary = tor_strndup(tmp, eol-tmp); + else + summary = tor_strdup(tmp); + controller_len = strlen("TimeStarted=\"\" CountrySummary=") + strlen(summary) + 42; controller_str = tor_malloc(controller_len); diff --git a/src/test/test_util.c b/src/test/test_util.c index 51b788e72..3ed6a7aab 100644 --- a/src/test/test_util.c +++ b/src/test/test_util.c @@ -1023,9 +1023,39 @@ test_util_strtok(void) ; } +static void +test_util_find_str_at_start_of_line(void *ptr) +{ + const char *long_string = + "hello world. hello world. hello hello. howdy.\n" + "hello hello world\n"; + + (void)ptr; + + /* not-found case. */ + tt_assert(! find_str_at_start_of_line(long_string, "fred")); + + /* not-found case where haystack doesn't end with \n */ + tt_assert(! find_str_at_start_of_line("foobar\nbaz", "fred")); + + /* start-of-string case */ + tt_assert(long_string == + find_str_at_start_of_line(long_string, "hello world.")); + + /* start-of-line case */ + tt_assert(strchr(long_string,'\n')+1 == + find_str_at_start_of_line(long_string, "hello hello")); + done: + ; +} + + #define UTIL_LEGACY(name) \ { #name, legacy_test_helper, 0, &legacy_setup, test_util_ ## name } +#define UTIL_TEST(name, flags) \ + { #name, test_util_ ## name, flags, NULL, NULL } + struct testcase_t util_tests[] = { UTIL_LEGACY(time), UTIL_LEGACY(config_line), @@ -1040,6 +1070,7 @@ struct testcase_t util_tests[] = { UTIL_LEGACY(threads), UTIL_LEGACY(sscanf), UTIL_LEGACY(strtok), + UTIL_TEST(find_str_at_start_of_line, 0), END_OF_TESTCASES }; -- cgit v1.2.3 From f80672d74786c05a776fdbd8581d553c98d75e5a Mon Sep 17 00:00:00 2001 From: Karsten Loesing Date: Fri, 18 Dec 2009 12:55:05 +0100 Subject: Remove duplicate words and a duplicate newline. --- src/common/compat_libevent.c | 2 +- src/common/util.c | 6 +++--- src/or/connection.c | 2 +- src/or/connection_or.c | 2 +- src/or/control.c | 2 +- src/or/router.c | 2 +- src/or/routerlist.c | 2 +- src/or/routerparse.c | 2 +- src/test/test_util.c | 1 - 9 files changed, 10 insertions(+), 11 deletions(-) diff --git a/src/common/compat_libevent.c b/src/common/compat_libevent.c index 32c6d4c8b..dcf51cbbd 100644 --- a/src/common/compat_libevent.c +++ b/src/common/compat_libevent.c @@ -26,7 +26,7 @@ /** A number representing a version of Libevent. This is a 4-byte number, with the first three bytes representing the - major, minor, and patchlevel respectively of the the library. The fourth + major, minor, and patchlevel respectively of the library. The fourth byte is unused. This is equivalent to the format of LIBEVENT_VERSION_NUMBER on Libevent diff --git a/src/common/util.c b/src/common/util.c index 6177a3e73..a15af7ed5 100644 --- a/src/common/util.c +++ b/src/common/util.c @@ -682,7 +682,7 @@ find_whitespace_eos(const char *s, const char *eos) return s; } -/** Return the the first occurrence of needle in haystack that +/** Return the first occurrence of needle in haystack that * occurs at the start of a line (that is, at the beginning of haystack * or immediately after a newline). Return NULL if no such string is found. */ @@ -1237,7 +1237,7 @@ format_rfc1123_time(char *buf, time_t t) memcpy(buf+8, MONTH_NAMES[tm.tm_mon], 3); } -/** Parse the the RFC1123 encoding of some time (in GMT) from buf, +/** Parse the RFC1123 encoding of some time (in GMT) from buf, * and store the result in *t. * * Return 0 on success, -1 on failure. @@ -1778,7 +1778,7 @@ write_str_to_file(const char *fname, const char *str, int bin) } /** Represents a file that we're writing to, with support for atomic commit: - * we can write into a a temporary file, and either remove the file on + * we can write into a temporary file, and either remove the file on * failure, or replace the original file on success. */ struct open_file_t { char *tempname; /**< Name of the temporary file. */ diff --git a/src/or/connection.c b/src/or/connection.c index fdbe86741..8de7ad9e5 100644 --- a/src/or/connection.c +++ b/src/or/connection.c @@ -1375,7 +1375,7 @@ connection_proxy_connect(connection_t *conn, int type) /* Send a SOCKS4 connect request with empty user id */ if (tor_addr_family(&conn->addr) != AF_INET) { - log_warn(LD_NET, "SOCKS4 client is incompatible with with IPv6"); + log_warn(LD_NET, "SOCKS4 client is incompatible with IPv6"); return -1; } diff --git a/src/or/connection_or.c b/src/or/connection_or.c index 60c5c7fca..84023f5a7 100644 --- a/src/or/connection_or.c +++ b/src/or/connection_or.c @@ -886,7 +886,7 @@ connection_or_nonopen_was_started_here(or_connection_t *conn) * return -1 if he is lying, broken, or otherwise something is wrong. * * If we initiated this connection (started_here is true), make sure - * the other side sent sent a correctly formed certificate. If I initiated the + * the other side sent a correctly formed certificate. If I initiated the * connection, make sure it's the right guy. * * Otherwise (if we _didn't_ initiate this connection), it's okay for diff --git a/src/or/control.c b/src/or/control.c index d3ed8587b..be1e921f3 100644 --- a/src/or/control.c +++ b/src/or/control.c @@ -279,7 +279,7 @@ connection_write_str_to_buf(const char *s, control_connection_t *conn) /** Given a len-character string in data, made of lines * terminated by CRLF, allocate a new string in *out, and copy the * contents of data into *out, adding a period before any period - * that that appears at the start of a line, and adding a period-CRLF line at + * that appears at the start of a line, and adding a period-CRLF line at * the end. Replace all LF characters sequences with CRLF. Return the number * of bytes in *out. */ diff --git a/src/or/router.c b/src/or/router.c index 7362e60f6..e14f23772 100644 --- a/src/or/router.c +++ b/src/or/router.c @@ -1660,7 +1660,7 @@ router_dump_router_to_string(char *s, size_t maxlen, routerinfo_t *router, return -1; } - /* PEM-encode the identity key key */ + /* PEM-encode the identity key */ if (crypto_pk_write_public_key_to_string(router->identity_pkey, &identity_pkey,&identity_pkeylen)<0) { log_warn(LD_BUG,"write identity_pkey to string failed!"); diff --git a/src/or/routerlist.c b/src/or/routerlist.c index 7275e1d5c..133e1933e 100644 --- a/src/or/routerlist.c +++ b/src/or/routerlist.c @@ -3937,7 +3937,7 @@ client_would_use_router(routerstatus_t *rs, time_t now, or_options_t *options) * this number per server. */ #define MIN_DL_PER_REQUEST 4 /** To prevent a single screwy cache from confusing us by selective reply, - * try to split our requests into at least this this many requests. */ + * try to split our requests into at least this many requests. */ #define MIN_REQUESTS 3 /** If we want fewer than this many descriptors, wait until we * want more, or until MAX_CLIENT_INTERVAL_WITHOUT_REQUEST has diff --git a/src/or/routerparse.c b/src/or/routerparse.c index bc59a62b5..13dc10c0e 100644 --- a/src/or/routerparse.c +++ b/src/or/routerparse.c @@ -2928,7 +2928,7 @@ networkstatus_parse_detached_signatures(const char *s, const char *eos) } if (base16_decode(digests->d[alg], DIGEST256_LEN, hexdigest, strlen(hexdigest)) < 0) { - log_warn(LD_DIR, "Bad encoding on on consensus-digest in detached " + log_warn(LD_DIR, "Bad encoding on consensus-digest in detached " "networkstatus signatures"); goto err; } diff --git a/src/test/test_util.c b/src/test/test_util.c index 3ed6a7aab..ba0f8cdf2 100644 --- a/src/test/test_util.c +++ b/src/test/test_util.c @@ -1049,7 +1049,6 @@ test_util_find_str_at_start_of_line(void *ptr) ; } - #define UTIL_LEGACY(name) \ { #name, legacy_test_helper, 0, &legacy_setup, test_util_ ## name } -- cgit v1.2.3 From 3a5a728d4a5490ac24819df54aca4a7d0dde0d10 Mon Sep 17 00:00:00 2001 From: Karsten Loesing Date: Fri, 18 Dec 2009 13:29:51 +0100 Subject: Permit an empty "bridge-ips" line when parsing bridge stats. --- src/or/geoip.c | 27 ++++++++++++++++----------- 1 file changed, 16 insertions(+), 11 deletions(-) diff --git a/src/or/geoip.c b/src/or/geoip.c index d99e47211..45dc8cf34 100644 --- a/src/or/geoip.c +++ b/src/or/geoip.c @@ -1102,6 +1102,7 @@ parse_bridge_stats_controller(const char *stats_str, time_t now) const char *BRIDGE_STATS_END = "bridge-stats-end "; const char *BRIDGE_IPS = "bridge-ips "; + const char *BRIDGE_IPS_EMPTY_LINE = "bridge-ips\n"; const char *tmp; time_t stats_end_time; size_t controller_len; @@ -1130,17 +1131,21 @@ parse_bridge_stats_controller(const char *stats_str, time_t now) /* Parse: "bridge-ips CC=N,CC=N,..." */ tmp = find_str_at_start_of_line(stats_str, BRIDGE_IPS); - if (!tmp) - return NULL; - tmp += strlen(BRIDGE_IPS); - - tmp = eat_whitespace_no_nl(tmp); - - eol = strchr(tmp, '\n'); - if (eol) - summary = tor_strndup(tmp, eol-tmp); - else - summary = tor_strdup(tmp); + if (tmp) { + tmp += strlen(BRIDGE_IPS); + tmp = eat_whitespace_no_nl(tmp); + eol = strchr(tmp, '\n'); + if (eol) + summary = tor_strndup(tmp, eol-tmp); + else + summary = tor_strdup(tmp); + } else { + /* Look if there is an empty "bridge-ips" line */ + tmp = find_str_at_start_of_line(stats_str, BRIDGE_IPS_EMPTY_LINE); + if (!tmp) + return NULL; + summary = tor_strdup(""); + } controller_len = strlen("TimeStarted=\"\" CountrySummary=") + strlen(summary) + 42; -- cgit v1.2.3