From a9edb0b4f67825a11647c8faefa613d704be44ae Mon Sep 17 00:00:00 2001 From: Mike Perry Date: Tue, 6 Jul 2010 12:08:13 -0700 Subject: More gracefully handle corrupt state files. Save a backup if we get odd circuitbuildtimes and other state info. In the case of circuit build times, we no longer assert, and reset our state. --- changes/cbt-bugfixes | 2 ++ src/or/circuitbuild.c | 27 +++++++++++++++++--- src/or/config.c | 71 +++++++++++++++++++++++++++++++-------------------- 3 files changed, 69 insertions(+), 31 deletions(-) diff --git a/changes/cbt-bugfixes b/changes/cbt-bugfixes index 31241c90e..12e62a8b3 100644 --- a/changes/cbt-bugfixes +++ b/changes/cbt-bugfixes @@ -29,5 +29,7 @@ parameter and via a LearnCircuitBuildTimeout config option. Also automatically disable circuit build time calculation if we are either a AuthoritativeDirectory, or if we fail to write our state file. Bug 1296. + - More gracefully handle corrupt state files, removing asserts in favor + of saving a backup and resetting state. diff --git a/src/or/circuitbuild.c b/src/or/circuitbuild.c index 07a66cf55..4090054c1 100644 --- a/src/or/circuitbuild.c +++ b/src/or/circuitbuild.c @@ -193,12 +193,11 @@ circuit_build_times_new_consensus_params(circuit_build_times_t *cbt, int32_t num = networkstatus_get_param(ns, "cbtrecentcount", CBT_DEFAULT_RECENT_CIRCUITS); - if (num != cbt->liveness.num_recent_circs) { + if (num > 0 && num != cbt->liveness.num_recent_circs) { int8_t *recent_circs; log_notice(LD_CIRC, "Changing recent timeout size from %d to %d", cbt->liveness.num_recent_circs, num); - tor_assert(num > 0); tor_assert(cbt->liveness.timeouts_after_firsthop); /* @@ -675,6 +674,10 @@ circuit_build_times_parse_state(circuit_build_times_t *cbt, "Corrupt state file? Build times count mismatch. " "Read %d times, but file says %d", loaded_cnt, state->TotalBuildTimes); + *msg = tor_strdup("Build times count mismatch."); + circuit_build_times_reset(cbt); + tor_free(loaded_times); + return -1; } circuit_build_times_shuffle_and_store_array(cbt, loaded_times, loaded_cnt); @@ -688,8 +691,18 @@ circuit_build_times_parse_state(circuit_build_times_t *cbt, log_info(LD_CIRC, "Loaded %d/%d values from %d lines in circuit time histogram", tot_values, cbt->total_build_times, N); - tor_assert(cbt->total_build_times == tot_values); - tor_assert(cbt->total_build_times <= CBT_NCIRCUITS_TO_OBSERVE); + + if (cbt->total_build_times != tot_values + || cbt->total_build_times > CBT_NCIRCUITS_TO_OBSERVE) { + log_warn(LD_CIRC, + "Corrupt state file? Shuffled build times mismatch. " + "Read %d times, but file says %d", tot_values, + state->TotalBuildTimes); + *msg = tor_strdup("Build times count mismatch."); + circuit_build_times_reset(cbt); + tor_free(loaded_times); + return -1; + } circuit_build_times_set_timeout(cbt); @@ -742,6 +755,12 @@ circuit_build_times_update_alpha(circuit_build_times_t *cbt) n++; } + /* + * We are erring and asserting here because this can only happen + * in codepaths other than startup. The startup state parsing code + * performs this same check, and resets state if it hits it. If we + * hit it at runtime, something serious has gone wrong. + */ if (n!=cbt->total_build_times) { log_err(LD_CIRC, "Discrepancy in build times count: %d vs %d", n, cbt->total_build_times); diff --git a/src/or/config.c b/src/or/config.c index 771fa03bf..7dee8ffab 100644 --- a/src/or/config.c +++ b/src/or/config.c @@ -4837,25 +4837,63 @@ or_state_validate(or_state_t *old_state, or_state_t *state, } /** Replace the current persistent state with new_state */ -static void +static int or_state_set(or_state_t *new_state) { char *err = NULL; + int ret = 0; tor_assert(new_state); config_free(&state_format, global_state); global_state = new_state; if (entry_guards_parse_state(global_state, 1, &err)<0) { log_warn(LD_GENERAL,"%s",err); tor_free(err); + ret = -1; } if (rep_hist_load_state(global_state, &err)<0) { log_warn(LD_GENERAL,"Unparseable bandwidth history state: %s",err); tor_free(err); + ret = -1; } if (circuit_build_times_parse_state(&circ_times, global_state, &err) < 0) { log_warn(LD_GENERAL,"%s",err); tor_free(err); + ret = -1; } + return ret; +} + +/** + * Save a broken state file to a backup location. + */ +static void +or_state_save_broken(char *fname) +{ + int i; + file_status_t status; + size_t len = strlen(fname)+16; + char *fname2 = tor_malloc(len); + for (i = 0; i < 100; ++i) { + tor_snprintf(fname2, len, "%s.%d", fname, i); + status = file_status(fname2); + if (status == FN_NOENT) + break; + } + if (i == 100) { + log_warn(LD_BUG, "Unable to parse state in \"%s\"; too many saved bad " + "state files to move aside. Discarding the old state file.", + fname); + unlink(fname); + } else { + log_warn(LD_BUG, "Unable to parse state in \"%s\". Moving it aside " + "to \"%s\". This could be a bug in Tor; please tell " + "the developers.", fname, fname2); + if (rename(fname, fname2) < 0) { + log_warn(LD_BUG, "Weirdly, I couldn't even move the state aside. The " + "OS gave an error of %s", strerror(errno)); + } + } + tor_free(fname2); } /** Reload the persistent state from disk, generating a new state as needed. @@ -4917,31 +4955,8 @@ or_state_load(void) " This is a bug in Tor."); goto done; } else if (badstate && contents) { - int i; - file_status_t status; - size_t len = strlen(fname)+16; - char *fname2 = tor_malloc(len); - for (i = 0; i < 100; ++i) { - tor_snprintf(fname2, len, "%s.%d", fname, i); - status = file_status(fname2); - if (status == FN_NOENT) - break; - } - if (i == 100) { - log_warn(LD_BUG, "Unable to parse state in \"%s\"; too many saved bad " - "state files to move aside. Discarding the old state file.", - fname); - unlink(fname); - } else { - log_warn(LD_BUG, "Unable to parse state in \"%s\". Moving it aside " - "to \"%s\". This could be a bug in Tor; please tell " - "the developers.", fname, fname2); - if (rename(fname, fname2) < 0) { - log_warn(LD_BUG, "Weirdly, I couldn't even move the state aside. The " - "OS gave an error of %s", strerror(errno)); - } - } - tor_free(fname2); + or_state_save_broken(fname); + tor_free(contents); config_free(&state_format, new_state); @@ -4953,7 +4968,9 @@ or_state_load(void) } else { log_info(LD_GENERAL, "Initialized state"); } - or_state_set(new_state); + if (or_state_set(new_state) == -1) { + or_state_save_broken(fname); + } new_state = NULL; if (!contents) { global_state->next_write = 0; -- cgit v1.2.3