From 0269a0c17723fd3e22738f7495fe017225b924a4 Mon Sep 17 00:00:00 2001 From: Sage Weil Date: Fri, 13 Nov 2015 22:27:14 -0500 Subject: [PATCH] mon/OSDMonitor: simplify failure reporters vs reports logic Since each OSD only sends a failure report for a given peer once, we don't need to count reports vs reporters separately. (This was probably a bad idea anyway.) Remove this logic and the associated config option. Reported-by: Greg Farnum Signed-off-by: Sage Weil --- src/common/config_opts.h | 1 - src/mon/OSDMonitor.cc | 29 ++++++++++++++++------------- src/mon/OSDMonitor.h | 18 ++++++------------ 3 files changed, 22 insertions(+), 26 deletions(-) diff --git a/src/common/config_opts.h b/src/common/config_opts.h index 0730d5c4e8d24..3ae21b5260d21 100644 --- a/src/common/config_opts.h +++ b/src/common/config_opts.h @@ -277,7 +277,6 @@ OPTION(mon_sync_debug_provider, OPT_INT, -1) // monitor to be used as the sync p OPTION(mon_sync_debug_provider_fallback, OPT_INT, -1) // monitor to be used as fallback if sync provider fails OPTION(mon_inject_sync_get_chunk_delay, OPT_DOUBLE, 0) // inject N second delay on each get_chunk request OPTION(mon_osd_min_down_reporters, OPT_INT, 1) // number of OSDs who need to report a down OSD for it to count -OPTION(mon_osd_min_down_reports, OPT_INT, 3) // number of times a down OSD must be reported for it to count OPTION(mon_osd_force_trim_to, OPT_INT, 0) // force mon to trim maps to this point, regardless of min_last_epoch_clean (dangerous, use with care) OPTION(mon_mds_force_trim_to, OPT_INT, 0) // force mon to trim mdsmaps to this point (dangerous, use with care) diff --git a/src/mon/OSDMonitor.cc b/src/mon/OSDMonitor.cc index 93c17004afdcf..c3c684f6ce5d4 100644 --- a/src/mon/OSDMonitor.cc +++ b/src/mon/OSDMonitor.cc @@ -1673,9 +1673,9 @@ bool OSDMonitor::check_failure(utime_t now, int target_osd, failure_info_t& fi) } dout(10) << " osd." << target_osd << " has " - << fi.reporters.size() << " reporters and " - << fi.num_reports << " reports, " - << grace << " grace (" << orig_grace << " + " << my_grace << " + " << peer_grace << "), max_failed_since " << max_failed_since + << fi.reporters.size() << " reporters, " + << grace << " grace (" << orig_grace << " + " << my_grace + << " + " << peer_grace << "), max_failed_since " << max_failed_since << dendl; // already pending failure? @@ -1686,13 +1686,13 @@ bool OSDMonitor::check_failure(utime_t now, int target_osd, failure_info_t& fi) } if (failed_for >= grace && - ((int)fi.reporters.size() >= g_conf->mon_osd_min_down_reporters) && - (fi.num_reports >= g_conf->mon_osd_min_down_reports)) { - dout(1) << " we have enough reports/reporters to mark osd." << target_osd << " down" << dendl; + ((int)fi.reporters.size() >= g_conf->mon_osd_min_down_reporters)) { + dout(1) << " we have enough reporters to mark osd." << target_osd + << " down" << dendl; pending_inc.new_state[target_osd] = CEPH_OSD_UP; mon->clog->info() << osdmap.get_inst(target_osd) << " failed (" - << fi.num_reports << " reports from " << (int)fi.reporters.size() << " peers after " + << (int)fi.reporters.size() << " reporters after " << failed_for << " >= grace " << grace << ")\n"; return true; } @@ -1703,7 +1703,8 @@ bool OSDMonitor::prepare_failure(MonOpRequestRef op) { op->mark_osdmon_event(__func__); MOSDFailure *m = static_cast(op->get_req()); - dout(1) << "prepare_failure " << m->get_target() << " from " << m->get_orig_source_inst() + dout(1) << "prepare_failure " << m->get_target() + << " from " << m->get_orig_source_inst() << " is reporting failure:" << m->if_osd_failed() << dendl; int target_osd = m->get_target().name.num(); @@ -1713,7 +1714,9 @@ bool OSDMonitor::prepare_failure(MonOpRequestRef op) // calculate failure time utime_t now = ceph_clock_now(g_ceph_context); - utime_t failed_since = m->get_recv_stamp() - utime_t(m->failed_for ? m->failed_for : g_conf->osd_heartbeat_grace, 0); + utime_t failed_since = + m->get_recv_stamp() - + utime_t(m->failed_for ? m->failed_for : g_conf->osd_heartbeat_grace, 0); if (m->if_osd_failed()) { // add a report @@ -1729,7 +1732,7 @@ bool OSDMonitor::prepare_failure(MonOpRequestRef op) } else { // remove the report mon->clog->debug() << m->get_target() << " failure report canceled by " - << m->get_orig_source_inst() << "\n"; + << m->get_orig_source_inst() << "\n"; if (failure_info.count(target_osd)) { failure_info_t& fi = failure_info[target_osd]; list ls; @@ -1741,12 +1744,12 @@ bool OSDMonitor::prepare_failure(MonOpRequestRef op) ls.pop_front(); } if (fi.reporters.empty()) { - dout(10) << " removing last failure_info for osd." << target_osd << dendl; + dout(10) << " removing last failure_info for osd." << target_osd + << dendl; failure_info.erase(target_osd); } else { dout(10) << " failure_info for osd." << target_osd << " now " - << fi.reporters.size() << " reporters and " - << fi.num_reports << " reports" << dendl; + << fi.reporters.size() << " reporters" << dendl; } } else { dout(10) << " no failure_info for osd." << target_osd << dendl; diff --git a/src/mon/OSDMonitor.h b/src/mon/OSDMonitor.h index 517c34fd81b49..7638b6add95d1 100644 --- a/src/mon/OSDMonitor.h +++ b/src/mon/OSDMonitor.h @@ -51,22 +51,20 @@ class PGMap; /// information about a particular peer's failure reports for one osd struct failure_reporter_t { - int num_reports; ///< reports from this reporter utime_t failed_since; ///< when they think it failed - MonOpRequestRef op; ///< most recent failure op request + MonOpRequestRef op; ///< failure op request - failure_reporter_t() : num_reports(0) {} - failure_reporter_t(utime_t s) : num_reports(1), failed_since(s) {} + failure_reporter_t() {} + failure_reporter_t(utime_t s) : failed_since(s) {} ~failure_reporter_t() { } }; /// information about all failure reports for one osd struct failure_info_t { - map reporters; ///< reporter -> # reports + map reporters; ///< reporter -> failed_since etc utime_t max_failed_since; ///< most recent failed_since - int num_reports; - failure_info_t() : num_reports(0) {} + failure_info_t() {} utime_t get_failed_since() { if (max_failed_since == utime_t() && !reporters.empty()) { @@ -83,7 +81,7 @@ struct failure_info_t { // set the message for the latest report. return any old op request we had, // if any, so we can discard it. MonOpRequestRef add_report(int who, utime_t failed_since, - MonOpRequestRef op) { + MonOpRequestRef op) { map::iterator p = reporters.find(who); if (p == reporters.end()) { if (max_failed_since == utime_t()) @@ -91,10 +89,7 @@ struct failure_info_t { else if (max_failed_since < failed_since) max_failed_since = failed_since; p = reporters.insert(map::value_type(who, failure_reporter_t(failed_since))).first; - } else { - p->second.num_reports++; } - num_reports++; MonOpRequestRef ret = p->second.op; p->second.op = op; @@ -116,7 +111,6 @@ struct failure_info_t { map::iterator p = reporters.find(who); if (p == reporters.end()) return; - num_reports -= p->second.num_reports; reporters.erase(p); if (reporters.empty()) max_failed_since = utime_t(); -- 2.39.5