]> git.apps.os.sepia.ceph.com Git - ceph.git/commitdiff
mon: locally apply osd heartbeat grace to failure checks
authorSage Weil <sage@inktank.com>
Tue, 4 Sep 2012 18:50:30 +0000 (11:50 -0700)
committerSage Weil <sage@inktank.com>
Tue, 18 Sep 2012 21:39:00 +0000 (14:39 -0700)
Aggregate the failure reports into a single mon 'failed_since' value (the
max, currently), and wait until we have exceeded the grace period to
consider the osd failed.

WARNING: This slightly changes the semantics.  Previously, the grace could
be adjusted in the [osd] section.  Now, the [osd] option controls when the
failure messages are sent, and the [mon] option controls when it is marked
down, and sane users should set it once in [global].

Signed-off-by: Sage Weil <sage@inktank.com>
src/mon/OSDMonitor.cc
src/mon/OSDMonitor.h

index 51fe4ef6c41b69c7e857c580d08063b359d03820..9935dcb03fa6df6c672bc5c56a4a4d75e2ac5058 100644 (file)
@@ -693,11 +693,18 @@ bool OSDMonitor::prepare_failure(MOSDFailure *m)
     // add a report
     failure_info_t& fi = failure_info[target_osd];
     fi.add_report(reporter, failed_since);
+
+    utime_t grace(g_conf->osd_heartbeat_grace, 0);
+    utime_t max_failed_since = fi.get_failed_since();
+
     dout(10) << " osd." << target_osd << " has "
             << fi.reporters.size() << " reporters and "
-            << fi.num_reports << " reports" << dendl;
+            << fi.num_reports << " reports, "
+            << grace << " grace, max_failed_since " << max_failed_since
+            << dendl;
 
-    if (((int)fi.reporters.size() >= g_conf->osd_min_down_reporters) &&
+    if (max_failed_since + grace < now &&
+       ((int)fi.reporters.size() >= g_conf->osd_min_down_reporters) &&
         (fi.num_reports >= g_conf->osd_min_down_reports)) {
       dout(1) << " we have enough reports/reporters to mark osd." << target_osd << " down" << dendl;
       pending_inc.new_state[target_osd] = CEPH_OSD_UP;
index b121ee9718314b9615db5b8913b6bd9989b7f1a2..10bc44cde5158342d58611b0b577eec8bd4f3dfb 100644 (file)
@@ -53,6 +53,18 @@ struct failure_info_t {
 
   failure_info_t() : num_reports(0) {}
 
+  utime_t get_failed_since() {
+    if (max_failed_since == utime_t() && reporters.size()) {
+      // the old max must have canceled; recalculate.
+      for (map<int, failure_reporter_t>::iterator p = reporters.begin();
+          p != reporters.end();
+          ++p)
+       if (p->second.failed_since > max_failed_since)
+         max_failed_since = p->second.failed_since;
+    }
+    return max_failed_since;
+  }
+
   void add_report(int who, utime_t failed_since) {
     map<int, failure_reporter_t>::iterator p = reporters.find(who);
     if (p == reporters.end()) {