]> git.apps.os.sepia.ceph.com Git - ceph.git/commitdiff
osd: include failed_for in MOSDFailure reports
authorSage Weil <sage@inktank.com>
Tue, 18 Sep 2012 21:37:55 +0000 (14:37 -0700)
committerSage Weil <sage@inktank.com>
Tue, 18 Sep 2012 21:38:59 +0000 (14:38 -0700)
The monitor will need this to dynamically adjust the heartbeat grace.

Closes: #3044
Signed-off-by: Sage Weil <sage@inktank.com>
src/messages/MOSDFailure.h
src/osd/OSD.cc
src/osd/OSD.h

index 3b8ed0b4a2cceaab72d8b10053fb95522625cfbf..807ab2b679f2a7d22e5215d90d97605b48aa3b11 100644 (file)
 
 class MOSDFailure : public PaxosServiceMessage {
 
-  static const int HEAD_VERSION = 2;
+  static const int HEAD_VERSION = 3;
 
  public:
   uuid_d fsid;
   entity_inst_t target_osd;
   __u8 is_failed;
   epoch_t       epoch;
+  int32_t failed_for;  // known to be failed since at least this long
 
   MOSDFailure() : PaxosServiceMessage(MSG_OSD_FAILURE, 0, HEAD_VERSION) { }
-  MOSDFailure(const uuid_d &fs, entity_inst_t f, epoch_t e)
+  MOSDFailure(const uuid_d &fs, entity_inst_t f, int duration, epoch_t e)
     : PaxosServiceMessage(MSG_OSD_FAILURE, e, HEAD_VERSION),
-      fsid(fs), target_osd(f), is_failed(true), epoch(e) { }
+      fsid(fs), target_osd(f), is_failed(true), epoch(e), failed_for(duration) { }
 private:
   ~MOSDFailure() {}
 
@@ -47,10 +48,14 @@ public:
     ::decode(fsid, p);
     ::decode(target_osd, p);
     ::decode(epoch, p);
-    if (header.version >=2)
+    if (header.version >= 2)
       ::decode(is_failed, p);
     else
       is_failed = true;
+    if (header.version >= 3)
+      ::decode(failed_for, p);
+    else
+      failed_for = 0;
   }
   void encode_payload(uint64_t features) {
     paxos_encode();
@@ -58,11 +63,12 @@ public:
     ::encode(target_osd, payload);
     ::encode(epoch, payload);
     ::encode(is_failed, payload);
+    ::encode(failed_for, payload);
   }
 
   const char *get_type_name() const { return "osd_failure"; }
   void print(ostream& out) const {
-    out << "osd_failure(" << target_osd << " e" << epoch << " v" << version << ")";
+    out << "osd_failure(" << target_osd << " for " << failed_for << " e" << epoch << " v" << version << ")";
   }
 };
 
index 5add67505589267ec5056900abf4c9eedc9d05fc..675665ff8f47260c8417c1acf3dfc30fc989c4fb 100644 (file)
@@ -1957,16 +1957,19 @@ void OSD::heartbeat_check()
       derr << "heartbeat_check: no reply from osd." << p->first
           << " ever, first ping sent " << p->second.first_tx
           << " (cutoff " << cutoff << ")" << dendl;
+
+      // fail
+      failure_queue[p->first] = p->second.last_tx;
     } else {
       if (p->second.last_rx > cutoff)
        continue;  // got recent reply
       derr << "heartbeat_check: no reply from osd." << p->first
           << " since " << p->second.last_rx
           << " (cutoff " << cutoff << ")" << dendl;
-    }
 
-    // fail!
-    queue_failure(p->first);
+      // fail
+      failure_queue[p->first] = p->second.last_rx;
+    }
   }
 }
 
@@ -2459,10 +2462,12 @@ void OSD::send_failures()
     heartbeat_lock.Lock();
     locked = true;
   }
+  utime_t now = ceph_clock_now(g_ceph_context);
   while (!failure_queue.empty()) {
-    int osd = *failure_queue.begin();
+    int osd = failure_queue.begin()->first;
+    int failed_for = (int)(double)(now - failure_queue.begin()->second);
     entity_inst_t i = osdmap->get_inst(osd);
-    monc->send_mon_message(new MOSDFailure(monc->get_fsid(), i, osdmap->get_epoch()));
+    monc->send_mon_message(new MOSDFailure(monc->get_fsid(), i, failed_for, osdmap->get_epoch()));
     failure_pending[osd] = i;
     failure_queue.erase(osd);
   }
@@ -2471,7 +2476,7 @@ void OSD::send_failures()
 
 void OSD::send_still_alive(epoch_t epoch, entity_inst_t i)
 {
-  MOSDFailure *m = new MOSDFailure(monc->get_fsid(), i, epoch);
+  MOSDFailure *m = new MOSDFailure(monc->get_fsid(), i, 0, epoch);
   m->is_failed = false;
   monc->send_mon_message(m);
 }
index 9cf4c43df70ae4e694bc88ffeacb0236c2ca1d36..a644cf6a811d902b40a63c24256bcbf167aed9e8 100644 (file)
@@ -825,13 +825,10 @@ protected:
   void send_alive();
 
   // -- failures --
-  set<int> failure_queue;
+  map<int,utime_t> failure_queue;
   map<int,entity_inst_t> failure_pending;
 
 
-  void queue_failure(int n) {
-    failure_queue.insert(n);
-  }
   void send_failures();
   void send_still_alive(epoch_t epoch, entity_inst_t i);