]> git.apps.os.sepia.ceph.com Git - ceph.git/commitdiff
MDS: MDSMonitor: if MDS is in standby-replay and its leader goes down,
authorGreg Farnum <gregf@hq.newdream.net>
Wed, 8 Dec 2010 17:39:59 +0000 (09:39 -0800)
committerGreg Farnum <gregf@hq.newdream.net>
Thu, 6 Jan 2011 19:12:13 +0000 (11:12 -0800)
take over as the MDS!

This means we can now exit standby-replay.

Signed-off-by: Greg Farnum <gregf@hq.newdream.net>
src/mds/MDS.cc
src/mds/MDS.h
src/mds/MDSMap.h
src/mon/MDSMonitor.cc

index 38e57a028a310b03fbc1e036ec364ae6d8229a68..672b9cc5c7889afdfa2337628108b7c22b6656c4 100644 (file)
@@ -83,6 +83,7 @@ MDS::MDS(const char *n, Messenger *m, MonClient *mc) :
   whoami(-1), incarnation(0),
   standby_for_rank(-1),
   standby_type(0),
+  continue_replay(false),
   messenger(m),
   monc(mc),
   logclient(messenger, &mc->monmap, mc),
@@ -895,33 +896,38 @@ void MDS::handle_mds_map(MMDSMap *m)
            << ceph_mds_state_name(state) << dendl;
     want_state = state;
 
-    // did i just recover?
-    if ((is_active() || is_clientreplay()) &&
-       (oldstate == MDSMap::STATE_REJOIN ||
-        oldstate == MDSMap::STATE_RECONNECT)) 
-      recovery_done();
-
-    if (is_active()) {
-      active_start();
-    } else if (is_replay() || is_oneshot_replay() ||
-        is_standby_replay()) {
-      replay_start();
-    } else if (is_resolve()) {
-      resolve_start();
-    } else if (is_reconnect()) {
-      reconnect_start();
-    } else if (is_clientreplay()) {
-      clientreplay_start();
-    } else if (is_creating()) {
-      boot_create();
-    } else if (is_starting()) {
-      boot_start();
-    } else if (is_stopping()) {
-      assert(oldstate == MDSMap::STATE_ACTIVE);
-      stopping_start();
+    if (oldstate == MDSMap::STATE_STANDBY_REPLAY) {
+        dout(10) << "Monitor activated us! Deactivating replay loop" << dendl;
+        assert (state == MDSMap::STATE_REPLAY);
+        standby_for_rank = -1;
+    } else {
+      // did i just recover?
+      if ((is_active() || is_clientreplay()) &&
+          (oldstate == MDSMap::STATE_REJOIN ||
+              oldstate == MDSMap::STATE_RECONNECT))
+        recovery_done();
+
+      if (is_active()) {
+        active_start();
+      } else if (is_replay() || is_oneshot_replay() ||
+          is_standby_replay()) {
+        replay_start();
+      } else if (is_resolve()) {
+        resolve_start();
+      } else if (is_reconnect()) {
+        reconnect_start();
+      } else if (is_clientreplay()) {
+        clientreplay_start();
+      } else if (is_creating()) {
+        boot_create();
+      } else if (is_starting()) {
+        boot_start();
+      } else if (is_stopping()) {
+        assert(oldstate == MDSMap::STATE_ACTIVE);
+        stopping_start();
+      }
     }
   }
-
   
   // RESOLVE
   // is someone else newly resolving?
@@ -1189,6 +1195,8 @@ void MDS::calc_recovery_set()
 void MDS::replay_start()
 {
   dout(1) << "replay_start" << dendl;
+  if (is_standby_replay())
+    continue_replay = true;
   
   calc_recovery_set();
 
@@ -1235,13 +1243,19 @@ void MDS::replay_done()
 
   if (is_standby_replay()) {
     standby_trim_segments();
-    mdlog->get_journaler()->set_writeable();
     dout(10) << "setting replay timer" << dendl;
     timer.add_event_after(g_conf.mds_replay_interval,
                           new C_Standby_replay_start(this));
     return;
   }
 
+  if (continue_replay) {
+    mdlog->get_journaler()->set_writeable();
+    continue_replay = false;
+    standby_replay_restart();
+    return;
+  }
+
   if (g_conf.mds_wipe_sessions) {
     dout(1) << "wiping out client sessions" << dendl;
     sessionmap.wipe();
index 9b37105c08891b4a934137953a30008e5847a43a..3eb95a13f009632161aade297c7204278cf7779d 100644 (file)
@@ -149,6 +149,9 @@ class MDS : public Dispatcher {
   int standby_for_rank;
   int standby_type;
   string standby_for_name;
+  bool continue_replay; /* set to true by replay_start if we're a hot standby,
+                           remains true until leader MDS fails and we need to
+                           take over*/
 
   Messenger    *messenger;
   MonClient    *monc;
index b77920a57c7369baadf88db2dd9d18cb5be7e47b..7966b4f42dac28bfc5e7f5fef31a98baf6d942ae 100644 (file)
@@ -299,7 +299,8 @@ public:
       if (p->second.rank == -1 &&
          (p->second.standby_for_rank == mds ||
           p->second.standby_for_name == name) &&
-         p->second.state == MDSMap::STATE_STANDBY &&
+         (p->second.state == MDSMap::STATE_STANDBY ||
+             p->second.state == MDSMap::STATE_STANDBY_REPLAY) &&
          !p->second.laggy()) {
        return p->first;
       }
index 42c674a1a526c5e2aba60d7cc2cbc00f949e2cf2..a9c167c49dfdf7498be004221077c4297f296116 100644 (file)
@@ -837,10 +837,10 @@ void MDSMonitor::tick()
        switch (info.state) {
        case MDSMap::STATE_CREATING:
        case MDSMap::STATE_STARTING:
-       case MDSMap::STATE_STANDBY_REPLAY:
        case MDSMap::STATE_ONESHOT_REPLAY:
          si.state = info.state;
          break;
+        case MDSMap::STATE_STANDBY_REPLAY:
        case MDSMap::STATE_REPLAY:
        case MDSMap::STATE_RESOLVE:
        case MDSMap::STATE_RECONNECT: