]> git-server-git.apps.pok.os.sepia.ceph.com Git - ceph.git/commitdiff
mon: tear down standby replays on MDS rank stop 10628/head
authorJohn Spray <john.spray@redhat.com>
Tue, 9 Aug 2016 13:28:02 +0000 (14:28 +0100)
committerJohn Spray <john.spray@redhat.com>
Tue, 9 Aug 2016 13:29:44 +0000 (14:29 +0100)
Fixes: http://tracker.ceph.com/issues/16909
Signed-off-by: John Spray <john.spray@redhat.com>
src/mds/FSMap.cc
src/mds/FSMap.h
src/mon/MDSMonitor.cc

index 53899cadc3e4c4c3d0504e399bdb96f736178476..af798db1a2165317f5463b0489ee066b1249decb 100644 (file)
@@ -801,7 +801,7 @@ void FSMap::insert(const MDSMap::mds_info_t &new_info)
   standby_epochs[new_info.global_id] = epoch;
 }
 
-void FSMap::stop(mds_gid_t who)
+std::list<mds_gid_t> FSMap::stop(mds_gid_t who)
 {
   assert(mds_roles.at(who) != FS_CLUSTER_ID_NONE);
   auto fs = filesystems.at(mds_roles.at(who));
@@ -810,10 +810,24 @@ void FSMap::stop(mds_gid_t who)
   fs->mds_map.in.erase(info.rank);
   fs->mds_map.stopped.insert(info.rank);
 
+  // Also drop any standby replays that were following this rank
+  std::list<mds_gid_t> standbys;
+  for (const auto &i : fs->mds_map.mds_info) {
+    const auto &other_gid = i.first;
+    const auto &other_info = i.second;
+    if (other_info.rank == info.rank
+        && other_info.state == MDSMap::STATE_STANDBY_REPLAY) {
+      standbys.push_back(other_gid);
+      erase(other_gid, 0);
+    }
+  }
+
   fs->mds_map.mds_info.erase(who);
   mds_roles.erase(who);
 
   fs->mds_map.epoch = epoch;
+
+  return standbys;
 }
 
 
index 5bf27838be6eb20e850fa93a2de12d248582f14c..c71182c24c07913ecae03c723487f56c1c3e923a 100644 (file)
@@ -269,8 +269,11 @@ public:
   /**
    * A daemon reports that it is STATE_STOPPED: remove it,
    * and the rank it held.
+   *
+   * @returns a list of any additional GIDs that were removed from the map
+   * as a side effect (like standby replays)
    */
-  void stop(mds_gid_t who);
+  std::list<mds_gid_t> stop(mds_gid_t who);
 
   /**
    * The rank held by 'who', if any, is to be relinquished, and
index c72fd5a05a82748288340ed633f4457fac9fbec3..84414d1d83884f07e43db665ab8f402593bbc7e9 100644 (file)
@@ -599,8 +599,16 @@ bool MDSMonitor::prepare_beacon(MonOpRequestRef op)
             << "  standby_for_rank=" << m->get_standby_for_rank()
             << dendl;
     if (state == MDSMap::STATE_STOPPED) {
-      pending_fsmap.stop(gid);
-      last_beacon.erase(gid);
+      auto erased = pending_fsmap.stop(gid);
+      erased.push_back(gid);
+
+      for (const auto &erased_gid : erased) {
+        last_beacon.erase(erased_gid);
+        if (pending_daemon_health.count(erased_gid)) {
+          pending_daemon_health.erase(erased_gid);
+          pending_daemon_health_rm.insert(erased_gid);
+        }
+      }
     } else if (state == MDSMap::STATE_DAMAGED) {
       if (!mon->osdmon()->is_writeable()) {
         dout(4) << __func__ << ": DAMAGED from rank " << info.rank