From: Sage Weil <sage.weil@dreamhost.com>
Date: Thu, 27 Jan 2011 16:47:48 +0000 (-0800)
Subject: mds: cluster_fail instead of reset_cluster
X-Git-Tag: v0.25~231^2~43^2~5
X-Git-Url: http://git-server-git.apps.pok.os.sepia.ceph.com/?a=commitdiff_plain;h=bb72d95dc0905a8bb445cb1bdad53101d88e480e;p=ceph.git

mds: cluster_fail instead of reset_cluster

Mark all cluster members as failed, and blacklist.  Do not force up/failed
ranks to stopped, as that requires the admin to do other trickery.  This
keeps the cluster fail orthogonal to any journal discard/reset.

Signed-off-by: Sage Weil <sage@newdream.net>
---

diff --git a/src/mon/MDSMonitor.cc b/src/mon/MDSMonitor.cc
index cf2f958f2685..63305d55392f 100644
--- a/src/mon/MDSMonitor.cc
+++ b/src/mon/MDSMonitor.cc
@@ -630,9 +630,9 @@ int MDSMonitor::fail_mds(std::ostream &ss, const std::string &arg)
   return 0;
 }
 
-int MDSMonitor::reset_cluster(std::ostream &ss)
+int MDSMonitor::cluster_fail(std::ostream &ss)
 {
-  dout(10) << "reset_cluster" << dendl;
+  dout(10) << "cluster_fail" << dendl;
 
   if (!pending_mdsmap.test_flag(CEPH_MDSMAP_DOWN)) {
     ss << "mdsmap must be marked DOWN first ('mds cluster_down')";
@@ -644,17 +644,6 @@ int MDSMonitor::reset_cluster(std::ostream &ss)
   }
 
   // --- reset the cluster map ---
-  pending_mdsmap.stopped.insert(pending_mdsmap.in.begin(),
-				pending_mdsmap.in.end());
-  pending_mdsmap.in.clear();
-  pending_mdsmap.stopped.insert(pending_mdsmap.failed.begin(),
-				pending_mdsmap.failed.end());
-  pending_mdsmap.failed.clear();
-  
-  pending_mdsmap.stopped.erase(0);
-  pending_mdsmap.failed.insert(0);
-  pending_mdsmap.in.insert(0);
-
   if (pending_mdsmap.mds_info.size()) {
     // blacklist all old mds's
     utime_t until = g_clock.now();
@@ -669,9 +658,12 @@ int MDSMonitor::reset_cluster(std::ostream &ss)
     mon->osdmon()->propose_pending();
   }
   pending_mdsmap.up.clear();
+  pending_mdsmap.failed.insert(pending_mdsmap.in.begin(),
+			       pending_mdsmap.in.end());
+  pending_mdsmap.in.clear();
   pending_mdsmap.mds_info.clear();
 
-  ss << "reset mds cluster to single mds";
+  ss << "failed all mds cluster members";
   return 0;
 }
 
@@ -753,8 +745,8 @@ bool MDSMonitor::prepare_command(MMonCommand *m)
       paxos->wait_for_commit(new Monitor::C_Command(mon, m, 0, rs, paxos->get_version()));
       return true;
     }
-    else if (m->cmd[1] == "cluster_reset") {
-      r = reset_cluster(ss);
+    else if (m->cmd[1] == "cluster_fail") {
+      r = cluster_fail(ss);
     }
     else if (m->cmd[1] == "cluster_down") {
       if (pending_mdsmap.test_flag(CEPH_MDSMAP_DOWN)) {
diff --git a/src/mon/MDSMonitor.h b/src/mon/MDSMonitor.h
index 372804b419ee..0f0b2e88581e 100644
--- a/src/mon/MDSMonitor.h
+++ b/src/mon/MDSMonitor.h
@@ -88,7 +88,7 @@ class MDSMonitor : public PaxosService {
 
   enum health_status_t get_health(std::ostream &ss) const;
   int fail_mds(std::ostream &ss, const std::string &arg);
-  int reset_cluster(std::ostream &ss);
+  int cluster_fail(std::ostream &ss);
 
   bool preprocess_command(MMonCommand *m);
   bool prepare_command(MMonCommand *m);