From: Sage Weil <sage@newdream.net>
Date: Thu, 27 Jan 2011 16:15:23 +0000 (-0800)
Subject: mon: add 'mds reset_cluster' command
X-Git-Tag: v0.25~231^2~43^2~8
X-Git-Url: http://git-server-git.apps.pok.os.sepia.ceph.com/?a=commitdiff_plain;h=9ebd773529896565200089aa4548463a55dc3861;p=ceph.git

mon: add 'mds reset_cluster' command

Reset an MDS cluster back to a single node.  The idea is:

 - wipe out mds journals
 - maybe set recovery flag
 - mds reset_cluster (this)

Then mds0 only recover from an (empty) journal.  Other MDS nodes would only
rejoin the cluster later.

See: #602
Signed-off-by: Sage Weil <sage@newdream.net>
---

diff --git a/src/mon/MDSMonitor.cc b/src/mon/MDSMonitor.cc
index a9d181249e65..340edce178df 100644
--- a/src/mon/MDSMonitor.cc
+++ b/src/mon/MDSMonitor.cc
@@ -625,6 +625,47 @@ int MDSMonitor::fail_mds(std::ostream &ss, const std::string &arg)
   return 0;
 }
 
+int MDSMonitor::reset_cluster(std::ostream &ss)
+{
+  dout(10) << "reset_cluster" << dendl;
+
+  if (pending_mdsmap.up.size() && !mon->osdmon()->paxos->is_writeable()) {
+    ss << "osdmap not writeable, can't blacklist up mds's";
+    return -EAGAIN;
+  }
+
+  // --- reset the cluster map ---
+  pending_mdsmap.stopped.insert(pending_mdsmap.in.begin(),
+				pending_mdsmap.in.end());
+  pending_mdsmap.in.clear();
+  pending_mdsmap.stopped.insert(pending_mdsmap.failed.begin(),
+				pending_mdsmap.failed.end());
+  pending_mdsmap.failed.clear();
+  
+  pending_mdsmap.stopped.erase(0);
+  pending_mdsmap.failed.insert(0);
+  pending_mdsmap.in.insert(0);
+
+  if (pending_mdsmap.mds_info.size()) {
+    // blacklist all old mds's
+    utime_t until = g_clock.now();
+    until += g_conf.mds_blacklist_interval;
+    for (map<int32_t,uint64_t>::iterator p = pending_mdsmap.up.begin();
+	 p != pending_mdsmap.up.end();
+	 ++p) {
+      MDSMap::mds_info_t& info = pending_mdsmap.mds_info[p->second];
+      dout(10) << " blacklisting gid " << p->second << " " << info.addr << dendl;
+      pending_mdsmap.last_failure_osd_epoch = mon->osdmon()->blacklist(info.addr, until);
+    }
+    mon->osdmon()->propose_pending();
+  }
+  pending_mdsmap.up.clear();
+  pending_mdsmap.mds_info.clear();
+
+  ss << "reset mds cluster to single mds";
+  return 0;
+}
+
 bool MDSMonitor::prepare_command(MMonCommand *m)
 {
   int r = -EINVAL;
@@ -703,6 +744,9 @@ bool MDSMonitor::prepare_command(MMonCommand *m)
       paxos->wait_for_commit(new Monitor::C_Command(mon, m, 0, rs, paxos->get_version()));
       return true;
     }
+    else if (m->cmd[1] == "cluster_reset") {
+      r = reset_cluster(ss);
+    }
     else if (m->cmd[1] == "compat" && m->cmd.size() == 4) {
       uint64_t f = atoll(m->cmd[3].c_str());
       if (m->cmd[2] == "rm_compat") {
diff --git a/src/mon/MDSMonitor.h b/src/mon/MDSMonitor.h
index 528acec1abf8..372804b419ee 100644
--- a/src/mon/MDSMonitor.h
+++ b/src/mon/MDSMonitor.h
@@ -87,8 +87,10 @@ class MDSMonitor : public PaxosService {
   bool prepare_offload_targets(MMDSLoadTargets *m);
 
   enum health_status_t get_health(std::ostream &ss) const;
-  bool preprocess_command(MMonCommand *m);
   int fail_mds(std::ostream &ss, const std::string &arg);
+  int reset_cluster(std::ostream &ss);
+
+  bool preprocess_command(MMonCommand *m);
   bool prepare_command(MMonCommand *m);
 
   // beacons