From: Sage Weil Date: Thu, 27 Jan 2011 16:15:23 +0000 (-0800) Subject: mon: add 'mds reset_cluster' command X-Git-Tag: v0.25~231^2~43^2~8 X-Git-Url: http://git-server-git.apps.pok.os.sepia.ceph.com/?a=commitdiff_plain;h=9ebd773529896565200089aa4548463a55dc3861;p=ceph.git mon: add 'mds reset_cluster' command Reset an MDS cluster back to a single node. The idea is: - wipe out mds journals - maybe set recovery flag - mds reset_cluster (this) Then mds0 only recover from an (empty) journal. Other MDS nodes would only rejoin the cluster later. See: #602 Signed-off-by: Sage Weil --- diff --git a/src/mon/MDSMonitor.cc b/src/mon/MDSMonitor.cc index a9d181249e65..340edce178df 100644 --- a/src/mon/MDSMonitor.cc +++ b/src/mon/MDSMonitor.cc @@ -625,6 +625,47 @@ int MDSMonitor::fail_mds(std::ostream &ss, const std::string &arg) return 0; } +int MDSMonitor::reset_cluster(std::ostream &ss) +{ + dout(10) << "reset_cluster" << dendl; + + if (pending_mdsmap.up.size() && !mon->osdmon()->paxos->is_writeable()) { + ss << "osdmap not writeable, can't blacklist up mds's"; + return -EAGAIN; + } + + // --- reset the cluster map --- + pending_mdsmap.stopped.insert(pending_mdsmap.in.begin(), + pending_mdsmap.in.end()); + pending_mdsmap.in.clear(); + pending_mdsmap.stopped.insert(pending_mdsmap.failed.begin(), + pending_mdsmap.failed.end()); + pending_mdsmap.failed.clear(); + + pending_mdsmap.stopped.erase(0); + pending_mdsmap.failed.insert(0); + pending_mdsmap.in.insert(0); + + if (pending_mdsmap.mds_info.size()) { + // blacklist all old mds's + utime_t until = g_clock.now(); + until += g_conf.mds_blacklist_interval; + for (map::iterator p = pending_mdsmap.up.begin(); + p != pending_mdsmap.up.end(); + ++p) { + MDSMap::mds_info_t& info = pending_mdsmap.mds_info[p->second]; + dout(10) << " blacklisting gid " << p->second << " " << info.addr << dendl; + pending_mdsmap.last_failure_osd_epoch = mon->osdmon()->blacklist(info.addr, until); + } + mon->osdmon()->propose_pending(); + } + pending_mdsmap.up.clear(); + pending_mdsmap.mds_info.clear(); + + ss << "reset mds cluster to single mds"; + return 0; +} + bool MDSMonitor::prepare_command(MMonCommand *m) { int r = -EINVAL; @@ -703,6 +744,9 @@ bool MDSMonitor::prepare_command(MMonCommand *m) paxos->wait_for_commit(new Monitor::C_Command(mon, m, 0, rs, paxos->get_version())); return true; } + else if (m->cmd[1] == "cluster_reset") { + r = reset_cluster(ss); + } else if (m->cmd[1] == "compat" && m->cmd.size() == 4) { uint64_t f = atoll(m->cmd[3].c_str()); if (m->cmd[2] == "rm_compat") { diff --git a/src/mon/MDSMonitor.h b/src/mon/MDSMonitor.h index 528acec1abf8..372804b419ee 100644 --- a/src/mon/MDSMonitor.h +++ b/src/mon/MDSMonitor.h @@ -87,8 +87,10 @@ class MDSMonitor : public PaxosService { bool prepare_offload_targets(MMDSLoadTargets *m); enum health_status_t get_health(std::ostream &ss) const; - bool preprocess_command(MMonCommand *m); int fail_mds(std::ostream &ss, const std::string &arg); + int reset_cluster(std::ostream &ss); + + bool preprocess_command(MMonCommand *m); bool prepare_command(MMonCommand *m); // beacons