From 7b2e24f543a1ed6687d64295ee1e0a0a8828e1cc Mon Sep 17 00:00:00 2001 From: Douglas Fuller Date: Tue, 25 Jul 2017 14:06:51 -0400 Subject: [PATCH] mon/FSCommands: Set extra MDS to standby When reducing max_mds, deactivate any MDS rank greater than the new value of max_mds. Signed-off-by: Douglas Fuller --- PendingReleaseNotes | 8 +++---- src/mon/FSCommands.cc | 13 ++++++++--- src/mon/MDSMonitor.cc | 54 ++++++++++++++++++++++++++++++++----------- src/mon/MDSMonitor.h | 2 +- 4 files changed, 55 insertions(+), 22 deletions(-) diff --git a/PendingReleaseNotes b/PendingReleaseNotes index c9eaa94ed4564..c8e3734626013 100644 --- a/PendingReleaseNotes +++ b/PendingReleaseNotes @@ -13,10 +13,10 @@ (even as standby). Operators may ignore the error messages and continue upgrading/restarting or follow this upgrade sequence: - Reduce the number of ranks to 1 (`ceph fs set max_mds 1`), - deactivate all other ranks (`ceph mds deactivate :`), shutdown - standbys leaving the one active MDS, upgrade the single active MDS, then - upgrade/start standbys. Finally, restore the previous max_mds. + Reduce the number of ranks to 1 (`ceph fs set max_mds 1`), wait + for all other MDS to deactivate, leaving the one active MDS, upgrade the + single active MDS, then upgrade/start standbys. Finally, restore the + previous max_mds. See also: https://tracker.ceph.com/issues/23172 diff --git a/src/mon/FSCommands.cc b/src/mon/FSCommands.cc index 0d90f663c8f45..4ac5655352e23 100644 --- a/src/mon/FSCommands.cc +++ b/src/mon/FSCommands.cc @@ -20,7 +20,6 @@ #include "MgrStatMonitor.h" - static const string EXPERIMENTAL_WARNING("Warning! This feature is experimental." "It may cause problems up to and including data loss." "Consult the documentation at ceph.com, and if unsure, do not proceed." @@ -264,7 +263,9 @@ public: return -EINVAL; } - if (!fs->mds_map.allows_multimds() && n > fs->mds_map.get_max_mds() && + mds_rank_t oldmax = fs->mds_map.get_max_mds(); + + if (!fs->mds_map.allows_multimds() && n > oldmax && n > 1) { ss << "multi-MDS clusters are not enabled; set 'allow_multimds' to enable"; return -EINVAL; @@ -273,6 +274,7 @@ public: ss << "may not have more than " << MAX_MDS << " MDS ranks"; return -EINVAL; } + fsmap.modify_filesystem( fs->fscid, [n](std::shared_ptr fs) @@ -437,7 +439,12 @@ public: } }); - ss << "marked " << (is_down ? "down" : "up"); + if (is_down) { + ss << " marked down. "; + } else { + ss << " marked up, max_mds = " << fs->mds_map.get_max_mds(); + } + } else if (var == "standby_count_wanted") { if (interr.length()) { ss << var << " requires an integer value"; diff --git a/src/mon/MDSMonitor.cc b/src/mon/MDSMonitor.cc index 24159e70833a2..bb15aab4bcdac 100644 --- a/src/mon/MDSMonitor.cc +++ b/src/mon/MDSMonitor.cc @@ -1323,8 +1323,6 @@ int MDSMonitor::filesystem_command( r = 0; mds_gid_t gid = fs->mds_map.up.at(role.rank); ss << "telling mds." << role << " " - << pending.get_info_gid(gid).addr << " to deactivate"; - pending.modify_daemon(gid, [](MDSMap::mds_info_t *info) { info->state = MDSMap::STATE_STOPPING; }); @@ -1762,19 +1760,29 @@ int MDSMonitor::print_nodes(Formatter *f) /** * If a cluster is undersized (with respect to max_mds), then - * attempt to find daemons to grow it. + * attempt to find daemons to grow it. If the cluster is oversized + * (with respect to max_mds) then shrink it by stopping its highest rank. */ -bool MDSMonitor::maybe_expand_cluster(std::shared_ptr &fs) +bool MDSMonitor::maybe_resize_cluster(std::shared_ptr &fs) { - bool do_propose = false; auto &pending = get_pending_fsmap_writeable(); + int in = fs->mds_map.get_num_in_mds(); + int max = fs->mds_map.get_max_mds(); - if (fs->mds_map.test_flag(CEPH_MDSMAP_DOWN)) { - return do_propose; + dout(20) << __func__ << " in " << in << " max " << max << dendl; + + if (fs->mds_map.is_degraded()) { + dout(5) << "not resizing degraded MDS cluster " + << fs->mds_map.fs_name << dendl; + return false; } - while (fs->mds_map.get_num_in_mds() < size_t(fs->mds_map.get_max_mds()) && - !fs->mds_map.is_degraded()) { + if (fs->mds_map.get_num_mds(CEPH_MDS_STATE_STOPPING)) { + dout(5) << "An MDS for " << fs->mds_map.fs_name + << " is stopping; waiting to resize" << dendl; + } + + if (in < max) { mds_rank_t mds = mds_rank_t(0); string name; while (fs->mds_map.is_in(mds)) { @@ -1783,7 +1791,7 @@ bool MDSMonitor::maybe_expand_cluster(std::shared_ptr &fs) mds_gid_t newgid = pending.find_replacement_for({fs->fscid, mds}, name, g_conf->mon_force_standby_active); if (newgid == MDS_GID_NONE) { - break; + return false; } const auto &new_info = pending.get_info_gid(newgid); @@ -1795,10 +1803,28 @@ bool MDSMonitor::maybe_expand_cluster(std::shared_ptr &fs) << mds << " (now has " << fs->mds_map.get_num_in_mds() + 1 << " ranks)"; pending.promote(newgid, fs, mds); - do_propose = true; + return true; } - return do_propose; + if (in > max) { + mds_rank_t target = in - 1; + mds_gid_t target_gid = fs->mds_map.get_info(target).global_id; + if (fs->mds_map.get_state(target) == CEPH_MDS_STATE_ACTIVE) { + dout(1) << "deactivating " << target << dendl; + mon->clog->info() << "deactivating " + << fs->mds_map.get_info(target).human_name(); + fsmap.modify_daemon(target_gid, + [] (MDSMap::mds_info_t *info) { + info->state = MDSMap::STATE_STOPPING; + }); + return true; + } else { + dout(20) << "skipping deactivate on " << target << dendl; + return false; + } + } + + return false; } @@ -2012,9 +2038,9 @@ void MDSMonitor::tick() do_propose |= pending.check_health(); - // expand mds cluster (add new nodes to @in)? + // resize mds cluster (adjust @in)? for (auto &p : pending.filesystems) { - do_propose |= maybe_expand_cluster(p.second); + do_propose |= maybe_resize_cluster(p.second); } const auto now = ceph_clock_now(); diff --git a/src/mon/MDSMonitor.h b/src/mon/MDSMonitor.h index d02af4b2d94fe..e7a80b331cd4f 100644 --- a/src/mon/MDSMonitor.h +++ b/src/mon/MDSMonitor.h @@ -113,7 +113,7 @@ class MDSMonitor : public PaxosService, public PaxosFSMap { std::list > handlers; bool maybe_promote_standby(std::shared_ptr &fs); - bool maybe_expand_cluster(std::shared_ptr &fs); + bool maybe_resize_cluster(std::shared_ptr &fs); void maybe_replace_gid(mds_gid_t gid, const MDSMap::mds_info_t& info, bool *mds_propose, bool *osd_propose); void tick() override; // check state, take actions -- 2.39.5