From ab4f33bd248bf192c1a81fea05a81c7f85ede52d Mon Sep 17 00:00:00 2001 From: Patrick Donnelly Date: Thu, 12 Apr 2018 23:04:31 -0700 Subject: [PATCH] cephfs: add new down/joinable fs flags Fixes: http://tracker.ceph.com/issues/20609 Signed-off-by: Patrick Donnelly --- PendingReleaseNotes | 8 +++++ qa/tasks/cephfs/filesystem.py | 46 +++++++++++++++--------- src/include/ceph_fs.h | 7 ++-- src/mon/FSCommands.cc | 50 ++++++++++++++++++++++----- src/mon/MDSMonitor.cc | 37 ++++++++++---------- src/mon/MonCommands.h | 3 +- src/test/pybind/test_ceph_argparse.py | 10 ++++-- 7 files changed, 110 insertions(+), 51 deletions(-) diff --git a/PendingReleaseNotes b/PendingReleaseNotes index 562c5494846..7bbeff7f810 100644 --- a/PendingReleaseNotes +++ b/PendingReleaseNotes @@ -45,6 +45,14 @@ max_mds. Accordingly, ceph mds deactivate has been deprecated as it is now redundant. + * Taking a CephFS cluster down is now done by setting the down flag which + deactivates all MDS. + + * Preventing standbys from joining as new actives (formerly the cluster_down + flag) on a file system is now accomplished by setting the joinable flag. + This is useful mostly for testing so that a file system may be quickly + brought down and deleted. + * New CephFS file system attributes session_timeout and session_autoclose are configurable via `ceph fs set`. The MDS config options mds_session_timeout, mds_session_autoclose, and mds_max_file_size are now diff --git a/qa/tasks/cephfs/filesystem.py b/qa/tasks/cephfs/filesystem.py index cc877d24e62..a90c99d215f 100644 --- a/qa/tasks/cephfs/filesystem.py +++ b/qa/tasks/cephfs/filesystem.py @@ -274,6 +274,12 @@ class MDSCluster(CephCluster): def status(self): return FSStatus(self.mon_manager) + def set_down(self, down=True): + self.mon_manager.raw_cluster_cmd("fs", "set", str(self.name), "down", str(down).lower()) + + def set_joinable(self, joinable=True): + self.mon_manager.raw_cluster_cmd("fs", "set", str(self.name), "joinable", str(joinable).lower()) + def delete_all_filesystems(self): """ Remove all filesystems that exist, and any pools in use by them. @@ -286,7 +292,7 @@ class MDSCluster(CephCluster): # mark cluster down for each fs to prevent churn during deletion status = self.status() for fs in status.get_filesystems(): - self.mon_manager.raw_cluster_cmd("fs", "set", fs['mdsmap']['fs_name'], "cluster_down", "true") + self.mon_manager.raw_cluster_cmd("fs", "set", str(fs['mdsmap']['fs_name']), "joinable", "false") # get a new copy as actives may have since changed status = self.status() @@ -553,8 +559,10 @@ class Filesystem(MDSCluster): def _df(self): return json.loads(self.mon_manager.raw_cluster_cmd("df", "--format=json-pretty")) - def get_mds_map(self): - return self.status().get_fsmap(self.id)['mdsmap'] + def get_mds_map(self, status=None): + if status is None: + status = self.status() + return status.get_fsmap(self.id)['mdsmap'] def get_var(self, var): return self.status().get_fsmap(self.id)['mdsmap'][var] @@ -630,7 +638,7 @@ class Filesystem(MDSCluster): def get_usage(self): return self._df()['stats']['total_used_bytes'] - def are_daemons_healthy(self): + def are_daemons_healthy(self, status=None): """ Return true if all daemons are in one of active, standby, standby-replay, and at least max_mds daemons are in 'active'. @@ -648,7 +656,7 @@ class Filesystem(MDSCluster): active_count = 0 try: - mds_map = self.get_mds_map() + mds_map = self.get_mds_map(status=status) except CommandFailedError as cfe: # Old version, fall back to non-multi-fs commands if cfe.exitstatus == errno.EINVAL: @@ -670,7 +678,10 @@ class Filesystem(MDSCluster): active_count, mds_map['max_mds'] )) - if active_count >= mds_map['max_mds']: + if active_count > mds_map['max_mds']: + log.info("are_daemons_healthy: number of actives is grater than max_mds: {0}".format(mds_map)) + return False + elif active_count == mds_map['max_mds']: # The MDSMap says these guys are active, but let's check they really are for mds_id, mds_status in mds_map['info'].items(): if mds_status['state'] == 'up:active': @@ -692,15 +703,15 @@ class Filesystem(MDSCluster): else: return False - def get_daemon_names(self, state=None): + def get_daemon_names(self, state=None, status=None): """ Return MDS daemon names of those daemons in the given state :param state: :return: """ - status = self.get_mds_map() + mdsmap = self.get_mds_map(status) result = [] - for mds_status in sorted(status['info'].values(), lambda a, b: cmp(a['rank'], b['rank'])): + for mds_status in sorted(mdsmap['info'].values(), lambda a, b: cmp(a['rank'], b['rank'])): if mds_status['state'] == state or state is None: result.append(mds_status['name']) @@ -715,25 +726,25 @@ class Filesystem(MDSCluster): """ return self.get_daemon_names("up:active") - def get_all_mds_rank(self): - status = self.get_mds_map() + def get_all_mds_rank(self, status=None): + mdsmap = self.get_mds_map(status) result = [] - for mds_status in sorted(status['info'].values(), lambda a, b: cmp(a['rank'], b['rank'])): + for mds_status in sorted(mdsmap['info'].values(), lambda a, b: cmp(a['rank'], b['rank'])): if mds_status['rank'] != -1 and mds_status['state'] != 'up:standby-replay': result.append(mds_status['rank']) return result - def get_rank_names(self): + def get_rank_names(self, status=None): """ Return MDS daemon names of those daemons holding a rank, sorted by rank. This includes e.g. up:replay/reconnect as well as active, but does not include standby or standby-replay. """ - status = self.get_mds_map() + mdsmap = self.get_mds_map(status) result = [] - for mds_status in sorted(status['info'].values(), lambda a, b: cmp(a['rank'], b['rank'])): + for mds_status in sorted(mdsmap['info'].values(), lambda a, b: cmp(a['rank'], b['rank'])): if mds_status['rank'] != -1 and mds_status['state'] != 'up:standby-replay': result.append(mds_status['name']) @@ -750,8 +761,9 @@ class Filesystem(MDSCluster): elapsed = 0 while True: - if self.are_daemons_healthy(): - return + status = self.status() + if self.are_daemons_healthy(status=status): + return status else: time.sleep(1) elapsed += 1 diff --git a/src/include/ceph_fs.h b/src/include/ceph_fs.h index 060c890ae93..2be246397bd 100644 --- a/src/include/ceph_fs.h +++ b/src/include/ceph_fs.h @@ -231,12 +231,13 @@ struct ceph_mon_subscribe_ack { /* * mdsmap flags */ -#define CEPH_MDSMAP_DOWN (1<<0) /* cluster deliberately down */ -#define CEPH_MDSMAP_ALLOW_SNAPS (1<<1) /* cluster allowed to create snapshots */ +#define CEPH_MDSMAP_NOT_JOINABLE (1<<0) /* standbys cannot join */ +#define CEPH_MDSMAP_DOWN (CEPH_MDSMAP_NOT_JOINABLE) /* backwards compat */ +#define CEPH_MDSMAP_ALLOW_SNAPS (1<<1) /* cluster allowed to create snapshots */ /* deprecated #define CEPH_MDSMAP_ALLOW_MULTIMDS (1<<2) cluster allowed to have >1 active MDS */ /* deprecated #define CEPH_MDSMAP_ALLOW_DIRFRAGS (1<<3) cluster allowed to fragment directories */ -#define CEPH_MDSMAP_DEFAULTS 0 +#define CEPH_MDSMAP_DEFAULTS (0) /* * mds states diff --git a/src/mon/FSCommands.cc b/src/mon/FSCommands.cc index a6dcd0763b1..09c4892dd2d 100644 --- a/src/mon/FSCommands.cc +++ b/src/mon/FSCommands.cc @@ -261,9 +261,7 @@ public: if (n <= 0) { ss << "You must specify at least one MDS"; return -EINVAL; - } - - if (n > MAX_MDS) { + } else if (n > MAX_MDS) { ss << "may not have more than " << MAX_MDS << " MDS ranks"; return -EINVAL; } @@ -272,7 +270,7 @@ public: fs->fscid, [n](std::shared_ptr fs) { - fs->mds_map.clear_flag(CEPH_MDSMAP_DOWN); + fs->mds_map.clear_flag(CEPH_MDSMAP_NOT_JOINABLE); fs->mds_map.set_max_mds(n); }); } else if (var == "inline_data") { @@ -370,10 +368,14 @@ public: ss << "enabled new snapshots"; } } else if (var == "allow_multimds") { - ss << "Multiple MDS is always enabled. Use the max_mds parameter to control the number of active MDSs allowed. This command is DEPRECATED and will be REMOVED from future releases."; + ss << "Multiple MDS is always enabled. Use the max_mds" + << " parameter to control the number of active MDSs" + << " allowed. This command is DEPRECATED and will be" + << " REMOVED from future releases."; } else if (var == "allow_dirfrags") { - ss << "Directory fragmentation is now permanently enabled. This command is DEPRECATED and will be REMOVED from future releases."; - } else if (var == "cluster_down") { + ss << "Directory fragmentation is now permanently enabled." + << " This command is DEPRECATED and will be REMOVED from future releases."; + } else if (var == "down") { bool is_down = false; int r = parse_bool(val, &is_down, ss); if (r != 0) { @@ -387,12 +389,10 @@ public: [is_down](std::shared_ptr fs) { if (is_down) { - fs->mds_map.set_flag(CEPH_MDSMAP_DOWN); fs->mds_map.set_old_max_mds(); fs->mds_map.set_max_mds(0); } else { mds_rank_t oldmax = fs->mds_map.get_old_max_mds(); - fs->mds_map.clear_flag(CEPH_MDSMAP_DOWN); fs->mds_map.set_max_mds(oldmax ? oldmax : 1); } }); @@ -402,7 +402,39 @@ public: } else { ss << " marked up, max_mds = " << fs->mds_map.get_max_mds(); } + } else if (var == "cluster_down" || var == "joinable") { + bool joinable = true; + int r = parse_bool(val, &joinable, ss); + if (r != 0) { + return r; + } + if (var == "cluster_down") { + joinable = !joinable; + } + + ss << fs->mds_map.get_fs_name(); + fsmap.modify_filesystem( + fs->fscid, + [joinable](std::shared_ptr fs) + { + if (joinable) { + fs->mds_map.clear_flag(CEPH_MDSMAP_NOT_JOINABLE); + } else { + fs->mds_map.set_flag(CEPH_MDSMAP_NOT_JOINABLE); + } + }); + + if (joinable) { + ss << " marked joinable; MDS may join as newly active."; + } else { + ss << " marked not joinable; MDS cannot join as newly active."; + } + + if (var == "cluster_down") { + ss << " WARNING: cluster_down flag is deprecated and will be" + << " removed in a future version. Please use \"joinable\"."; + } } else if (var == "standby_count_wanted") { if (interr.length()) { ss << var << " requires an integer value"; diff --git a/src/mon/MDSMonitor.cc b/src/mon/MDSMonitor.cc index 71da3575d59..e2bbdb2252e 100644 --- a/src/mon/MDSMonitor.cc +++ b/src/mon/MDSMonitor.cc @@ -1295,7 +1295,9 @@ int MDSMonitor::filesystem_command( auto &pending = get_pending_fsmap_writeable(); if (prefix == "mds deactivate") { - ss << "This command is deprecated because it is obsolete; to deactivate one or more MDS, decrease max_mds appropriately (ceph fs set max_mds)"; + ss << "This command is deprecated because it is obsolete;" + << " to deactivate one or more MDS, decrease max_mds appropriately" + << " (ceph fs set max_mds)"; } else if (prefix == "mds set_state") { mds_gid_t gid; if (!cmd_getval(g_ceph_context, cmdmap, "gid", gid)) { @@ -1749,9 +1751,10 @@ bool MDSMonitor::maybe_resize_cluster(std::shared_ptr &fs) if (fs->mds_map.get_num_mds(CEPH_MDS_STATE_STOPPING)) { dout(5) << "An MDS for " << fs->mds_map.fs_name << " is stopping; waiting to resize" << dendl; + return false; } - if (in < max) { + if (in < max && !fs->mds_map.test_flag(CEPH_MDSMAP_NOT_JOINABLE)) { mds_rank_t mds = mds_rank_t(0); string name; while (fs->mds_map.is_in(mds)) { @@ -1773,19 +1776,16 @@ bool MDSMonitor::maybe_resize_cluster(std::shared_ptr &fs) << " ranks)"; pending.promote(newgid, fs, mds); return true; - } - - if (in > max) { + } else if (in > max) { mds_rank_t target = in - 1; - mds_gid_t target_gid = fs->mds_map.get_info(target).global_id; - if (fs->mds_map.get_state(target) == CEPH_MDS_STATE_ACTIVE) { + const auto &info = fs->mds_map.get_info(target); + if (fs->mds_map.is_active(target)) { dout(1) << "deactivating " << target << dendl; - mon->clog->info() << "deactivating " - << fs->mds_map.get_info(target).human_name(); - fsmap.modify_daemon(target_gid, - [] (MDSMap::mds_info_t *info) { - info->state = MDSMap::STATE_STOPPING; - }); + mon->clog->info() << "deactivating " << info.human_name(); + pending.modify_daemon(info.global_id, + [] (MDSMap::mds_info_t *info) { + info->state = MDSMap::STATE_STOPPING; + }); return true; } else { dout(20) << "skipping deactivate on " << target << dendl; @@ -1829,7 +1829,7 @@ void MDSMonitor::maybe_replace_gid(mds_gid_t gid, const MDSMap::mds_info_t& info info.state != MDSMap::STATE_STANDBY && info.state != MDSMap::STATE_STANDBY_REPLAY && may_replace && - !pending.get_filesystem(fscid)->mds_map.test_flag(CEPH_MDSMAP_DOWN) && + !pending.get_filesystem(fscid)->mds_map.test_flag(CEPH_MDSMAP_NOT_JOINABLE) && (sgid = pending.find_replacement_for({fscid, info.rank}, info.name, g_conf->mon_force_standby_active)) != MDS_GID_NONE) { @@ -1878,7 +1878,9 @@ void MDSMonitor::maybe_replace_gid(mds_gid_t gid, const MDSMap::mds_info_t& info bool MDSMonitor::maybe_promote_standby(std::shared_ptr &fs) { - assert(!fs->mds_map.test_flag(CEPH_MDSMAP_DOWN)); + if (fs->mds_map.test_flag(CEPH_MDSMAP_NOT_JOINABLE)) { + return false; + } auto &pending = get_pending_fsmap_writeable(); @@ -2075,10 +2077,7 @@ void MDSMonitor::tick() } for (auto &p : pending.filesystems) { - auto &fs = p.second; - if (!fs->mds_map.test_flag(CEPH_MDSMAP_DOWN)) { - do_propose |= maybe_promote_standby(fs); - } + do_propose |= maybe_promote_standby(p.second); } if (do_propose) { diff --git a/src/mon/MonCommands.h b/src/mon/MonCommands.h index 01983d6d391..d68b3e220d9 100644 --- a/src/mon/MonCommands.h +++ b/src/mon/MonCommands.h @@ -386,7 +386,8 @@ COMMAND("fs set " \ "name=fs_name,type=CephString " \ "name=var,type=CephChoices,strings=max_mds|max_file_size" "|allow_new_snaps|inline_data|cluster_down|allow_dirfrags|balancer" \ - "|standby_count_wanted|session_timeout|session_autoclose " \ + "|standby_count_wanted|session_timeout|session_autoclose" \ + "|down|joinable " \ "name=val,type=CephString " \ "name=confirm,type=CephString,req=false", \ "set fs parameter to ", "mds", "rw", "cli,rest") diff --git a/src/test/pybind/test_ceph_argparse.py b/src/test/pybind/test_ceph_argparse.py index b636cb52115..146fa58eb80 100755 --- a/src/test/pybind/test_ceph_argparse.py +++ b/src/test/pybind/test_ceph_argparse.py @@ -445,10 +445,16 @@ class TestFS(TestArgparse): self.assert_valid_command(['fs', 'set', 'default', 'max_mds', '2']) def test_fs_set_cluster_down(self): - self.assert_valid_command(['fs', 'set', 'default', 'cluster_down', 'true']) + self.assert_valid_command(['fs', 'set', 'default', 'down', 'true']) def test_fs_set_cluster_up(self): - self.assert_valid_command(['fs', 'set', 'default', 'cluster_down', 'false']) + self.assert_valid_command(['fs', 'set', 'default', 'down', 'false']) + + def test_fs_set_cluster_joinable(self): + self.assert_valid_command(['fs', 'set', 'default', 'joinable', 'true']) + + def test_fs_set_cluster_not_joinable(self): + self.assert_valid_command(['fs', 'set', 'default', 'joinable', 'false']) def test_fs_set(self): self.assert_valid_command(['fs', 'set', 'default', 'max_file_size', '2']) -- 2.39.5