cephfs: add new down/joinable fs flags

author Patrick Donnelly <pdonnell@redhat.com>

Fri, 13 Apr 2018 06:04:31 +0000 (23:04 -0700)

committer Patrick Donnelly <pdonnell@redhat.com>

Tue, 17 Apr 2018 18:25:59 +0000 (11:25 -0700)
author Patrick Donnelly <pdonnell@redhat.com>
Fri, 13 Apr 2018 06:04:31 +0000 (23:04 -0700)
committer Patrick Donnelly <pdonnell@redhat.com>
Tue, 17 Apr 2018 18:25:59 +0000 (11:25 -0700)
diff --git a/PendingReleaseNotes b/PendingReleaseNotes

index 562c54948460c843c35e00429d76746148d4fc3c..7bbeff7f810e4bafaa3239a4915f8871c2cd5581 100644 (file)
--- a/PendingReleaseNotes
+++ b/PendingReleaseNotes
@@ -45,6 +45,14 @@
      max_mds. Accordingly, ceph mds deactivate has been deprecated as it
      is now redundant.
  
+  * Taking a CephFS cluster down is now done by setting the down flag which
+    deactivates all MDS.
+
+  * Preventing standbys from joining as new actives (formerly the cluster_down
+    flag) on a file system is now accomplished by setting the joinable flag.
+    This is useful mostly for testing so that a file system may be quickly
+    brought down and deleted.
+
    * New CephFS file system attributes session_timeout and session_autoclose
      are configurable via `ceph fs set`. The MDS config options
      mds_session_timeout, mds_session_autoclose, and mds_max_file_size are now
diff --git a/qa/tasks/cephfs/filesystem.py b/qa/tasks/cephfs/filesystem.py

index cc877d24e6299277dba7785775d081fc9877744b..a90c99d215fb60c9155b0dc8200a63853f604c25 100644 (file)
--- a/qa/tasks/cephfs/filesystem.py
+++ b/qa/tasks/cephfs/filesystem.py
@@ -274,6 +274,12 @@ class MDSCluster(CephCluster):
      def status(self):
          return FSStatus(self.mon_manager)
  
+    def set_down(self, down=True):
+        self.mon_manager.raw_cluster_cmd("fs", "set", str(self.name), "down", str(down).lower())
+
+    def set_joinable(self, joinable=True):
+        self.mon_manager.raw_cluster_cmd("fs", "set", str(self.name), "joinable", str(joinable).lower())
+
      def delete_all_filesystems(self):
          """
          Remove all filesystems that exist, and any pools in use by them.
@@ -286,7 +292,7 @@ class MDSCluster(CephCluster):
          # mark cluster down for each fs to prevent churn during deletion
          status = self.status()
          for fs in status.get_filesystems():
-            self.mon_manager.raw_cluster_cmd("fs", "set", fs['mdsmap']['fs_name'], "cluster_down", "true")
+            self.mon_manager.raw_cluster_cmd("fs", "set", str(fs['mdsmap']['fs_name']), "joinable", "false")
  
          # get a new copy as actives may have since changed
          status = self.status()
@@ -553,8 +559,10 @@ class Filesystem(MDSCluster):
      def _df(self):
          return json.loads(self.mon_manager.raw_cluster_cmd("df", "--format=json-pretty"))
  
-    def get_mds_map(self):
-        return self.status().get_fsmap(self.id)['mdsmap']
+    def get_mds_map(self, status=None):
+        if status is None:
+            status = self.status()
+        return status.get_fsmap(self.id)['mdsmap']
  
      def get_var(self, var):
          return self.status().get_fsmap(self.id)['mdsmap'][var]
@@ -630,7 +638,7 @@ class Filesystem(MDSCluster):
      def get_usage(self):
          return self._df()['stats']['total_used_bytes']
  
-    def are_daemons_healthy(self):
+    def are_daemons_healthy(self, status=None):
          """
          Return true if all daemons are in one of active, standby, standby-replay, and
          at least max_mds daemons are in 'active'.
@@ -648,7 +656,7 @@ class Filesystem(MDSCluster):
  
          active_count = 0
          try:
-            mds_map = self.get_mds_map()
+            mds_map = self.get_mds_map(status=status)
          except CommandFailedError as cfe:
              # Old version, fall back to non-multi-fs commands
              if cfe.exitstatus == errno.EINVAL:
@@ -670,7 +678,10 @@ class Filesystem(MDSCluster):
              active_count, mds_map['max_mds']
          ))
  
-        if active_count >= mds_map['max_mds']:
+        if active_count > mds_map['max_mds']:
+            log.info("are_daemons_healthy: number of actives is grater than max_mds: {0}".format(mds_map))
+            return False
+        elif active_count == mds_map['max_mds']:
              # The MDSMap says these guys are active, but let's check they really are
              for mds_id, mds_status in mds_map['info'].items():
                  if mds_status['state'] == 'up:active':
@@ -692,15 +703,15 @@ class Filesystem(MDSCluster):
          else:
              return False
  
-    def get_daemon_names(self, state=None):
+    def get_daemon_names(self, state=None, status=None):
          """
          Return MDS daemon names of those daemons in the given state
          :param state:
          :return:
          """
-        status = self.get_mds_map()
+        mdsmap = self.get_mds_map(status)
          result = []
-        for mds_status in sorted(status['info'].values(), lambda a, b: cmp(a['rank'], b['rank'])):
+        for mds_status in sorted(mdsmap['info'].values(), lambda a, b: cmp(a['rank'], b['rank'])):
              if mds_status['state'] == state or state is None:
                  result.append(mds_status['name'])
  
@@ -715,25 +726,25 @@ class Filesystem(MDSCluster):
          """
          return self.get_daemon_names("up:active")
  
-    def get_all_mds_rank(self):
-        status = self.get_mds_map()
+    def get_all_mds_rank(self, status=None):
+        mdsmap = self.get_mds_map(status)
          result = []
-        for mds_status in sorted(status['info'].values(), lambda a, b: cmp(a['rank'], b['rank'])):
+        for mds_status in sorted(mdsmap['info'].values(), lambda a, b: cmp(a['rank'], b['rank'])):
              if mds_status['rank'] != -1 and mds_status['state'] != 'up:standby-replay':
                  result.append(mds_status['rank'])
  
          return result
  
-    def get_rank_names(self):
+    def get_rank_names(self, status=None):
          """
          Return MDS daemon names of those daemons holding a rank,
          sorted by rank.  This includes e.g. up:replay/reconnect
          as well as active, but does not include standby or
          standby-replay.
          """
-        status = self.get_mds_map()
+        mdsmap = self.get_mds_map(status)
          result = []
-        for mds_status in sorted(status['info'].values(), lambda a, b: cmp(a['rank'], b['rank'])):
+        for mds_status in sorted(mdsmap['info'].values(), lambda a, b: cmp(a['rank'], b['rank'])):
              if mds_status['rank'] != -1 and mds_status['state'] != 'up:standby-replay':
                  result.append(mds_status['name'])
  
@@ -750,8 +761,9 @@ class Filesystem(MDSCluster):
  
          elapsed = 0
          while True:
-            if self.are_daemons_healthy():
-                return
+            status = self.status()
+            if self.are_daemons_healthy(status=status):
+                return status
              else:
                  time.sleep(1)
                  elapsed += 1
diff --git a/src/include/ceph_fs.h b/src/include/ceph_fs.h

index 060c890ae93051b87042d6f4c492410a8a8c2f70..2be246397bd9f8562ace45c8bde9d9c3bdc6681f 100644 (file)
--- a/src/include/ceph_fs.h
+++ b/src/include/ceph_fs.h
@@ -231,12 +231,13 @@ struct ceph_mon_subscribe_ack {
  /*
   * mdsmap flags
   */
-#define CEPH_MDSMAP_DOWN    (1<<0)  /* cluster deliberately down */
-#define CEPH_MDSMAP_ALLOW_SNAPS   (1<<1)  /* cluster allowed to create snapshots */
+#define CEPH_MDSMAP_NOT_JOINABLE                 (1<<0)  /* standbys cannot join */
+#define CEPH_MDSMAP_DOWN                         (CEPH_MDSMAP_NOT_JOINABLE) /* backwards compat */
+#define CEPH_MDSMAP_ALLOW_SNAPS                  (1<<1)  /* cluster allowed to create snapshots */
  /* deprecated #define CEPH_MDSMAP_ALLOW_MULTIMDS (1<<2) cluster allowed to have >1 active MDS */
  /* deprecated #define CEPH_MDSMAP_ALLOW_DIRFRAGS (1<<3) cluster allowed to fragment directories */
  
-#define CEPH_MDSMAP_DEFAULTS 0
+#define CEPH_MDSMAP_DEFAULTS (0)
  
  /*
   * mds states
diff --git a/src/mon/FSCommands.cc b/src/mon/FSCommands.cc

index a6dcd0763b1b5b439fbb3459d16e1132a78c7ad8..09c4892dd2de83ea444909c9ccb5dfd5e23298ed 100644 (file)
--- a/src/mon/FSCommands.cc
+++ b/src/mon/FSCommands.cc
@@ -261,9 +261,7 @@ public:
        if (n <= 0) {
          ss << "You must specify at least one MDS";
          return -EINVAL;
-      }
-
-      if (n > MAX_MDS) {
+      } else if (n > MAX_MDS) {
          ss << "may not have more than " << MAX_MDS << " MDS ranks";
          return -EINVAL;
        }
@@ -272,7 +270,7 @@ public:
            fs->fscid,
            [n](std::shared_ptr<Filesystem> fs)
        {
-       fs->mds_map.clear_flag(CEPH_MDSMAP_DOWN);
+       fs->mds_map.clear_flag(CEPH_MDSMAP_NOT_JOINABLE);
          fs->mds_map.set_max_mds(n);
        });
      } else if (var == "inline_data") {
@@ -370,10 +368,14 @@ public:
         ss << "enabled new snapshots";
        }
      } else if (var == "allow_multimds") {
-          ss << "Multiple MDS is always enabled. Use the max_mds parameter to control the number of active MDSs allowed. This command is DEPRECATED and will be REMOVED from future releases.";
+        ss << "Multiple MDS is always enabled. Use the max_mds"
+           << " parameter to control the number of active MDSs"
+           << " allowed. This command is DEPRECATED and will be"
+           << " REMOVED from future releases.";
      } else if (var == "allow_dirfrags") {
-       ss << "Directory fragmentation is now permanently enabled. This command is DEPRECATED and will be REMOVED from future releases.";
-    } else if (var == "cluster_down") {
+        ss << "Directory fragmentation is now permanently enabled."
+           << " This command is DEPRECATED and will be REMOVED from future releases.";
+    } else if (var == "down") {
        bool is_down = false;
        int r = parse_bool(val, &is_down, ss);
        if (r != 0) {
@@ -387,12 +389,10 @@ public:
            [is_down](std::shared_ptr<Filesystem> fs)
        {
         if (is_down) {
-         fs->mds_map.set_flag(CEPH_MDSMAP_DOWN);
           fs->mds_map.set_old_max_mds();
           fs->mds_map.set_max_mds(0);
         } else {
           mds_rank_t oldmax = fs->mds_map.get_old_max_mds();
-         fs->mds_map.clear_flag(CEPH_MDSMAP_DOWN);
           fs->mds_map.set_max_mds(oldmax ? oldmax : 1);
         }
        });
@@ -402,7 +402,39 @@ public:
        } else {
         ss << " marked up, max_mds = " << fs->mds_map.get_max_mds();
        }
+    } else if (var == "cluster_down" || var == "joinable") {
+      bool joinable = true;
+      int r = parse_bool(val, &joinable, ss);
+      if (r != 0) {
+        return r;
+      }
+      if (var == "cluster_down") {
+        joinable = !joinable;
+      }
+
+      ss << fs->mds_map.get_fs_name();
  
+      fsmap.modify_filesystem(
+          fs->fscid,
+          [joinable](std::shared_ptr<Filesystem> fs)
+      {
+       if (joinable) {
+         fs->mds_map.clear_flag(CEPH_MDSMAP_NOT_JOINABLE);
+       } else {
+         fs->mds_map.set_flag(CEPH_MDSMAP_NOT_JOINABLE);
+       }
+      });
+
+      if (joinable) {
+       ss << " marked joinable; MDS may join as newly active.";
+      } else {
+       ss << " marked not joinable; MDS cannot join as newly active.";
+      }
+
+      if (var == "cluster_down") {
+        ss << " WARNING: cluster_down flag is deprecated and will be"
+           << " removed in a future version. Please use \"joinable\".";
+      }
      } else if (var == "standby_count_wanted") {
        if (interr.length()) {
         ss << var << " requires an integer value";
diff --git a/src/mon/MDSMonitor.cc b/src/mon/MDSMonitor.cc

index 71da3575d5975a9859d43d2754b22ac220157bb7..e2bbdb2252ebae88508886ee8f00fe045f485054 100644 (file)
--- a/src/mon/MDSMonitor.cc
+++ b/src/mon/MDSMonitor.cc
@@ -1295,7 +1295,9 @@ int MDSMonitor::filesystem_command(
    auto &pending = get_pending_fsmap_writeable();
  
    if (prefix == "mds deactivate") {
-    ss << "This command is deprecated because it is obsolete; to deactivate one or more MDS, decrease max_mds appropriately (ceph fs set <fsname> max_mds)";
+    ss << "This command is deprecated because it is obsolete;"
+       << " to deactivate one or more MDS, decrease max_mds appropriately"
+       << " (ceph fs set <fsname> max_mds)";
    } else if (prefix == "mds set_state") {
      mds_gid_t gid;
      if (!cmd_getval(g_ceph_context, cmdmap, "gid", gid)) {
@@ -1749,9 +1751,10 @@ bool MDSMonitor::maybe_resize_cluster(std::shared_ptr<Filesystem> &fs)
    if (fs->mds_map.get_num_mds(CEPH_MDS_STATE_STOPPING)) {
      dout(5) << "An MDS for " << fs->mds_map.fs_name
                  << " is stopping; waiting to resize" << dendl;
+    return false;
    }
  
-  if (in < max) {
+  if (in < max && !fs->mds_map.test_flag(CEPH_MDSMAP_NOT_JOINABLE)) {
      mds_rank_t mds = mds_rank_t(0);
      string name;
      while (fs->mds_map.is_in(mds)) {
@@ -1773,19 +1776,16 @@ bool MDSMonitor::maybe_resize_cluster(std::shared_ptr<Filesystem> &fs)
                        << " ranks)";
      pending.promote(newgid, fs, mds);
      return true;
-  }
-
-  if (in > max) {
+  } else if (in > max) {
      mds_rank_t target = in - 1;
-    mds_gid_t target_gid = fs->mds_map.get_info(target).global_id;
-    if (fs->mds_map.get_state(target) == CEPH_MDS_STATE_ACTIVE) {
+    const auto &info = fs->mds_map.get_info(target);
+    if (fs->mds_map.is_active(target)) {
        dout(1) << "deactivating " << target << dendl;
-      mon->clog->info() << "deactivating "
-                       << fs->mds_map.get_info(target).human_name();
-      fsmap.modify_daemon(target_gid,
-                                 [] (MDSMap::mds_info_t *info) {
-                                   info->state = MDSMap::STATE_STOPPING;
-                                 });
+      mon->clog->info() << "deactivating " << info.human_name();
+      pending.modify_daemon(info.global_id,
+                            [] (MDSMap::mds_info_t *info) {
+                                info->state = MDSMap::STATE_STOPPING;
+                            });
        return true;
      } else {
        dout(20) << "skipping deactivate on " << target << dendl;
@@ -1829,7 +1829,7 @@ void MDSMonitor::maybe_replace_gid(mds_gid_t gid, const MDSMap::mds_info_t& info
        info.state != MDSMap::STATE_STANDBY &&
        info.state != MDSMap::STATE_STANDBY_REPLAY &&
        may_replace &&
-      !pending.get_filesystem(fscid)->mds_map.test_flag(CEPH_MDSMAP_DOWN) &&
+      !pending.get_filesystem(fscid)->mds_map.test_flag(CEPH_MDSMAP_NOT_JOINABLE) &&
        (sgid = pending.find_replacement_for({fscid, info.rank}, info.name,
                  g_conf->mon_force_standby_active)) != MDS_GID_NONE)
    {
@@ -1878,7 +1878,9 @@ void MDSMonitor::maybe_replace_gid(mds_gid_t gid, const MDSMap::mds_info_t& info
  
  bool MDSMonitor::maybe_promote_standby(std::shared_ptr<Filesystem> &fs)
  {
-  assert(!fs->mds_map.test_flag(CEPH_MDSMAP_DOWN));
+  if (fs->mds_map.test_flag(CEPH_MDSMAP_NOT_JOINABLE)) {
+    return false;
+  }
  
    auto &pending = get_pending_fsmap_writeable();
  
@@ -2075,10 +2077,7 @@ void MDSMonitor::tick()
    }
  
    for (auto &p : pending.filesystems) {
-    auto &fs = p.second;
-    if (!fs->mds_map.test_flag(CEPH_MDSMAP_DOWN)) {
-      do_propose |= maybe_promote_standby(fs);
-    }
+    do_propose |= maybe_promote_standby(p.second);
    }
  
    if (do_propose) {
diff --git a/src/mon/MonCommands.h b/src/mon/MonCommands.h

index 01983d6d391673e504f4ff12e2c669cb9dac5a46..d68b3e220d95af30be7742a86967bdeca5e0c5e1 100644 (file)
--- a/src/mon/MonCommands.h
+++ b/src/mon/MonCommands.h
@@ -386,7 +386,8 @@ COMMAND("fs set " \
         "name=fs_name,type=CephString " \
         "name=var,type=CephChoices,strings=max_mds|max_file_size"
          "|allow_new_snaps|inline_data|cluster_down|allow_dirfrags|balancer" \
-        "|standby_count_wanted|session_timeout|session_autoclose " \
+        "|standby_count_wanted|session_timeout|session_autoclose" \
+        "|down|joinable " \
         "name=val,type=CephString "                                     \
         "name=confirm,type=CephString,req=false",                       \
         "set fs parameter <var> to <val>", "mds", "rw", "cli,rest")
diff --git a/src/test/pybind/test_ceph_argparse.py b/src/test/pybind/test_ceph_argparse.py

index b636cb5211566783c80e7449d42498fd75d2b0a4..146fa58eb80a3bb79c33f409a1a3023d4e789dd9 100755 (executable)
--- a/src/test/pybind/test_ceph_argparse.py
+++ b/src/test/pybind/test_ceph_argparse.py
@@ -445,10 +445,16 @@ class TestFS(TestArgparse):
          self.assert_valid_command(['fs', 'set', 'default', 'max_mds', '2'])
  
      def test_fs_set_cluster_down(self):
-        self.assert_valid_command(['fs', 'set', 'default', 'cluster_down', 'true'])
+        self.assert_valid_command(['fs', 'set', 'default', 'down', 'true'])
  
      def test_fs_set_cluster_up(self):
-        self.assert_valid_command(['fs', 'set', 'default', 'cluster_down', 'false'])
+        self.assert_valid_command(['fs', 'set', 'default', 'down', 'false'])
+
+    def test_fs_set_cluster_joinable(self):
+        self.assert_valid_command(['fs', 'set', 'default', 'joinable', 'true'])
+
+    def test_fs_set_cluster_not_joinable(self):
+        self.assert_valid_command(['fs', 'set', 'default', 'joinable', 'false'])
  
      def test_fs_set(self):
          self.assert_valid_command(['fs', 'set', 'default', 'max_file_size', '2'])
author	Patrick Donnelly <pdonnell@redhat.com>
	Fri, 13 Apr 2018 06:04:31 +0000 (23:04 -0700)
committer	Patrick Donnelly <pdonnell@redhat.com>
	Tue, 17 Apr 2018 18:25:59 +0000 (11:25 -0700)
PendingReleaseNotes		patch \| blob \| history
qa/tasks/cephfs/filesystem.py		patch \| blob \| history
src/include/ceph_fs.h		patch \| blob \| history
src/mon/FSCommands.cc		patch \| blob \| history
src/mon/MDSMonitor.cc		patch \| blob \| history
src/mon/MonCommands.h		patch \| blob \| history
src/test/pybind/test_ceph_argparse.py		patch \| blob \| history