]> git-server-git.apps.pok.os.sepia.ceph.com Git - ceph.git/commitdiff
mds: add configurable snapshot limit 33295/head
authorMilind Changire <mchangir@redhat.com>
Fri, 13 Mar 2020 07:44:35 +0000 (13:14 +0530)
committerMilind Changire <mchangir@redhat.com>
Fri, 13 Mar 2020 07:44:35 +0000 (13:14 +0530)
Added config option "mds_max_snaps_per_dir" defaulting to 100

Fixes: https://tracker.ceph.com/issues/41209
Signed-off-by: Milind Changire <mchangir@redhat.com>
(cherry picked from commit c15905cd9774a28d07d9a1151e18feb89d024d97)

Conflicts:
        src/common/options.cc (differences between octopus and nautilus)

qa/tasks/cephfs/test_snapshots.py
src/common/options.cc
src/mds/MDSRank.cc
src/mds/Server.cc
src/mds/Server.h

index 7c45ed877eedfd3624472e708e488dfc7782dba6..7f81a76b992dea93eb20ecd542b1771cf651ba68 100644 (file)
@@ -1,6 +1,8 @@
+import sys
 import logging
 import signal
 import time
+import errno
 from textwrap import dedent
 from tasks.cephfs.fuse_mount import FuseMount
 from tasks.cephfs.cephfs_test_case import CephFSTestCase
@@ -13,6 +15,7 @@ MDS_RESTART_GRACE = 60
 
 class TestSnapshots(CephFSTestCase):
     MDSS_REQUIRED = 3
+    LOAD_SETTINGS = ["mds_max_snaps_per_dir"]
 
     def _check_subtree(self, rank, path, status=None):
         got_subtrees = self.fs.rank_asok(["get", "subtrees"], rank=rank, status=status)
@@ -443,3 +446,89 @@ class TestSnapshots(CephFSTestCase):
 
         self.mount_a.run_shell(["rmdir", "d1/.snap/s1"])
         self.mount_a.run_shell(["rm", "-rf", "d0", "d1"])
+
+    class SnapLimitViolationException(Exception):
+        failed_snapshot_number = -1
+
+        def __init__(self, num):
+            self.failed_snapshot_number = num
+
+    def get_snap_name(self, dir_name, sno):
+            sname = "{dir_name}/.snap/s_{sno}".format(dir_name=dir_name, sno=sno)
+            return sname
+
+    def create_snap_dir(self, sname):
+        self.mount_a.run_shell(["mkdir", sname])
+
+    def delete_dir_and_snaps(self, dir_name, snaps):
+        for sno in range(1, snaps+1, 1):
+            sname = self.get_snap_name(dir_name, sno)
+            self.mount_a.run_shell(["rmdir", sname])
+        self.mount_a.run_shell(["rmdir", dir_name])
+
+    def create_dir_and_snaps(self, dir_name, snaps):
+        self.mount_a.run_shell(["mkdir", dir_name])
+
+        for sno in range(1, snaps+1, 1):
+            sname = self.get_snap_name(dir_name, sno)
+            try:
+                self.create_snap_dir(sname)
+            except CommandFailedError as e:
+                # failing at the last mkdir beyond the limit is expected
+                if sno == snaps:
+                    log.info("failed while creating snap #{}: {}".format(sno, repr(e)))
+                    sys.exc_clear()
+                    raise TestSnapshots.SnapLimitViolationException(sno)
+
+    def test_mds_max_snaps_per_dir_default_limit(self):
+        """
+        Test the newly introudced option named mds_max_snaps_per_dir
+        Default snaps limit is 100
+        Test if the default number of snapshot directories can be created
+        """
+        self.create_dir_and_snaps("accounts", int(self.mds_max_snaps_per_dir))
+        self.delete_dir_and_snaps("accounts", int(self.mds_max_snaps_per_dir))
+
+    def test_mds_max_snaps_per_dir_with_increased_limit(self):
+        """
+        Test the newly introudced option named mds_max_snaps_per_dir
+        First create 101 directories and ensure that the 101st directory
+        creation fails. Then increase the default by one and see if the
+        additional directory creation succeeds
+        """
+        # first test the default limit
+        new_limit = int(self.mds_max_snaps_per_dir)
+        self.fs.rank_asok(['config', 'set', 'mds_max_snaps_per_dir', repr(new_limit)])
+        try:
+            self.create_dir_and_snaps("accounts", new_limit + 1)
+        except TestSnapshots.SnapLimitViolationException as e:
+            if e.failed_snapshot_number == (new_limit + 1):
+                sys.exc_clear()
+                pass
+        # then increase the limit by one and test
+        new_limit = new_limit + 1
+        self.fs.rank_asok(['config', 'set', 'mds_max_snaps_per_dir', repr(new_limit)])
+        sname = self.get_snap_name("accounts", new_limit)
+        self.create_snap_dir(sname)
+        self.delete_dir_and_snaps("accounts", new_limit)
+
+    def test_mds_max_snaps_per_dir_with_reduced_limit(self):
+        """
+        Test the newly introudced option named mds_max_snaps_per_dir
+        First create 99 directories. Then reduce the limit to 98. Then try
+        creating another directory and ensure that additional directory
+        creation fails.
+        """
+        # first test the new limit
+        new_limit = int(self.mds_max_snaps_per_dir) - 1
+        self.create_dir_and_snaps("accounts", new_limit)
+        sname = self.get_snap_name("accounts", new_limit + 1)
+        # then reduce the limit by one and test
+        new_limit = new_limit - 1
+        self.fs.rank_asok(['config', 'set', 'mds_max_snaps_per_dir', repr(new_limit)])
+        try:
+            self.create_snap_dir(sname)
+        except CommandFailedError as e:
+            # after reducing limit we expect the new snapshot creation to fail
+            pass
+        self.delete_dir_and_snaps("accounts", new_limit + 1)
index 95ddada8ef942d60fd298bd66e9213de8d6be4d0..0ed33f2b039ff09dc1d7daaa2b1b827736292921 100644 (file)
@@ -8160,6 +8160,13 @@ std::vector<Option> get_mds_options() {
      .set_default(0)
      .set_description("threshold for cache usage to disallow \"dump cache\" operation to file")
      .set_long_description("Disallow MDS from dumping caches to file via \"dump cache\" command if cache usage exceeds this threshold."),
+
+    Option("mds_max_snaps_per_dir", Option::TYPE_UINT, Option::LEVEL_ADVANCED)
+     .set_default(100)
+     .set_min_max(0, 4096)
+     .set_flag(Option::FLAG_RUNTIME)
+     .set_description("max snapshots per directory")
+     .set_long_description("maximum number of snapshots that can be created per directory"),
   });
 }
 
index 9fc70fb3ebdd84f57f88fb2c334cae2c8d51de25..1d3306f52cae14651b2a346efa9d04c30dbf962f 100644 (file)
@@ -3690,6 +3690,7 @@ const char** MDSRankDispatcher::get_tracked_conf_keys() const
     "mds_max_purge_files",
     "mds_max_purge_ops",
     "mds_max_purge_ops_per_pg",
+    "mds_max_snaps_per_dir",
     "mds_op_complaint_time",
     "mds_op_history_duration",
     "mds_op_history_size",
index 42d15f96427931f589dedd1fbd83e9987ec007bf..5fc238cf98f72ed52c90008206053ea756eea61f 100644 (file)
@@ -193,6 +193,7 @@ Server::Server(MDSRank *m) :
   terminating_sessions(false),
   recall_throttle(g_conf().get_val<double>("mds_recall_max_decay_rate"))
 {
+  max_snaps_per_dir = g_conf().get_val<uint64_t>("mds_max_snaps_per_dir");
   replay_unsafe_with_closed_session = g_conf().get_val<bool>("mds_replay_unsafe_with_closed_session");
   cap_revoke_eviction_timeout = g_conf().get_val<double>("mds_cap_revoke_eviction_timeout");
   supported_features = feature_bitset_t(CEPHFS_FEATURES_MDS_SUPPORTED);
@@ -1110,6 +1111,11 @@ void Server::handle_conf_change(const std::set<std::string>& changed) {
   if (changed.count("mds_recall_max_decay_rate")) {
     recall_throttle = DecayCounter(g_conf().get_val<double>("mds_recall_max_decay_rate"));
   }
+  if (changed.count("mds_max_snaps_per_dir")) {
+    max_snaps_per_dir = g_conf().get_val<uint64_t>("mds_max_snaps_per_dir");
+    dout(20) << __func__ << " max snapshots per directory changed to "
+            << max_snaps_per_dir << dendl;
+  }
 }
 
 /*
@@ -9682,6 +9688,14 @@ void Server::handle_client_mksnap(MDRequestRef& mdr)
   if (!check_access(mdr, diri, MAY_WRITE|MAY_SNAPSHOT))
     return;
 
+  // check if we can create any more snapshots
+  // we don't allow any more if we are already at or beyond the limit
+  if (diri->snaprealm &&
+      diri->snaprealm->get_snaps().size() >= max_snaps_per_dir) {
+    respond_to_request(mdr, -EMLINK);
+    return;
+  }
+
   // make sure name is unique
   if (diri->snaprealm &&
       diri->snaprealm->exists(snapname)) {
index 074887ac4c2e0a7f68ef8658dae62b82538bf672..b41c83b2f6d7aeaf6e764c37da4661983f13cf53 100644 (file)
@@ -107,6 +107,7 @@ private:
 
   bool replay_unsafe_with_closed_session = false;
   double cap_revoke_eviction_timeout = 0;
+  uint64_t max_snaps_per_dir = 100;
 
   friend class MDSContinuation;
   friend class ServerContext;