]> git.apps.os.sepia.ceph.com Git - ceph.git/commitdiff
mds: trim pinned and empty subtrees
authorPatrick Donnelly <pdonnell@redhat.com>
Fri, 5 Jun 2020 02:00:04 +0000 (19:00 -0700)
committerPatrick Donnelly <pdonnell@redhat.com>
Wed, 24 Jun 2020 22:43:31 +0000 (15:43 -0700)
Before export (and ephemeral) pinned subtrees are stuck in cache
forever.

Add qa test for checking export pinned directories can be trimmed.

Signed-off-by: Patrick Donnelly <pdonnell@redhat.com>
qa/tasks/cephfs/filesystem.py
qa/tasks/cephfs/test_exports.py
src/mds/MDCache.cc

index 0f72c8ee7eb74cde2361eca1c113eaa7684c8354..bb8e41e83d2a10dde4f7097a767ce98a7fc29cca 100644 (file)
@@ -844,9 +844,11 @@ class Filesystem(MDSCluster):
 
         return result
 
-    def get_rank(self, rank=0, status=None):
+    def get_rank(self, rank=None, status=None):
         if status is None:
             status = self.getinfo()
+        if rank is None:
+            rank = 0
         return status.get_rank(self.id, rank)
 
     def rank_restart(self, rank=0, status=None):
@@ -1016,6 +1018,12 @@ class Filesystem(MDSCluster):
         info = self.get_rank(rank=rank, status=status)
         return json.loads(self.mon_manager.raw_cluster_cmd("tell", 'mds.{0}'.format(info['name']), *command))
 
+    def ranks_tell(self, command, status=None):
+        if status is None:
+            status = self.status()
+        for r in status.get_ranks(self.id):
+            self.rank_tell(command, rank=r['rank'], status=status)
+
     def read_cache(self, path, depth=None):
         cmd = ["dump", "tree", path]
         if depth is not None:
index d53a33ef3a11397896398e0910f0b5016375acd4..76eb9fa518b4afaca2afa0d33250226728f41186 100644 (file)
@@ -137,6 +137,23 @@ class TestExports(CephFSTestCase):
         if (len(self.fs.get_active_names()) > 2):
             self.assertEqual(self.mount_a.getfattr("1/2/3", "ceph.dir.pin"), '2')
 
+    def test_export_pin_cache_drop(self):
+        """
+        That the export pin does not prevent empty (nothing in cache) subtree merging.
+        """
+
+        self.fs.set_max_mds(2)
+        status = self.fs.wait_for_daemons()
+        self.mount_a.run_shell(f"mkdir -p foo")
+        self.mount_a.setfattr(f"foo", "ceph.dir.pin", "0")
+        self.mount_a.run_shell(["bash", "-c", Raw(f"'mkdir -p foo/bar/baz && setfattr -n ceph.dir.pin -v 1 foo/bar'")])
+        self._wait_subtrees([('/foo/bar', 1), ('/foo', 0)], status=status)
+        self.mount_a.umount_wait() # release all caps
+        def _drop():
+            self.fs.ranks_tell(["cache", "drop"], status=status)
+        # drop cache multiple times to clear replica pins
+        self._wait_subtrees([], status=status, action=_drop)
+
     def test_session_race(self):
         """
         Test session creation race.
index 980967f925a486c55581b3be2b90c95965629c6c..c7045663c1e70ebd6813500a95658753acf40c6e 100644 (file)
@@ -6754,32 +6754,38 @@ std::pair<bool, uint64_t> MDCache::trim(uint64_t count)
     ++p;
     CInode *diri = dir->get_inode();
     if (dir->is_auth()) {
-      if (!diri->is_auth() && !diri->is_base() &&
-         dir->get_num_head_items() == 0) {
-       if (dir->state_test(CDir::STATE_EXPORTING) ||
-           !(mds->is_active() || mds->is_stopping()) ||
-           dir->is_freezing() || dir->is_frozen())
-         continue;
+      if (diri->is_auth() && !diri->is_base()) {
+        /* this situation should correspond to an export pin */
+        if (dir->get_num_head_items() == 0 && dir->get_num_ref() == 1) {
+          /* pinned empty subtree, try to drop */
+          if (dir->state_test(CDir::STATE_AUXSUBTREE)) {
+            dout(20) << "trimming empty pinned subtree " << *dir << dendl;
+            dir->state_clear(CDir::STATE_AUXSUBTREE);
+            remove_subtree(dir);
+            diri->close_dirfrag(dir->dirfrag().frag);
+          }
+        }
+      } else if (!diri->is_auth() && !diri->is_base() && dir->get_num_head_items() == 0) {
+        if (dir->state_test(CDir::STATE_EXPORTING) ||
+           !(mds->is_active() || mds->is_stopping()) ||
+           dir->is_freezing() || dir->is_frozen())
+          continue;
 
-       migrator->export_empty_import(dir);
+        migrator->export_empty_import(dir);
         ++trimmed;
       }
-    } else {
-      if (!diri->is_auth()) {
-       if (dir->get_num_ref() > 1)  // only subtree pin
-         continue;
-       auto&& ls = diri->get_subtree_dirfrags();
-       if (diri->get_num_ref() > (int)ls.size()) // only pinned by subtrees
-         continue;
+    } else if (!diri->is_auth() && dir->get_num_ref() <= 1) {
+      // only subtree pin
+      auto&& ls = diri->get_subtree_dirfrags();
+      if (diri->get_num_ref() > (int)ls.size()) // only pinned by subtrees
+        continue;
 
-       // don't trim subtree root if its auth MDS is recovering.
-       // This simplify the cache rejoin code.
-       if (dir->is_subtree_root() &&
-           rejoin_ack_gather.count(dir->get_dir_auth().first))
-         continue;
-       trim_dirfrag(dir, 0, expiremap);
-        ++trimmed;
-      }
+      // don't trim subtree root if its auth MDS is recovering.
+      // This simplify the cache rejoin code.
+      if (dir->is_subtree_root() && rejoin_ack_gather.count(dir->get_dir_auth().first))
+        continue;
+      trim_dirfrag(dir, 0, expiremap);
+      ++trimmed;
     }
   }