From 5eea6761cc7e5950810cb0a625d49bd3f788183c Mon Sep 17 00:00:00 2001 From: Patrick Donnelly Date: Thu, 4 Jun 2020 19:00:04 -0700 Subject: [PATCH] mds: trim pinned and empty subtrees Before export (and ephemeral) pinned subtrees are stuck in cache forever. Add qa test for checking export pinned directories can be trimmed. Signed-off-by: Patrick Donnelly --- qa/tasks/cephfs/filesystem.py | 10 ++++++- qa/tasks/cephfs/test_exports.py | 17 +++++++++++ src/mds/MDCache.cc | 50 ++++++++++++++++++--------------- 3 files changed, 54 insertions(+), 23 deletions(-) diff --git a/qa/tasks/cephfs/filesystem.py b/qa/tasks/cephfs/filesystem.py index 0f72c8ee7eb..bb8e41e83d2 100644 --- a/qa/tasks/cephfs/filesystem.py +++ b/qa/tasks/cephfs/filesystem.py @@ -844,9 +844,11 @@ class Filesystem(MDSCluster): return result - def get_rank(self, rank=0, status=None): + def get_rank(self, rank=None, status=None): if status is None: status = self.getinfo() + if rank is None: + rank = 0 return status.get_rank(self.id, rank) def rank_restart(self, rank=0, status=None): @@ -1016,6 +1018,12 @@ class Filesystem(MDSCluster): info = self.get_rank(rank=rank, status=status) return json.loads(self.mon_manager.raw_cluster_cmd("tell", 'mds.{0}'.format(info['name']), *command)) + def ranks_tell(self, command, status=None): + if status is None: + status = self.status() + for r in status.get_ranks(self.id): + self.rank_tell(command, rank=r['rank'], status=status) + def read_cache(self, path, depth=None): cmd = ["dump", "tree", path] if depth is not None: diff --git a/qa/tasks/cephfs/test_exports.py b/qa/tasks/cephfs/test_exports.py index d53a33ef3a1..76eb9fa518b 100644 --- a/qa/tasks/cephfs/test_exports.py +++ b/qa/tasks/cephfs/test_exports.py @@ -137,6 +137,23 @@ class TestExports(CephFSTestCase): if (len(self.fs.get_active_names()) > 2): self.assertEqual(self.mount_a.getfattr("1/2/3", "ceph.dir.pin"), '2') + def test_export_pin_cache_drop(self): + """ + That the export pin does not prevent empty (nothing in cache) subtree merging. + """ + + self.fs.set_max_mds(2) + status = self.fs.wait_for_daemons() + self.mount_a.run_shell(f"mkdir -p foo") + self.mount_a.setfattr(f"foo", "ceph.dir.pin", "0") + self.mount_a.run_shell(["bash", "-c", Raw(f"'mkdir -p foo/bar/baz && setfattr -n ceph.dir.pin -v 1 foo/bar'")]) + self._wait_subtrees([('/foo/bar', 1), ('/foo', 0)], status=status) + self.mount_a.umount_wait() # release all caps + def _drop(): + self.fs.ranks_tell(["cache", "drop"], status=status) + # drop cache multiple times to clear replica pins + self._wait_subtrees([], status=status, action=_drop) + def test_session_race(self): """ Test session creation race. diff --git a/src/mds/MDCache.cc b/src/mds/MDCache.cc index 980967f925a..c7045663c1e 100644 --- a/src/mds/MDCache.cc +++ b/src/mds/MDCache.cc @@ -6754,32 +6754,38 @@ std::pair MDCache::trim(uint64_t count) ++p; CInode *diri = dir->get_inode(); if (dir->is_auth()) { - if (!diri->is_auth() && !diri->is_base() && - dir->get_num_head_items() == 0) { - if (dir->state_test(CDir::STATE_EXPORTING) || - !(mds->is_active() || mds->is_stopping()) || - dir->is_freezing() || dir->is_frozen()) - continue; + if (diri->is_auth() && !diri->is_base()) { + /* this situation should correspond to an export pin */ + if (dir->get_num_head_items() == 0 && dir->get_num_ref() == 1) { + /* pinned empty subtree, try to drop */ + if (dir->state_test(CDir::STATE_AUXSUBTREE)) { + dout(20) << "trimming empty pinned subtree " << *dir << dendl; + dir->state_clear(CDir::STATE_AUXSUBTREE); + remove_subtree(dir); + diri->close_dirfrag(dir->dirfrag().frag); + } + } + } else if (!diri->is_auth() && !diri->is_base() && dir->get_num_head_items() == 0) { + if (dir->state_test(CDir::STATE_EXPORTING) || + !(mds->is_active() || mds->is_stopping()) || + dir->is_freezing() || dir->is_frozen()) + continue; - migrator->export_empty_import(dir); + migrator->export_empty_import(dir); ++trimmed; } - } else { - if (!diri->is_auth()) { - if (dir->get_num_ref() > 1) // only subtree pin - continue; - auto&& ls = diri->get_subtree_dirfrags(); - if (diri->get_num_ref() > (int)ls.size()) // only pinned by subtrees - continue; + } else if (!diri->is_auth() && dir->get_num_ref() <= 1) { + // only subtree pin + auto&& ls = diri->get_subtree_dirfrags(); + if (diri->get_num_ref() > (int)ls.size()) // only pinned by subtrees + continue; - // don't trim subtree root if its auth MDS is recovering. - // This simplify the cache rejoin code. - if (dir->is_subtree_root() && - rejoin_ack_gather.count(dir->get_dir_auth().first)) - continue; - trim_dirfrag(dir, 0, expiremap); - ++trimmed; - } + // don't trim subtree root if its auth MDS is recovering. + // This simplify the cache rejoin code. + if (dir->is_subtree_root() && rejoin_ack_gather.count(dir->get_dir_auth().first)) + continue; + trim_dirfrag(dir, 0, expiremap); + ++trimmed; } } -- 2.39.5