From: Patrick Donnelly Date: Tue, 9 Jun 2020 22:28:21 +0000 (-0700) Subject: qa: add more ephemeral pin tests X-Git-Tag: v15.2.5~172^2~5 X-Git-Url: http://git-server-git.apps.pok.os.sepia.ceph.com/?a=commitdiff_plain;h=8fc589eb02d1207247ad96f7a96654f28d67ecc0;p=ceph.git qa: add more ephemeral pin tests Signed-off-by: Patrick Donnelly (cherry picked from commit aef549e49e6fe69635307a0c1f66221e75d81f5d) --- diff --git a/qa/suites/multimds/basic/tasks/cephfs_test_exports.yaml b/qa/suites/multimds/basic/tasks/cephfs_test_exports.yaml index 46334fe16d0..6eb6c987c36 100644 --- a/qa/suites/multimds/basic/tasks/cephfs_test_exports.yaml +++ b/qa/suites/multimds/basic/tasks/cephfs_test_exports.yaml @@ -1,3 +1,7 @@ +overrides: + ceph: + log-whitelist: + - Replacing daemon mds tasks: - cephfs_test_runner: fail_on_skip: false diff --git a/qa/tasks/cephfs/cephfs_test_case.py b/qa/tasks/cephfs/cephfs_test_case.py index eee6bb7a036..1c206dc592e 100644 --- a/qa/tasks/cephfs/cephfs_test_case.py +++ b/qa/tasks/cephfs/cephfs_test_case.py @@ -288,13 +288,22 @@ class CephFSTestCase(CephTestCase): else: log.info("No core_pattern directory set, nothing to clear (internal.coredump not enabled?)") - def _get_subtrees(self, status=None, rank=None): + def _get_subtrees(self, status=None, rank=None, path=None): + if path is None: + path = "/" try: with contextutil.safe_while(sleep=1, tries=3) as proceed: while proceed(): try: - subtrees = self.fs.rank_asok(["get", "subtrees"], status=status, rank=rank) - subtrees = filter(lambda s: s['dir']['path'].startswith('/'), subtrees) + if rank == "all": + subtrees = [] + for r in self.fs.get_ranks(status=status): + s = self.fs.rank_asok(["get", "subtrees"], status=status, rank=r['rank']) + s = filter(lambda s: s['auth_first'] == r['rank'] and s['auth_second'] == -2, s) + subtrees += s + else: + subtrees = self.fs.rank_asok(["get", "subtrees"], status=status, rank=rank) + subtrees = filter(lambda s: s['dir']['path'].startswith(path), subtrees) return list(subtrees) except CommandFailedError as e: # Sometimes we get transient errors @@ -305,12 +314,12 @@ class CephFSTestCase(CephTestCase): except contextutil.MaxWhileTries as e: raise RuntimeError(f"could not get subtree state from rank {rank}") from e - def _wait_subtrees(self, test, status=None, rank=None, timeout=30, sleep=2, action=None): + def _wait_subtrees(self, test, status=None, rank=None, timeout=30, sleep=2, action=None, path=None): test = sorted(test) try: with contextutil.safe_while(sleep=sleep, tries=timeout//sleep) as proceed: while proceed(): - subtrees = self._get_subtrees(status=status, rank=rank) + subtrees = self._get_subtrees(status=status, rank=rank, path=path) filtered = sorted([(s['dir']['path'], s['auth_first']) for s in subtrees]) log.info("%s =?= %s", filtered, test) if filtered == test: @@ -332,42 +341,26 @@ class CephFSTestCase(CephTestCase): if out_json['status'] == "no active scrubs running": break; - def _wait_distributed_subtrees(self, status, rank, count): - timeout = 30 - pause = 2 - for i in range(timeout//pause): - subtrees = self.fs.mds_asok(["get", "subtrees"], mds_id=status.get_rank(self.fs.id, rank)['name']) - subtrees = filter(lambda s: s['dir']['path'].startswith('/'), subtrees) - subtrees = list(filter(lambda s: s['distributed_ephemeral_pin'] == 1, subtrees)) - if (len(subtrees) == count): - return subtrees - time.sleep(pause) - raise RuntimeError("rank {0} failed to reach desired subtree state".format(rank)) - - def get_auth_subtrees(self, status, rank): - subtrees = self.fs.mds_asok(["get", "subtrees"], mds_id=status.get_rank(self.fs.id, rank)['name']) - subtrees = filter(lambda s: s['dir']['path'].startswith('/'), subtrees) - subtrees = filter(lambda s: s['auth_first'] == rank, subtrees) - - return list(subtrees) - - def get_ephemerally_pinned_auth_subtrees(self, status, rank): - subtrees = self.fs.mds_asok(["get", "subtrees"], mds_id=status.get_rank(self.fs.id, rank)['name']) - subtrees = filter(lambda s: s['dir']['path'].startswith('/'), subtrees) - subtrees = filter(lambda s: (s['distributed_ephemeral_pin'] == 1 or s['random_ephemeral_pin'] == 1) and (s['auth_first'] == rank), subtrees) - - return list(subtrees) - - def get_distributed_auth_subtrees(self, status, rank): - subtrees = self.fs.mds_asok(["get", "subtrees"], mds_id=status.get_rank(self.fs.id, rank)['name']) - subtrees = filter(lambda s: s['dir']['path'].startswith('/'), subtrees) - subtrees = filter(lambda s: (s['distributed_ephemeral_pin'] == 1) and (s['auth_first'] == rank), subtrees) - - return list(subtrees) - - def get_random_auth_subtrees(self, status, rank): - subtrees = self.fs.mds_asok(["get", "subtrees"], mds_id=status.get_rank(self.fs.id, rank)['name']) - subtrees = filter(lambda s: s['dir']['path'].startswith('/'), subtrees) - subtrees = filter(lambda s: (s['random_ephemeral_pin'] == 1) and (s['auth_first'] == rank), subtrees) - - return list(subtrees) + def _wait_distributed_subtrees(self, count, status=None, rank=None, path=None): + try: + with contextutil.safe_while(sleep=5, tries=20) as proceed: + while proceed(): + subtrees = self._get_subtrees(status=status, rank=rank, path=path) + subtrees = list(filter(lambda s: s['distributed_ephemeral_pin'] == True, subtrees)) + log.info(f"len={len(subtrees)} {subtrees}") + if len(subtrees) >= count: + return subtrees + except contextutil.MaxWhileTries as e: + raise RuntimeError("rank {0} failed to reach desired subtree state".format(rank)) from e + + def _wait_random_subtrees(self, count, status=None, rank=None, path=None): + try: + with contextutil.safe_while(sleep=5, tries=20) as proceed: + while proceed(): + subtrees = self._get_subtrees(status=status, rank=rank, path=path) + subtrees = list(filter(lambda s: s['random_ephemeral_pin'] == True, subtrees)) + log.info(f"len={len(subtrees)} {subtrees}") + if len(subtrees) >= count: + return subtrees + except contextutil.MaxWhileTries as e: + raise RuntimeError("rank {0} failed to reach desired subtree state".format(rank)) from e diff --git a/qa/tasks/cephfs/filesystem.py b/qa/tasks/cephfs/filesystem.py index 3ae1db50001..a59c7dc23df 100644 --- a/qa/tasks/cephfs/filesystem.py +++ b/qa/tasks/cephfs/filesystem.py @@ -1021,8 +1021,18 @@ class Filesystem(MDSCluster): def ranks_tell(self, command, status=None): if status is None: status = self.status() + out = [] for r in status.get_ranks(self.id): - self.rank_tell(command, rank=r['rank'], status=status) + result = self.rank_tell(command, rank=r['rank'], status=status) + out.append((r['rank'], result)) + return sorted(out) + + def ranks_perf(self, f, status=None): + perf = self.ranks_tell(["perf", "dump"], status=status) + out = [] + for rank, perf in perf: + out.append((rank, f(perf))) + return out def read_cache(self, path, depth=None): cmd = ["dump", "tree", path] diff --git a/qa/tasks/cephfs/test_exports.py b/qa/tasks/cephfs/test_exports.py index 47fb0e29925..3b0f5f3352c 100644 --- a/qa/tasks/cephfs/test_exports.py +++ b/qa/tasks/cephfs/test_exports.py @@ -1,5 +1,7 @@ import logging +import random import time +import unittest from tasks.cephfs.fuse_mount import FuseMount from tasks.cephfs.cephfs_test_case import CephFSTestCase from teuthology.orchestra.run import CommandFailedError, Raw @@ -144,9 +146,9 @@ class TestExports(CephFSTestCase): self.fs.set_max_mds(2) status = self.fs.wait_for_daemons() - self.mount_a.run_shell(f"mkdir -p foo") + self.mount_a.run_shell_payload(f"mkdir -p foo") self.mount_a.setfattr(f"foo", "ceph.dir.pin", "0") - self.mount_a.run_shell(["bash", "-c", Raw(f"'mkdir -p foo/bar/baz && setfattr -n ceph.dir.pin -v 1 foo/bar'")]) + self.mount_a.run_shell_payload(f"mkdir -p foo/bar/baz && setfattr -n ceph.dir.pin -v 1 foo/bar") self._wait_subtrees([('/foo/bar', 1), ('/foo', 0)], status=status) self.mount_a.umount_wait() # release all caps def _drop(): @@ -191,199 +193,358 @@ class TestExports(CephFSTestCase): new_rank1 = self.fs.get_rank(rank=1) self.assertEqual(rank1['gid'], new_rank1['gid']) - def test_ephememeral_pin_distribution(self): +class TestEphemeralPins(CephFSTestCase): + MDSS_REQUIRED = 3 + CLIENTS_REQUIRED = 1 - # Check if the subtree distribution under ephemeral distributed pin is fairly uniform + def setUp(self): + CephFSTestCase.setUp(self) - self.fs.set_max_mds(3) - self.fs.wait_for_daemons() - - status = self.fs.status() + self.config_set('mds', 'mds_export_ephemeral_random', True) + self.config_set('mds', 'mds_export_ephemeral_distributed', True) + self.config_set('mds', 'mds_export_ephemeral_random_max', 1.0) - self.mount_a.run_shell(["mkdir", "-p", "a"]) - self._wait_subtrees(status, 0, []) + self.mount_a.run_shell_payload(f""" +set -e - for i in range(0,100): - self.mount_a.run_shell(["mkdir", "-p", "a/" + str(i) + "/d"]) - - self._wait_subtrees(status, 0, []) - - self.mount_b.setfattr(["a", "ceph.dir.pin.distributed", "1"]) - - self._wait_distributed_subtrees([status, 0, 100]) - - # Check if distribution is uniform - rank0_distributed_subtree_ratio = len(self.get_distributed_auth_subtrees(status, 0))/len(self.get_auth_subtrees(status, 0)) - self.assertGreaterEqual(rank0_distributed_subtree_ratio, 0.2) +# Use up a random number of inode numbers so the ephemeral pinning is not the same every test. +mkdir .inode_number_thrash +count=$((RANDOM % 1024)) +for ((i = 0; i < count; i++)); do touch .inode_number_thrash/$i; done +rm -rf .inode_number_thrash +""") - rank1_distributed_subtree_ratio = len(self.get_distributed_auth_subtrees(status, 1))/len(self.get_auth_subtrees(status, 1)) - self.assertGreaterEqual(rank1_distributed_subtree_ratio, 0.2) - - rank2_distributed_subtree_ratio = len(self.get_distributed_auth_subtrees(status, 2))/len(self.get_auth_subtrees(status, 2)) - self.assertGreaterEqual(rank2_distributed_subtree_ratio, 0.2) - - def test_ephemeral_random(self): - - # Check if export ephemeral random is applied hierarchically - self.fs.set_max_mds(3) - self.fs.wait_for_daemons() + self.status = self.fs.wait_for_daemons() + + def _setup_tree(self, path="tree", export=-1, distributed=False, random=0.0, count=100, wait=True): + return self.mount_a.run_shell_payload(f""" +set -e +mkdir -p {path} +{f"setfattr -n ceph.dir.pin -v {export} {path}" if export >= 0 else ""} +{f"setfattr -n ceph.dir.pin.distributed -v 1 {path}" if distributed else ""} +{f"setfattr -n ceph.dir.pin.random -v {random} {path}" if random > 0.0 else ""} +for ((i = 0; i < {count}; i++)); do + mkdir -p "{path}/$i" + echo file > "{path}/$i/file" +done +""", wait=wait) + + def test_ephemeral_pin_dist_override(self): + """ + That an ephemeral distributed pin overrides a normal export pin. + """ - status = self.fs.status() + self._setup_tree(distributed=True) + subtrees = self._wait_distributed_subtrees(100, status=self.status, rank="all") + for s in subtrees: + path = s['dir']['path'] + if path == '/tree': + self.assertEqual(s['export_pin'], 0) + self.assertEqual(s['auth_first'], 0) + elif path.startswith('/tree/'): + self.assertEqual(s['export_pin'], -1) + self.assertTrue(s['distributed_ephemeral_pin']) + + def test_ephemeral_pin_dist_override_pin(self): + """ + That an export pin overrides an ephemerally pinned directory. + """ - tmp_dir = "" - for i in range(0, 100): - tmp_dir = tmp_dir + str(i) + "/" - self.mount_a.run_shell(["mkdir", "-p", tmp_dir]) - self.mount_b.setfattr([temp_dir, "ceph.dir.pin.random", "1"]) + self._setup_tree(distributed=True, export=0) + subtrees = self._wait_distributed_subtrees(100, status=self.status, rank="all", path="/tree/") + which = None + for s in subtrees: + if s['auth_first'] == 1: + path = s['dir']['path'] + self.mount_a.setfattr(path[1:], "ceph.dir.pin", "0") + which = path + break + self.assertIsNotNone(which) + time.sleep(15) + subtrees = self._get_subtrees(status=self.status, rank=0) + for s in subtrees: + path = s['dir']['path'] + if path == which: + self.assertEqual(s['auth_first'], 0) + self.assertFalse(s['distributed_ephemeral_pin']) + return + # it has been merged into /tree + + def test_ephemeral_pin_dist_off(self): + """ + That turning off ephemeral distributed pin merges subtrees. + """ - count = len(get_random_auth_subtrees(status,0)) - self.assertEqual(count, 100) + self._setup_tree(distributed=True, export=0) + self._wait_distributed_subtrees(100, status=self.status, rank="all") + self.mount_a.setfattr("tree", "ceph.dir.pin.distributed", "0") + self._wait_subtrees([('/tree', 0)], status=self.status) - def test_ephemeral_pin_grow_mds(self): - - # Increase the no of MDSs and verify that the no of subtrees that migrate are less than 1/3 of the total no of subtrees that are ephemerally pinned + def test_ephemeral_pin_dist_conf_off(self): + """ + That turning off ephemeral distributed pin config prevents distribution. + """ - self.fs.set_max_mds(3) - self.fs.wait_for_daemons() + self._setup_tree(export=0) + self.config_set('mds', 'mds_export_ephemeral_distributed', False) + self.mount_a.setfattr("tree", "ceph.dir.pin.distributed", "1") + time.sleep(30) + self._wait_subtrees([('/tree', 0)], status=self.status) - status = self.fs.status() + def test_ephemeral_pin_dist_conf_off_merge(self): + """ + That turning off ephemeral distributed pin config merges subtrees. + """ - for i in range(0,100): - self.mount_a.run_shell(["mkdir", "-p", "a/" + str(i) + "/d"]) - self._wait_subtrees(status, 0, []) - self.mount_b.setfattr(["a", "ceph.dir.pin.distributed", "1"]) - self._wait_distributed_subtrees([status, 0, 100]) + self._setup_tree(distributed=True, export=0) + self._wait_distributed_subtrees(100, status=self.status) + self.config_set('mds', 'mds_export_ephemeral_distributed', False) + self._wait_subtrees([('/tree', 0)], timeout=60, status=self.status) - subtrees_old = dict(get_ephemrally_pinned_auth_subtrees(status, 0).items() + get_ephemrally_pinned_auth_subtrees(status, 1).items() + get_ephemrally_pinned_auth_subtrees(status, 2).items()) - self.fs.set_max_mds(4) - self.fs.wait_for_daemons() - # Sleeping for a while to allow the ephemeral pin migrations to complete - time.sleep(15) - subtrees_new = dict(get_ephemrally_pinned_auth_subtrees(status, 0).items() + get_ephemrally_pinned_auth_subtrees(status, 1).items() + get_ephemrally_pinned_auth_subtrees(status, 2).items()) - for old_subtree in subtrees_old: - for new_subtree in subtrees_new: - if (old_subtree['dir']['path'] == new_subtree['dir']['path']) and (old_subtree['auth_first'] != new_subtree['auth_first']): - count = count + 1 - break + def test_ephemeral_pin_dist_override_before(self): + """ + That a conventional export pin overrides the distributed policy _before_ distributed policy is set. + """ - assertLessEqual((count/subtrees_old), 0.33) + count = 10 + self._setup_tree(count=count) + test = [] + for i in range(count): + path = f"tree/{i}" + self.mount_a.setfattr(path, "ceph.dir.pin", "1") + test.append(("/"+path, 1)) + self.mount_a.setfattr("tree", "ceph.dir.pin.distributed", "1") + time.sleep(10) # for something to not happen... + self._wait_subtrees(test, timeout=60, status=self.status, rank="all", path="/tree/") + + def test_ephemeral_pin_dist_override_after(self): + """ + That a conventional export pin overrides the distributed policy _after_ distributed policy is set. + """ - def test_ephemeral_pin_shrink_mds(self): + self._setup_tree(count=10, distributed=True) + subtrees = self._wait_distributed_subtrees(10, status=self.status, rank="all") + victim = None + test = [] + for s in subtrees: + path = s['dir']['path'] + auth = s['auth_first'] + if auth in (0, 2) and victim is None: + victim = path + self.mount_a.setfattr(victim[1:], "ceph.dir.pin", "1") + test.append((victim, 1)) + else: + test.append((path, auth)) + self.assertIsNotNone(victim) + self._wait_subtrees(test, status=self.status, rank="all", path="/tree/") + + def test_ephemeral_pin_dist_failover(self): + """ + That MDS failover does not cause unnecessary migrations. + """ - # Shrink the no of MDSs + # pin /tree so it does not export during failover + self._setup_tree(distributed=True, export=0) + subtrees = self._wait_distributed_subtrees(100, status=self.status, rank="all") + test = [(s['dir']['path'], s['auth_first']) for s in subtrees] + before = self.fs.ranks_perf(lambda p: p['mds']['exported']) + log.info(f"export stats: {before}") + self.fs.rank_fail(rank=1) + self.status = self.fs.wait_for_daemons() + time.sleep(10) # waiting for something to not happen + after = self.fs.ranks_perf(lambda p: p['mds']['exported']) + log.info(f"export stats: {after}") + self.assertEqual(before, after) + + def test_ephemeral_pin_distribution(self): + """ + That ephemerally pinned subtrees are somewhat evenly distributed. + """ self.fs.set_max_mds(3) - self.fs.wait_for_daemons() + self.status = self.fs.wait_for_daemons() - status = self.fs.status() - - for i in range(0,100): - self.mount_a.run_shell(["mkdir", "-p", "a/" + str(i) + "/d"]) - self._wait_subtrees(status, 0, []) - self.mount_b.setfattr(["a", "ceph.dir.pin.distributed", "1"]) - self._wait_distributed_subtrees([status, 0, 100]) + count = 1000 + self._setup_tree(count=count, distributed=True) + subtrees = self._wait_distributed_subtrees(count, status=self.status, rank="all") + nsubtrees = len(subtrees) - subtrees_old = dict(get_ephemrally_pinned_auth_subtrees(status, 0).items() + get_ephemrally_pinned_auth_subtrees(status, 1).items() + get_ephemrally_pinned_auth_subtrees(status, 2).items()) - self.fs.set_max_mds(2) - self.fs.wait_for_daemons() - time.sleep(15) + # Check if distribution is uniform + rank0 = list(filter(lambda x: x['auth_first'] == 0, subtrees)) + rank1 = list(filter(lambda x: x['auth_first'] == 1, subtrees)) + rank2 = list(filter(lambda x: x['auth_first'] == 2, subtrees)) + self.assertGreaterEqual(len(rank0)/nsubtrees, 0.2) + self.assertGreaterEqual(len(rank1)/nsubtrees, 0.2) + self.assertGreaterEqual(len(rank2)/nsubtrees, 0.2) - subtrees_new = dict(get_ephemrally_pinned_auth_subtrees(status, 0).items() + get_ephemrally_pinned_auth_subtrees(status, 1).items() + get_ephemrally_pinned_auth_subtrees(status, 2).items()) - for old_subtree in subtrees_old: - for new_subtree in subtrees_new: - if (old_subtree['dir']['path'] == new_subtree['dir']['path']) and (old_subtree['auth_first'] != new_subtree['auth_first']): - count = count + 1 - break + def test_ephemeral_random(self): + """ + That 100% randomness causes all children to be pinned. + """ + self._setup_tree(random=1.0) + self._wait_random_subtrees(100, status=self.status, rank="all") - assertLessEqual((count/subtrees_old), 0.33) + def test_ephemeral_random_max(self): + """ + That the config mds_export_ephemeral_random_max is not exceeded. + """ - def test_ephemeral_pin_unset_config(self): + r = 0.5 + count = 1000 + self._setup_tree(count=count, random=r) + subtrees = self._wait_random_subtrees(int(r*count*.75), status=self.status, rank="all") + self.config_set('mds', 'mds_export_ephemeral_random_max', 0.01) + self._setup_tree(path="tree/new", count=count) + time.sleep(30) # for something not to happen... + subtrees = self._get_subtrees(status=self.status, rank="all", path="tree/new/") + self.assertLessEqual(len(subtrees), int(.01*count*1.25)) + + def test_ephemeral_random_max_config(self): + """ + That the config mds_export_ephemeral_random_max config rejects new OOB policies. + """ - # Check if unsetting the distributed pin config results in every distributed pin being unset + self.config_set('mds', 'mds_export_ephemeral_random_max', 0.01) + try: + p = self._setup_tree(count=1, random=0.02, wait=False) + p.wait() + except CommandFailedError as e: + log.info(f"{e}") + self.assertIn("Invalid", p.stderr.getvalue()) + else: + raise RuntimeError("mds_export_ephemeral_random_max ignored!") - self.fs.set_max_mds(3) - self.fs.wait_for_daemons() + def test_ephemeral_random_dist(self): + """ + That ephemeral random and distributed can coexist with each other. + """ - status = self.fs.status() + self._setup_tree(random=1.0, distributed=True, export=0) + self._wait_distributed_subtrees(100, status=self.status) + self._wait_random_subtrees(100, status=self.status) - for i in range(0, 10): - self.mount_a.run_shell(["mkdir", "-p", i +"/dummy_dir"]) - self.mount_b.setfattr([i, "ceph.dir.pin.distributed", "1"]) + def test_ephemeral_random_pin_override_before(self): + """ + That a conventional export pin overrides the random policy before creating new directories. + """ - self._wait_distributed_subtrees([status, 0, 10]) + self._setup_tree(count=0, random=1.0) + self._setup_tree(path="tree/pin", count=10, export=1) + self._wait_subtrees([("/tree/pin", 1)], status=self.status, rank=1, path="/tree/pin") - self.fs.mds_asok(["config", "set", "mds_export_ephemeral_distributed_config", "false"]) - # Sleep for a while to facilitate unsetting of the pins - time.sleep(15) - - for i in range(0, 10): - self.assertTrue(self.mount_a.getfattr(i, "ceph.dir.pin.distributed") == "0") + def test_ephemeral_random_pin_override_after(self): + """ + That a conventional export pin overrides the random policy after creating new directories. + """ - def test_ephemeral_distributed_pin_unset(self): + count = 10 + self._setup_tree(count=0, random=1.0) + self._setup_tree(path="tree/pin", count=count) + self._wait_random_subtrees(count+1, status=self.status, rank="all") + self.mount_a.setfattr(f"tree/pin", "ceph.dir.pin", "1") + self._wait_subtrees([("/tree/pin", 1)], status=self.status, rank=1, path="/tree/pin") - # Test if unsetting the distributed ephemeral pin on a parent directory then the children directory should not be ephemerally pinned anymore + def test_ephemeral_randomness(self): + """ + That the randomness is reasonable. + """ - self.fs.set_max_mds(3) - self.fs.wait_for_daemons() + r = random.uniform(0.25, 0.75) # ratios don't work for small r! + count = 1000 + self._setup_tree(count=count, random=r) + subtrees = self._wait_random_subtrees(int(r*count*.50), status=self.status, rank="all") + time.sleep(30) # for max to not be exceeded + subtrees = self._wait_random_subtrees(int(r*count*.50), status=self.status, rank="all") + self.assertLessEqual(len(subtrees), int(r*count*1.50)) - status = self.fs.status() + def test_ephemeral_random_cache_drop(self): + """ + That the random ephemeral pin does not prevent empty (nothing in cache) subtree merging. + """ - for i in range(0, 10): - self.mount_a.run_shell(["mkdir", "-p", i +"/a/b"]) - self.mount_b.setfattr([i, "ceph.dir.pin.distributed", "1"]) + count = 100 + self._setup_tree(count=count, random=1.0) + subtrees = self._wait_random_subtrees(count, status=self.status, rank="all") + self.mount_a.umount_wait() # release all caps + def _drop(): + self.fs.ranks_tell(["cache", "drop"], status=self.status) + self._wait_subtrees([], status=self.status, action=_drop) - self._wait_distributed_subtrees([status, 0, 10]) + def test_ephemeral_random_failover(self): + """ + That the random ephemeral pins stay pinned across MDS failover. + """ - for i in range(0, 10): - self.mount_a.run_shell(["mkdir", "-p", i +"/a/b"]) - self.mount_b.setfattr([i, "ceph.dir.pin.distributed", "0"]) + count = 100 + r = 0.5 + self._setup_tree(count=count, random=r, export=0) + # wait for all random subtrees to be created, not a specific count + time.sleep(30) + subtrees = self._wait_random_subtrees(1, status=self.status, rank=1) + test = [(s['dir']['path'], s['auth_first']) for s in subtrees] + before = self.fs.ranks_perf(lambda p: p['mds']['exported']) + log.info(f"export stats: {before}") + self.fs.rank_fail(rank=1) + self.status = self.fs.wait_for_daemons() + time.sleep(30) # waiting for something to not happen + self._wait_subtrees(test, status=self.status, rank=1) + after = self.fs.ranks_perf(lambda p: p['mds']['exported']) + log.info(f"export stats: {after}") + self.assertEqual(before, after) - time.sleep(15) + def test_ephemeral_pin_grow_mds(self): + """ + That consistent hashing works to reduce the number of migrations. + """ - subtree_count = len(get_distributed_auth_subtrees(status, 0)) - assertEqual(subtree_count, 0) + self.fs.set_max_mds(2) + self.status = self.fs.wait_for_daemons() - def test_ephemeral_standby(self): + self._setup_tree(distributed=True) + subtrees_old = self._wait_distributed_subtrees(100, status=self.status, rank="all") - # Test if the distribution is unaltered when a Standby MDS takes up a failed rank + self.fs.set_max_mds(3) + self.status = self.fs.wait_for_daemons() - # Need all my standbys up as well as the active daemons - self.wait_for_daemon_start() - status = self.fs.status() - - for i in range(0, 10): - self.mount_a.run_shell(["mkdir", "-p", i +"/a/b"]) - self.mount_b.setfattr([i, "ceph.dir.pin.distributed", "1"]) - - self._wait_distributed_subtrees([status, 0, 10]) - - original_subtrees = get_ephemerally_pinned_auth_subtrees(status, 0) - - # Flush the journal for rank 0 - self.fs.rank_asok(["flush", "journal"], rank=0, status=status) - - (original_active, ) = self.fs.get_active_names() - original_standbys = self.mds_cluster.get_standby_daemons() + # Sleeping for a while to allow the ephemeral pin migrations to complete + time.sleep(30) + + subtrees_new = self._wait_distributed_subtrees(100, status=self.status, rank="all") + count = 0 + for old_subtree in subtrees_old: + for new_subtree in subtrees_new: + if (old_subtree['dir']['path'] == new_subtree['dir']['path']) and (old_subtree['auth_first'] != new_subtree['auth_first']): + count = count + 1 + break - # Kill the rank 0 daemon's physical process - self.fs.mds_stop(original_active) + log.info("{0} migrations have occured due to the cluster resizing".format(count)) + # ~50% of subtrees from the two rank will migrate to another rank + self.assertLessEqual((count/len(subtrees_old)), (0.5)*1.25) # with 25% overbudget - grace = float(self.fs.get_config("mds_beacon_grace", service_type="mon")) + def test_ephemeral_pin_shrink_mds(self): + """ + That consistent hashing works to reduce the number of migrations. + """ - # Wait until the monitor promotes his replacement - def promoted(): - active = self.fs.get_active_names() - return active and active[0] in original_standbys + self.fs.set_max_mds(3) + self.status = self.fs.wait_for_daemons() - log.info("Waiting for promotion of one of the original standbys {0}".format( - original_standbys)) - self.wait_until_true( - promoted, - timeout=grace*2) + self._setup_tree(distributed=True) + subtrees_old = self._wait_distributed_subtrees(100, status=self.status, rank="all") - self.fs.wait_for_state('up:active', rank=0, timeout=MDS_RESTART_GRACE) + self.fs.set_max_mds(2) + self.status = self.fs.wait_for_daemons() + time.sleep(30) - new_subtrees = get_ephemerally_pinned_auth_subtrees(status, 0) + subtrees_new = self._wait_distributed_subtrees(100, status=self.status, rank="all") + count = 0 + for old_subtree in subtrees_old: + for new_subtree in subtrees_new: + if (old_subtree['dir']['path'] == new_subtree['dir']['path']) and (old_subtree['auth_first'] != new_subtree['auth_first']): + count = count + 1 + break - assertEqual(original_subtrees, new_subtrees) + log.info("{0} migrations have occured due to the cluster resizing".format(count)) + # rebalancing from 3 -> 2 may cause half of rank 0/1 to move and all of rank 2 + self.assertLessEqual((count/len(subtrees_old)), (1.0/3.0/2.0 + 1.0/3.0/2.0 + 1.0/3.0)*1.25) # aka .66 with 25% overbudget