From 1eb33745a894d238e451f0605f2cad330e7893bb Mon Sep 17 00:00:00 2001 From: Venky Shankar Date: Tue, 18 Dec 2018 08:29:41 -0500 Subject: [PATCH] test: switch using "scrub start" tell interface to initiate scrub ... and fixup doc too. Signed-off-by: Venky Shankar --- doc/cephfs/disaster-recovery-experts.rst | 2 +- qa/tasks/cephfs/filesystem.py | 4 +++ qa/tasks/cephfs/test_damage.py | 2 +- qa/tasks/cephfs/test_forward_scrub.py | 6 ++-- qa/tasks/cephfs/test_recovery_pool.py | 2 +- qa/tasks/cephfs/test_scrub.py | 4 +-- qa/tasks/cephfs/test_scrub_checks.py | 40 ++++++++++++++++-------- qa/tasks/cephfs_upgrade_snap.py | 4 +-- 8 files changed, 41 insertions(+), 23 deletions(-) diff --git a/doc/cephfs/disaster-recovery-experts.rst b/doc/cephfs/disaster-recovery-experts.rst index 8db85a88611..1c585a664f3 100644 --- a/doc/cephfs/disaster-recovery-experts.rst +++ b/doc/cephfs/disaster-recovery-experts.rst @@ -259,4 +259,4 @@ run a forward scrub to repair them. Ensure you have an MDS running and issue: :: - ceph daemon mds.a scrub_path / recursive repair + ceph tell mds.a scrub start / recursive repair diff --git a/qa/tasks/cephfs/filesystem.py b/qa/tasks/cephfs/filesystem.py index 8764753c21c..cac566a252f 100644 --- a/qa/tasks/cephfs/filesystem.py +++ b/qa/tasks/cephfs/filesystem.py @@ -921,6 +921,10 @@ class Filesystem(MDSCluster): info = self.get_rank(rank=rank, status=status) return self.json_asok(command, 'mds', info['name'], timeout=timeout) + def rank_tell(self, command, rank=0, status=None): + info = self.get_rank(rank=rank, status=status) + return json.loads(self.mon_manager.raw_cluster_cmd("tell", 'mds.{0}'.format(info['name']), *command)) + def read_cache(self, path, depth=None): cmd = ["dump", "tree", path] if depth is not None: diff --git a/qa/tasks/cephfs/test_damage.py b/qa/tasks/cephfs/test_damage.py index 459077b0428..43e94a2acf2 100644 --- a/qa/tasks/cephfs/test_damage.py +++ b/qa/tasks/cephfs/test_damage.py @@ -434,7 +434,7 @@ class TestDamage(CephFSTestCase): self.mount_a.umount_wait() # Now repair the stats - scrub_json = self.fs.mds_asok(["scrub_path", "/subdir", "repair"]) + scrub_json = self.fs.rank_tell(["scrub", "start", "/subdir", "repair"]) log.info(json.dumps(scrub_json, indent=2)) self.assertEqual(scrub_json["passed_validation"], False) diff --git a/qa/tasks/cephfs/test_forward_scrub.py b/qa/tasks/cephfs/test_forward_scrub.py index e165780f31f..b0f85e3213f 100644 --- a/qa/tasks/cephfs/test_forward_scrub.py +++ b/qa/tasks/cephfs/test_forward_scrub.py @@ -232,7 +232,7 @@ class TestForwardScrub(CephFSTestCase): self.mount_a.umount_wait() with self.assert_cluster_log("inode table repaired", invert_match=True): - out_json = self.fs.mds_asok(["scrub_path", "/", "repair", "recursive"]) + out_json = self.fs.rank_tell(["scrub", "start", "/", "repair", "recursive"]) self.assertNotEqual(out_json, None) self.mds_cluster.mds_stop() @@ -255,7 +255,7 @@ class TestForwardScrub(CephFSTestCase): self.fs.wait_for_daemons() with self.assert_cluster_log("inode table repaired"): - out_json = self.fs.mds_asok(["scrub_path", "/", "repair", "recursive"]) + out_json = self.fs.rank_tell(["scrub", "start", "/", "repair", "recursive"]) self.assertNotEqual(out_json, None) self.mds_cluster.mds_stop() @@ -286,7 +286,7 @@ class TestForwardScrub(CephFSTestCase): "oh i'm sorry did i overwrite your xattr?") with self.assert_cluster_log("bad backtrace on inode"): - out_json = self.fs.mds_asok(["scrub_path", "/", "repair", "recursive"]) + out_json = self.fs.rank_tell(["scrub", "start", "/", "repair", "recursive"]) self.assertNotEqual(out_json, None) self.fs.mds_asok(["flush", "journal"]) backtrace = self.fs.read_backtrace(file_ino) diff --git a/qa/tasks/cephfs/test_recovery_pool.py b/qa/tasks/cephfs/test_recovery_pool.py index 97049b9c0a3..1684d170c8e 100644 --- a/qa/tasks/cephfs/test_recovery_pool.py +++ b/qa/tasks/cephfs/test_recovery_pool.py @@ -186,7 +186,7 @@ class TestRecoveryPool(CephFSTestCase): for rank in self.recovery_fs.get_ranks(status=status): self.fs.mon_manager.raw_cluster_cmd('tell', "mds." + rank['name'], 'injectargs', '--debug-mds=20') - self.fs.rank_asok(['scrub_path', '/', 'recursive', 'repair'], rank=rank['rank'], status=status) + self.fs.rank_tell(['scrub', 'start', '/', 'recursive', 'repair'], rank=rank['rank'], status=status) log.info(str(self.mds_cluster.status())) # Mount a client diff --git a/qa/tasks/cephfs/test_scrub.py b/qa/tasks/cephfs/test_scrub.py index 9469dfce6e4..d96f5691ba2 100644 --- a/qa/tasks/cephfs/test_scrub.py +++ b/qa/tasks/cephfs/test_scrub.py @@ -103,7 +103,7 @@ class DupInodeWorkload(Workload): self._filesystem.wait_for_daemons() def validate(self): - out_json = self._filesystem.mds_asok(["scrub_path", "/", "recursive", "repair"]) + out_json = self._filesystem.rank_tell(["scrub", "start", "/", "recursive", "repair"]) self.assertNotEqual(out_json, None) self.assertTrue(self._filesystem.are_daemons_healthy()) return self._errors @@ -129,7 +129,7 @@ class TestScrub(CephFSTestCase): # Apply any data damage the workload wants workload.damage() - out_json = self.fs.mds_asok(["scrub_path", "/", "recursive", "repair"]) + out_json = self.fs.rank_tell(["scrub", "start", "/", "recursive", "repair"]) self.assertNotEqual(out_json, None) # See that the files are present and correct diff --git a/qa/tasks/cephfs/test_scrub_checks.py b/qa/tasks/cephfs/test_scrub_checks.py index a2de5271972..e6921c8c8b9 100644 --- a/qa/tasks/cephfs/test_scrub_checks.py +++ b/qa/tasks/cephfs/test_scrub_checks.py @@ -63,7 +63,7 @@ class TestScrubChecks(CephFSTestCase): nep = "{test_path}/i/dont/exist".format(test_path=abs_test_path) self.asok_command(mds_rank, "flush_path {nep}".format(nep=nep), lambda j, r: self.json_validator(j, r, "return_code", -errno.ENOENT)) - self.asok_command(mds_rank, "scrub_path {nep}".format(nep=nep), + self.tell_command(mds_rank, "scrub start {nep}".format(nep=nep), lambda j, r: self.json_validator(j, r, "return_code", -errno.ENOENT)) test_repo_path = "{test_path}/ceph-qa-suite".format(test_path=abs_test_path) @@ -73,8 +73,8 @@ class TestScrubChecks(CephFSTestCase): log.info("First run: flushing {dirpath}".format(dirpath=dirpath)) command = "flush_path {dirpath}".format(dirpath=dirpath) self.asok_command(mds_rank, command, success_validator) - command = "scrub_path {dirpath}".format(dirpath=dirpath) - self.asok_command(mds_rank, command, success_validator) + command = "scrub start {dirpath}".format(dirpath=dirpath) + self.tell_command(mds_rank, command, success_validator) filepath = "{repo_path}/suites/fs/verify/validater/valgrind.yaml".format( repo_path=test_repo_path) @@ -82,13 +82,13 @@ class TestScrubChecks(CephFSTestCase): log.info("First run: flushing {filepath}".format(filepath=filepath)) command = "flush_path {filepath}".format(filepath=filepath) self.asok_command(mds_rank, command, success_validator) - command = "scrub_path {filepath}".format(filepath=filepath) - self.asok_command(mds_rank, command, success_validator) + command = "scrub start {filepath}".format(filepath=filepath) + self.tell_command(mds_rank, command, success_validator) filepath = "{repo_path}/suites/fs/basic/clusters/fixed-3-cephfs.yaml". \ format(repo_path=test_repo_path) - command = "scrub_path {filepath}".format(filepath=filepath) - self.asok_command(mds_rank, command, + command = "scrub start {filepath}".format(filepath=filepath) + self.tell_command(mds_rank, command, lambda j, r: self.json_validator(j, r, "performed_validation", False)) @@ -96,8 +96,8 @@ class TestScrubChecks(CephFSTestCase): log.info("First run: flushing base dir /") command = "flush_path /" self.asok_command(mds_rank, command, success_validator) - command = "scrub_path /" - self.asok_command(mds_rank, command, success_validator) + command = "scrub start /" + self.tell_command(mds_rank, command, success_validator) new_dir = "{repo_path}/new_dir_{i}".format(repo_path=repo_path, i=run_seq) test_new_dir = "{repo_path}/new_dir_{i}".format(repo_path=test_repo_path, @@ -118,16 +118,16 @@ class TestScrubChecks(CephFSTestCase): # check that scrub fails on errors ino = self.mount_a.path_to_ino(new_file) rados_obj_name = "{ino:x}.00000000".format(ino=ino) - command = "scrub_path {file}".format(file=test_new_file) + command = "scrub start {file}".format(file=test_new_file) # Missing parent xattr -> ENODATA self.fs.rados(["rmxattr", rados_obj_name, "parent"], pool=self.fs.get_data_pool_name()) - self.asok_command(mds_rank, command, + self.tell_command(mds_rank, command, lambda j, r: self.json_validator(j, r, "return_code", -errno.ENODATA)) # Missing object -> ENOENT self.fs.rados(["rm", rados_obj_name], pool=self.fs.get_data_pool_name()) - self.asok_command(mds_rank, command, + self.tell_command(mds_rank, command, lambda j, r: self.json_validator(j, r, "return_code", -errno.ENOENT)) command = "flush_path /" @@ -162,7 +162,7 @@ class TestScrubChecks(CephFSTestCase): self.mount_a.run_shell(["sudo", "rmdir", test_dir]) self.assertEqual(ar.exception.exitstatus, 1) - self.asok_command(mds_rank, "scrub_path /{0} repair".format(test_dir), + self.tell_command(mds_rank, "scrub start /{0} repair".format(test_dir), lambda j, r: self.json_validator(j, r, "return_code", 0)) # wait a few second for background repair @@ -181,6 +181,20 @@ class TestScrubChecks(CephFSTestCase): jv=element_value, ev=expected_value) return True, "Succeeded" + def tell_command(self, mds_rank, command, validator): + log.info("Running command '{command}'".format(command=command)) + + command_list = command.split() + jout = self.fs.rank_tell(command_list, mds_rank) + + log.info("command '{command}' returned '{jout}'".format( + command=command, jout=jout)) + + success, errstring = validator(jout, 0) + if not success: + raise AsokCommandFailedError(command, rout, jout, errstring) + return jout + def asok_command(self, mds_rank, command, validator): log.info("Running command '{command}'".format(command=command)) diff --git a/qa/tasks/cephfs_upgrade_snap.py b/qa/tasks/cephfs_upgrade_snap.py index a11b1d7ee75..1708d43cfe7 100644 --- a/qa/tasks/cephfs_upgrade_snap.py +++ b/qa/tasks/cephfs_upgrade_snap.py @@ -24,13 +24,13 @@ def task(ctx, config): mds_map = fs.get_mds_map() assert(mds_map['max_mds'] == 1) - json = fs.rank_asok(["scrub_path", "/", "force", "recursive", "repair"]) + json = fs.rank_tell(["scrub", "start", "/", "force", "recursive", "repair"]) if not json or json['return_code'] == 0: log.info("scrub / completed") else: log.info("scrub / failed: {}".format(json)) - json = fs.rank_asok(["scrub_path", "~mdsdir", "force", "recursive", "repair"]) + json = fs.rank_tell(["scrub", "start", "~mdsdir", "force", "recursive", "repair"]) if not json or json['return_code'] == 0: log.info("scrub ~mdsdir completed") else: -- 2.39.5