]> git.apps.os.sepia.ceph.com Git - ceph.git/commitdiff
qa/cephfs: Add more tests for multimds scrub
authorSidharth Anupkrishnan <sanupkri@redhat.com>
Tue, 25 Aug 2020 13:33:47 +0000 (19:03 +0530)
committerYan, Zheng <zyan@redhat.com>
Mon, 16 Nov 2020 01:02:18 +0000 (09:02 +0800)
Signed-off-by: Sidharth Anupkrishnan <sanupkri@redhat.com>
qa/tasks/cephfs/test_multimds_misc.py

index 8071c1186d18a615972d80aae2f6eb2aaed8528c..eecd2e488403246e82b9d5da77604c14fcec4f15 100644 (file)
@@ -1,6 +1,7 @@
 import logging
 import errno
 from tasks.cephfs.cephfs_test_case import CephFSTestCase
+from teuthology.contextutil import safe_while
 from teuthology.orchestra.run import CommandFailedError
 
 log = logging.getLogger(__name__)
@@ -9,12 +10,35 @@ class TestScrub2(CephFSTestCase):
     MDSS_REQUIRED = 3
     CLIENTS_REQUIRED = 1
 
-    def _get_scrub_status(self):
-        return self.fs.rank_tell(["scrub", "status"], 0)
+    def _get_scrub_status(self, rank=0):
+        return self.fs.rank_tell(["scrub", "status"], rank)
 
     def _wait_until_scrubbed(self, timeout):
         self.wait_until_true(lambda: "no active" in self._get_scrub_status()['status'], timeout)
 
+    def _check_task_status_na(self, timo=120):
+        """ check absence of scrub status in ceph status """
+        with safe_while(sleep=1, tries=120, action='wait for task status') as proceed:
+            while proceed():
+                active = self.fs.get_active_names()
+                log.debug("current active={0}".format(active))
+                task_status = self.fs.get_task_status("scrub status")
+                if not active[0] in task_status:
+                    return True
+
+    def _check_task_status(self, expected_status, timo=120):
+        """ check scrub status for current active mds in ceph status """
+        with safe_while(sleep=1, tries=120, action='wait for task status') as proceed:
+            while proceed():
+                active = self.fs.get_active_names()
+                log.debug("current active={0}".format(active))
+                task_status = self.fs.get_task_status("scrub status")
+                try:
+                    if task_status[active[0]].startswith(expected_status):
+                        return True
+                except KeyError:
+                    pass
+
     def _find_path_inos(self, root_path):
         inos = []
         p = self.mount_a.run_shell(["find", root_path])
@@ -32,10 +56,13 @@ class TestScrub2(CephFSTestCase):
         self.mount_a.run_shell(['mkdir', '-p', path])
         self.mount_a.run_shell(['sync', path])
 
-        self.mount_a.setfattr("d1/d2", "ceph.dir.pin", "1")
-        self.mount_a.setfattr("d1/d2/d3/d4", "ceph.dir.pin", "2")
-        self.mount_a.setfattr("d1/d2/d3/d4/d5/d6", "ceph.dir.pin", "0")
-        self._wait_subtrees([('/d1/d2', 1), ('/d1/d2/d3/d4', 2), ('/d1/d2/d3/d4/d5/d6', 0)], status=status, rank=0)
+        self.mount_a.setfattr("d1/d2", "ceph.dir.pin", "0")
+        self.mount_a.setfattr("d1/d2/d3/d4", "ceph.dir.pin", "1")
+        self.mount_a.setfattr("d1/d2/d3/d4/d5/d6", "ceph.dir.pin", "2")
+        
+        self._wait_subtrees([('/d1/d2', 0), ('/d1/d2/d3/d4', 1)], status, 0)
+        self._wait_subtrees([('/d1/d2/d3/d4', 1), ('/d1/d2/d3/d4/d5/d6', 2)], status, 1)
+        self._wait_subtrees([('/d1/d2/d3/d4', 1), ('/d1/d2/d3/d4/d5/d6', 2)], status, 2)
 
         for rank in range(3):
             self.fs.rank_tell(["flush", "journal"], rank)
@@ -71,9 +98,9 @@ class TestScrub2(CephFSTestCase):
             damage = [d for d in all_damage if d['ino'] in inos and d['damage_type'] == "backtrace"]
             return len(damage) >= len(inos)
 
-        self.assertTrue(_check_damage(1, inos[0:2]))
-        self.assertTrue(_check_damage(2, inos[2:4]))
-        self.assertTrue(_check_damage(0, inos[4:6]))
+        self.assertTrue(_check_damage(0, inos[0:2]))
+        self.assertTrue(_check_damage(1, inos[2:4]))
+        self.assertTrue(_check_damage(2, inos[4:6]))
 
     def test_scrub_non_mds0(self):
         self._setup_subtrees()
@@ -94,3 +121,108 @@ class TestScrub2(CephFSTestCase):
         expect_exdev(["scrub", "abort"], rank1["name"])
         expect_exdev(["scrub", "pause"], rank1["name"])
         expect_exdev(["scrub", "resume"], rank1["name"])
+
+    def test_scrub_abort_mds0(self):
+        self._setup_subtrees()
+
+        inos = self._find_path_inos('d1/d2/d3/')
+
+        for ino in inos:
+            file_obj_name = "{0:x}.00000000".format(ino)
+            self.fs.rados(["rmxattr", file_obj_name, "parent"])
+
+        out_json = self.fs.rank_tell(["scrub", "start", "/d1/d2/d3", "recursive", "force"], 0)
+        self.assertNotEqual(out_json, None)
+        
+        res = self.fs.rank_tell(["scrub", "abort"])
+        self.assertEqual(res['return_code'], 0)
+
+        # Abort and verify in both mdss. We also check the status in rank 0 mds because
+        # it is supposed to gather the scrub status from other mdss.
+        self.wait_until_true(lambda: "no active" in self._get_scrub_status(1)['status']
+                and "no active" in self._get_scrub_status(2)['status']
+                and "no active" in self._get_scrub_status(0)['status'], 30)
+
+        # sleep enough to fetch updated task status
+        checked = self._check_task_status_na()
+        self.assertTrue(checked)
+
+    def test_scrub_pause_and_resume_mds0(self):
+        self._setup_subtrees()
+
+        inos = self._find_path_inos('d1/d2/d3/')
+
+        for ino in inos:
+            file_obj_name = "{0:x}.00000000".format(ino)
+            self.fs.rados(["rmxattr", file_obj_name, "parent"])
+
+        out_json = self.fs.rank_tell(["scrub", "start", "/d1/d2/d3", "recursive", "force"], 0)
+        self.assertNotEqual(out_json, None)
+
+        res = self.fs.rank_tell(["scrub", "pause"])
+        self.assertEqual(res['return_code'], 0)
+
+        self.wait_until_true(lambda: "PAUSED" in self._get_scrub_status(1)['status']
+                and "PAUSED" in self._get_scrub_status(2)['status']
+                and "PAUSED" in self._get_scrub_status(0)['status'], 30)
+
+        checked = self._check_task_status("paused")
+        self.assertTrue(checked)
+
+        # resume and verify
+        res = self.fs.rank_tell(["scrub", "resume"])
+        self.assertEqual(res['return_code'], 0)
+        
+        self.wait_until_true(lambda: not("PAUSED" in self._get_scrub_status(1)['status'])
+                and not("PAUSED" in self._get_scrub_status(2)['status'])
+                and not("PAUSED" in self._get_scrub_status(0)['status']), 30)
+
+        checked = self._check_task_status_na()
+        self.assertTrue(checked)
+
+    def test_scrub_pause_and_resume_with_abort_mds0(self):
+        self._setup_subtrees()
+
+        inos = self._find_path_inos('d1/d2/d3/')
+
+        for ino in inos:
+            file_obj_name = "{0:x}.00000000".format(ino)
+            self.fs.rados(["rmxattr", file_obj_name, "parent"])
+
+        out_json = self.fs.rank_tell(["scrub", "start", "/d1/d2/d3", "recursive", "force"], 0)
+        self.assertNotEqual(out_json, None)
+
+        res = self.fs.rank_tell(["scrub", "pause"])
+        self.assertEqual(res['return_code'], 0)
+
+        self.wait_until_true(lambda: "PAUSED" in self._get_scrub_status(1)['status']
+                and "PAUSED" in self._get_scrub_status(2)['status']
+                and "PAUSED" in self._get_scrub_status(0)['status'], 30)
+
+        checked = self._check_task_status("paused")
+        self.assertTrue(checked)
+
+        res = self.fs.rank_tell(["scrub", "abort"])
+        self.assertEqual(res['return_code'], 0)
+
+        self.wait_until_true(lambda: "PAUSED" in self._get_scrub_status(1)['status']
+                and "0 inodes" in self._get_scrub_status(1)['status']
+                and "PAUSED" in self._get_scrub_status(2)['status']
+                and "0 inodes" in self._get_scrub_status(2)['status']
+                and "PAUSED" in self._get_scrub_status(0)['status']
+                and "0 inodes" in self._get_scrub_status(0)['status'], 30)
+
+        # scrub status should still be paused...
+        checked = self._check_task_status("paused")
+        self.assertTrue(checked)
+
+        # resume and verify
+        res = self.fs.rank_tell(["scrub", "resume"])
+        self.assertEqual(res['return_code'], 0)
+
+        self.wait_until_true(lambda: not("PAUSED" in self._get_scrub_status(1)['status'])
+                and not("PAUSED" in self._get_scrub_status(2)['status'])
+                and not("PAUSED" in self._get_scrub_status(0)['status']), 30)
+
+        checked = self._check_task_status_na()
+        self.assertTrue(checked)