]> git-server-git.apps.pok.os.sepia.ceph.com Git - ceph.git/commitdiff
qa: add data scan tests for ancestry rebuild
authorPatrick Donnelly <pdonnell@ibm.com>
Tue, 28 Jan 2025 18:40:39 +0000 (13:40 -0500)
committerPatrick Donnelly <pdonnell@ibm.com>
Thu, 30 Jan 2025 04:19:52 +0000 (23:19 -0500)
When one PG is lost in the metadata pool, it will seem as if arbitrary dirfrags
have been lost (and possibly other things, ignored for now). The
cephfs-data-scan tool does not presently try to reconstruct those missing
parents so these tests will fail.

Importantly, also test the case where multiple ancestry paths may exist due to
out-of-date backtraces on regular files.

Signed-off-by: Patrick Donnelly <pdonnell@ibm.com>
qa/tasks/cephfs/filesystem.py
qa/tasks/cephfs/test_data_scan.py

index 3846ef23f9719e0bece05f7754726cb11e0bed1a..0ff5a28475c56d55db524390959b5586cb92fc1a 100644 (file)
@@ -61,6 +61,14 @@ class ObjectNotFound(Exception):
     def __str__(self):
         return "Object not found: '{0}'".format(self._object_name)
 
+class FSDamaged(Exception):
+    def __init__(self, ident, ranks):
+        self.ident = ident
+        self.ranks = ranks
+
+    def __str__(self):
+        return f"File system {self.ident} has damaged ranks {self.ranks}"
+
 class FSMissing(Exception):
     def __init__(self, ident):
         self.ident = ident
@@ -1092,10 +1100,17 @@ class FilesystemBase(MDSClusterBase):
             mds.check_status()
 
         active_count = 0
-        mds_map = self.get_mds_map(status=status)
 
+        if status is None:
+            status = self.status()
+
+        mds_map = self.get_mds_map(status=status)
         log.debug("are_daemons_healthy: mds map: {0}".format(mds_map))
 
+        damaged = self.get_damaged(status=status)
+        if damaged:
+            raise FSDamaged(self.id, damaged)
+
         for mds_id, mds_status in mds_map['info'].items():
             if mds_status['state'] not in ["up:active", "up:standby", "up:standby-replay"]:
                 log.warning("Unhealthy mds state {0}:{1}".format(mds_id, mds_status['state']))
index 64f8d691c4fefa7e352f4cee16bc1f4141051cd3..b8f371d432eb394c1525f4abc5331ddbb69152ee 100644 (file)
@@ -15,6 +15,8 @@ from collections import namedtuple, defaultdict
 from textwrap import dedent
 
 from teuthology.exceptions import CommandFailedError
+from teuthology import contextutil
+from tasks.cephfs.filesystem import FSDamaged
 from tasks.cephfs.cephfs_test_case import CephFSTestCase, for_teuthology
 
 log = logging.getLogger(__name__)
@@ -84,6 +86,18 @@ class Workload(object):
         pool = self._filesystem.get_metadata_pool_name()
         self._filesystem.rados(["purge", pool, '--yes-i-really-really-mean-it'])
 
+    def is_damaged(self):
+        sleep = 2
+        timeout = 120
+        with contextutil.safe_while(sleep=sleep, tries=timeout/sleep) as proceed:
+            while proceed():
+                try:
+                    self._filesystem.wait_for_daemons()
+                except FSDamaged as e:
+                    if 0 in e.ranks:
+                        return True
+        return False
+
     def flush(self):
         """
         Called after client unmount, after write: flush whatever you want
@@ -150,6 +164,90 @@ class SymlinkWorkload(Workload):
         self.assert_equal(target, "symdir/onemegs")
         return self._errors
 
+class NestedDirWorkload(Workload):
+    """
+    Nested directories, one is lost.
+    """
+
+    def write(self):
+        self._mount.run_shell_payload("mkdir -p dir_x/dir_xx/dir_xxx/")
+        self._mount.run_shell_payload("dd if=/dev/urandom of=dir_x/dir_xx/dir_xxx/file_y conv=fsync bs=1 count=1")
+        self._initial_state = self._filesystem.read_cache("dir_x/dir_xx", depth=0)
+
+    def damage(self):
+        dirfrag_obj = "{0:x}.00000000".format(self._initial_state[0]['ino'])
+        self._filesystem.radosm(["rm", dirfrag_obj])
+
+    def is_damaged(self):
+        # workload runner expects MDS to be offline
+        self._filesystem.fail()
+        return True
+
+    def validate(self):
+        self._mount.run_shell_payload("find dir_x -execdir stat {} +")
+        self._mount.run_shell_payload("stat dir_x/dir_xx/dir_xxx/file_y")
+        return self._errors
+
+class NestedDirWorkloadRename(Workload):
+    """
+    Nested directories, one is lost. With renames.
+    """
+
+    def write(self):
+        self._mount.run_shell_payload("mkdir -p dir_x/dir_xx/dir_xxx/; mkdir -p dir_y")
+        self._mount.run_shell_payload("dd if=/dev/urandom of=dir_x/dir_xx/dir_xxx/file_y conv=fsync bs=1 count=1")
+        self._initial_state = self._filesystem.read_cache("dir_x/dir_xx", depth=0)
+        self._filesystem.flush()
+        self._mount.run_shell_payload("mv dir_x/dir_xx dir_y/dir_yy; sync dir_y")
+
+    def damage(self):
+        dirfrag_obj = "{0:x}.00000000".format(self._initial_state[0]['ino'])
+        self._filesystem.radosm(["rm", dirfrag_obj])
+
+    def is_damaged(self):
+        # workload runner expects MDS to be offline
+        self._filesystem.fail()
+        return True
+
+    def validate(self):
+        self._mount.run_shell_payload("find . -execdir stat {} +")
+        self._mount.run_shell_payload("stat dir_y/dir_yy/dir_xxx/file_y")
+        return self._errors
+
+class NestedDoubleDirWorkloadRename(Workload):
+    """
+    Nested directories, two lost with backtraces to rebuild. With renames.
+    """
+
+    def write(self):
+        self._mount.run_shell_payload("mkdir -p dir_x/dir_xx/dir_xxx/; mkdir -p dir_y")
+        self._mount.run_shell_payload("dd if=/dev/urandom of=dir_x/dir_xx/dir_xxx/file_y conv=fsync bs=1 count=1")
+        self._initial_state = []
+        self._initial_state.append(self._filesystem.read_cache("dir_x/dir_xx", depth=0))
+        self._initial_state.append(self._filesystem.read_cache("dir_y", depth=0))
+        self._filesystem.flush()
+        self._mount.run_shell_payload("""
+        mv dir_x/dir_xx dir_y/dir_yy
+        sync dir_y
+        dd if=/dev/urandom of=dir_y/dir_yy/dir_xxx/file_z conv=fsync bs=1 count=1
+        """)
+
+    def damage(self):
+        for o in self._initial_state:
+            dirfrag_obj = "{0:x}.00000000".format(o[0]['ino'])
+            self._filesystem.radosm(["rm", dirfrag_obj])
+
+    def is_damaged(self):
+        # workload runner expects MDS to be offline
+        self._filesystem.fail()
+        return True
+
+    def validate(self):
+        self._mount.run_shell_payload("find . -execdir stat {} +")
+        # during recovery: we may get dir_x/dir_xx or dir_y/dir_yy; depending on rados pg iteration order
+        self._mount.run_shell_payload("stat dir_y/dir_yy/dir_xxx/file_y || stat dir_x/dir_xx/dir_xxx/file_y")
+        return self._errors
+
 
 class MovedFile(Workload):
     def write(self):
@@ -391,10 +489,6 @@ class NonDefaultLayout(Workload):
 class TestDataScan(CephFSTestCase):
     MDSS_REQUIRED = 2
 
-    def is_marked_damaged(self, rank):
-        mds_map = self.fs.get_mds_map()
-        return rank in mds_map['damaged']
-
     def _rebuild_metadata(self, workload, workers=1, unmount=True):
         """
         That when all objects in metadata pool are removed, we can rebuild a metadata pool
@@ -425,19 +519,11 @@ class TestDataScan(CephFSTestCase):
         # Reset the MDS map in case multiple ranks were in play: recovery procedure
         # only understands how to rebuild metadata under rank 0
         self.fs.reset()
+        self.assertEqual(self.fs.get_var('max_mds'), 1)
 
         self.fs.set_joinable() # redundant with reset
 
-        def get_state(mds_id):
-            info = self.mds_cluster.get_mds_info(mds_id)
-            return info['state'] if info is not None else None
-
-        self.wait_until_true(lambda: self.is_marked_damaged(0), 60)
-        for mds_id in self.fs.mds_ids:
-            self.wait_until_equal(
-                    lambda: get_state(mds_id),
-                    "up:standby",
-                    timeout=60)
+        self.assertTrue(workload.is_damaged())
 
         self.fs.table_tool([self.fs.name + ":0", "reset", "session"])
         self.fs.table_tool([self.fs.name + ":0", "reset", "snap"])
@@ -450,7 +536,7 @@ class TestDataScan(CephFSTestCase):
                 self.fs.journal_tool(["journal", "reset", "--yes-i-really-really-mean-it"], 0)
 
         self.fs.journal_tool(["journal", "reset", "--force", "--yes-i-really-really-mean-it"], 0)
-        self.fs.data_scan(["init"])
+        self.fs.data_scan(["init", "--force-init"])
         self.fs.data_scan(["scan_extents"], worker_count=workers)
         self.fs.data_scan(["scan_inodes"], worker_count=workers)
         self.fs.data_scan(["scan_links"])
@@ -461,6 +547,7 @@ class TestDataScan(CephFSTestCase):
         self.run_ceph_cmd('mds', 'repaired', '0')
 
         # Start the MDS
+        self.fs.set_joinable() # necessary for some tests without damage
         self.fs.wait_for_daemons()
         log.info(str(self.mds_cluster.status()))
 
@@ -489,6 +576,15 @@ class TestDataScan(CephFSTestCase):
     def test_rebuild_symlink(self):
         self._rebuild_metadata(SymlinkWorkload(self.fs, self.mount_a))
 
+    def test_rebuild_nested(self):
+        self._rebuild_metadata(NestedDirWorkload(self.fs, self.mount_a))
+
+    def test_rebuild_nested_rename(self):
+        self._rebuild_metadata(NestedDirWorkloadRename(self.fs, self.mount_a))
+
+    def test_rebuild_nested_double_rename(self):
+        self._rebuild_metadata(NestedDoubleDirWorkloadRename(self.fs, self.mount_a))
+
     def test_rebuild_moved_file(self):
         self._rebuild_metadata(MovedFile(self.fs, self.mount_a))