From 293b90fe63665666ca2de8b65c38f987a4d0b2a6 Mon Sep 17 00:00:00 2001
From: Patrick Donnelly <pdonnell@redhat.com>
Date: Tue, 17 Jan 2023 21:22:10 -0500
Subject: [PATCH] qa/tasks/cephfs: test damage to dentry's first is caught

Signed-off-by: Patrick Donnelly <pdonnell@redhat.com>
---
 qa/suites/fs/functional/tasks/damage.yaml |  1 +
 qa/tasks/cephfs/filesystem.py             |  3 +
 qa/tasks/cephfs/test_damage.py            | 97 +++++++++++++++++++++++
 3 files changed, 101 insertions(+)

diff --git a/qa/suites/fs/functional/tasks/damage.yaml b/qa/suites/fs/functional/tasks/damage.yaml
index 917c4b1a3d63e..ff8b3a58a767a 100644
--- a/qa/suites/fs/functional/tasks/damage.yaml
+++ b/qa/suites/fs/functional/tasks/damage.yaml
@@ -18,6 +18,7 @@ overrides:
       - Metadata damage detected
       - MDS_READ_ONLY
       - force file system read-only
+      - with standby daemon mds
 tasks:
   - cephfs_test_runner:
       modules:
diff --git a/qa/tasks/cephfs/filesystem.py b/qa/tasks/cephfs/filesystem.py
index d9ccfc51266b0..111a99fc4c873 100644
--- a/qa/tasks/cephfs/filesystem.py
+++ b/qa/tasks/cephfs/filesystem.py
@@ -1648,6 +1648,9 @@ class Filesystem(MDSCluster):
     def get_scrub_status(self, rank=0):
         return self.run_scrub(["status"], rank)
 
+    def flush(self, rank=0):
+        return self.rank_tell(["flush", "journal"], rank=rank)
+
     def wait_until_scrub_complete(self, result=None, tag=None, rank=0, sleep=30,
                                   timeout=300, reverse=False):
         # time out after "timeout" seconds and assume as done
diff --git a/qa/tasks/cephfs/test_damage.py b/qa/tasks/cephfs/test_damage.py
index 4d019442976b6..d83187017e3ff 100644
--- a/qa/tasks/cephfs/test_damage.py
+++ b/qa/tasks/cephfs/test_damage.py
@@ -3,6 +3,7 @@ import json
 import logging
 import errno
 import re
+import time
 from teuthology.contextutil import MaxWhileTries
 from teuthology.exceptions import CommandFailedError
 from teuthology.orchestra.run import wait
@@ -562,3 +563,99 @@ class TestDamage(CephFSTestCase):
             self.fs.mon_manager.raw_cluster_cmd(
                 'tell', 'mds.{0}'.format(self.fs.get_active_names()[0]),
                 "damage", "rm", str(entry['id']))
+
+    def test_dentry_first_existing(self):
+        """
+        That the MDS won't abort when the dentry is already known to be damaged.
+        """
+
+        def verify_corrupt():
+            info = self.fs.read_cache("/a", 0)
+            log.debug('%s', info)
+            self.assertEqual(len(info), 1)
+            dirfrags = info[0]['dirfrags']
+            self.assertEqual(len(dirfrags), 1)
+            dentries = dirfrags[0]['dentries']
+            self.assertEqual([dn['path'] for dn in dentries if dn['is_primary']], ['a/c'])
+            self.assertEqual(dentries[0]['snap_first'], 18446744073709551606) # SNAP_HEAD
+
+        self.mount_a.run_shell_payload("mkdir -p a/b")
+        self.fs.flush()
+        self.config_set("mds", "mds_abort_on_newly_corrupt_dentry", False)
+        self.config_set("mds", "mds_inject_rename_corrupt_dentry_first", "1.0")
+        time.sleep(5) # for conf to percolate
+        self.mount_a.run_shell_payload("mv a/b a/c; sync .")
+        self.mount_a.umount()
+        verify_corrupt()
+        self.fs.fail()
+        self.config_rm("mds", "mds_inject_rename_corrupt_dentry_first")
+        self.config_set("mds", "mds_abort_on_newly_corrupt_dentry", False)
+        self.fs.set_joinable()
+        status = self.fs.status()
+        self.fs.flush()
+        self.assertFalse(self.fs.status().hadfailover(status))
+        verify_corrupt()
+
+    def test_dentry_first_preflush(self):
+        """
+        That the MDS won't write a dentry with new damage to CDentry::first
+        to the journal.
+        """
+
+        rank0 = self.fs.get_rank()
+        self.fs.rank_freeze(True, rank=0)
+        self.mount_a.run_shell_payload("mkdir -p a/{b,c}/d")
+        self.fs.flush()
+        self.config_set("mds", "mds_inject_rename_corrupt_dentry_first", "1.0")
+        time.sleep(5) # for conf to percolate
+        p = self.mount_a.run_shell_payload("timeout 60 mv a/b a/z", wait=False)
+        self.wait_until_true(lambda: "laggy_since" in self.fs.get_rank(), timeout=self.fs.beacon_timeout)
+        self.config_rm("mds", "mds_inject_rename_corrupt_dentry_first")
+        self.fs.rank_freeze(False, rank=0)
+        self.delete_mds_coredump(rank0['name'])
+        self.fs.mds_restart(rank0['name'])
+        self.fs.wait_for_daemons()
+        p.wait()
+        self.mount_a.run_shell_payload("stat a/ && find a/")
+        self.fs.flush()
+
+    def test_dentry_first_precommit(self):
+        """
+        That the MDS won't write a dentry with new damage to CDentry::first
+        to the directory object.
+        """
+
+        fscid = self.fs.id
+        self.mount_a.run_shell_payload("mkdir -p a/{b,c}/d; sync .")
+        self.mount_a.umount() # allow immediate scatter write back
+        self.fs.flush()
+        # now just twiddle some inode metadata on a regular file
+        self.mount_a.mount_wait()
+        self.mount_a.run_shell_payload("chmod 711 a/b/d; sync .")
+        self.mount_a.umount() # avoid journaling session related things
+        # okay, now cause the dentry to get damaged after loading from the journal
+        self.fs.fail()
+        self.config_set("mds", "mds_inject_journal_corrupt_dentry_first", "1.0")
+        time.sleep(5) # for conf to percolate
+        self.fs.set_joinable()
+        self.fs.wait_for_daemons()
+        rank0 = self.fs.get_rank()
+        self.fs.rank_freeze(True, rank=0)
+        # so now we want to trigger commit but this will crash, so:
+        c = ['--connect-timeout=60', 'tell', f"mds.{fscid}:0", "flush", "journal"]
+        p = self.ceph_cluster.mon_manager.run_cluster_cmd(args=c, wait=False, timeoutcmd=30)
+        self.wait_until_true(lambda: "laggy_since" in self.fs.get_rank(), timeout=self.fs.beacon_timeout)
+        self.config_rm("mds", "mds_inject_journal_corrupt_dentry_first")
+        self.fs.rank_freeze(False, rank=0)
+        self.delete_mds_coredump(rank0['name'])
+        self.fs.mds_restart(rank0['name'])
+        self.fs.wait_for_daemons()
+        try:
+            p.wait()
+        except CommandFailedError as e:
+            print(e)
+        else:
+            self.fail("flush journal should fail!")
+        self.mount_a.mount_wait()
+        self.mount_a.run_shell_payload("stat a/ && find a/")
+        self.fs.flush()
-- 
2.39.5