From: Xiubo Li Date: Sat, 18 Sep 2021 02:34:19 +0000 (+0800) Subject: qa: add test support for the alloc ino failing X-Git-Tag: v16.2.14~44^2~1 X-Git-Url: http://git-server-git.apps.pok.os.sepia.ceph.com/?a=commitdiff_plain;h=35bc2dac2866c145b03d95a1fe0f1c2a2ead94e9;p=ceph.git qa: add test support for the alloc ino failing Fixes: https://tracker.ceph.com/issues/52280 Signed-off-by: Xiubo Li (cherry picked from commit 71797091a25d3153d12def815ee7bd9b361593cd) Conflicts: doc/cephfs/mds-config-ref.rst: format changed src/common/options/mds.yaml.in: no such file --- diff --git a/doc/cephfs/mds-config-ref.rst b/doc/cephfs/mds-config-ref.rst index 2efc83b410e..2b22a844f3a 100644 --- a/doc/cephfs/mds-config-ref.rst +++ b/doc/cephfs/mds-config-ref.rst @@ -501,6 +501,25 @@ :Type: 32-bit Integer :Default: ``0`` +``mds_inject_skip_replaying_inotable`` + +:Description: Ceph will skip replaying the inotable when replaying the journal, + and the premary MDS will crash, while the replacing MDS won't. + (for developers only). + +:Type: Boolean +:Default: ``false`` + + +``mds_kill_skip_replaying_inotable`` + +:Description: Ceph will skip replaying the inotable when replaying the journal, + and the premary MDS will crash, while the replacing MDS won't. + (for developers only). + +:Type: Boolean +:Default: ``false`` + ``mds_wipe_sessions`` diff --git a/qa/suites/fs/multiclient/tasks/cephfs_misc_tests.yaml b/qa/suites/fs/multiclient/tasks/cephfs_misc_tests.yaml index 40d63ba792b..e6d6ef99b15 100644 --- a/qa/suites/fs/multiclient/tasks/cephfs_misc_tests.yaml +++ b/qa/suites/fs/multiclient/tasks/cephfs_misc_tests.yaml @@ -11,3 +11,4 @@ overrides: - has not responded to cap revoke by MDS for over - MDS_CLIENT_LATE_RELEASE - responding to mclientcaps + - RECENT_CRASH diff --git a/qa/tasks/cephfs/test_misc.py b/qa/tasks/cephfs/test_misc.py index 0bd8ad6217e..4fde5fb8f5e 100644 --- a/qa/tasks/cephfs/test_misc.py +++ b/qa/tasks/cephfs/test_misc.py @@ -414,3 +414,45 @@ class TestCacheDrop(CephFSTestCase): # particular operation causing this is journal flush which causes the # MDS to wait wait for cap revoke. self.mount_a.resume_netns() + +class TestSkipReplayInoTable(CephFSTestCase): + MDSS_REQUIRED = 1 + CLIENTS_REQUIRED = 1 + + def test_alloc_cinode_assert(self): + """ + Test alloc CInode assert. + + See: https://tracker.ceph.com/issues/52280 + """ + + # Create a directory and the mds will journal this and then crash + self.mount_a.run_shell(["rm", "-rf", "test_alloc_ino"]) + self.mount_a.run_shell(["mkdir", "test_alloc_ino"]) + + status = self.fs.status() + rank0 = self.fs.get_rank(rank=0, status=status) + + self.fs.mds_asok(['config', 'set', 'mds_kill_skip_replaying_inotable', "true"]) + # This will make the MDS crash, since we only have one MDS in the + # cluster and without the "wait=False" it will stuck here forever. + self.mount_a.run_shell(["mkdir", "test_alloc_ino/dir1"], wait=False) + self.fs.mds_asok(['flush', 'journal']) + + # Now set the mds config to skip replaying the inotable + self.fs.set_ceph_conf('mds', 'mds_inject_skip_replaying_inotable', True) + self.fs.set_ceph_conf('mds', 'mds_wipe_sessions', True) + + # sleep 5 seconds to make sure the journal log is flushed and applied + time.sleep(5) + self.fs.mds_restart() + # sleep 5 seconds to make sure the mds tell command won't stuck + time.sleep(5) + self.fs.wait_for_daemons() + + self.delete_mds_coredump(rank0['name']); + + self.mount_a.run_shell(["mkdir", "test_alloc_ino/dir2"]) + + ls_out = set(self.mount_a.ls("test_alloc_ino/")) + self.assertEqual(ls_out, set({"dir1", "dir2"})) diff --git a/src/common/legacy_config_opts.h b/src/common/legacy_config_opts.h index ea103d7de9e..03b0973db09 100644 --- a/src/common/legacy_config_opts.h +++ b/src/common/legacy_config_opts.h @@ -130,6 +130,8 @@ OPTION(ms_connection_idle_timeout, OPT_U64) OPTION(ms_pq_max_tokens_per_priority, OPT_U64) OPTION(ms_pq_min_cost, OPT_U64) OPTION(ms_inject_socket_failures, OPT_U64) +OPTION(mds_inject_skip_replaying_inotable, OPT_BOOL) +OPTION(mds_kill_skip_replaying_inotable, OPT_BOOL) SAFE_OPTION(ms_inject_delay_type, OPT_STR) // "osd mds mon client" allowed OPTION(ms_inject_delay_max, OPT_DOUBLE) // seconds OPTION(ms_inject_delay_probability, OPT_DOUBLE) // range [0, 1] diff --git a/src/common/options.cc b/src/common/options.cc index 4dc69cb0db3..3f4527795f8 100644 --- a/src/common/options.cc +++ b/src/common/options.cc @@ -8775,6 +8775,14 @@ std::vector