From: Xiubo Li Date: Sat, 18 Sep 2021 02:34:19 +0000 (+0800) Subject: qa: add test support for the alloc ino failing X-Git-Tag: v18.1.3~24^2~1 X-Git-Url: http://git.apps.os.sepia.ceph.com/?a=commitdiff_plain;h=bdcce54873f2f4c2f355688b51c4b22db7aa2c70;p=ceph-ci.git qa: add test support for the alloc ino failing Fixes: https://tracker.ceph.com/issues/52280 Signed-off-by: Xiubo Li (cherry picked from commit 71797091a25d3153d12def815ee7bd9b361593cd) --- diff --git a/doc/cephfs/mds-config-ref.rst b/doc/cephfs/mds-config-ref.rst index a5e0bba9177..5b68053a05e 100644 --- a/doc/cephfs/mds-config-ref.rst +++ b/doc/cephfs/mds-config-ref.rst @@ -57,6 +57,8 @@ .. confval:: mds_kill_import_at .. confval:: mds_kill_link_at .. confval:: mds_kill_rename_at +.. confval:: mds_inject_skip_replaying_inotable +.. confval:: mds_kill_skip_replaying_inotable .. confval:: mds_wipe_sessions .. confval:: mds_wipe_ino_prealloc .. confval:: mds_skip_ino diff --git a/qa/suites/fs/multiclient/tasks/cephfs_misc_tests.yaml b/qa/suites/fs/multiclient/tasks/cephfs_misc_tests.yaml index 40d63ba792b..e6d6ef99b15 100644 --- a/qa/suites/fs/multiclient/tasks/cephfs_misc_tests.yaml +++ b/qa/suites/fs/multiclient/tasks/cephfs_misc_tests.yaml @@ -11,3 +11,4 @@ overrides: - has not responded to cap revoke by MDS for over - MDS_CLIENT_LATE_RELEASE - responding to mclientcaps + - RECENT_CRASH diff --git a/qa/tasks/cephfs/test_misc.py b/qa/tasks/cephfs/test_misc.py index 53150acbf03..5c513b5c99a 100644 --- a/qa/tasks/cephfs/test_misc.py +++ b/qa/tasks/cephfs/test_misc.py @@ -572,3 +572,45 @@ class TestCacheDrop(CephFSTestCase): # particular operation causing this is journal flush which causes the # MDS to wait wait for cap revoke. self.mount_a.resume_netns() + +class TestSkipReplayInoTable(CephFSTestCase): + MDSS_REQUIRED = 1 + CLIENTS_REQUIRED = 1 + + def test_alloc_cinode_assert(self): + """ + Test alloc CInode assert. + + See: https://tracker.ceph.com/issues/52280 + """ + + # Create a directory and the mds will journal this and then crash + self.mount_a.run_shell(["rm", "-rf", "test_alloc_ino"]) + self.mount_a.run_shell(["mkdir", "test_alloc_ino"]) + + status = self.fs.status() + rank0 = self.fs.get_rank(rank=0, status=status) + + self.fs.mds_asok(['config', 'set', 'mds_kill_skip_replaying_inotable', "true"]) + # This will make the MDS crash, since we only have one MDS in the + # cluster and without the "wait=False" it will stuck here forever. + self.mount_a.run_shell(["mkdir", "test_alloc_ino/dir1"], wait=False) + self.fs.mds_asok(['flush', 'journal']) + + # Now set the mds config to skip replaying the inotable + self.fs.set_ceph_conf('mds', 'mds_inject_skip_replaying_inotable', True) + self.fs.set_ceph_conf('mds', 'mds_wipe_sessions', True) + + # sleep 5 seconds to make sure the journal log is flushed and applied + time.sleep(5) + self.fs.mds_restart() + # sleep 5 seconds to make sure the mds tell command won't stuck + time.sleep(5) + self.fs.wait_for_daemons() + + self.delete_mds_coredump(rank0['name']); + + self.mount_a.run_shell(["mkdir", "test_alloc_ino/dir2"]) + + ls_out = set(self.mount_a.ls("test_alloc_ino/")) + self.assertEqual(ls_out, set({"dir1", "dir2"})) diff --git a/src/common/options/mds.yaml.in b/src/common/options/mds.yaml.in index 28814401e54..eeb895615d6 100644 --- a/src/common/options/mds.yaml.in +++ b/src/common/options/mds.yaml.in @@ -1094,6 +1094,26 @@ options: default: false services: - mds +- name: mds_kill_skip_replaying_inotable + type: bool + level: dev + default: false + services: + - mds + fmt_desc: Ceph will skip replaying the inotable when replaying the journal, and + the premary MDS will crash, while the replacing MDS won't. + (for testing only). + with_legacy: true +- name: mds_inject_skip_replaying_inotable + type: bool + level: dev + default: false + services: + - mds + fmt_desc: Ceph will skip replaying the inotable when replaying the journal, and + the premary MDS will crash, while the replacing MDS won't. + (for testing only). + with_legacy: true # percentage of MDS modify replies to skip sending the client a trace on [0-1] - name: mds_inject_traceless_reply_probability type: float diff --git a/src/mds/MDCache.h b/src/mds/MDCache.h index 582cf33dcba..a99bed72a8f 100644 --- a/src/mds/MDCache.h +++ b/src/mds/MDCache.h @@ -211,6 +211,9 @@ class MDCache { bool test_and_clear_taken_inos(inodeno_t ino) { return replay_taken_inos.erase(ino) != 0; } + bool is_taken_inos_empty(void) { + return replay_taken_inos.empty(); + } uint64_t cache_limit_memory(void) { return cache_memory_limit; diff --git a/src/mds/Server.cc b/src/mds/Server.cc index d34f4fb719a..50e1c3f2cef 100644 --- a/src/mds/Server.cc +++ b/src/mds/Server.cc @@ -4479,6 +4479,9 @@ public: void finish(int r) override { ceph_assert(r == 0); + // crash current MDS and the replacing MDS will test the journal + ceph_assert(!g_conf()->mds_kill_skip_replaying_inotable); + dn->pop_projected_linkage(); // dirty inode, dn, dir @@ -6825,6 +6828,9 @@ public: void finish(int r) override { ceph_assert(r == 0); + // crash current MDS and the replacing MDS will test the journal + ceph_assert(!g_conf()->mds_kill_skip_replaying_inotable); + // link the inode dn->pop_projected_linkage(); @@ -7131,6 +7137,11 @@ void Server::handle_client_symlink(MDRequestRef& mdr) journal_and_reply(mdr, newi, dn, le, new C_MDS_mknod_finish(this, mdr, dn, newi)); mds->balancer->maybe_fragment(dir, false); + + // flush the journal as soon as possible + if (g_conf()->mds_kill_skip_replaying_inotable) { + mdlog->flush(); + } } diff --git a/src/mds/events/EMetaBlob.h b/src/mds/events/EMetaBlob.h index 69e15d9e9c0..736a509ea21 100644 --- a/src/mds/events/EMetaBlob.h +++ b/src/mds/events/EMetaBlob.h @@ -601,7 +601,7 @@ private: } void update_segment(LogSegment *ls); - void replay(MDSRank *mds, LogSegment *ls, MDPeerUpdate *su=NULL); + void replay(MDSRank *mds, LogSegment *ls, int type, MDPeerUpdate *su=NULL); }; WRITE_CLASS_ENCODER_FEATURES(EMetaBlob) WRITE_CLASS_ENCODER_FEATURES(EMetaBlob::fullbit) diff --git a/src/mds/journal.cc b/src/mds/journal.cc index c5a28050ec5..4b9e73b5a7e 100644 --- a/src/mds/journal.cc +++ b/src/mds/journal.cc @@ -1163,7 +1163,7 @@ void EMetaBlob::generate_test_instances(std::list& ls) ls.push_back(new EMetaBlob()); } -void EMetaBlob::replay(MDSRank *mds, LogSegment *logseg, MDPeerUpdate *peerup) +void EMetaBlob::replay(MDSRank *mds, LogSegment *logseg, int type, MDPeerUpdate *peerup) { dout(10) << "EMetaBlob.replay " << lump_map.size() << " dirlumps by " << client_name << dendl; @@ -1567,9 +1567,12 @@ void EMetaBlob::replay(MDSRank *mds, LogSegment *logseg, MDPeerUpdate *peerup) logseg->open_files.push_back(&in->item_open_file); } + bool skip_replaying_inotable = g_conf()->mds_inject_skip_replaying_inotable; + // allocated_inos if (inotablev) { - if (mds->inotable->get_version() >= inotablev) { + if (mds->inotable->get_version() >= inotablev || + unlikely(type == EVENT_UPDATE && skip_replaying_inotable)) { dout(10) << "EMetaBlob.replay inotable tablev " << inotablev << " <= table " << mds->inotable->get_version() << dendl; if (allocated_ino) @@ -1597,7 +1600,8 @@ void EMetaBlob::replay(MDSRank *mds, LogSegment *logseg, MDPeerUpdate *peerup) } } if (sessionmapv) { - if (mds->sessionmap.get_version() >= sessionmapv) { + if (mds->sessionmap.get_version() >= sessionmapv || + unlikely(type == EVENT_UPDATE && skip_replaying_inotable)) { dout(10) << "EMetaBlob.replay sessionmap v " << sessionmapv << " <= table " << mds->sessionmap.get_version() << dendl; if (used_preallocated_ino) @@ -2241,7 +2245,8 @@ void EUpdate::update_segment() void EUpdate::replay(MDSRank *mds) { auto&& segment = get_segment(); - metablob.replay(mds, segment); + dout(10) << "EUpdate::replay" << dendl; + metablob.replay(mds, segment, EVENT_UPDATE); if (had_peers) { dout(10) << "EUpdate.replay " << reqid << " had peers, expecting a matching ECommitted" << dendl; @@ -2324,7 +2329,7 @@ void EOpen::replay(MDSRank *mds) { dout(10) << "EOpen.replay " << dendl; auto&& segment = get_segment(); - metablob.replay(mds, segment); + metablob.replay(mds, segment, EVENT_OPEN); // note which segments inodes belong to, so we don't have to start rejournaling them for (const auto &ino : inos) { @@ -2640,7 +2645,7 @@ void EPeerUpdate::replay(MDSRank *mds) dout(10) << "EPeerUpdate.replay prepare " << reqid << " for mds." << leader << ": applying commit, saving rollback info" << dendl; su = new MDPeerUpdate(origop, rollback); - commit.replay(mds, segment, su); + commit.replay(mds, segment, EVENT_PEERUPDATE, su); mds->mdcache->add_uncommitted_peer(reqid, segment, leader, su); break; @@ -2652,7 +2657,7 @@ void EPeerUpdate::replay(MDSRank *mds) case EPeerUpdate::OP_ROLLBACK: dout(10) << "EPeerUpdate.replay abort " << reqid << " for mds." << leader << ": applying rollback commit blob" << dendl; - commit.replay(mds, segment); + commit.replay(mds, segment, EVENT_PEERUPDATE); mds->mdcache->finish_uncommitted_peer(reqid, false); break; @@ -2831,7 +2836,7 @@ void ESubtreeMap::replay(MDSRank *mds) // first, stick the spanning tree in my cache //metablob.print(*_dout); - metablob.replay(mds, get_segment()); + metablob.replay(mds, get_segment(), EVENT_SUBTREEMAP); // restore import/export maps for (map >::iterator p = subtrees.begin(); @@ -2906,7 +2911,7 @@ void EFragment::replay(MDSRank *mds) ceph_abort(); } - metablob.replay(mds, segment); + metablob.replay(mds, segment, EVENT_FRAGMENT); if (in && g_conf()->mds_debug_frag) in->verify_dirfrags(); } @@ -2990,7 +2995,7 @@ void EExport::replay(MDSRank *mds) { dout(10) << "EExport.replay " << base << dendl; auto&& segment = get_segment(); - metablob.replay(mds, segment); + metablob.replay(mds, segment, EVENT_EXPORT); CDir *dir = mds->mdcache->get_dirfrag(base); ceph_assert(dir); @@ -3069,7 +3074,7 @@ void EImportStart::replay(MDSRank *mds) dout(10) << "EImportStart.replay " << base << " bounds " << bounds << dendl; //metablob.print(*_dout); auto&& segment = get_segment(); - metablob.replay(mds, segment); + metablob.replay(mds, segment, EVENT_IMPORTSTART); // put in ambiguous import list mds->mdcache->add_ambiguous_import(base, bounds);