# particular operation causing this is journal flush which causes the
# MDS to wait wait for cap revoke.
self.mount_a.resume_netns()
+
+class TestSkipReplayInoTable(CephFSTestCase):
+ MDSS_REQUIRED = 1
+ CLIENTS_REQUIRED = 1
+
+ def test_alloc_cinode_assert(self):
+ """
+ Test alloc CInode assert.
+
+ See: https://tracker.ceph.com/issues/52280
+ """
+
+ # Create a directory and the mds will journal this and then crash
+ self.mount_a.run_shell(["rm", "-rf", "test_alloc_ino"])
+ self.mount_a.run_shell(["mkdir", "test_alloc_ino"])
+
+ status = self.fs.status()
+ rank0 = self.fs.get_rank(rank=0, status=status)
+
+ self.fs.mds_asok(['config', 'set', 'mds_kill_skip_replaying_inotable', "true"])
+ # This will make the MDS crash, since we only have one MDS in the
+ # cluster and without the "wait=False" it will stuck here forever.
+ self.mount_a.run_shell(["mkdir", "test_alloc_ino/dir1"], wait=False)
+ self.fs.mds_asok(['flush', 'journal'])
+
+ # Now set the mds config to skip replaying the inotable
+ self.fs.set_ceph_conf('mds', 'mds_inject_skip_replaying_inotable', True)
+ self.fs.set_ceph_conf('mds', 'mds_wipe_sessions', True)
+
+ # sleep 5 seconds to make sure the journal log is flushed and applied
+ time.sleep(5)
+ self.fs.mds_restart()
+ # sleep 5 seconds to make sure the mds tell command won't stuck
+ time.sleep(5)
+ self.fs.wait_for_daemons()
+
+ self.delete_mds_coredump(rank0['name']);
+
+ self.mount_a.run_shell(["mkdir", "test_alloc_ino/dir2"])
+
+ ls_out = set(self.mount_a.ls("test_alloc_ino/"))
+ self.assertEqual(ls_out, set({"dir1", "dir2"}))
void finish(int r) override {
ceph_assert(r == 0);
+ // crash current MDS and the replacing MDS will test the journal
+ ceph_assert(!g_conf()->mds_kill_skip_replaying_inotable);
+
dn->pop_projected_linkage();
// dirty inode, dn, dir
void finish(int r) override {
ceph_assert(r == 0);
+ // crash current MDS and the replacing MDS will test the journal
+ ceph_assert(!g_conf()->mds_kill_skip_replaying_inotable);
+
// link the inode
dn->pop_projected_linkage();
journal_and_reply(mdr, newi, dn, le, new C_MDS_mknod_finish(this, mdr, dn, newi));
mds->balancer->maybe_fragment(dir, false);
+
+ // flush the journal as soon as possible
+ if (g_conf()->mds_kill_skip_replaying_inotable) {
+ mdlog->flush();
+ }
}
ls.push_back(new EMetaBlob());
}
-void EMetaBlob::replay(MDSRank *mds, LogSegment *logseg, MDPeerUpdate *peerup)
+void EMetaBlob::replay(MDSRank *mds, LogSegment *logseg, int type, MDPeerUpdate *peerup)
{
dout(10) << "EMetaBlob.replay " << lump_map.size() << " dirlumps by " << client_name << dendl;
logseg->open_files.push_back(&in->item_open_file);
}
+ bool skip_replaying_inotable = g_conf()->mds_inject_skip_replaying_inotable;
+
// allocated_inos
if (inotablev) {
- if (mds->inotable->get_version() >= inotablev) {
+ if (mds->inotable->get_version() >= inotablev ||
+ unlikely(type == EVENT_UPDATE && skip_replaying_inotable)) {
dout(10) << "EMetaBlob.replay inotable tablev " << inotablev
<< " <= table " << mds->inotable->get_version() << dendl;
if (allocated_ino)
}
}
if (sessionmapv) {
- if (mds->sessionmap.get_version() >= sessionmapv) {
+ if (mds->sessionmap.get_version() >= sessionmapv ||
+ unlikely(type == EVENT_UPDATE && skip_replaying_inotable)) {
dout(10) << "EMetaBlob.replay sessionmap v " << sessionmapv
<< " <= table " << mds->sessionmap.get_version() << dendl;
if (used_preallocated_ino)
void EUpdate::replay(MDSRank *mds)
{
auto&& segment = get_segment();
- metablob.replay(mds, segment);
+ dout(10) << "EUpdate::replay" << dendl;
+ metablob.replay(mds, segment, EVENT_UPDATE);
if (had_peers) {
dout(10) << "EUpdate.replay " << reqid << " had peers, expecting a matching ECommitted" << dendl;
{
dout(10) << "EOpen.replay " << dendl;
auto&& segment = get_segment();
- metablob.replay(mds, segment);
+ metablob.replay(mds, segment, EVENT_OPEN);
// note which segments inodes belong to, so we don't have to start rejournaling them
for (const auto &ino : inos) {
dout(10) << "EPeerUpdate.replay prepare " << reqid << " for mds." << leader
<< ": applying commit, saving rollback info" << dendl;
su = new MDPeerUpdate(origop, rollback);
- commit.replay(mds, segment, su);
+ commit.replay(mds, segment, EVENT_PEERUPDATE, su);
mds->mdcache->add_uncommitted_peer(reqid, segment, leader, su);
break;
case EPeerUpdate::OP_ROLLBACK:
dout(10) << "EPeerUpdate.replay abort " << reqid << " for mds." << leader
<< ": applying rollback commit blob" << dendl;
- commit.replay(mds, segment);
+ commit.replay(mds, segment, EVENT_PEERUPDATE);
mds->mdcache->finish_uncommitted_peer(reqid, false);
break;
// first, stick the spanning tree in my cache
//metablob.print(*_dout);
- metablob.replay(mds, get_segment());
+ metablob.replay(mds, get_segment(), EVENT_SUBTREEMAP);
// restore import/export maps
for (map<dirfrag_t, vector<dirfrag_t> >::iterator p = subtrees.begin();
ceph_abort();
}
- metablob.replay(mds, segment);
+ metablob.replay(mds, segment, EVENT_FRAGMENT);
if (in && g_conf()->mds_debug_frag)
in->verify_dirfrags();
}
{
dout(10) << "EExport.replay " << base << dendl;
auto&& segment = get_segment();
- metablob.replay(mds, segment);
+ metablob.replay(mds, segment, EVENT_EXPORT);
CDir *dir = mds->mdcache->get_dirfrag(base);
ceph_assert(dir);
dout(10) << "EImportStart.replay " << base << " bounds " << bounds << dendl;
//metablob.print(*_dout);
auto&& segment = get_segment();
- metablob.replay(mds, segment);
+ metablob.replay(mds, segment, EVENT_IMPORTSTART);
// put in ambiguous import list
mds->mdcache->add_ambiguous_import(base, bounds);