From 3d420afcb0b4a9196144b6729be308888fe377f3 Mon Sep 17 00:00:00 2001 From: Xiubo Li Date: Wed, 15 Sep 2021 20:58:22 +0800 Subject: [PATCH] mds: do not take the ino which has been used When replaying the journals, if the inodetable or the sessionmap versions do not match, the CInode will be added to the inode_map, but the ino may still be in the inodetable or sessions' prealloc inos list. So when allocating new CInode we should skip them. Fixes: https://tracker.ceph.com/issues/52280 Signed-off-by: Xiubo Li (cherry picked from commit a8c63c7919a78497190995b1a542fbcc05e68d92) --- src/mds/CDir.cc | 1 + src/mds/MDCache.cc | 2 ++ src/mds/MDCache.h | 12 ++++++++++++ src/mds/Server.cc | 41 ++++++++++++++++++++++++++++++----------- src/mds/journal.cc | 4 ++++ 5 files changed, 49 insertions(+), 11 deletions(-) diff --git a/src/mds/CDir.cc b/src/mds/CDir.cc index 344da96322d91..502651d14ee66 100644 --- a/src/mds/CDir.cc +++ b/src/mds/CDir.cc @@ -1911,6 +1911,7 @@ CDentry *CDir::_load_dentry( if (!undef_inode) { mdcache->add_inode(in); // add + mdcache->insert_taken_inos(in->ino()); dn = add_primary_dentry(dname, in, std::move(alternate_name), first, last); // link } dout(12) << "_fetched got " << *dn << " " << *in << dendl; diff --git a/src/mds/MDCache.cc b/src/mds/MDCache.cc index 7ceea6769b383..f680db5e8f4b9 100644 --- a/src/mds/MDCache.cc +++ b/src/mds/MDCache.cc @@ -329,6 +329,8 @@ void MDCache::remove_inode(CInode *o) snap_inode_map.erase(o->vino()); } + clear_taken_inos(o->ino()); + if (o->ino() < MDS_INO_SYSTEM_BASE) { if (o == root) root = 0; if (o == myin) myin = 0; diff --git a/src/mds/MDCache.h b/src/mds/MDCache.h index 66eae6c293327..4899110452171 100644 --- a/src/mds/MDCache.h +++ b/src/mds/MDCache.h @@ -193,6 +193,16 @@ class MDCache { explicit MDCache(MDSRank *m, PurgeQueue &purge_queue_); ~MDCache(); + void insert_taken_inos(inodeno_t ino) { + replay_taken_inos.insert(ino); + } + void clear_taken_inos(inodeno_t ino) { + replay_taken_inos.erase(ino); + } + bool test_and_clear_taken_inos(inodeno_t ino) { + return replay_taken_inos.erase(ino) != 0; + } + uint64_t cache_limit_memory(void) { return cache_memory_limit; } @@ -1220,6 +1230,8 @@ class MDCache { StrayManager stray_manager; private: + std::set replay_taken_inos; // the inos have been taken when replaying + // -- fragmenting -- struct ufragment { ufragment() {} diff --git a/src/mds/Server.cc b/src/mds/Server.cc index a4b3ef60904b7..e03ea522b5608 100644 --- a/src/mds/Server.cc +++ b/src/mds/Server.cc @@ -3324,17 +3324,36 @@ CInode* Server::prepare_new_inode(MDRequestRef& mdr, CDir *dir, inodeno_t useino // while session is opening. bool allow_prealloc_inos = mdr->session->is_open(); + inodeno_t _useino = useino; + // assign ino - if (allow_prealloc_inos && (mdr->used_prealloc_ino = _inode->ino = mdr->session->take_ino(useino))) { - mds->sessionmap.mark_projected(mdr->session); - dout(10) << "prepare_new_inode used_prealloc " << mdr->used_prealloc_ino - << " (" << mdr->session->info.prealloc_inos.size() << " left)" - << dendl; - } else { - mdr->alloc_ino = - _inode->ino = mds->inotable->project_alloc_id(useino); - dout(10) << "prepare_new_inode alloc " << mdr->alloc_ino << dendl; - } + do { + if (allow_prealloc_inos && (mdr->used_prealloc_ino = _inode->ino = mdr->session->take_ino(_useino))) { + if (mdcache->test_and_clear_taken_inos(_inode->ino)) { + _inode->ino = 0; + dout(10) << "prepare_new_inode used_prealloc " << mdr->used_prealloc_ino + << " (" << mdr->session->info.prealloc_inos.size() << " left)" + << " but has been taken, will try again!" << dendl; + } else { + mds->sessionmap.mark_projected(mdr->session); + dout(10) << "prepare_new_inode used_prealloc " << mdr->used_prealloc_ino + << " (" << mdr->session->info.prealloc_inos.size() << " left)" + << dendl; + } + } else { + mdr->alloc_ino = + _inode->ino = mds->inotable->project_alloc_id(_useino); + if (mdcache->test_and_clear_taken_inos(_inode->ino)) { + mds->inotable->apply_alloc_id(_inode->ino); + _inode->ino = 0; + dout(10) << "prepare_new_inode alloc " << mdr->alloc_ino + << " but has been taken, will try again!" << dendl; + } else { + dout(10) << "prepare_new_inode alloc " << mdr->alloc_ino << dendl; + } + } + _useino = 0; + } while (!_inode->ino); if (useino && useino != _inode->ino) { dout(0) << "WARNING: client specified " << useino << " and i allocated " << _inode->ino << dendl; @@ -3343,7 +3362,7 @@ CInode* Server::prepare_new_inode(MDRequestRef& mdr, CDir *dir, inodeno_t useino << " but mds." << mds->get_nodeid() << " allocated " << _inode->ino; //ceph_abort(); // just for now. } - + if (allow_prealloc_inos && mdr->session->get_num_projected_prealloc_inos() < g_conf()->mds_client_prealloc_inos / 2) { int need = g_conf()->mds_client_prealloc_inos - mdr->session->get_num_projected_prealloc_inos(); diff --git a/src/mds/journal.cc b/src/mds/journal.cc index f7c161fc69ae5..7849f0b28bd15 100644 --- a/src/mds/journal.cc +++ b/src/mds/journal.cc @@ -1572,6 +1572,8 @@ void EMetaBlob::replay(MDSRank *mds, LogSegment *logseg, MDPeerUpdate *peerup) if (mds->inotable->get_version() >= inotablev) { dout(10) << "EMetaBlob.replay inotable tablev " << inotablev << " <= table " << mds->inotable->get_version() << dendl; + if (allocated_ino) + mds->mdcache->insert_taken_inos(allocated_ino); } else { dout(10) << "EMetaBlob.replay inotable v " << inotablev << " - 1 == table " << mds->inotable->get_version() @@ -1598,6 +1600,8 @@ void EMetaBlob::replay(MDSRank *mds, LogSegment *logseg, MDPeerUpdate *peerup) if (mds->sessionmap.get_version() >= sessionmapv) { dout(10) << "EMetaBlob.replay sessionmap v " << sessionmapv << " <= table " << mds->sessionmap.get_version() << dendl; + if (used_preallocated_ino) + mds->mdcache->insert_taken_inos(used_preallocated_ino); } else { dout(10) << "EMetaBlob.replay sessionmap v " << sessionmapv << ", table " << mds->sessionmap.get_version() -- 2.39.5