From: Xiubo Li Date: Wed, 15 Sep 2021 12:58:22 +0000 (+0800) Subject: mds: do not take the ino which has been used X-Git-Tag: v16.2.14~44^2~2 X-Git-Url: http://git-server-git.apps.pok.os.sepia.ceph.com/?a=commitdiff_plain;h=cf627d001c3d14e17ef6e04e4800dba79e7251f7;p=ceph.git mds: do not take the ino which has been used When replaying the journals, if the inodetable or the sessionmap versions do not match, the CInode will be added to the inode_map, but the ino may still be in the inodetable or sessions' prealloc inos list. So when allocating new CInode we should skip them. Fixes: https://tracker.ceph.com/issues/52280 Signed-off-by: Xiubo Li (cherry picked from commit a8c63c7919a78497190995b1a542fbcc05e68d92) --- diff --git a/src/mds/CDir.cc b/src/mds/CDir.cc index baf79d4c6c4a..718b5aaece6d 100644 --- a/src/mds/CDir.cc +++ b/src/mds/CDir.cc @@ -1909,6 +1909,7 @@ CDentry *CDir::_load_dentry( if (!undef_inode) { mdcache->add_inode(in); // add + mdcache->insert_taken_inos(in->ino()); dn = add_primary_dentry(dname, in, std::move(alternate_name), first, last); // link } dout(12) << "_fetched got " << *dn << " " << *in << dendl; diff --git a/src/mds/MDCache.cc b/src/mds/MDCache.cc index ec5c32468434..4847d4846c7d 100644 --- a/src/mds/MDCache.cc +++ b/src/mds/MDCache.cc @@ -320,6 +320,8 @@ void MDCache::remove_inode(CInode *o) snap_inode_map.erase(o->vino()); } + clear_taken_inos(o->ino()); + if (o->ino() < MDS_INO_SYSTEM_BASE) { if (o == root) root = 0; if (o == myin) myin = 0; diff --git a/src/mds/MDCache.h b/src/mds/MDCache.h index 56fe1164b443..3257067340f0 100644 --- a/src/mds/MDCache.h +++ b/src/mds/MDCache.h @@ -193,6 +193,16 @@ class MDCache { explicit MDCache(MDSRank *m, PurgeQueue &purge_queue_); ~MDCache(); + void insert_taken_inos(inodeno_t ino) { + replay_taken_inos.insert(ino); + } + void clear_taken_inos(inodeno_t ino) { + replay_taken_inos.erase(ino); + } + bool test_and_clear_taken_inos(inodeno_t ino) { + return replay_taken_inos.erase(ino) != 0; + } + uint64_t cache_limit_memory(void) { return cache_memory_limit; } @@ -1216,6 +1226,8 @@ class MDCache { StrayManager stray_manager; private: + std::set replay_taken_inos; // the inos have been taken when replaying + // -- fragmenting -- struct ufragment { ufragment() {} diff --git a/src/mds/Server.cc b/src/mds/Server.cc index b8627e8e071c..edaf5fb3e7d1 100644 --- a/src/mds/Server.cc +++ b/src/mds/Server.cc @@ -3316,17 +3316,36 @@ CInode* Server::prepare_new_inode(MDRequestRef& mdr, CDir *dir, inodeno_t useino // while session is opening. bool allow_prealloc_inos = mdr->session->is_open(); + inodeno_t _useino = useino; + // assign ino - if (allow_prealloc_inos && (mdr->used_prealloc_ino = _inode->ino = mdr->session->take_ino(useino))) { - mds->sessionmap.mark_projected(mdr->session); - dout(10) << "prepare_new_inode used_prealloc " << mdr->used_prealloc_ino - << " (" << mdr->session->info.prealloc_inos.size() << " left)" - << dendl; - } else { - mdr->alloc_ino = - _inode->ino = mds->inotable->project_alloc_id(useino); - dout(10) << "prepare_new_inode alloc " << mdr->alloc_ino << dendl; - } + do { + if (allow_prealloc_inos && (mdr->used_prealloc_ino = _inode->ino = mdr->session->take_ino(_useino))) { + if (mdcache->test_and_clear_taken_inos(_inode->ino)) { + _inode->ino = 0; + dout(10) << "prepare_new_inode used_prealloc " << mdr->used_prealloc_ino + << " (" << mdr->session->info.prealloc_inos.size() << " left)" + << " but has been taken, will try again!" << dendl; + } else { + mds->sessionmap.mark_projected(mdr->session); + dout(10) << "prepare_new_inode used_prealloc " << mdr->used_prealloc_ino + << " (" << mdr->session->info.prealloc_inos.size() << " left)" + << dendl; + } + } else { + mdr->alloc_ino = + _inode->ino = mds->inotable->project_alloc_id(_useino); + if (mdcache->test_and_clear_taken_inos(_inode->ino)) { + mds->inotable->apply_alloc_id(_inode->ino); + _inode->ino = 0; + dout(10) << "prepare_new_inode alloc " << mdr->alloc_ino + << " but has been taken, will try again!" << dendl; + } else { + dout(10) << "prepare_new_inode alloc " << mdr->alloc_ino << dendl; + } + } + _useino = 0; + } while (!_inode->ino); if (useino && useino != _inode->ino) { dout(0) << "WARNING: client specified " << useino << " and i allocated " << _inode->ino << dendl; @@ -3335,7 +3354,7 @@ CInode* Server::prepare_new_inode(MDRequestRef& mdr, CDir *dir, inodeno_t useino << " but mds." << mds->get_nodeid() << " allocated " << _inode->ino; //ceph_abort(); // just for now. } - + if (allow_prealloc_inos && mdr->session->get_num_projected_prealloc_inos() < g_conf()->mds_client_prealloc_inos / 2) { int need = g_conf()->mds_client_prealloc_inos - mdr->session->get_num_projected_prealloc_inos(); diff --git a/src/mds/journal.cc b/src/mds/journal.cc index 6a05dee42a8d..74e86ec79bac 100644 --- a/src/mds/journal.cc +++ b/src/mds/journal.cc @@ -1565,6 +1565,8 @@ void EMetaBlob::replay(MDSRank *mds, LogSegment *logseg, MDPeerUpdate *peerup) if (mds->inotable->get_version() >= inotablev) { dout(10) << "EMetaBlob.replay inotable tablev " << inotablev << " <= table " << mds->inotable->get_version() << dendl; + if (allocated_ino) + mds->mdcache->insert_taken_inos(allocated_ino); } else { dout(10) << "EMetaBlob.replay inotable v " << inotablev << " - 1 == table " << mds->inotable->get_version() @@ -1591,6 +1593,8 @@ void EMetaBlob::replay(MDSRank *mds, LogSegment *logseg, MDPeerUpdate *peerup) if (mds->sessionmap.get_version() >= sessionmapv) { dout(10) << "EMetaBlob.replay sessionmap v " << sessionmapv << " <= table " << mds->sessionmap.get_version() << dendl; + if (used_preallocated_ino) + mds->mdcache->insert_taken_inos(used_preallocated_ino); } else { dout(10) << "EMetaBlob.replay sessionmap v " << sessionmapv << ", table " << mds->sessionmap.get_version()