From 5cc44b63f433c29c6fb933ae3072ac6270e7d333 Mon Sep 17 00:00:00 2001 From: Xiubo Li Date: Tue, 11 May 2021 12:03:51 +0800 Subject: [PATCH] mds: save the metadata pool id MDSRank class's private member There is one rare case that when mds daemon received a new mdsmap and during decoding it, the metadata_pool will be reset to -1, if other threads try to get it from old mdsmap it will get a invalid pool id. This can also help get rid of the mds_lock else where. Fixes: https://tracker.ceph.com/issues/50389 Signed-off-by: Xiubo Li Signed-off-by: Xiubo Li --- src/mds/CDir.cc | 2 +- src/mds/CInode.cc | 6 +++--- src/mds/MDBalancer.cc | 2 +- src/mds/MDCache.cc | 8 ++++---- src/mds/MDLog.cc | 12 ++++++------ src/mds/MDSRank.cc | 10 +++++----- src/mds/MDSRank.h | 9 ++++++++- src/mds/MDSTable.cc | 4 ++-- src/mds/OpenFileTable.cc | 10 +++++----- src/mds/Server.cc | 6 +++--- src/mds/SessionMap.cc | 10 +++++----- 11 files changed, 43 insertions(+), 36 deletions(-) diff --git a/src/mds/CDir.cc b/src/mds/CDir.cc index 0242cdc67ca..ef3e6a0d0c2 100644 --- a/src/mds/CDir.cc +++ b/src/mds/CDir.cc @@ -2193,7 +2193,7 @@ public: vector &&r, mempool::mds_co::compact_set &&stales) : dir(d), op_prio(pr) { - metapool = dir->mdcache->mds->mdsmap->get_metadata_pool(); + metapool = dir->mdcache->mds->get_metadata_pool(); version = dir->get_version(); is_new = dir->is_new(); to_set.swap(s); diff --git a/src/mds/CInode.cc b/src/mds/CInode.cc index 550de651f5e..4e47fdf7869 100644 --- a/src/mds/CInode.cc +++ b/src/mds/CInode.cc @@ -1149,7 +1149,7 @@ void CInode::store(MDSContext *fin) m.write_full(bl); object_t oid = CInode::get_object_name(ino(), frag_t(), ".inode"); - object_locator_t oloc(mdcache->mds->mdsmap->get_metadata_pool()); + object_locator_t oloc(mdcache->mds->get_metadata_pool()); Context *newfin = new C_OnFinisher(new C_IO_Inode_Stored(this, get_version(), fin), @@ -1224,7 +1224,7 @@ void CInode::fetch(MDSContext *fin) C_GatherBuilder gather(g_ceph_context, new C_OnFinisher(c, mdcache->mds->finisher)); object_t oid = CInode::get_object_name(ino(), frag_t(), ""); - object_locator_t oloc(mdcache->mds->mdsmap->get_metadata_pool()); + object_locator_t oloc(mdcache->mds->get_metadata_pool()); // Old on-disk format: inode stored in xattr of a dirfrag ObjectOperation rd; @@ -5188,7 +5188,7 @@ void CInode::scrub_finished() { int64_t CInode::get_backtrace_pool() const { if (is_dir()) { - return mdcache->mds->mdsmap->get_metadata_pool(); + return mdcache->mds->get_metadata_pool(); } else { // Files are required to have an explicit layout that specifies // a pool diff --git a/src/mds/MDBalancer.cc b/src/mds/MDBalancer.cc index fe36831b82b..90e67127cb0 100644 --- a/src/mds/MDBalancer.cc +++ b/src/mds/MDBalancer.cc @@ -368,7 +368,7 @@ int MDBalancer::localize_balancer() /* we assume that balancer is in the metadata pool */ object_t oid = object_t(mds->mdsmap->get_balancer()); - object_locator_t oloc(mds->mdsmap->get_metadata_pool()); + object_locator_t oloc(mds->get_metadata_pool()); ceph_tid_t tid = mds->objecter->read(oid, oloc, 0, 0, CEPH_NOSNAP, &lua_src, 0, new C_SafeCond(lock, cond, &ack, &r)); dout(15) << "launched non-blocking read tid=" << tid diff --git a/src/mds/MDCache.cc b/src/mds/MDCache.cc index 7fd265fefce..6be4415ff7c 100644 --- a/src/mds/MDCache.cc +++ b/src/mds/MDCache.cc @@ -8648,7 +8648,7 @@ void MDCache::open_remote_dentry(CDentry *dn, bool projected, MDSContext *fin, b dout(10) << "open_remote_dentry " << *dn << dendl; CDentry::linkage_t *dnl = projected ? dn->get_projected_linkage() : dn->get_linkage(); inodeno_t ino = dnl->get_remote_ino(); - int64_t pool = dnl->get_remote_d_type() == DT_DIR ? mds->mdsmap->get_metadata_pool() : -1; + int64_t pool = dnl->get_remote_d_type() == DT_DIR ? mds->get_metadata_pool() : -1; open_ino(ino, pool, new C_MDC_OpenRemoteDentry(this, dn, ino, fin, want_xlocked), true, want_xlocked); // backtrace } @@ -8778,7 +8778,7 @@ void MDCache::_open_ino_backtrace_fetched(inodeno_t ino, bufferlist& bl, int err return; } } else if (err == -CEPHFS_ENOENT) { - int64_t meta_pool = mds->mdsmap->get_metadata_pool(); + int64_t meta_pool = mds->get_metadata_pool(); if (info.pool != meta_pool) { dout(10) << " no object in pool " << info.pool << ", retrying pool " << meta_pool << dendl; @@ -9023,7 +9023,7 @@ void MDCache::do_open_ino(inodeno_t ino, open_ino_info_t& info, int err) } else { ceph_assert(!info.ancestors.empty()); info.checking = mds->get_nodeid(); - open_ino(info.ancestors[0].dirino, mds->mdsmap->get_metadata_pool(), + open_ino(info.ancestors[0].dirino, mds->get_metadata_pool(), new C_MDC_OpenInoParentOpened(this, ino), info.want_replica); } } @@ -11949,7 +11949,7 @@ void MDCache::_fragment_committed(dirfrag_t basedirfrag, const MDRequestRef& mdr mds->finisher)); SnapContext nullsnapc; - object_locator_t oloc(mds->mdsmap->get_metadata_pool()); + object_locator_t oloc(mds->get_metadata_pool()); for (const auto& fg : uf.old_frags) { object_t oid = CInode::get_object_name(basedirfrag.ino, fg, ""); ObjectOperation op; diff --git a/src/mds/MDLog.cc b/src/mds/MDLog.cc index 1fc4b58f17e..1a08109d926 100644 --- a/src/mds/MDLog.cc +++ b/src/mds/MDLog.cc @@ -161,7 +161,7 @@ void MDLog::create(MDSContext *c) // Instantiate Journaler and start async write to RADOS ceph_assert(journaler == NULL); - journaler = new Journaler("mdlog", ino, mds->mdsmap->get_metadata_pool(), + journaler = new Journaler("mdlog", ino, mds->get_metadata_pool(), CEPH_FS_ONDISK_MAGIC, mds->objecter, logger, l_mdl_jlat, mds->finisher); ceph_assert(journaler->is_readonly()); @@ -171,7 +171,7 @@ void MDLog::create(MDSContext *c) journaler->write_head(gather.new_sub()); // Async write JournalPointer to RADOS - JournalPointer jp(mds->get_nodeid(), mds->mdsmap->get_metadata_pool()); + JournalPointer jp(mds->get_nodeid(), mds->get_metadata_pool()); jp.front = ino; jp.back = 0; jp.save(mds->objecter, gather.new_sub()); @@ -966,7 +966,7 @@ void MDLog::_recovery_thread(MDSContext *completion) // First, read the pointer object. // If the pointer object is not present, then create it with // front = default ino and back = null - JournalPointer jp(mds->get_nodeid(), mds->mdsmap->get_metadata_pool()); + JournalPointer jp(mds->get_nodeid(), mds->get_metadata_pool()); const int read_result = jp.load(mds->objecter); if (read_result == -CEPHFS_ENOENT) { inodeno_t const default_log_ino = MDS_INO_LOG_OFFSET + mds->get_nodeid(); @@ -1001,7 +1001,7 @@ void MDLog::_recovery_thread(MDSContext *completion) } dout(1) << "Erasing journal " << jp.back << dendl; C_SaferCond erase_waiter; - Journaler back("mdlog", jp.back, mds->mdsmap->get_metadata_pool(), + Journaler back("mdlog", jp.back, mds->get_metadata_pool(), CEPH_FS_ONDISK_MAGIC, mds->objecter, logger, l_mdl_jlat, mds->finisher); @@ -1041,7 +1041,7 @@ void MDLog::_recovery_thread(MDSContext *completion) /* Read the header from the front journal */ Journaler *front_journal = new Journaler("mdlog", jp.front, - mds->mdsmap->get_metadata_pool(), CEPH_FS_ONDISK_MAGIC, mds->objecter, + mds->get_metadata_pool(), CEPH_FS_ONDISK_MAGIC, mds->objecter, logger, l_mdl_jlat, mds->finisher); // Assign to ::journaler so that we can be aborted by ::shutdown while @@ -1128,7 +1128,7 @@ void MDLog::_reformat_journal(JournalPointer const &jp_in, Journaler *old_journa /* Create the new Journaler file */ Journaler *new_journal = new Journaler("mdlog", jp.back, - mds->mdsmap->get_metadata_pool(), CEPH_FS_ONDISK_MAGIC, mds->objecter, logger, l_mdl_jlat, mds->finisher); + mds->get_metadata_pool(), CEPH_FS_ONDISK_MAGIC, mds->objecter, logger, l_mdl_jlat, mds->finisher); dout(4) << "Writing new journal header " << jp.back << dendl; file_layout_t new_layout = old_journal->get_layout(); new_journal->set_writeable(); diff --git a/src/mds/MDSRank.cc b/src/mds/MDSRank.cc index 14607b9514a..f085e266979 100644 --- a/src/mds/MDSRank.cc +++ b/src/mds/MDSRank.cc @@ -513,6 +513,11 @@ MDSRank::MDSRank( { hb = g_ceph_context->get_heartbeat_map()->add_worker("MDSRank", pthread_self()); + // The metadata pool won't change in the whole life time + // of the fs, with this we can get rid of the mds_lock + // in many places too. + metadata_pool = mdsmap->get_metadata_pool(); + purge_queue.update_op_limit(*mdsmap); objecter->unset_honor_pool_full(); @@ -883,11 +888,6 @@ class C_MDS_VoidFn : public MDSInternalContext } }; -int64_t MDSRank::get_metadata_pool() -{ - return mdsmap->get_metadata_pool(); -} - MDSTableClient *MDSRank::get_table_client(int t) { switch (t) { diff --git a/src/mds/MDSRank.h b/src/mds/MDSRank.h index 8e97c634da4..7ec785c14ff 100644 --- a/src/mds/MDSRank.h +++ b/src/mds/MDSRank.h @@ -179,7 +179,10 @@ class MDSRank { mds_rank_t get_nodeid() const { return whoami; } std::string_view get_fs_name() const { return fs_name; } - int64_t get_metadata_pool(); + int64_t get_metadata_pool() const + { + return metadata_pool; + } mono_time get_starttime() const { return starttime; @@ -601,6 +604,10 @@ class MDSRank { private: bool send_status = true; + // The metadata pool won't change in the whole life time of the fs, + // with this we can get rid of the mds_lock in many places too. + int64_t metadata_pool = -1; + // "task" string that gets displayed in ceph status inline static const std::string SCRUB_STATUS_KEY = "scrub status"; diff --git a/src/mds/MDSTable.cc b/src/mds/MDSTable.cc index 635e8bb5db1..679633f0c78 100644 --- a/src/mds/MDSTable.cc +++ b/src/mds/MDSTable.cc @@ -83,7 +83,7 @@ void MDSTable::save(MDSContext *onfinish, version_t v) // write (async) SnapContext snapc; object_t oid = get_object_name(); - object_locator_t oloc(mds->mdsmap->get_metadata_pool()); + object_locator_t oloc(mds->get_metadata_pool()); mds->objecter->write_full(oid, oloc, snapc, bl, ceph::real_clock::now(), 0, @@ -159,7 +159,7 @@ void MDSTable::load(MDSContext *onfinish) C_IO_MT_Load *c = new C_IO_MT_Load(this, onfinish); object_t oid = get_object_name(); - object_locator_t oloc(mds->mdsmap->get_metadata_pool()); + object_locator_t oloc(mds->get_metadata_pool()); mds->objecter->read_full(oid, oloc, CEPH_NOSNAP, &c->bl, 0, new C_OnFinisher(c, mds->finisher)); } diff --git a/src/mds/OpenFileTable.cc b/src/mds/OpenFileTable.cc index 1c853ada788..d3b0a33bbb5 100644 --- a/src/mds/OpenFileTable.cc +++ b/src/mds/OpenFileTable.cc @@ -317,7 +317,7 @@ void OpenFileTable::_journal_finish(int r, uint64_t log_seq, MDSContext *c, new C_OnFinisher(new C_IO_OFT_Save(this, log_seq, c), mds->finisher)); SnapContext snapc; - object_locator_t oloc(mds->mdsmap->get_metadata_pool()); + object_locator_t oloc(mds->get_metadata_pool()); for (auto& [idx, vops] : ops_map) { object_t oid = get_object_name(idx); for (auto& op : vops) { @@ -345,7 +345,7 @@ void OpenFileTable::commit(MDSContext *c, uint64_t log_seq, int op_prio) C_GatherBuilder gather(g_ceph_context); SnapContext snapc; - object_locator_t oloc(mds->mdsmap->get_metadata_pool()); + object_locator_t oloc(mds->get_metadata_pool()); const unsigned max_write_size = mds->mdcache->max_dir_commit_size; @@ -728,7 +728,7 @@ void OpenFileTable::_read_omap_values(const std::string& key, unsigned idx, { object_t oid = get_object_name(idx); dout(10) << __func__ << ": load from '" << oid << ":" << key << "'" << dendl; - object_locator_t oloc(mds->mdsmap->get_metadata_pool()); + object_locator_t oloc(mds->get_metadata_pool()); C_IO_OFT_Load *c = new C_IO_OFT_Load(this, idx, first); ObjectOperation op; if (first) @@ -867,7 +867,7 @@ void OpenFileTable::_load_finish(int op_r, int header_r, int values_r, C_GatherBuilder gather(g_ceph_context, new C_OnFinisher(new C_IO_OFT_Recover(this), mds->finisher)); - object_locator_t oloc(mds->mdsmap->get_metadata_pool()); + object_locator_t oloc(mds->get_metadata_pool()); SnapContext snapc; for (unsigned omap_idx = 0; omap_idx < loaded_journals.size(); omap_idx++) { @@ -1113,7 +1113,7 @@ void OpenFileTable::_prefetch_inodes() int64_t pool; if (prefetch_state == DIR_INODES) - pool = mds->mdsmap->get_metadata_pool(); + pool = mds->get_metadata_pool(); else if (prefetch_state == FILE_INODES) pool = mds->mdsmap->get_first_data_pool(); else diff --git a/src/mds/Server.cc b/src/mds/Server.cc index cc93b10d9ba..123e1941b38 100644 --- a/src/mds/Server.cc +++ b/src/mds/Server.cc @@ -2459,7 +2459,7 @@ void Server::handle_osd_map() * using osdmap_full_flag(), because we want to know "is the flag set" * rather than "does the flag apply to us?" */ mds->objecter->with_osdmap([this](const OSDMap& o) { - auto pi = o.get_pg_pool(mds->mdsmap->get_metadata_pool()); + auto pi = o.get_pg_pool(mds->get_metadata_pool()); is_full = pi && pi->has_flag(pg_pool_t::FLAG_FULL); dout(7) << __func__ << ": full = " << is_full << " epoch = " << o.get_epoch() << dendl; @@ -4035,7 +4035,7 @@ void Server::_lookup_snap_ino(MDRequestRef& mdr) if (parent_ino) { diri = mdcache->get_inode(parent_ino); if (!diri) { - mdcache->open_ino(parent_ino, mds->mdsmap->get_metadata_pool(), new C_MDS_LookupIno2(this, mdr)); + mdcache->open_ino(parent_ino, mds->get_metadata_pool(), new C_MDS_LookupIno2(this, mdr)); return; } @@ -4067,7 +4067,7 @@ void Server::_lookup_snap_ino(MDRequestRef& mdr) respond_to_request(mdr, -CEPHFS_ESTALE); } else { - mdcache->open_ino(vino.ino, mds->mdsmap->get_metadata_pool(), new C_MDS_LookupIno2(this, mdr), false); + mdcache->open_ino(vino.ino, mds->get_metadata_pool(), new C_MDS_LookupIno2(this, mdr), false); } } diff --git a/src/mds/SessionMap.cc b/src/mds/SessionMap.cc index 354461bf50a..b96fc37d607 100644 --- a/src/mds/SessionMap.cc +++ b/src/mds/SessionMap.cc @@ -237,7 +237,7 @@ void SessionMap::_load_finish( dout(10) << __func__ << ": continue omap load from '" << last_key << "'" << dendl; object_t oid = get_object_name(); - object_locator_t oloc(mds->mdsmap->get_metadata_pool()); + object_locator_t oloc(mds->get_metadata_pool()); C_IO_SM_Load *c = new C_IO_SM_Load(this, false); ObjectOperation op; op.omap_get_vals(last_key, "", g_conf()->mds_sessionmap_keys_per_op, @@ -279,7 +279,7 @@ void SessionMap::load(MDSContext *onload) C_IO_SM_Load *c = new C_IO_SM_Load(this, true); object_t oid = get_object_name(); - object_locator_t oloc(mds->mdsmap->get_metadata_pool()); + object_locator_t oloc(mds->get_metadata_pool()); ObjectOperation op; op.omap_get_header(&c->header_bl, &c->header_r); @@ -316,7 +316,7 @@ void SessionMap::load_legacy() C_IO_SM_LoadLegacy *c = new C_IO_SM_LoadLegacy(this); object_t oid = get_object_name(); - object_locator_t oloc(mds->mdsmap->get_metadata_pool()); + object_locator_t oloc(mds->get_metadata_pool()); mds->objecter->read_full(oid, oloc, CEPH_NOSNAP, &c->bl, 0, new C_OnFinisher(c, mds->finisher)); @@ -388,7 +388,7 @@ void SessionMap::save(MDSContext *onsave, version_t needv) committing = version; SnapContext snapc; object_t oid = get_object_name(); - object_locator_t oloc(mds->mdsmap->get_metadata_pool()); + object_locator_t oloc(mds->get_metadata_pool()); ObjectOperation op; @@ -885,7 +885,7 @@ void SessionMap::save_if_dirty(const std::set &tgt_sessions, SnapContext snapc; object_t oid = get_object_name(); - object_locator_t oloc(mds->mdsmap->get_metadata_pool()); + object_locator_t oloc(mds->get_metadata_pool()); MDSContext *on_safe = gather_bld->new_sub(); mds->objecter->mutate(oid, oloc, op, snapc, ceph::real_clock::now(), 0, -- 2.39.5