From: Venky Shankar Date: Fri, 2 Feb 2024 04:50:47 +0000 (-0500) Subject: mds: batch backtrace updates by pool-id when expiring a log segment X-Git-Tag: v19.2.3~454^2~1 X-Git-Url: http://git-server-git.apps.pok.os.sepia.ceph.com/?a=commitdiff_plain;h=94c8d024f853cf779508cea21070a7d339d21004;p=ceph.git mds: batch backtrace updates by pool-id when expiring a log segment LogSegment::try_to_expire() batches backtrace updations for inodes in dirty_parent_inodes list. If a backtrace update operations fails for one inode due to missing (removed) data pool, which is specially handled by treating the operation as a success, however, the errno (-ENOENT) is stored by the gather context and passed on as the return value to subsequent operations (even for successful backtrace update operations in the same gather context). Fixes: http://tracker.ceph.com/issues/63259 Signed-off-by: Venky Shankar (cherry picked from commit d1d3a8c7c80534cba26d1daf2f60f4c7f03c8339) --- diff --git a/src/mds/CInode.cc b/src/mds/CInode.cc index 861a6def09f..ff54e8ed1e9 100644 --- a/src/mds/CInode.cc +++ b/src/mds/CInode.cc @@ -1372,7 +1372,7 @@ void CInode::_commit_ops(int r, C_GatherBuilder &gather_bld, } void CInode::_store_backtrace(std::vector &ops_vec, - inode_backtrace_t &bt, int op_prio) + inode_backtrace_t &bt, int op_prio, bool ignore_old_pools) { dout(10) << __func__ << " on " << *this << dendl; ceph_assert(is_dirty_parent()); @@ -1393,8 +1393,8 @@ void CInode::_store_backtrace(std::vector &ops_vec, ops_vec.emplace_back(op_prio, pool, get_inode()->layout, mdcache->mds->mdsmap->get_up_features(), slink); - if (!state_test(STATE_DIRTYPOOL) || get_inode()->old_pools.empty()) { - dout(20) << __func__ << ": no dirtypool or no old pools" << dendl; + if (!state_test(STATE_DIRTYPOOL) || get_inode()->old_pools.empty() || ignore_old_pools) { + dout(20) << __func__ << ": no dirtypool or no old pools or ignore_old_pools" << dendl; return; } @@ -1417,7 +1417,7 @@ void CInode::store_backtrace(MDSContext *fin, int op_prio) inode_backtrace_t bt; auto version = get_inode()->backtrace_version; - _store_backtrace(ops_vec, bt, op_prio); + _store_backtrace(ops_vec, bt, op_prio, false); C_GatherBuilder gather(g_ceph_context, new C_OnFinisher( @@ -1428,12 +1428,14 @@ void CInode::store_backtrace(MDSContext *fin, int op_prio) gather.activate(); } -void CInode::store_backtrace(CInodeCommitOperations &op, int op_prio) +void CInode::store_backtrace(CInodeCommitOperations &op, int op_prio, + bool ignore_old_pools) { op.version = get_inode()->backtrace_version; op.in = this; - _store_backtrace(op.ops_vec, op.bt, op_prio); + // update backtraces in old pools + _store_backtrace(op.ops_vec, op.bt, op_prio, ignore_old_pools); } void CInode::_stored_backtrace(int r, version_t v, Context *fin) diff --git a/src/mds/CInode.h b/src/mds/CInode.h index b12d50181a4..5e8a06eedd5 100644 --- a/src/mds/CInode.h +++ b/src/mds/CInode.h @@ -748,8 +748,9 @@ class CInode : public MDSCacheObject, public InodeStoreBase, public Counter &ops_vec, - inode_backtrace_t &bt, int op_prio); - void store_backtrace(CInodeCommitOperations &op, int op_prio); + inode_backtrace_t &bt, int op_prio, bool ignore_old_pools); + void store_backtrace(CInodeCommitOperations &op, int op_prio, + bool ignore_old_pools=false); void store_backtrace(MDSContext *fin, int op_prio=-1); void _stored_backtrace(int r, version_t v, Context *fin); void fetch_backtrace(Context *fin, ceph::buffer::list *backtrace); @@ -1127,6 +1128,14 @@ class CInode : public MDSCacheObject, public InodeStoreBase, public Counter=0 + */ + int64_t get_backtrace_pool() const; + protected: ceph_lock_state_t *get_fcntl_lock_state() { if (!fcntl_locks) @@ -1177,14 +1186,6 @@ protected: clear_flock_lock_state(); } - /** - * Return the pool ID where we currently write backtraces for - * this inode (in addition to inode.old_pools) - * - * @returns a pool ID >=0 - */ - int64_t get_backtrace_pool() const; - // parent dentries in cache CDentry *parent = nullptr; // primary link mempool::mds_co::compact_set remote_parents; // if hard linked diff --git a/src/mds/journal.cc b/src/mds/journal.cc index e080b117610..40400ff4054 100644 --- a/src/mds/journal.cc +++ b/src/mds/journal.cc @@ -237,27 +237,53 @@ void LogSegment::try_to_expire(MDSRank *mds, MDSGatherBuilder &gather_bld, int o ceph_assert(g_conf()->mds_kill_journal_expire_at != 3); - size_t count = 0; - for (elist::iterator it = dirty_parent_inodes.begin(); !it.end(); ++it) - count++; - - std::vector ops_vec; - ops_vec.reserve(count); + std::map> ops_vec_map; // backtraces to be stored/updated for (elist::iterator p = dirty_parent_inodes.begin(); !p.end(); ++p) { CInode *in = *p; ceph_assert(in->is_auth()); if (in->can_auth_pin()) { dout(15) << "try_to_expire waiting for storing backtrace on " << *in << dendl; - ops_vec.resize(ops_vec.size() + 1); - in->store_backtrace(ops_vec.back(), op_prio); + auto pool_id = in->get_backtrace_pool(); + + // this is for the default data pool + dout(20) << __func__ << ": updating pool=" << pool_id << dendl; + ops_vec_map[pool_id].push_back(CInodeCommitOperations()); + in->store_backtrace(ops_vec_map[pool_id].back(), op_prio, true); + + + if (!in->state_test(CInode::STATE_DIRTYPOOL)) { + dout(20) << __func__ << ": no dirtypool" << dendl; + continue; + } + + // dispatch separate ops for backtrace updates for old pools + for (auto _pool_id : in->get_inode()->old_pools) { + if (_pool_id == pool_id) { + continue; + } + + in->auth_pin(in); // CInode::_stored_backtrace() does auth_unpin() + dout(20) << __func__ << ": updating old_pool=" << _pool_id << dendl; + + auto cco = CInodeCommitOperations(); + cco.in = in; + // use backtrace from the main pool so as to pickup the main + // pool-id for old pool updates. + cco.bt = ops_vec_map[pool_id].back().bt; + cco.ops_vec.emplace_back(op_prio, _pool_id); + cco.version = in->get_inode()->backtrace_version; + ops_vec_map[_pool_id].push_back(cco); + } } else { dout(15) << "try_to_expire waiting for unfreeze on " << *in << dendl; in->add_waiter(CInode::WAIT_UNFREEZE, gather_bld.new_sub()); } } - if (!ops_vec.empty()) + + for (auto& [pool_id, ops_vec] : ops_vec_map) { mds->finisher->queue(new BatchCommitBacktrace(mds, gather_bld.new_sub(), std::move(ops_vec))); + } ceph_assert(g_conf()->mds_kill_journal_expire_at != 4);