LogSegment::try_to_expire() batches backtrace updations for inodes in
dirty_parent_inodes list. If a backtrace update operations fails for
one inode due to missing (removed) data pool, which is specially
handled by treating the operation as a success, however, the errno
(-ENOENT) is stored by the gather context and passed on as the return
value to subsequent operations (even for successful backtrace update
operations in the same gather context).
Fixes: http://tracker.ceph.com/issues/63259
Signed-off-by: Venky Shankar <vshankar@redhat.com>
(cherry picked from commit
d1d3a8c7c80534cba26d1daf2f60f4c7f03c8339)
}
void CInode::_store_backtrace(std::vector<CInodeCommitOperation> &ops_vec,
- inode_backtrace_t &bt, int op_prio)
+ inode_backtrace_t &bt, int op_prio, bool ignore_old_pools)
{
dout(10) << __func__ << " on " << *this << dendl;
ceph_assert(is_dirty_parent());
ops_vec.emplace_back(op_prio, pool, get_inode()->layout,
mdcache->mds->mdsmap->get_up_features(), slink);
- if (!state_test(STATE_DIRTYPOOL) || get_inode()->old_pools.empty()) {
- dout(20) << __func__ << ": no dirtypool or no old pools" << dendl;
+ if (!state_test(STATE_DIRTYPOOL) || get_inode()->old_pools.empty() || ignore_old_pools) {
+ dout(20) << __func__ << ": no dirtypool or no old pools or ignore_old_pools" << dendl;
return;
}
inode_backtrace_t bt;
auto version = get_inode()->backtrace_version;
- _store_backtrace(ops_vec, bt, op_prio);
+ _store_backtrace(ops_vec, bt, op_prio, false);
C_GatherBuilder gather(g_ceph_context,
new C_OnFinisher(
gather.activate();
}
-void CInode::store_backtrace(CInodeCommitOperations &op, int op_prio)
+void CInode::store_backtrace(CInodeCommitOperations &op, int op_prio,
+ bool ignore_old_pools)
{
op.version = get_inode()->backtrace_version;
op.in = this;
- _store_backtrace(op.ops_vec, op.bt, op_prio);
+ // update backtraces in old pools
+ _store_backtrace(op.ops_vec, op.bt, op_prio, ignore_old_pools);
}
void CInode::_stored_backtrace(int r, version_t v, Context *fin)
inode_backtrace_t &bt);
void build_backtrace(int64_t pool, inode_backtrace_t& bt);
void _store_backtrace(std::vector<CInodeCommitOperation> &ops_vec,
- inode_backtrace_t &bt, int op_prio);
- void store_backtrace(CInodeCommitOperations &op, int op_prio);
+ inode_backtrace_t &bt, int op_prio, bool ignore_old_pools);
+ void store_backtrace(CInodeCommitOperations &op, int op_prio,
+ bool ignore_old_pools=false);
void store_backtrace(MDSContext *fin, int op_prio=-1);
void _stored_backtrace(int r, version_t v, Context *fin);
void fetch_backtrace(Context *fin, ceph::buffer::list *backtrace);
// client caps
client_t loner_cap = -1, want_loner_cap = -1;
+ /**
+ * Return the pool ID where we currently write backtraces for
+ * this inode (in addition to inode.old_pools)
+ *
+ * @returns a pool ID >=0
+ */
+ int64_t get_backtrace_pool() const;
+
protected:
ceph_lock_state_t *get_fcntl_lock_state() {
if (!fcntl_locks)
clear_flock_lock_state();
}
- /**
- * Return the pool ID where we currently write backtraces for
- * this inode (in addition to inode.old_pools)
- *
- * @returns a pool ID >=0
- */
- int64_t get_backtrace_pool() const;
-
// parent dentries in cache
CDentry *parent = nullptr; // primary link
mempool::mds_co::compact_set<CDentry*> remote_parents; // if hard linked
ceph_assert(g_conf()->mds_kill_journal_expire_at != 3);
- size_t count = 0;
- for (elist<CInode*>::iterator it = dirty_parent_inodes.begin(); !it.end(); ++it)
- count++;
-
- std::vector<CInodeCommitOperations> ops_vec;
- ops_vec.reserve(count);
+ std::map<int64_t, std::vector<CInodeCommitOperations>> ops_vec_map;
// backtraces to be stored/updated
for (elist<CInode*>::iterator p = dirty_parent_inodes.begin(); !p.end(); ++p) {
CInode *in = *p;
ceph_assert(in->is_auth());
if (in->can_auth_pin()) {
dout(15) << "try_to_expire waiting for storing backtrace on " << *in << dendl;
- ops_vec.resize(ops_vec.size() + 1);
- in->store_backtrace(ops_vec.back(), op_prio);
+ auto pool_id = in->get_backtrace_pool();
+
+ // this is for the default data pool
+ dout(20) << __func__ << ": updating pool=" << pool_id << dendl;
+ ops_vec_map[pool_id].push_back(CInodeCommitOperations());
+ in->store_backtrace(ops_vec_map[pool_id].back(), op_prio, true);
+
+
+ if (!in->state_test(CInode::STATE_DIRTYPOOL)) {
+ dout(20) << __func__ << ": no dirtypool" << dendl;
+ continue;
+ }
+
+ // dispatch separate ops for backtrace updates for old pools
+ for (auto _pool_id : in->get_inode()->old_pools) {
+ if (_pool_id == pool_id) {
+ continue;
+ }
+
+ in->auth_pin(in); // CInode::_stored_backtrace() does auth_unpin()
+ dout(20) << __func__ << ": updating old_pool=" << _pool_id << dendl;
+
+ auto cco = CInodeCommitOperations();
+ cco.in = in;
+ // use backtrace from the main pool so as to pickup the main
+ // pool-id for old pool updates.
+ cco.bt = ops_vec_map[pool_id].back().bt;
+ cco.ops_vec.emplace_back(op_prio, _pool_id);
+ cco.version = in->get_inode()->backtrace_version;
+ ops_vec_map[_pool_id].push_back(cco);
+ }
} else {
dout(15) << "try_to_expire waiting for unfreeze on " << *in << dendl;
in->add_waiter(CInode::WAIT_UNFREEZE, gather_bld.new_sub());
}
}
- if (!ops_vec.empty())
+
+ for (auto& [pool_id, ops_vec] : ops_vec_map) {
mds->finisher->queue(new BatchCommitBacktrace(mds, gather_bld.new_sub(), std::move(ops_vec)));
+ }
ceph_assert(g_conf()->mds_kill_journal_expire_at != 4);