From: John Spray Date: Wed, 24 Jun 2015 22:58:36 +0000 (+0100) Subject: mds: store layout on header object X-Git-Tag: v9.0.3~105^2~2 X-Git-Url: http://git-server-git.apps.pok.os.sepia.ceph.com/?a=commitdiff_plain;h=8d9c95fa42dd1c31fcb65918fb2180281380269d;p=ceph.git mds: store layout on header object This is surprisingly simple because we were already redundantly calling store_backtrace whenever the layout changed! That was a side effect of the way add_old_pool is handled, the backtrace version is bumped to latest even if the "old" pool is the current one. The upshot is that if we accept the existing behaviour of also unnecessarily updating the 'parent' xattr, keeping the new 'layout' xattr update requires no new dirty flags. This is a twitchy enough behaviour that new tests are needed to guard against regressions though. Fixes: #4161 Signed-off-by: John Spray --- diff --git a/src/mds/CInode.cc b/src/mds/CInode.cc index a9366265e763..d798448e6b7c 100644 --- a/src/mds/CInode.cc +++ b/src/mds/CInode.cc @@ -1124,20 +1124,25 @@ void CInode::store_backtrace(MDSInternalContextBase *fin, int op_prio) auth_pin(this); int64_t pool; - if (is_dir()) + if (is_dir()) { pool = mdcache->mds->mdsmap->get_metadata_pool(); - else + } else { pool = inode.layout.fl_pg_pool; + } inode_backtrace_t bt; build_backtrace(pool, bt); - bufferlist bl; - ::encode(bt, bl); + bufferlist parent_bl; + ::encode(bt, parent_bl); ObjectOperation op; op.priority = op_prio; op.create(false); - op.setxattr("parent", bl); + op.setxattr("parent", parent_bl); + + bufferlist layout_bl; + ::encode(inode.layout, layout_bl); + op.setxattr("layout", layout_bl); SnapContext snapc; object_t oid = get_object_name(ino(), frag_t(), ""); @@ -1147,6 +1152,7 @@ void CInode::store_backtrace(MDSInternalContextBase *fin, int op_prio) &mdcache->mds->finisher); if (!state_test(STATE_DIRTYPOOL) || inode.old_pools.empty()) { + dout(20) << __func__ << ": no dirtypool or no old pools" << dendl; mdcache->mds->objecter->mutate(oid, oloc, op, snapc, ceph_clock_now(g_ceph_context), 0, NULL, fin2); return; @@ -1156,16 +1162,21 @@ void CInode::store_backtrace(MDSInternalContextBase *fin, int op_prio) mdcache->mds->objecter->mutate(oid, oloc, op, snapc, ceph_clock_now(g_ceph_context), 0, NULL, gather.new_sub()); + // In the case where DIRTYPOOL is set, we update all old pools backtraces + // such that anyone reading them will see the new pool ID in + // inode_backtrace_t::pool and go read everything else from there. for (compact_set::iterator p = inode.old_pools.begin(); p != inode.old_pools.end(); ++p) { if (*p == pool) continue; + dout(20) << __func__ << ": updating old pool " << *p << dendl; + ObjectOperation op; op.priority = op_prio; op.create(false); - op.setxattr("parent", bl); + op.setxattr("parent", parent_bl); object_locator_t oloc(*p); mdcache->mds->objecter->mutate(oid, oloc, op, snapc, ceph_clock_now(g_ceph_context),