From a5f9a36c98243735d46aa93db9add8f4bee26103 Mon Sep 17 00:00:00 2001 From: Sage Weil Date: Tue, 4 Jan 2011 14:39:58 -0800 Subject: [PATCH] mds: make resolve adjust dir fragmentation as needed During resolve, adjust dir fragmentation as needed based on the subtrees the sender explicitly claims. The given fragmentation on the root is always valid. Their bounds may not be; only split our frags as needed if they happen to be partially in and partially out of the sender's bounding fragset. Signed-off-by: Sage Weil --- src/mds/CInode.h | 2 +- src/mds/MDCache.cc | 112 +++++++++++++++++++++++++++++++++++++++------ src/mds/MDCache.h | 12 +++++ 3 files changed, 111 insertions(+), 15 deletions(-) diff --git a/src/mds/CInode.h b/src/mds/CInode.h index 8426478221284..66d008157fc5e 100644 --- a/src/mds/CInode.h +++ b/src/mds/CInode.h @@ -362,7 +362,7 @@ public: bool has_dirfrags() { return !dirfrags.empty(); } CDir* get_dirfrag(frag_t fg) { if (dirfrags.count(fg)) { - assert(g_conf.debug_mds < 2 || dirfragtree.is_leaf(fg)); // performance hack FIXME + //assert(g_conf.debug_mds < 2 || dirfragtree.is_leaf(fg)); // performance hack FIXME return dirfrags[fg]; } else return 0; diff --git a/src/mds/MDCache.cc b/src/mds/MDCache.cc index 3ac7be40cae3c..5bf8fe4a01686 100644 --- a/src/mds/MDCache.cc +++ b/src/mds/MDCache.cc @@ -974,6 +974,49 @@ void MDCache::adjust_bounded_subtree_auth(CDir *dir, set& bounds, pair& dfs, set& bounds) +{ + dout(10) << "get_force_dirfrag_bound_set " << dfs << dendl; + + // sort by ino + map byino; + for (vector::iterator p = dfs.begin(); p != dfs.end(); ++p) + byino[p->ino].insert(p->frag); + dout(10) << " by ino: " << byino << dendl; + + for (map::iterator p = byino.begin(); p != byino.end(); ++p) { + CInode *diri = get_inode(p->first); + if (!diri) + continue; + dout(10) << " checking fragset " << p->second.get() << " on " << *diri << dendl; + for (set::iterator q = p->second.begin(); q != p->second.end(); ++q) { + frag_t fg = *q; + list u; + diri->get_dirfrags_under(fg, u); + dout(10) << " frag " << fg << " contains " << u << dendl; + if (!u.empty()) + bounds.insert(u.begin(), u.end()); + frag_t t = fg; + while (t != frag_t()) { + t = t.parent(); + CDir *dir = diri->get_dirfrag(t); + if (dir) { + // ugh, we found a containing parent + dout(10) << " ugh, splitting parent frag " << t << " " << *dir << dendl; + force_dir_fragment(diri, fg); + break; + } + } + } + } +} + + void MDCache::adjust_bounded_subtree_auth(CDir *dir, vector& bound_dfs, pair auth) { dout(7) << "adjust_bounded_subtree_auth " << dir->get_dir_auth() << " -> " << auth @@ -2518,19 +2561,15 @@ void MDCache::handle_resolve(MMDSResolve *m) for (map >::iterator pi = m->subtrees.begin(); pi != m->subtrees.end(); ++pi) { - CInode *diri = get_inode(pi->first.ino); - if (!diri) continue; - bool forced = diri->dirfragtree.force_to_leaf(pi->first.frag); - if (forced) { - dout(10) << " forced frag " << pi->first.frag << " to leaf in " - << diri->dirfragtree - << " on " << pi->first << dendl; - } - - CDir *dir = diri->get_dirfrag(pi->first.frag); - if (!dir) continue; - - adjust_bounded_subtree_auth(dir, pi->second, from); + dout(10) << "peer claims " << pi->first << " bounds " << pi->second << dendl; + CDir *dir = get_force_dirfrag(pi->first); + if (!dir) + continue; + + set bounds; + get_force_dirfrag_bound_set(pi->second, bounds); + + adjust_bounded_subtree_auth(dir, bounds, from); try_subtree_merge(dir); } @@ -8633,7 +8672,52 @@ void MDCache::adjust_dir_fragments(CInode *diri, frag_t basefrag, int bits, list srcfrags; diri->get_dirfrags_under(basefrag, srcfrags); - adjust_dir_fragments(diri, srcfrags, basefrag, bits, resultfrags, waiters, replay); + if (!srcfrags.empty()) + adjust_dir_fragments(diri, srcfrags, basefrag, bits, resultfrags, waiters, replay); +} + +CDir *MDCache::force_dir_fragment(CInode *diri, frag_t fg) +{ + CDir *dir = diri->get_dirfrag(fg); + if (dir) + return dir; + + dout(10) << "force_dir_fragment " << fg << " on " << *diri << dendl; + + list src, result; + list waiters; + + // split a parent? + frag_t parent = diri->dirfragtree.get_branch_or_leaf(fg); + while (1) { + CDir *pdir = diri->get_dirfrag(parent); + if (pdir) { + int split = fg.bits() - parent.bits(); + dout(10) << " splitting parent by " << split << " " << *pdir << dendl; + src.push_back(pdir); + adjust_dir_fragments(diri, src, parent, split, result, waiters, true); + dir = diri->get_dirfrag(fg); + dout(10) << "force_dir_fragment result " << *dir << dendl; + return dir; + } + if (parent == frag_t()) + break; + frag_t last = parent; + parent = parent.parent(); + dout(10) << " " << last << " parent is " << parent << dendl; + } + + // hoover up things under fg? + diri->get_dirfrags_under(fg, src); + if (src.empty()) { + dout(10) << "force_dir_fragment no frags under " << fg << dendl; + return NULL; + } + dout(10) << " will combine frags under " << fg << ": " << src << dendl; + adjust_dir_fragments(diri, src, fg, 0, result, waiters, true); + dir = result.front(); + dout(10) << "force_dir_fragment result " << *dir << dendl; + return dir; } void MDCache::adjust_dir_fragments(CInode *diri, diff --git a/src/mds/MDCache.h b/src/mds/MDCache.h index 7b2b94998e5c3..6c2fc069c0bd9 100644 --- a/src/mds/MDCache.h +++ b/src/mds/MDCache.h @@ -888,6 +888,15 @@ public: if (!have_inode(df.ino)) return NULL; return get_inode(df.ino)->get_dirfrag(df.frag); } + CDir* get_force_dirfrag(dirfrag_t df) { + CInode *diri = get_inode(df.ino); + if (!diri) + return NULL; + CDir *dir = force_dir_fragment(diri, df.frag); + if (!dir) + dir = diri->get_dirfrag(df.frag); + return dir; + } MDSCacheObject *get_object(MDSCacheObjectInfo &info); @@ -1096,6 +1105,9 @@ private: list& resultfrags, list& waiters, bool replay); + CDir *force_dir_fragment(CInode *diri, frag_t fg); + void get_force_dirfrag_bound_set(vector& dfs, set& bounds); + friend class EFragment; -- 2.39.5