From: Sage Weil Date: Tue, 4 Jan 2011 22:39:58 +0000 (-0800) Subject: mds: make resolve adjust dir fragmentation as needed X-Git-Tag: v0.25~390 X-Git-Url: http://git-server-git.apps.pok.os.sepia.ceph.com/?a=commitdiff_plain;h=a5f9a36c98243735d46aa93db9add8f4bee26103;p=ceph.git mds: make resolve adjust dir fragmentation as needed During resolve, adjust dir fragmentation as needed based on the subtrees the sender explicitly claims. The given fragmentation on the root is always valid. Their bounds may not be; only split our frags as needed if they happen to be partially in and partially out of the sender's bounding fragset. Signed-off-by: Sage Weil --- diff --git a/src/mds/CInode.h b/src/mds/CInode.h index 842647822128..66d008157fc5 100644 --- a/src/mds/CInode.h +++ b/src/mds/CInode.h @@ -362,7 +362,7 @@ public: bool has_dirfrags() { return !dirfrags.empty(); } CDir* get_dirfrag(frag_t fg) { if (dirfrags.count(fg)) { - assert(g_conf.debug_mds < 2 || dirfragtree.is_leaf(fg)); // performance hack FIXME + //assert(g_conf.debug_mds < 2 || dirfragtree.is_leaf(fg)); // performance hack FIXME return dirfrags[fg]; } else return 0; diff --git a/src/mds/MDCache.cc b/src/mds/MDCache.cc index 3ac7be40cae3..5bf8fe4a0168 100644 --- a/src/mds/MDCache.cc +++ b/src/mds/MDCache.cc @@ -974,6 +974,49 @@ void MDCache::adjust_bounded_subtree_auth(CDir *dir, set& bounds, pair& dfs, set& bounds) +{ + dout(10) << "get_force_dirfrag_bound_set " << dfs << dendl; + + // sort by ino + map byino; + for (vector::iterator p = dfs.begin(); p != dfs.end(); ++p) + byino[p->ino].insert(p->frag); + dout(10) << " by ino: " << byino << dendl; + + for (map::iterator p = byino.begin(); p != byino.end(); ++p) { + CInode *diri = get_inode(p->first); + if (!diri) + continue; + dout(10) << " checking fragset " << p->second.get() << " on " << *diri << dendl; + for (set::iterator q = p->second.begin(); q != p->second.end(); ++q) { + frag_t fg = *q; + list u; + diri->get_dirfrags_under(fg, u); + dout(10) << " frag " << fg << " contains " << u << dendl; + if (!u.empty()) + bounds.insert(u.begin(), u.end()); + frag_t t = fg; + while (t != frag_t()) { + t = t.parent(); + CDir *dir = diri->get_dirfrag(t); + if (dir) { + // ugh, we found a containing parent + dout(10) << " ugh, splitting parent frag " << t << " " << *dir << dendl; + force_dir_fragment(diri, fg); + break; + } + } + } + } +} + + void MDCache::adjust_bounded_subtree_auth(CDir *dir, vector& bound_dfs, pair auth) { dout(7) << "adjust_bounded_subtree_auth " << dir->get_dir_auth() << " -> " << auth @@ -2518,19 +2561,15 @@ void MDCache::handle_resolve(MMDSResolve *m) for (map >::iterator pi = m->subtrees.begin(); pi != m->subtrees.end(); ++pi) { - CInode *diri = get_inode(pi->first.ino); - if (!diri) continue; - bool forced = diri->dirfragtree.force_to_leaf(pi->first.frag); - if (forced) { - dout(10) << " forced frag " << pi->first.frag << " to leaf in " - << diri->dirfragtree - << " on " << pi->first << dendl; - } - - CDir *dir = diri->get_dirfrag(pi->first.frag); - if (!dir) continue; - - adjust_bounded_subtree_auth(dir, pi->second, from); + dout(10) << "peer claims " << pi->first << " bounds " << pi->second << dendl; + CDir *dir = get_force_dirfrag(pi->first); + if (!dir) + continue; + + set bounds; + get_force_dirfrag_bound_set(pi->second, bounds); + + adjust_bounded_subtree_auth(dir, bounds, from); try_subtree_merge(dir); } @@ -8633,7 +8672,52 @@ void MDCache::adjust_dir_fragments(CInode *diri, frag_t basefrag, int bits, list srcfrags; diri->get_dirfrags_under(basefrag, srcfrags); - adjust_dir_fragments(diri, srcfrags, basefrag, bits, resultfrags, waiters, replay); + if (!srcfrags.empty()) + adjust_dir_fragments(diri, srcfrags, basefrag, bits, resultfrags, waiters, replay); +} + +CDir *MDCache::force_dir_fragment(CInode *diri, frag_t fg) +{ + CDir *dir = diri->get_dirfrag(fg); + if (dir) + return dir; + + dout(10) << "force_dir_fragment " << fg << " on " << *diri << dendl; + + list src, result; + list waiters; + + // split a parent? + frag_t parent = diri->dirfragtree.get_branch_or_leaf(fg); + while (1) { + CDir *pdir = diri->get_dirfrag(parent); + if (pdir) { + int split = fg.bits() - parent.bits(); + dout(10) << " splitting parent by " << split << " " << *pdir << dendl; + src.push_back(pdir); + adjust_dir_fragments(diri, src, parent, split, result, waiters, true); + dir = diri->get_dirfrag(fg); + dout(10) << "force_dir_fragment result " << *dir << dendl; + return dir; + } + if (parent == frag_t()) + break; + frag_t last = parent; + parent = parent.parent(); + dout(10) << " " << last << " parent is " << parent << dendl; + } + + // hoover up things under fg? + diri->get_dirfrags_under(fg, src); + if (src.empty()) { + dout(10) << "force_dir_fragment no frags under " << fg << dendl; + return NULL; + } + dout(10) << " will combine frags under " << fg << ": " << src << dendl; + adjust_dir_fragments(diri, src, fg, 0, result, waiters, true); + dir = result.front(); + dout(10) << "force_dir_fragment result " << *dir << dendl; + return dir; } void MDCache::adjust_dir_fragments(CInode *diri, diff --git a/src/mds/MDCache.h b/src/mds/MDCache.h index 7b2b94998e5c..6c2fc069c0bd 100644 --- a/src/mds/MDCache.h +++ b/src/mds/MDCache.h @@ -888,6 +888,15 @@ public: if (!have_inode(df.ino)) return NULL; return get_inode(df.ino)->get_dirfrag(df.frag); } + CDir* get_force_dirfrag(dirfrag_t df) { + CInode *diri = get_inode(df.ino); + if (!diri) + return NULL; + CDir *dir = force_dir_fragment(diri, df.frag); + if (!dir) + dir = diri->get_dirfrag(df.frag); + return dir; + } MDSCacheObject *get_object(MDSCacheObjectInfo &info); @@ -1096,6 +1105,9 @@ private: list& resultfrags, list& waiters, bool replay); + CDir *force_dir_fragment(CInode *diri, frag_t fg); + void get_force_dirfrag_bound_set(vector& dfs, set& bounds); + friend class EFragment;