From: Sage Weil Date: Fri, 15 Apr 2011 22:51:50 +0000 (-0700) Subject: mds: keep import/export subtree_map state in sync with journal X-Git-Tag: v0.27~38 X-Git-Url: http://git.apps.os.sepia.ceph.com/?a=commitdiff_plain;h=f85515141a17936eef1e5c83700e0062239407e1;p=ceph.git mds: keep import/export subtree_map state in sync with journal We were being sloppy before with the ESubtreeMap vs import/export events. Fix that by doing a few things: - add an ambig flag to the subtree map items, and set it for in-progress imports. That means an ESubtreeMap followed by EImportFinish will do the right thing now. - adjust the dir_auth on EExport journaling (handle_export_dir_ack) so that our journaled subtree_map state is always in sync with what we see during replay. Also document clearly what the dir_auth variations actually mean. Signed-off-by: Sage Weil --- diff --git a/src/mds/MDCache.cc b/src/mds/MDCache.cc index d5e474ffc49f7..bae2a70ae8dfd 100644 --- a/src/mds/MDCache.cc +++ b/src/mds/MDCache.cc @@ -2198,9 +2198,25 @@ ESubtreeMap *MDCache::create_subtree_map() p != subtrees.end(); ++p) { CDir *dir = p->first; - if (!dir->is_auth()) continue; - dout(15) << " subtree " << *dir << dendl; + // journal subtree as "ours" if we are + // me, -2 + // me, me + // me, !me (may be importing and ambiguous!) + + // so not + // !me, * + if (dir->get_dir_auth().first != mds->whoami) + continue; + + if (dir->is_ambiguous_dir_auth() && + migrator->is_importing(dir->dirfrag())) { + dout(15) << " ambig subtree " << *dir << dendl; + le->ambiguous_subtrees.insert(dir->dirfrag()); + } else { + dout(15) << " subtree " << *dir << dendl; + } + le->subtrees[dir->dirfrag()].clear(); le->metablob.add_dir_context(dir, EMetaBlob::TO_ROOT); le->metablob.add_dir(dir, false); @@ -2479,7 +2495,8 @@ void MDCache::handle_mds_recovery(int who) ++p) { CDir *dir = p->first; - if (dir->authority().first != who) + if (dir->authority().first != who || + dir->authority().second == mds->whoami) continue; assert(!dir->is_auth()); diff --git a/src/mds/Migrator.cc b/src/mds/Migrator.cc index 25349ae93b4f1..67603e9faf1d4 100644 --- a/src/mds/Migrator.cc +++ b/src/mds/Migrator.cc @@ -53,8 +53,25 @@ #include "messages/MExportCapsAck.h" - - +/* + * this is what the dir->dir_auth values look like + * + * dir_auth authbits + * export + * me me - before + * me, me me - still me, but preparing for export + * me, them me - send MExportDir (peer is preparing) + * them, me me - journaled EExport + * them them - done + * + * import: + * them them - before + * me, them me - journaled EImportStart + * me me - done + * + * which implies: + * - auth bit is set if i am listed as first _or_ second dir_auth. + */ #include "common/config.h" @@ -1217,6 +1234,11 @@ void Migrator::handle_export_ack(MExportDirAck *m) set bounds; cache->get_subtree_bounds(dir, bounds); + // list us second, them first. + // this keeps authority().first in sync with subtree auth state in the journal. + int target = export_peer[dir]; + cache->adjust_subtree_auth(dir, target, mds->get_nodeid()); + // log completion. // include export bounds, to ensure they're in the journal. EExport *le = new EExport(mds->mdlog, dir); diff --git a/src/mds/events/ESubtreeMap.h b/src/mds/events/ESubtreeMap.h index f7f4090fd063e..0230de1a59ef8 100644 --- a/src/mds/events/ESubtreeMap.h +++ b/src/mds/events/ESubtreeMap.h @@ -22,21 +22,24 @@ class ESubtreeMap : public LogEvent { public: EMetaBlob metablob; map > subtrees; + set ambiguous_subtrees; uint64_t expire_pos; ESubtreeMap() : LogEvent(EVENT_SUBTREEMAP), expire_pos(0) { } void print(ostream& out) { - out << "subtree_map " << subtrees.size() << " subtrees " + out << "ESubtreeMap " << subtrees.size() << " subtrees " + << ", " << ambiguous_subtrees.size() << " ambiguous " << metablob; } void encode(bufferlist& bl) const { - __u8 struct_v = 3; + __u8 struct_v = 4; ::encode(struct_v, bl); ::encode(stamp, bl); ::encode(metablob, bl); ::encode(subtrees, bl); + ::encode(ambiguous_subtrees, bl); ::encode(expire_pos, bl); } void decode(bufferlist::iterator &bl) { @@ -46,6 +49,8 @@ public: ::decode(stamp, bl); ::decode(metablob, bl); ::decode(subtrees, bl); + if (struct_v >= 4) + ::decode(ambiguous_subtrees, bl); if (struct_v >= 3) ::decode(expire_pos, bl); } diff --git a/src/mds/journal.cc b/src/mds/journal.cc index 7e77443f9b5a6..814fd8d2cad48 100644 --- a/src/mds/journal.cc +++ b/src/mds/journal.cc @@ -1067,7 +1067,15 @@ void ESubtreeMap::replay(MDS *mds) p != subtrees.end(); ++p) { CDir *dir = mds->mdcache->get_dirfrag(p->first); - mds->mdcache->adjust_bounded_subtree_auth(dir, p->second, mds->get_nodeid()); + if (ambiguous_subtrees.count(p->first)) { + // ambiguous! + mds->mdcache->add_ambiguous_import(p->first, p->second); + mds->mdcache->adjust_bounded_subtree_auth(dir, p->second, + pair(mds->get_nodeid(), mds->get_nodeid())); + } else { + // not ambiguous + mds->mdcache->adjust_bounded_subtree_auth(dir, p->second, mds->get_nodeid()); + } } mds->mdcache->show_subtrees(); @@ -1201,9 +1209,9 @@ void EImportFinish::replay(MDS *mds) } } else { dout(10) << "EImportFinish.replay " << base << " success=" << success - << ", predates my subtree_map start point, ignoring" + << " on subtree not marked as ambiguous" << dendl; - // verify that? + assert(0 == "this shouldn't happen unless this is an old journal"); } }