From: Yan, Zheng Date: Fri, 28 Mar 2014 05:30:07 +0000 (+0800) Subject: mds: maintain auth bits during replay X-Git-Tag: v0.81~47^2~19 X-Git-Url: http://git-server-git.apps.pok.os.sepia.ceph.com/?a=commitdiff_plain;h=7d1fd6692c873746c3ef277456622be032e60373;p=ceph.git mds: maintain auth bits during replay Objects' STATE_AUTH bits are set when replaying EImportStart event. MDCache::trim_non_auth_subtree() clear objects' STATE_AUTH bits when replaying EExport event. Signed-off-by: Yan, Zheng --- diff --git a/src/mds/CInode.cc b/src/mds/CInode.cc index 19f16014138a..325203774047 100644 --- a/src/mds/CInode.cc +++ b/src/mds/CInode.cc @@ -572,8 +572,8 @@ CDir *CInode::get_or_open_dirfrag(MDCache *mdcache, frag_t fg) CDir *dir = get_dirfrag(fg); if (!dir) { // create it. - assert(is_auth()); - dir = new CDir(this, fg, mdcache, true); + assert(is_auth() || mdcache->mds->is_any_replay()); + dir = new CDir(this, fg, mdcache, is_auth()); add_dirfrag(dir); } return dir; diff --git a/src/mds/MDCache.cc b/src/mds/MDCache.cc index ba57b0e4bdd5..cc2f2be3bbb9 100644 --- a/src/mds/MDCache.cc +++ b/src/mds/MDCache.cc @@ -3045,7 +3045,7 @@ void MDCache::maybe_resolve_finish() finish_committed_masters(); if (mds->is_resolve()) { trim_unlinked_inodes(); - recalc_auth_bits(); + recalc_auth_bits(false); mds->resolve_done(); } else { maybe_send_pending_rejoins(); @@ -3409,15 +3409,19 @@ void MDCache::trim_unlinked_inodes() * once subtree auth is disambiguated, we need to adjust all the * auth and dirty bits in our cache before moving on. */ -void MDCache::recalc_auth_bits() +void MDCache::recalc_auth_bits(bool replay) { - dout(7) << "recalc_auth_bits" << dendl; + dout(7) << "recalc_auth_bits " << (replay ? "(replay)" : "") << dendl; if (root) { root->inode_auth.first = mds->mdsmap->get_root(); - if (mds->whoami != root->inode_auth.first) { + bool auth = mds->whoami == root->inode_auth.first; + if (auth) { + root->state_set(CInode::STATE_AUTH); + } else { root->state_clear(CInode::STATE_AUTH); - root->state_set(CInode::STATE_REJOINING); + if (!replay) + root->state_set(CInode::STATE_REJOINING); } } @@ -3432,11 +3436,16 @@ void MDCache::recalc_auth_bits() for (map >::iterator p = subtrees.begin(); p != subtrees.end(); ++p) { - - CInode *inode = p->first->get_inode(); - if (inode->is_mdsdir() && inode->ino() != MDS_INO_MDSDIR(mds->get_nodeid())) { - inode->state_clear(CInode::STATE_AUTH); - inode->state_set(CInode::STATE_REJOINING); + if (p->first->inode->is_mdsdir()) { + CInode *in = p->first->inode; + bool auth = in->ino() == MDS_INO_MDSDIR(mds->get_nodeid()); + if (auth) { + in->state_set(CInode::STATE_AUTH); + } else { + in->state_clear(CInode::STATE_AUTH); + if (!replay) + in->state_set(CInode::STATE_REJOINING); + } } list dfq; // dirfrag queue @@ -3453,16 +3462,18 @@ void MDCache::recalc_auth_bits() if (auth) { dir->state_set(CDir::STATE_AUTH); } else { - // close empty non-auth dirfrag - if (!dir->is_subtree_root() && dir->get_num_any() == 0) { - dir->inode->close_dirfrag(dir->get_frag()); - continue; - } - dir->state_set(CDir::STATE_REJOINING); dir->state_clear(CDir::STATE_AUTH); - dir->state_clear(CDir::STATE_COMPLETE); - if (dir->is_dirty()) - dir->mark_clean(); + if (!replay) { + // close empty non-auth dirfrag + if (!dir->is_subtree_root() && dir->get_num_any() == 0) { + dir->inode->close_dirfrag(dir->get_frag()); + continue; + } + dir->state_set(CDir::STATE_REJOINING); + dir->state_clear(CDir::STATE_COMPLETE); + if (dir->is_dirty()) + dir->mark_clean(); + } } // dentries in this dir @@ -3472,34 +3483,38 @@ void MDCache::recalc_auth_bits() // dn CDentry *dn = q->second; CDentry::linkage_t *dnl = dn->get_linkage(); - if (auth) + if (auth) { dn->state_set(CDentry::STATE_AUTH); - else { - dn->state_set(CDentry::STATE_REJOINING); + } else { dn->state_clear(CDentry::STATE_AUTH); - if (dn->is_dirty()) - dn->mark_clean(); + if (!replay) { + dn->state_set(CDentry::STATE_REJOINING); + if (dn->is_dirty()) + dn->mark_clean(); + } } if (dnl->is_primary()) { // inode - if (auth) - dnl->get_inode()->state_set(CInode::STATE_AUTH); - else { - dnl->get_inode()->state_set(CInode::STATE_REJOINING); - dnl->get_inode()->state_clear(CInode::STATE_AUTH); - if (dnl->get_inode()->is_dirty()) - dnl->get_inode()->mark_clean(); - if (dnl->get_inode()->is_dirty_parent()) - dnl->get_inode()->clear_dirty_parent(); - // avoid touching scatterlocks for our subtree roots! - if (subtree_inodes.count(dnl->get_inode()) == 0) - dnl->get_inode()->clear_scatter_dirty(); + CInode *in = dnl->get_inode(); + if (auth) { + in->state_set(CInode::STATE_AUTH); + } else { + in->state_clear(CInode::STATE_AUTH); + if (!replay) { + in->state_set(CInode::STATE_REJOINING); + if (in->is_dirty()) + in->mark_clean(); + if (in->is_dirty_parent()) + in->clear_dirty_parent(); + // avoid touching scatterlocks for our subtree roots! + if (subtree_inodes.count(in) == 0) + in->clear_scatter_dirty(); + } } - // recurse? - if (dnl->get_inode()->is_dir()) - dnl->get_inode()->get_nested_dirfrags(dfq); + if (in->is_dir()) + in->get_nested_dirfrags(dfq); } } } @@ -6738,6 +6753,8 @@ bool MDCache::trim_non_auth_subtree(CDir *dir) dir->remove_dentry(dn); } else { dout(20) << "trim_non_auth_subtree(" << dir << ") keeping inode " << in << " with dentry " << dn <state_clear(CDentry::STATE_AUTH); + in->state_clear(CInode::STATE_AUTH); } } else if (keep_dir && dnl->is_null()) { // keep null dentry for slave rollback dout(20) << "trim_non_auth_subtree(" << dir << ") keeping dentry " << dn <remove_dentry(dn); } } + dir->state_clear(CDir::STATE_AUTH); /** * We've now checked all our children and deleted those that need it. * Now return to caller, and tell them if *we're* a keeper. diff --git a/src/mds/MDCache.h b/src/mds/MDCache.h index 8edeaa37d15c..c72c7ab15b46 100644 --- a/src/mds/MDCache.h +++ b/src/mds/MDCache.h @@ -347,12 +347,12 @@ protected: void discard_delayed_resolve(int who); void maybe_resolve_finish(); void disambiguate_imports(); - void recalc_auth_bits(); void trim_unlinked_inodes(); void add_uncommitted_slave_update(metareqid_t reqid, int master, MDSlaveUpdate*); void finish_uncommitted_slave_update(metareqid_t reqid, int master); MDSlaveUpdate* get_uncommitted_slave_update(metareqid_t reqid, int master); public: + void recalc_auth_bits(bool replay); void remove_inode_recursive(CInode *in); bool is_ambiguous_slave_update(metareqid_t reqid, int master) { diff --git a/src/mds/Migrator.cc b/src/mds/Migrator.cc index cde0f704c924..323a16afb103 100644 --- a/src/mds/Migrator.cc +++ b/src/mds/Migrator.cc @@ -2225,11 +2225,15 @@ void Migrator::handle_export_dir(MExportDir *m) // include bounds in EImportStart set import_bounds; - cache->get_subtree_bounds(dir, import_bounds); - for (set::iterator it = import_bounds.begin(); - it != import_bounds.end(); - ++it) - le->metablob.add_dir(*it, false); // note that parent metadata is already in the event + for (vector::iterator p = m->bounds.begin(); + p != m->bounds.end(); + ++p) { + CDir *bd = cache->get_dirfrag(*p); + assert(bd); + le->metablob.add_dir(bd, false); // note that parent metadata is already in the event + import_bounds.insert(bd); + } + cache->verify_subtree_bounds(dir, import_bounds); // adjust popularity mds->balancer->add_import(dir, now); diff --git a/src/mds/Server.cc b/src/mds/Server.cc index c8a3ebc50cfe..bbfdc50d9c97 100644 --- a/src/mds/Server.cc +++ b/src/mds/Server.cc @@ -7016,8 +7016,10 @@ void Server::_rename_rollback_finish(MutationRef& mut, MDRequestRef& mdr, CDentr mut->apply(); - if (srcdn) { + if (srcdn && srcdn->get_linkage()->is_primary()) { CInode *in = srcdn->get_linkage()->get_inode(); + if (srcdn->authority().first == mds->get_nodeid()) + in->state_set(CInode::STATE_AUTH); // update subtree map? if (in && in->is_dir()) { assert(destdn); diff --git a/src/mds/journal.cc b/src/mds/journal.cc index aad0e0e4fa3a..323d52699d3d 100644 --- a/src/mds/journal.cc +++ b/src/mds/journal.cc @@ -947,7 +947,7 @@ void EMetaBlob::replay(MDS *mds, LogSegment *logseg, MDSlaveUpdate *slaveup) CInode *in = mds->mdcache->get_inode((*p)->inode.ino); bool isnew = in ? false:true; if (!in) - in = new CInode(mds->mdcache, true); + in = new CInode(mds->mdcache, false); (*p)->update_inode(mds, in); if (isnew) @@ -993,8 +993,10 @@ void EMetaBlob::replay(MDS *mds, LogSegment *logseg, MDSlaveUpdate *slaveup) // hmm. do i have the inode? CInode *diri = mds->mdcache->get_inode((*lp).ino); if (!diri) { - if (MDS_INO_IS_BASE(lp->ino)) { + if (MDS_INO_IS_MDSDIR(lp->ino)) { + assert(MDS_INO_MDSDIR(mds->get_nodeid()) != lp->ino); diri = mds->mdcache->create_system_inode(lp->ino, S_IFDIR|0755); + diri->state_clear(CInode::STATE_AUTH); dout(10) << "EMetaBlob.replay created base " << *diri << dendl; } else { dout(0) << "EMetaBlob.replay missing dir ino " << (*lp).ino << dendl; @@ -1006,13 +1008,17 @@ void EMetaBlob::replay(MDS *mds, LogSegment *logseg, MDSlaveUpdate *slaveup) dir = diri->get_or_open_dirfrag(mds->mdcache, (*lp).frag); if (MDS_INO_IS_BASE(lp->ino)) - mds->mdcache->adjust_subtree_auth(dir, CDIR_AUTH_UNKNOWN); + mds->mdcache->adjust_subtree_auth(dir, CDIR_AUTH_UNDEF); dout(10) << "EMetaBlob.replay added dir " << *dir << dendl; } dir->set_version( lump.fnode.version ); dir->fnode = lump.fnode; + if (lump.is_importing()) { + dir->state_set(CDir::STATE_AUTH); + dir->state_clear(CDir::STATE_COMPLETE); + } if (lump.is_dirty()) { dir->_mark_dirty(logseg); @@ -1041,8 +1047,6 @@ void EMetaBlob::replay(MDS *mds, LogSegment *logseg, MDSlaveUpdate *slaveup) dir->mark_new(logseg); if (lump.is_complete()) dir->mark_complete(); - else if (lump.is_importing()) - dir->state_clear(CDir::STATE_COMPLETE); dout(10) << "EMetaBlob.replay updated dir " << *dir << dendl; @@ -1067,10 +1071,12 @@ void EMetaBlob::replay(MDS *mds, LogSegment *logseg, MDSlaveUpdate *slaveup) dn->first = p->dnfirst; assert(dn->last == p->dnlast); } + if (lump.is_importing()) + dn->state_set(CDentry::STATE_AUTH); CInode *in = mds->mdcache->get_inode(p->inode.ino, p->dnlast); if (!in) { - in = new CInode(mds->mdcache, true, p->dnfirst, p->dnlast); + in = new CInode(mds->mdcache, dn->is_auth(), p->dnfirst, p->dnlast); p->update_inode(mds, in); mds->mdcache->add_inode(in); if (!dn->get_linkage()->is_null()) { @@ -1087,7 +1093,6 @@ void EMetaBlob::replay(MDS *mds, LogSegment *logseg, MDSlaveUpdate *slaveup) if (unlinked.count(in)) linked.insert(in); dir->link_primary_inode(dn, in); - if (p->is_dirty()) in->_mark_dirty(logseg); dout(10) << "EMetaBlob.replay added " << *in << dendl; } else { p->update_inode(mds, in); @@ -1115,12 +1120,17 @@ void EMetaBlob::replay(MDS *mds, LogSegment *logseg, MDSlaveUpdate *slaveup) } else { dout(10) << "EMetaBlob.replay for [" << p->dnfirst << "," << p->dnlast << "] had " << *in << dendl; } - if (p->is_dirty()) in->_mark_dirty(logseg); assert(in->first == p->dnfirst || (in->is_multiversion() && in->first > p->dnfirst)); } + if (p->is_dirty()) + in->_mark_dirty(logseg); if (p->is_dirty_parent()) in->_mark_dirty_parent(logseg, p->is_dirty_pool()); + if (dn->is_auth()) + in->state_set(CInode::STATE_AUTH); + else + in->state_clear(CInode::STATE_AUTH); assert(g_conf->mds_kill_journal_replay_at != 2); } @@ -1153,6 +1163,8 @@ void EMetaBlob::replay(MDS *mds, LogSegment *logseg, MDSlaveUpdate *slaveup) dn->first = p->dnfirst; assert(dn->last == p->dnlast); } + if (lump.is_importing()) + dn->state_set(CDentry::STATE_AUTH); } // null dentries @@ -1185,6 +1197,8 @@ void EMetaBlob::replay(MDS *mds, LogSegment *logseg, MDSlaveUpdate *slaveup) assert(dn->last == p->dnlast); } olddir = dir; + if (lump.is_importing()) + dn->state_set(CDentry::STATE_AUTH); } } @@ -1210,9 +1224,11 @@ void EMetaBlob::replay(MDS *mds, LogSegment *logseg, MDSlaveUpdate *slaveup) for (list::iterator p = leaves.begin(); p != leaves.end(); ++p) { CDir *dir = renamed_diri->get_dirfrag(*p); assert(dir); - // preserve subtree bound until slave commit if (dir->get_dir_auth() == CDIR_AUTH_UNDEF) + // preserve subtree bound until slave commit slaveup->olddirs.insert(dir->inode); + else + dir->state_set(CDir::STATE_AUTH); } } @@ -1240,6 +1256,7 @@ void EMetaBlob::replay(MDS *mds, LogSegment *logseg, MDSlaveUpdate *slaveup) } dir = renamed_diri->get_or_open_dirfrag(mds->mdcache, *p); dout(10) << " creating new rename import bound " << *dir << dendl; + dir->state_clear(CDir::STATE_AUTH); mds->mdcache->adjust_subtree_auth(dir, CDIR_AUTH_UNDEF, false); } } @@ -2405,7 +2422,9 @@ void ESubtreeMap::replay(MDS *mds) mds->mdcache->adjust_bounded_subtree_auth(dir, p->second, mds->get_nodeid()); } } - + + mds->mdcache->recalc_auth_bits(true); + mds->mdcache->show_subtrees(); } @@ -2623,7 +2642,20 @@ void EImportStart::replay(MDS *mds) // set auth partially to us so we don't trim it CDir *dir = mds->mdcache->get_dirfrag(base); assert(dir); - mds->mdcache->adjust_bounded_subtree_auth(dir, bounds, pair(mds->get_nodeid(), mds->get_nodeid())); + + set realbounds; + for (vector::iterator p = bounds.begin(); + p != bounds.end(); + ++p) { + CDir *bd = mds->mdcache->get_dirfrag(*p); + assert(bd); + if (!bd->is_subtree_root()) + bd->state_clear(CDir::STATE_AUTH); + realbounds.insert(bd); + } + + mds->mdcache->adjust_bounded_subtree_auth(dir, realbounds, + pair(mds->get_nodeid(), mds->get_nodeid())); // open client sessions? if (mds->sessionmap.version >= cmapv) { @@ -2781,6 +2813,8 @@ void EResetJournal::replay(MDS *mds) CDir *mydir = mds->mdcache->get_myin()->get_or_open_dirfrag(mds->mdcache, frag_t()); mds->mdcache->adjust_subtree_auth(mydir, mds->whoami); + mds->mdcache->recalc_auth_bits(true); + mds->mdcache->show_subtrees(); }