From: Yan, Zheng Date: Sat, 18 Jan 2014 12:31:36 +0000 (+0800) Subject: mds: journal dirfragtree change X-Git-Tag: v0.78~165^2~11 X-Git-Url: http://git-server-git.apps.pok.os.sepia.ceph.com/?a=commitdiff_plain;h=f3666ededcf52ef4a124aca768bd993cace4fdf6;p=ceph.git mds: journal dirfragtree change Introduce new flag DIRTYDFT to CDir and EMetaBlob::dirlump, the new flag indicates the dirfrag is newly fragmented and the corresponding dirfragtree change hasn't been propagate to the directory inode. After fragmenting subtree dirfrags, make sure DIRTYDFT flag is set on EMetaBlob::dirlump that correspond to the resulting dirfrags. Journal replay code uses DIRTYDFT frag to decide if dirfragtree is scattered dirty. Signed-off-by: Yan, Zheng --- diff --git a/src/mds/CDir.cc b/src/mds/CDir.cc index 8761876d6ed5..e244175f59b3 100644 --- a/src/mds/CDir.cc +++ b/src/mds/CDir.cc @@ -2173,10 +2173,24 @@ void CDir::decode_import(bufferlist::iterator& blp, utime_t now, LogSegment *ls) // did we import some dirty scatterlock data? if (dirty_old_rstat.size() || - !(fnode.rstat == fnode.accounted_rstat)) + !(fnode.rstat == fnode.accounted_rstat)) { cache->mds->locker->mark_updated_scatterlock(&inode->nestlock); - if (!(fnode.fragstat == fnode.accounted_fragstat)) + ls->dirty_dirfrag_nest.push_back(&inode->item_dirty_dirfrag_nest); + } + if (!(fnode.fragstat == fnode.accounted_fragstat)) { cache->mds->locker->mark_updated_scatterlock(&inode->filelock); + ls->dirty_dirfrag_dir.push_back(&inode->item_dirty_dirfrag_dir); + } + if (is_dirty_dft()) { + if (inode->dirfragtreelock.get_state() != LOCK_MIX && + inode->dirfragtreelock.is_stable()) { + // clear stale dirtydft + state_clear(STATE_DIRTYDFT); + } else { + cache->mds->locker->mark_updated_scatterlock(&inode->dirfragtreelock); + ls->dirty_dirfrag_dirfragtree.push_back(&inode->item_dirty_dirfrag_dirfragtree); + } + } } diff --git a/src/mds/CDir.h b/src/mds/CDir.h index 02e76d8c87df..955a04e9e107 100644 --- a/src/mds/CDir.h +++ b/src/mds/CDir.h @@ -107,6 +107,7 @@ public: static const unsigned STATE_STICKY = (1<<15); // sticky pin due to inode stickydirs static const unsigned STATE_DNPINNEDFRAG = (1<<16); // dir is refragmenting static const unsigned STATE_ASSIMRSTAT = (1<<17); // assimilating inode->frag rstats + static const unsigned STATE_DIRTYDFT = (1<<18); // dirty dirfragtree // common states static const unsigned STATE_CLEAN = 0; @@ -115,7 +116,7 @@ public: // these state bits are preserved by an import/export // ...except if the directory is hashed, in which case none of them are! static const unsigned MASK_STATE_EXPORTED = - (STATE_COMPLETE|STATE_DIRTY); + (STATE_COMPLETE|STATE_DIRTY|STATE_DIRTYDFT); static const unsigned MASK_STATE_IMPORT_KEPT = ( STATE_IMPORTING @@ -472,6 +473,7 @@ private: bool is_complete() { return state & STATE_COMPLETE; } bool is_exporting() { return state & STATE_EXPORTING; } bool is_importing() { return state & STATE_IMPORTING; } + bool is_dirty_dft() { return state & STATE_DIRTYDFT; } int get_dir_rep() { return dir_rep; } bool is_rep() { diff --git a/src/mds/CInode.cc b/src/mds/CInode.cc index b7c9772c9f0f..2e2529851393 100644 --- a/src/mds/CInode.cc +++ b/src/mds/CInode.cc @@ -1400,11 +1400,14 @@ void CInode::decode_lock_state(int type, bufferlist& bl) dirfragtree.swap(temp); for (map::iterator p = dirfrags.begin(); p != dirfrags.end(); - ++p) + ++p) { if (!dirfragtree.is_leaf(p->first)) { dout(10) << " forcing open dirfrag " << p->first << " to leaf (racing with split|merge)" << dendl; dirfragtree.force_to_leaf(g_ceph_context, p->first); } + if (p->second->is_auth()) + p->second->state_clear(CDir::STATE_DIRTYDFT); + } } if (g_conf->mds_debug_frag) verify_dirfrags(); @@ -1646,6 +1649,10 @@ void CInode::start_scatter(ScatterLock *lock) case CEPH_LOCK_INEST: finish_scatter_update(lock, dir, pi->rstat.version, pf->accounted_rstat.version); break; + + case CEPH_LOCK_IDFT: + dir->state_clear(CDir::STATE_DIRTYDFT); + break; } } } diff --git a/src/mds/MDCache.cc b/src/mds/MDCache.cc index 22ddb143e193..42575679d5b8 100644 --- a/src/mds/MDCache.cc +++ b/src/mds/MDCache.cc @@ -11451,13 +11451,25 @@ void MDCache::dispatch_fragment_dir(MDRequest *mdr) for (list::iterator p = info.resultfrags.begin(); p != info.resultfrags.end(); ++p) { - le->metablob.add_dir(*p, false); + if (diri->is_auth()) { + le->metablob.add_fragmented_dir(*p, false); + } else { + (*p)->state_set(CDir::STATE_DIRTYDFT); + le->metablob.add_fragmented_dir(*p, true); + } } // dft lock - mds->locker->mark_updated_scatterlock(&diri->dirfragtreelock); - mdr->ls->dirty_dirfrag_dirfragtree.push_back(&diri->item_dirty_dirfrag_dirfragtree); - mdr->add_updated_lock(&diri->dirfragtreelock); + if (diri->is_auth()) { + // journal dirfragtree + inode_t *pi = diri->project_inode(); + pi->version = diri->pre_dirty(); + journal_dirty_inode(mdr, &le->metablob, diri); + } else { + mds->locker->mark_updated_scatterlock(&diri->dirfragtreelock); + mdr->ls->dirty_dirfrag_dirfragtree.push_back(&diri->item_dirty_dirfrag_dirfragtree); + mdr->add_updated_lock(&diri->dirfragtreelock); + } /* // filelock @@ -11487,6 +11499,11 @@ void MDCache::_fragment_logged(MDRequest *mdr) dout(10) << "fragment_logged " << basedirfrag << " bits " << info.bits << " on " << *diri << dendl; + if (diri->is_auth()) + diri->pop_and_dirty_projected_inode(mdr->ls); + + mdr->apply(); // mark scatterlock + // store resulting frags C_GatherBuilder gather(g_ceph_context, new C_MDC_FragmentStore(this, mdr)); @@ -11537,7 +11554,6 @@ void MDCache::_fragment_stored(MDRequest *mdr) mds->send_message_mds(notify, p->first); } - mdr->apply(); // mark scatterlock mds->locker->drop_locks(mdr); // unfreeze resulting frags diff --git a/src/mds/Migrator.cc b/src/mds/Migrator.cc index 50d9931a368c..036d9be1da3a 100644 --- a/src/mds/Migrator.cc +++ b/src/mds/Migrator.cc @@ -2711,6 +2711,11 @@ void Migrator::decode_import_inode(CDentry *dn, bufferlist::iterator& blp, int o mds->locker->mark_updated_scatterlock(&in->filelock); } + if (in->dirfragtreelock.is_dirty()) { + updated_scatterlocks.push_back(&in->dirfragtreelock); + mds->locker->mark_updated_scatterlock(&in->dirfragtreelock); + } + // adjust replica list //assert(!in->is_replica(oldauth)); // not true on failed export in->add_replica(oldauth, CInode::EXPORT_NONCE); diff --git a/src/mds/events/EMetaBlob.h b/src/mds/events/EMetaBlob.h index 5123fac2f2ef..14c640f0b460 100644 --- a/src/mds/events/EMetaBlob.h +++ b/src/mds/events/EMetaBlob.h @@ -225,6 +225,7 @@ public: static const int STATE_DIRTY = (1<<2); // dirty due to THIS journal item, that is! static const int STATE_NEW = (1<<3); // new directory static const int STATE_IMPORTING = (1<<4); // importing directory + static const int STATE_DIRTYDFT = (1<<5); // dirty dirfragtree //version_t dirv; fnode_t fnode; @@ -249,6 +250,8 @@ public: void mark_new() { state |= STATE_NEW; } bool is_importing() { return state & STATE_IMPORTING; } void mark_importing() { state |= STATE_IMPORTING; } + bool is_dirty_dft() { return state & STATE_DIRTYDFT; } + void mark_dirty_dft() { state |= STATE_DIRTYDFT; } list > &get_dfull() { return dfull; } list &get_dremote() { return dremote; } @@ -535,10 +538,15 @@ private: dirlump& add_import_dir(CDir *dir) { // dirty=false would be okay in some cases return add_dir(dir->dirfrag(), dir->get_projected_fnode(), dir->get_projected_version(), - dir->is_dirty(), dir->is_complete(), false, true); + dir->is_dirty(), dir->is_complete(), false, true, dir->is_dirty_dft()); + } + dirlump& add_fragmented_dir(CDir *dir, bool dirtydft) { + return add_dir(dir->dirfrag(), dir->get_projected_fnode(), dir->get_projected_version(), + false, false, false, false, dirtydft); } dirlump& add_dir(dirfrag_t df, fnode_t *pf, version_t pv, bool dirty, - bool complete=false, bool isnew=false, bool importing=false) { + bool complete=false, bool isnew=false, + bool importing=false, bool dirty_dft=false) { if (lump_map.count(df) == 0) lump_order.push_back(df); @@ -549,6 +557,7 @@ private: if (dirty) l.mark_dirty(); if (isnew) l.mark_new(); if (importing) l.mark_importing(); + if (dirty_dft) l.mark_dirty_dft(); return l; } diff --git a/src/mds/journal.cc b/src/mds/journal.cc index e6d530fc1010..c7e707785066 100644 --- a/src/mds/journal.cc +++ b/src/mds/journal.cc @@ -1019,6 +1019,12 @@ void EMetaBlob::replay(MDS *mds, LogSegment *logseg, MDSlaveUpdate *slaveup) dout(10) << "EMetaBlob.replay clean fragstat on " << *dir << dendl; } } + if (lump.is_dirty_dft()) { + dout(10) << "EMetaBlob.replay dirty dirfragtree on " << *dir << dendl; + dir->state_set(CDir::STATE_DIRTYDFT); + mds->locker->mark_updated_scatterlock(&dir->inode->dirfragtreelock); + logseg->dirty_dirfrag_dirfragtree.push_back(&dir->inode->item_dirty_dirfrag_dirfragtree); + } if (lump.is_new()) dir->mark_new(logseg); if (lump.is_complete())