From ee059881d73f2b108716ccce37091f6ec0a67c7a Mon Sep 17 00:00:00 2001 From: sageweil Date: Wed, 25 Jul 2007 22:04:05 +0000 Subject: [PATCH] journal old subtree bound ino on merge git-svn-id: https://ceph.svn.sf.net/svnroot/ceph@1550 29311d96-e01e-0410-9327-a35deaab8ce9 --- branches/sage/mds/TODO | 21 ++++--------- branches/sage/mds/mds/CDir.cc | 2 +- branches/sage/mds/mds/CInode.cc | 29 ++++++++++++++++-- branches/sage/mds/mds/CInode.h | 2 +- branches/sage/mds/mds/Locker.cc | 13 ++++++-- branches/sage/mds/mds/MDCache.cc | 52 ++++++++++++++++++++++++++++++-- branches/sage/mds/mds/MDCache.h | 1 + 7 files changed, 95 insertions(+), 25 deletions(-) diff --git a/branches/sage/mds/TODO b/branches/sage/mds/TODO index 429fd072aafbe..ccbe35c73b642 100644 --- a/branches/sage/mds/TODO +++ b/branches/sage/mds/TODO @@ -68,25 +68,16 @@ sage mds / - STICKY dir state and pin? make sure it's kept across import/export/fragment / - pull _bound maps out of Migrator; they are redundant (trust the subtree map!) - - handle_resolve needs to infer splits/merges +/ - handle_resolve needs to infer splits/merges - rejoin, too! - - auth journals and applies update in the request update pipeline +/ - auth journals and applies update in the request update pipeline - - dirfragtree is lazily consistent. no lock. bcast by primary when it updates. - --> this makes it tricky to properly journal dirfragtree on the auth inode. what about a scatterlock? - +/ - dirfragtree is lazily consistent. no lock. bcast by primary when it updates. +/ - bcast to dir replicas - - bcast to dir replicas - - inode auth will journal inode update separately/lazily - - also on handle_resolve(), if there is a mismatch. - - do i need a fragtrace_t something to tell me where the splits for a given frag occurred? - - or something like a fragtree_t simplify()? - - is there any reason to freeze the dir? - - CDentry objects will be moved to the new frag(s) - - Server etc. must take care not to carry CDir pointers around; they're unstable! - - - what about flushing the old dirfrag storage off disk...? +/ - inode auth will journal inode update separately/lazily +/ - via subtree_merge_at - journal epoch, or something similar diff --git a/branches/sage/mds/mds/CDir.cc b/branches/sage/mds/mds/CDir.cc index 4847cd89d1235..c4c7f373405e5 100644 --- a/branches/sage/mds/mds/CDir.cc +++ b/branches/sage/mds/mds/CDir.cc @@ -448,7 +448,7 @@ void CDir::steal_dentry(CDentry *dn) if (dn->dir->items.empty()) dn->dir->put(PIN_CHILD); - if (nitems == 0) + if (nnull + nitems == 0) get(PIN_CHILD); if (dn->is_null()) nnull++; diff --git a/branches/sage/mds/mds/CInode.cc b/branches/sage/mds/mds/CInode.cc index b71fecb71432e..53011a5030c8d 100644 --- a/branches/sage/mds/mds/CInode.cc +++ b/branches/sage/mds/mds/CInode.cc @@ -439,7 +439,19 @@ void CInode::encode_lock_state(int type, bufferlist& bl) break; case LOCK_OTYPE_IDIRFRAGTREE: - dirfragtree._encode(bl); + { + // encode the raw tree + dirfragtree._encode(bl); + + // also specify which frags are mine + set myfrags; + list dfls; + get_dirfrags(dfls); + for (list::iterator p = dfls.begin(); p != dfls.end(); ++p) + if ((*p)->is_auth()) + myfrags.insert((*p)->get_frag()); + _encode(myfrags, bl); + } break; case LOCK_OTYPE_IFILE: @@ -488,7 +500,20 @@ void CInode::decode_lock_state(int type, bufferlist& bl) break; case LOCK_OTYPE_IDIRFRAGTREE: - dirfragtree._decode(bl, off); + { + fragtree_t temp; + temp._decode(bl, off); + set authfrags; + _decode(authfrags, bl, off); + if (is_auth()) { + // auth. believe replica's auth frags only. + for (set::iterator p = authfrags.begin(); p != authfrags.end(); ++p) + dirfragtree.force_to_leaf(*p); + } else { + // replica. just take the tree. + dirfragtree.swap(temp); + } + } break; case LOCK_OTYPE_IFILE: diff --git a/branches/sage/mds/mds/CInode.h b/branches/sage/mds/mds/CInode.h index 979fbd0d99a48..fdaf4ebec8c5b 100644 --- a/branches/sage/mds/mds/CInode.h +++ b/branches/sage/mds/mds/CInode.h @@ -271,7 +271,7 @@ public: LocalLock versionlock; SimpleLock authlock; SimpleLock linklock; - SimpleLock dirfragtreelock; + ScatterLock dirfragtreelock; FileLock filelock; ScatterLock dirlock; diff --git a/branches/sage/mds/mds/Locker.cc b/branches/sage/mds/mds/Locker.cc index 190bd5618faf8..708ce9f97d084 100644 --- a/branches/sage/mds/mds/Locker.cc +++ b/branches/sage/mds/mds/Locker.cc @@ -337,6 +337,7 @@ bool Locker::rdlock_start(SimpleLock *lock, MDRequest *mdr) switch (lock->get_type()) { case LOCK_OTYPE_IFILE: return file_rdlock_start((FileLock*)lock, mdr); + case LOCK_OTYPE_IDIRFRAGTREE: case LOCK_OTYPE_IDIR: return scatter_rdlock_start((ScatterLock*)lock, mdr); default: @@ -349,6 +350,7 @@ void Locker::rdlock_finish(SimpleLock *lock, MDRequest *mdr) switch (lock->get_type()) { case LOCK_OTYPE_IFILE: return file_rdlock_finish((FileLock*)lock, mdr); + case LOCK_OTYPE_IDIRFRAGTREE: case LOCK_OTYPE_IDIR: return scatter_rdlock_finish((ScatterLock*)lock, mdr); default: @@ -359,6 +361,7 @@ void Locker::rdlock_finish(SimpleLock *lock, MDRequest *mdr) bool Locker::wrlock_start(SimpleLock *lock, MDRequest *mdr) { switch (lock->get_type()) { + case LOCK_OTYPE_IDIRFRAGTREE: case LOCK_OTYPE_IDIR: return scatter_wrlock_start((ScatterLock*)lock, mdr); case LOCK_OTYPE_IVERSION: @@ -371,6 +374,7 @@ bool Locker::wrlock_start(SimpleLock *lock, MDRequest *mdr) void Locker::wrlock_finish(SimpleLock *lock, MDRequest *mdr) { switch (lock->get_type()) { + case LOCK_OTYPE_IDIRFRAGTREE: case LOCK_OTYPE_IDIR: return scatter_wrlock_finish((ScatterLock*)lock, mdr); case LOCK_OTYPE_IVERSION: @@ -387,6 +391,7 @@ bool Locker::xlock_start(SimpleLock *lock, MDRequest *mdr) return file_xlock_start((FileLock*)lock, mdr); case LOCK_OTYPE_IVERSION: return local_xlock_start((LocalLock*)lock, mdr); + case LOCK_OTYPE_IDIRFRAGTREE: case LOCK_OTYPE_IDIR: assert(0); default: @@ -401,6 +406,7 @@ void Locker::xlock_finish(SimpleLock *lock, MDRequest *mdr) return file_xlock_finish((FileLock*)lock, mdr); case LOCK_OTYPE_IVERSION: return local_xlock_finish((LocalLock*)lock, mdr); + case LOCK_OTYPE_IDIRFRAGTREE: case LOCK_OTYPE_IDIR: assert(0); default: @@ -811,7 +817,6 @@ void Locker::handle_lock(MLock *m) case LOCK_OTYPE_DN: case LOCK_OTYPE_IAUTH: case LOCK_OTYPE_ILINK: - case LOCK_OTYPE_IDIRFRAGTREE: handle_simple_lock(lock, m); break; @@ -819,6 +824,7 @@ void Locker::handle_lock(MLock *m) handle_file_lock((FileLock*)lock, m); break; + case LOCK_OTYPE_IDIRFRAGTREE: case LOCK_OTYPE_IDIR: handle_scatter_lock((ScatterLock*)lock, m); break; @@ -1330,7 +1336,8 @@ bool Locker::scatter_wrlock_start(ScatterLock *lock, MDRequest *mdr) } // wait for write. - lock->add_waiter(SimpleLock::WAIT_WR|SimpleLock::WAIT_STABLE, new C_MDS_RetryRequest(mdcache, mdr)); + lock->add_waiter(SimpleLock::WAIT_WR|SimpleLock::WAIT_STABLE, + new C_MDS_RetryRequest(mdcache, mdr)); // initiate scatter or lock? if (lock->is_stable()) { @@ -1526,7 +1533,7 @@ void Locker::scatter_writebehind(ScatterLock *lock) inode_t *pi = in->project_inode(); pi->version = in->pre_dirty(); - EUpdate *le = new EUpdate("dir.mtime writebehind"); + EUpdate *le = new EUpdate("scatter writebehind"); le->metablob.add_dir_context(in->get_parent_dn()->get_dir()); le->metablob.add_primary_dentry(in->get_parent_dn(), true, 0, pi); diff --git a/branches/sage/mds/mds/MDCache.cc b/branches/sage/mds/mds/MDCache.cc index 610c1f54b640c..10c5271b65c41 100644 --- a/branches/sage/mds/mds/MDCache.cc +++ b/branches/sage/mds/mds/MDCache.cc @@ -468,6 +468,16 @@ void MDCache::try_subtree_merge(CDir *dir) try_subtree_merge_at(*p); } +class C_MDC_SubtreeMergeWB : public Context { + MDCache *mdcache; + CInode *in; +public: + C_MDC_SubtreeMergeWB(MDCache *mdc, CInode *i) : mdcache(mdc), in(i) {} + void finish(int r) { + mdcache->subtree_merge_writebehind_finish(in); + } +}; + void MDCache::try_subtree_merge_at(CDir *dir) { dout(10) << "try_subtree_merge_at " << *dir << endl; @@ -497,11 +507,40 @@ void MDCache::try_subtree_merge_at(CDir *dir) subtrees[parent].erase(dir); eval_subtree_root(dir); + + // journal inode? + // (this is a large hammer to ensure that dirfragtree updates will + // hit the disk before the relevant dirfrags ever close) + if (dir->inode->is_auth() && + dir->inode->can_auth_pin()) { + CInode *in = dir->inode; + dout(10) << "try_subtree_merge_at journaling merged bound " << *in << endl; + + in->auth_pin(); + + // journal write-behind. + inode_t *pi = in->project_inode(); + pi->version = in->pre_dirty(); + + EUpdate *le = new EUpdate("subtree merge writebehind"); + le->metablob.add_dir_context(in->get_parent_dn()->get_dir()); + le->metablob.add_primary_dentry(in->get_parent_dn(), true, 0, pi); + + mds->mdlog->submit_entry(le); + mds->mdlog->wait_for_sync(new C_MDC_SubtreeMergeWB(this, in)); + } } show_subtrees(15); } +void MDCache::subtree_merge_writebehind_finish(CInode *in) +{ + dout(10) << "subtree_merge_writebehind_finish on " << in << endl; + in->pop_and_dirty_projected_inode(); + in->auth_unpin(); +} + void MDCache::eval_subtree_root(CDir *dir) { // evaluate subtree inode dirlock? @@ -513,7 +552,8 @@ void MDCache::eval_subtree_root(CDir *dir) mds->locker->scatter_eval(&dir->inode->dirlock); else mds->locker->try_scatter_eval(&dir->inode->dirlock); // ** may or may not be auth_pinned ** - } + } + } @@ -1288,7 +1328,12 @@ void MDCache::handle_resolve(MMDSResolve *m) ++pi) { CInode *diri = get_inode(pi->first.ino); if (!diri) continue; - diri->dirfragtree.force_to_leaf(pi->first.frag); + bool forced = diri->dirfragtree.force_to_leaf(pi->first.frag); + if (forced) { + dout(10) << " forced frag " << pi->first.frag << " to leaf in " + << diri->dirfragtree + << " on " << pi->first << endl; + } CDir *dir = diri->get_dirfrag(pi->first.frag); if (!dir) continue; adjust_bounded_subtree_auth(dir, pi->second, from); @@ -5480,7 +5525,8 @@ void MDCache::split_dir(CDir *dir, int bits) void MDCache::fragment_freeze(CInode *diri, list& frags, frag_t basefrag, int bits) { C_Gather *gather = new C_Gather(new C_MDC_FragmentGo(this, diri, frags, basefrag, bits)); - + + // freeze the dirs for (list::iterator p = frags.begin(); p != frags.end(); ++p) { diff --git a/branches/sage/mds/mds/MDCache.h b/branches/sage/mds/mds/MDCache.h index d4099185dba3b..e9a30c6b1b26f 100644 --- a/branches/sage/mds/mds/MDCache.h +++ b/branches/sage/mds/mds/MDCache.h @@ -249,6 +249,7 @@ public: void adjust_export_state(CDir *dir); void try_subtree_merge(CDir *root); void try_subtree_merge_at(CDir *root); + void subtree_merge_writebehind_finish(CInode *in); void eval_subtree_root(CDir *dir); CDir *get_subtree_root(CDir *dir); void remove_subtree(CDir *dir); -- 2.39.5