From: Sage Weil Date: Fri, 23 May 2008 21:40:46 +0000 (-0700) Subject: mds: mknod/mkdir behave X-Git-Tag: v0.3~170^2~82 X-Git-Url: http://git-server-git.apps.pok.os.sepia.ceph.com/?a=commitdiff_plain;h=b44c7c9c62d22e329a09dbc3f615a53d5cec5ae0;p=ceph.git mds: mknod/mkdir behave --- diff --git a/src/TODO b/src/TODO index 849057d4cc9c..6c7883571103 100644 --- a/src/TODO +++ b/src/TODO @@ -66,6 +66,9 @@ mon - some sort of tester for PaxosService... - osdmon needs to lower-bound old osdmap versions it keeps around? +mds nested +- fix rejoin vs updated dirfrag nested/dirlocks + mds mustfix - replay of dir fragmentation (dont want dir frozen, pins, etc.?) - fix rm -r vs mds exports diff --git a/src/include/types.h b/src/include/types.h index d4c6705e5837..807e8fefd0e4 100644 --- a/src/include/types.h +++ b/src/include/types.h @@ -203,15 +203,18 @@ struct nested_info_t { utime_t rctime; // \max_{children}(ctime, nested_ctime) __u64 rbytes; __u64 rfiles; + __u64 rsubdirs; void encode(bufferlist &bl) const { ::encode(rbytes, bl); ::encode(rfiles, bl); + ::encode(rsubdirs, bl); ::encode(rctime, bl); } void decode(bufferlist::iterator &bl) { ::decode(rbytes, bl); ::decode(rfiles, bl); + ::decode(rsubdirs, bl); ::decode(rctime, bl); } }; @@ -300,33 +303,44 @@ static inline void decode(inode_t &i, bufferlist::iterator &p) { /* * like an inode, but for a dir frag */ -struct fnode_t { - version_t version; +struct frag_info_t { utime_t mtime; - __u64 size; // files + dirs - __u64 nprimary, nremote; __u64 nfiles; // files __u64 nsubdirs; // subdirs - nested_info_t nested; // nested summation - nested_info_t accounted_nested; // nested summation + __u64 size() { return nfiles + nsubdirs; } + void encode(bufferlist &bl) const { - ::encode(version, bl); - ::encode(size, bl); - ::encode(nprimary, bl); - ::encode(nremote, bl); + ::encode(mtime, bl); + //::encode(size, bl); ::encode(nfiles, bl); ::encode(nsubdirs, bl); + } + void decode(bufferlist::iterator &bl) { + ::decode(mtime, bl); + //::decode(size, bl); + ::decode(nfiles, bl); + ::decode(nsubdirs, bl); + } +}; +WRITE_CLASS_ENCODER(frag_info_t) + +struct fnode_t { + version_t version; + frag_info_t fraginfo, accounted_fraginfo; // this dir + nested_info_t nested, accounted_nested; // this dir + sum over children. + + void encode(bufferlist &bl) const { + ::encode(version, bl); + ::encode(fraginfo, bl); + ::encode(accounted_fraginfo, bl); ::encode(nested, bl); ::encode(accounted_nested, bl); } void decode(bufferlist::iterator &bl) { ::decode(version, bl); - ::decode(size, bl); - ::decode(nprimary, bl); - ::decode(nremote, bl); - ::decode(nfiles, bl); - ::decode(nsubdirs, bl); + ::decode(fraginfo, bl); + ::decode(accounted_fraginfo, bl); ::decode(nested, bl); ::decode(accounted_nested, bl); } diff --git a/src/mds/CDir.cc b/src/mds/CDir.cc index 07b16134fb4d..6c987fcf2d63 100644 --- a/src/mds/CDir.cc +++ b/src/mds/CDir.cc @@ -86,9 +86,12 @@ ostream& operator<<(ostream& out, CDir& dir) if (dir.state_test(CDir::STATE_EXPORTBOUND)) out << "|exportbound"; if (dir.state_test(CDir::STATE_IMPORTBOUND)) out << "|importbound"; - out << " s=" << dir.fnode.size; - out << " rb=" << dir.fnode.nested.rbytes; - out << " rf=" << dir.fnode.nested.rfiles; + out << " s=" << dir.fnode.fraginfo.size() + << "=" << dir.fnode.fraginfo.nfiles + << "+" << dir.fnode.fraginfo.nsubdirs; + out << " rb=" << dir.fnode.nested.rbytes << "/" << dir.fnode.accounted_nested.rbytes; + out << " rf=" << dir.fnode.nested.rfiles << "/" << dir.fnode.accounted_nested.rfiles; + out << " rd=" << dir.fnode.nested.rsubdirs << "/" << dir.fnode.accounted_nested.rsubdirs; out << " sz=" << dir.get_nitems() << "+" << dir.get_nnull(); if (dir.get_num_dirty()) @@ -490,13 +493,18 @@ void CDir::steal_dentry(CDentry *dn) nnull++; else { nitems++; - fnode.size++; if (dn->is_primary()) { - fnode.nprimary++; fnode.nested.rbytes += dn->get_inode()->inode.accounted_nested.rbytes; fnode.nested.rfiles += dn->get_inode()->inode.accounted_nested.rfiles; - } else { - fnode.nremote++; + if (dn->get_inode()->is_dir()) + fnode.fraginfo.nsubdirs++; + else + fnode.fraginfo.nfiles++; + } else if (dn->is_remote()) { + if (dn->get_remote_d_type() == (S_IFDIR >> 12)) + fnode.fraginfo.nsubdirs++; + else + fnode.fraginfo.nfiles++; } } diff --git a/src/mds/CInode.cc b/src/mds/CInode.cc index 7d74fb29e1f8..c8465c3358ac 100644 --- a/src/mds/CInode.cc +++ b/src/mds/CInode.cc @@ -73,6 +73,7 @@ ostream& operator<<(ostream& out, CInode& in) out << " s=" << in.inode.size; out << " rb=" << in.inode.nested.rbytes << "/" << in.inode.accounted_nested.rbytes; out << " rf=" << in.inode.nested.rfiles << "/" << in.inode.accounted_nested.rfiles; + out << " rd=" << in.inode.nested.rsubdirs << "/" << in.inode.accounted_nested.rsubdirs; // locks out << " " << in.authlock; @@ -656,11 +657,11 @@ void CInode::clear_dirty_scattered(int type) dout(10) << "clear_dirty_scattered " << type << " on " << *this << dendl; switch (type) { case CEPH_LOCK_IDIR: - xlist_dirty_inode_mtime.remove_myself(); + xlist_dirty_dirfrag_dir.remove_myself(); break; case CEPH_LOCK_INESTED: - assert(0); // hmm! + xlist_dirty_dirfrag_nested.remove_myself(); break; default: diff --git a/src/mds/CInode.h b/src/mds/CInode.h index e855e92f6591..ae93cea69814 100644 --- a/src/mds/CInode.h +++ b/src/mds/CInode.h @@ -211,7 +211,8 @@ protected: xlist::item xlist_dirty; public: xlist::item xlist_open_file; - xlist::item xlist_dirty_inode_mtime; + xlist::item xlist_dirty_dirfrag_dir; + xlist::item xlist_dirty_dirfrag_nested; xlist::item xlist_purging_inode; private: @@ -244,7 +245,9 @@ public: inode_auth(CDIR_AUTH_DEFAULT), replica_caps_wanted(0), xlist_dirty(this), xlist_open_file(this), - xlist_dirty_inode_mtime(this), xlist_purging_inode(this), + xlist_dirty_dirfrag_dir(this), + xlist_dirty_dirfrag_nested(this), + xlist_purging_inode(this), auth_pins(0), nested_auth_pins(0), versionlock(this, CEPH_LOCK_IVERSION, WAIT_VERSIONLOCK_OFFSET), authlock(this, CEPH_LOCK_IAUTH, WAIT_AUTHLOCK_OFFSET), diff --git a/src/mds/Locker.cc b/src/mds/Locker.cc index c8922f02f602..e1b69d23ece4 100644 --- a/src/mds/Locker.cc +++ b/src/mds/Locker.cc @@ -402,6 +402,8 @@ void Locker::wrlock_finish(SimpleLock *lock, Mutation *mut) return scatter_wrlock_finish((ScatterLock*)lock, mut); case CEPH_LOCK_IVERSION: return local_wrlock_finish((LocalLock*)lock, mut); + case CEPH_LOCK_IFILE: + return file_wrlock_finish((FileLock*)lock, mut); default: assert(0); } @@ -1153,6 +1155,7 @@ int Locker::issue_client_lease(CInode *in, int client, if (in->filelock.can_lease()) mask |= CEPH_LOCK_IFILE; } if (in->xattrlock.can_lease()) mask |= CEPH_LOCK_IXATTR; + //if (in->nestedlock.can_lease()) mask |= CEPH_LOCK_INESTED; _issue_client_lease(in, mask, pool, client, bl, now, session); return mask; @@ -1218,27 +1221,20 @@ void Locker::revoke_client_leases(SimpleLock *lock) // nested --------------------------------------------------------------- -void Locker::predirty_nested(Mutation *mut, EMetaBlob *blob, CInode *in, bool parent_mtime) +void Locker::predirty_nested(Mutation *mut, EMetaBlob *blob, CInode *in, + bool do_parent, int dfiles, int dsubdirs) { + dout(10) << "predirty_nested " + << do_parent << "/" << dfiles << "/" << dsubdirs + << " " << *in << dendl; + CDir *parent = in->get_projected_parent_dn()->get_dir(); - // initial diff from *in inode_t *curi = in->get_projected_inode(); - __u64 drbytes; - __u64 drfiles; - utime_t rctime; - if (in->is_dir()) { - drbytes = curi->nested.rbytes - curi->accounted_nested.rbytes; - drfiles = curi->nested.rfiles - curi->accounted_nested.rfiles; - rctime = MAX(curi->ctime, curi->nested.rctime); - } else { - drbytes = curi->size - curi->accounted_nested.rbytes; - drfiles = 1 - curi->accounted_nested.rfiles; - rctime = curi->ctime; + if (curi->is_file()) { + curi->nested.rbytes = curi->size; } - - dout(10) << "predirty_nested delta " << drbytes << " bytes / " << drfiles << " files from " << *in << dendl; - + // build list of inodes to wrlock, dirty, and update list lsi; CInode *cur = in; @@ -1249,35 +1245,56 @@ void Locker::predirty_nested(Mutation *mut, EMetaBlob *blob, CInode *in, bool pa // opportunistically adjust parent dirfrag CInode *pin = parent->get_inode(); - dout(10) << "predirty_nested delta " << drbytes << " bytes / " << drfiles << " files for " << *pin << dendl; - if (pin->is_base()) - break; + if (do_parent) { + assert(mut->wrlocks.count(&pin->dirlock)); + assert(mut->wrlocks.count(&pin->nestedlock)); + } - if (mut->wrlocks.count(&pin->dirlock) == 0 && - !scatter_wrlock_try(&pin->dirlock, mut)) { - dout(10) << "predirty_nested can't wrlock " << pin->dirlock << " on " << *pin << dendl; + if (mut->wrlocks.count(&pin->nestedlock) == 0 && + !scatter_wrlock_try(&pin->nestedlock, mut)) { + dout(10) << "predirty_nested can't wrlock " << pin->nestedlock << " on " << *pin << dendl; break; } // inode -> dirfrag + __u64 drbytes = curi->nested.rbytes - curi->accounted_nested.rbytes; + __u64 drfiles = curi->nested.rfiles - curi->accounted_nested.rfiles; + __u64 drsubdirs = curi->nested.rsubdirs - curi->accounted_nested.rsubdirs; + utime_t rctime = MAX(curi->ctime, curi->nested.rctime); + mut->add_projected_fnode(parent); fnode_t *pf = parent->project_fnode(); pf->version = parent->pre_dirty(); - if (parent_mtime) { - dout(10) << "predirty_nested updating mtime on " << *parent << dendl; - pf->mtime = rctime = mut->now; - } + if (do_parent) { + dout(10) << "predirty_nested updating mtime/size on " << *parent << dendl; + pf->fraginfo.mtime = mut->now; + pf->fraginfo.nfiles += dfiles; + pf->fraginfo.nsubdirs += dsubdirs; + //pf->nested.rfiles += dfiles; + //pf->nested.rsubdirs += dsubdirs; + } + dout(10) << "predirty_nested delta " + << drbytes << " bytes / " << drfiles << " files / " << drsubdirs << " subdirs for " + << *parent << dendl; pf->nested.rbytes += drbytes; pf->nested.rfiles += drfiles; + pf->nested.rsubdirs += drsubdirs; pf->nested.rctime = rctime; - + curi->accounted_nested.rbytes += drbytes; curi->accounted_nested.rfiles += drfiles; + curi->accounted_nested.rsubdirs += drsubdirs; curi->accounted_nested.rctime = rctime; - if (!pin->is_auth()) + if (pin->is_base()) break; + if (!pin->is_auth()) { + if (do_parent) + mut->ls->dirty_dirfrag_dir.push_back(&pin->xlist_dirty_dirfrag_dir); + mut->ls->dirty_dirfrag_nested.push_back(&pin->xlist_dirty_dirfrag_nested); + break; + } // dirfrag -> diri mut->add_projected_inode(pin); @@ -1286,24 +1303,30 @@ void Locker::predirty_nested(Mutation *mut, EMetaBlob *blob, CInode *in, bool pa version_t ppv = pin->pre_dirty(); inode_t *pi = pin->project_inode(); pi->version = ppv; - if (pf->mtime > pi->mtime) - pi->mtime = pf->mtime; + if (do_parent) { + dout(10) << "predirty_nested updating size/mtime on " << *pin << dendl; + if (pf->fraginfo.mtime > pi->mtime) + pi->mtime = pf->fraginfo.mtime; + pi->size += pf->fraginfo.size() - pf->accounted_fraginfo.size(); + pf->accounted_fraginfo = pf->fraginfo; + } + drbytes = pf->nested.rbytes - pf->accounted_nested.rbytes; + drfiles = pf->nested.rfiles - pf->accounted_nested.rfiles; + drsubdirs = pf->nested.rsubdirs - pf->accounted_nested.rsubdirs; + dout(10) << "predirty_nested delta " + << drbytes << " bytes / " << drfiles << " files / " << drsubdirs << " subdirs for " + << *pin << dendl; pi->nested.rbytes += drbytes; pi->nested.rfiles += drfiles; - pi->nested.rctime = rctime; + pi->nested.rsubdirs += drsubdirs; + pi->nested.rctime = MAX(pf->fraginfo.mtime, pf->nested.rctime); + pf->accounted_nested = pf->nested; - pf->accounted_nested.rbytes += drbytes; - pf->accounted_nested.rfiles += drfiles; - pf->accounted_nested.rctime = rctime; - // next parent! cur = pin; curi = pi; parent = cur->get_projected_parent_dn()->get_dir(); - - drbytes = curi->nested.rbytes - curi->accounted_nested.rbytes; - drfiles = curi->nested.rfiles - curi->accounted_nested.rfiles; - rctime = MAX(curi->ctime, curi->nested.rctime); + do_parent = false; } // now, stick it in the blob diff --git a/src/mds/Locker.h b/src/mds/Locker.h index 666e9ed7badc..876f41b8e00c 100644 --- a/src/mds/Locker.h +++ b/src/mds/Locker.h @@ -156,7 +156,7 @@ protected: void scatter_writebehind_finish(ScatterLock *lock, LogSegment *ls); public: - void predirty_nested(Mutation *mut, EMetaBlob *blob, CInode *in, bool parent_mtime); + void predirty_nested(Mutation *mut, EMetaBlob *blob, CInode *in, bool do_parent, int dfiles=0, int dsubdirs=0); // local protected: diff --git a/src/mds/LogSegment.h b/src/mds/LogSegment.h index 4723960c7936..75d5f6a4f0fe 100644 --- a/src/mds/LogSegment.h +++ b/src/mds/LogSegment.h @@ -39,7 +39,8 @@ class LogSegment { xlist dirty_dentries; xlist open_files; - xlist dirty_inode_mtimes; + xlist dirty_dirfrag_dir; + xlist dirty_dirfrag_nested; xlist slave_updates; diff --git a/src/mds/MDCache.cc b/src/mds/MDCache.cc index 089e4fdc2d07..1cd08e9d6aee 100644 --- a/src/mds/MDCache.cc +++ b/src/mds/MDCache.cc @@ -1820,7 +1820,7 @@ void MDCache::rejoin_walk(CDir *dir, MMDSCacheRejoin *rejoin) rejoin->add_weak_primary_dentry(dir->dirfrag(), p->first, dn->get_inode()->ino()); dn->get_inode()->get_nested_dirfrags(nested); - if (dn->get_inode()->dirlock.is_updated()) { + if (dn->get_inode()->dirlock.is_updated()) { // ******* FIXME ********* // include full inode to shed any dirtyscattered state rejoin->add_full_inode(dn->get_inode()->inode, dn->get_inode()->symlink, diff --git a/src/mds/Server.cc b/src/mds/Server.cc index 81aea634efc1..4270af8164e1 100644 --- a/src/mds/Server.cc +++ b/src/mds/Server.cc @@ -1420,6 +1420,7 @@ CDentry* Server::rdlock_path_xlock_dentry(MDRequest *mdr, bool okexist, bool mus else rdlocks.insert(&dn->lock); // existing dn, rdlock wrlocks.insert(&dn->dir->inode->dirlock); // also, wrlock on dir mtime + wrlocks.insert(&dn->dir->inode->nestedlock); // also, wrlock on dir mtime if (!mds->locker->acquire_locks(mdr, rdlocks, wrlocks, xlocks)) return 0; @@ -1512,9 +1513,9 @@ version_t Server::predirty_dn_diri(MDRequest *mdr, CDentry *dn, EMetaBlob *blob, dout(10) << "predirty_dn_diri (non-auth) ctime/mtime " << mdr->now << " on " << *diri << dendl; - blob->add_dirtied_inode_mtime(diri->ino(), mdr->now); + //blob->add_dirtied_inode_mtime(diri->ino(), mdr->now); assert(mdr->ls); - mdr->ls->dirty_inode_mtimes.push_back(&diri->xlist_dirty_inode_mtime); + mdr->ls->dirty_dirfrag_dir.push_back(&diri->xlist_dirty_dirfrag_dir); } return dirpv; @@ -2042,6 +2043,7 @@ void Server::handle_client_mknod(MDRequest *mdr) if ((newi->inode.mode & S_IFMT) == 0) newi->inode.mode |= S_IFREG; newi->inode.version = dn->pre_dirty() - 1; + newi->inode.nested.rfiles = 1; dout(10) << "mknod mode " << newi->inode.mode << " rdev " << newi->inode.rdev << dendl; @@ -2051,7 +2053,7 @@ void Server::handle_client_mknod(MDRequest *mdr) le->metablob.add_client_req(req->get_reqid()); le->metablob.add_allocated_ino(newi->ino(), mds->idalloc->get_version()); - mds->locker->predirty_nested(mdr, &le->metablob, newi, true); + mds->locker->predirty_nested(mdr, &le->metablob, newi, true, 1, 0); //version_t dirpv = predirty_dn_diri(mdr, dn, &le->metablob); // dir mtime too le->metablob.add_primary_dentry(dn, true, newi, &newi->inode); @@ -2083,6 +2085,7 @@ void Server::handle_client_mkdir(MDRequest *mdr) newi->inode.mode |= S_IFDIR; newi->inode.layout = g_default_mds_dir_layout; newi->inode.version = dn->pre_dirty() - 1; + newi->inode.nested.rsubdirs = 1; // ...and that new dir is empty. CDir *newdir = newi->get_or_open_dirfrag(mds->mdcache, frag_t()); @@ -2096,7 +2099,7 @@ void Server::handle_client_mkdir(MDRequest *mdr) EUpdate *le = new EUpdate(mdlog, "mkdir"); le->metablob.add_client_req(req->get_reqid()); le->metablob.add_allocated_ino(newi->ino(), mds->idalloc->get_version()); - mds->locker->predirty_nested(mdr, &le->metablob, newi, true); + mds->locker->predirty_nested(mdr, &le->metablob, newi, true, 0, 1); le->metablob.add_primary_dentry(dn, true, newi, &newi->inode); le->metablob.add_dir(newdir, true, true); // dirty AND complete @@ -2141,13 +2144,14 @@ void Server::handle_client_symlink(MDRequest *mdr) newi->symlink = req->get_path2(); newi->inode.size = newi->symlink.length(); newi->inode.version = dn->pre_dirty() - 1; - + newi->inode.nested.rfiles = 1; + // prepare finisher mdr->ls = mdlog->get_current_segment(); EUpdate *le = new EUpdate(mdlog, "symlink"); le->metablob.add_client_req(req->get_reqid()); le->metablob.add_allocated_ino(newi->ino(), mds->idalloc->get_version()); - mds->locker->predirty_nested(mdr, &le->metablob, newi, true); + mds->locker->predirty_nested(mdr, &le->metablob, newi, true, 1, 0); le->metablob.add_primary_dentry(dn, true, newi, &newi->inode); // log + wait diff --git a/src/mds/SimpleLock.h b/src/mds/SimpleLock.h index 6b59852df8d1..bd2fc0061955 100644 --- a/src/mds/SimpleLock.h +++ b/src/mds/SimpleLock.h @@ -29,6 +29,7 @@ inline const char *get_lock_type_name(int t) { case CEPH_LOCK_IDFT: return "idft"; case CEPH_LOCK_IDIR: return "idir"; case CEPH_LOCK_IXATTR: return "ixattr"; + case CEPH_LOCK_INESTED: return "inested"; case CEPH_LOCK_INO: return "ino"; default: assert(0); return 0; } diff --git a/src/mds/events/EMetaBlob.h b/src/mds/events/EMetaBlob.h index 62c9aaa59c91..1930d01e9a9c 100644 --- a/src/mds/events/EMetaBlob.h +++ b/src/mds/events/EMetaBlob.h @@ -161,8 +161,8 @@ public: */ public: struct dirlump { - static const int STATE_COMPLETE = (1<<1); - static const int STATE_DIRTY = (1<<2); // dirty due to THIS journal item, that is! + static const int STATE_COMPLETE = (1<<1); + static const int STATE_DIRTY = (1<<2); // dirty due to THIS journal item, that is! //version_t dirv; fnode_t fnode; @@ -246,7 +246,7 @@ private: list atids; // inode dirlocks (scatterlocks) i've touched. - map dirty_inode_mtimes; + //map dirty_inode_mtimes; // ino's i've allocated list allocated_inos; @@ -263,7 +263,7 @@ private: ::encode(lump_order, bl); ::encode(lump_map, bl); ::encode(atids, bl); - ::encode(dirty_inode_mtimes, bl); + //::encode(dirty_inode_mtimes, bl); ::encode(allocated_inos, bl); if (!allocated_inos.empty()) ::encode(alloc_tablev, bl); @@ -274,7 +274,7 @@ private: ::decode(lump_order, bl); ::decode(lump_map, bl); ::decode(atids, bl); - ::decode(dirty_inode_mtimes, bl); + //::decode(dirty_inode_mtimes, bl); ::decode(allocated_inos, bl); if (!allocated_inos.empty()) ::decode(alloc_tablev, bl); @@ -309,9 +309,11 @@ private: atids.push_back(atid); } + /* void add_dirtied_inode_mtime(inodeno_t ino, utime_t ctime) { dirty_inode_mtimes[ino] = ctime; } + */ void add_allocated_ino(inodeno_t ino, version_t tablev) { allocated_inos.push_back(ino); diff --git a/src/mds/journal.cc b/src/mds/journal.cc index 7d5494e443c9..4bcfa32a2707 100644 --- a/src/mds/journal.cc +++ b/src/mds/journal.cc @@ -106,9 +106,9 @@ C_Gather *LogSegment::try_to_expire(MDS *mds) } // dirty non-auth mtimes - for (xlist::iterator p = dirty_inode_mtimes.begin(); !p.end(); ++p) { + for (xlist::iterator p = dirty_dirfrag_dir.begin(); !p.end(); ++p) { CInode *in = *p; - dout(10) << "try_to_expire waiting for dirlock mtime flush on " << *in << dendl; + dout(10) << "try_to_expire waiting for dirlock flush on " << *in << dendl; if (!gather) gather = new C_Gather; if (in->is_ambiguous_auth()) { @@ -125,6 +125,25 @@ C_Gather *LogSegment::try_to_expire(MDS *mds) } //(*p)->dirlock.add_waiter(SimpleLock::WAIT_STABLE, gather->new_sub()); } + for (xlist::iterator p = dirty_dirfrag_nested.begin(); !p.end(); ++p) { + CInode *in = *p; + dout(10) << "try_to_expire waiting for nestedlock flush on " << *in << dendl; + if (!gather) gather = new C_Gather; + + if (in->is_ambiguous_auth()) { + dout(10) << " waiting for single auth on " << *in << dendl; + in->add_waiter(MDSCacheObject::WAIT_SINGLEAUTH, gather->new_sub()); + } else if (in->is_auth()) { + dout(10) << " i'm auth, unscattering nestedlock on " << *in << dendl; + assert(in->is_replicated()); // hrm! + mds->locker->scatter_lock(&in->nestedlock); + in->nestedlock.add_waiter(SimpleLock::WAIT_STABLE, gather->new_sub()); + } else { + dout(10) << " i'm a replica, requesting nestedlock unscatter of " << *in << dendl; + mds->locker->scatter_try_unscatter(&in->nestedlock, gather->new_sub()); + } + //(*p)->nestedlock.add_waiter(SimpleLock::WAIT_STABLE, gather->new_sub()); + } // open files if (!open_files.empty()) { @@ -294,8 +313,12 @@ void EMetaBlob::replay(MDS *mds, LogSegment *logseg) dout(10) << "EMetaBlob.replay added dir " << *dir << dendl; } dir->set_version( lump.fnode.version ); - if (lump.is_dirty()) + if (lump.is_dirty()) { dir->_mark_dirty(logseg); + dir->get_inode()->dirlock.set_updated(); + dir->get_inode()->nestedlock.set_updated(); + } + if (lump.is_complete()) dir->mark_complete(); @@ -409,7 +432,7 @@ void EMetaBlob::replay(MDS *mds, LogSegment *logseg) mds->anchorclient->got_journaled_agree(*p, logseg); } - // dirtied inode mtimes + /*// dirtied inode mtimes if (!dirty_inode_mtimes.empty()) for (map::iterator p = dirty_inode_mtimes.begin(); p != dirty_inode_mtimes.end(); @@ -419,6 +442,7 @@ void EMetaBlob::replay(MDS *mds, LogSegment *logseg) in->dirlock.set_updated(); logseg->dirty_inode_mtimes.push_back(&in->xlist_dirty_inode_mtime); } + */ // allocated_inos if (!allocated_inos.empty()) {