From: Sage Weil Date: Fri, 23 May 2008 17:43:00 +0000 (-0700) Subject: mds: unmarry cdir projected_versoin vs project_fnode; pull Mutation out of MDRequest X-Git-Tag: v0.3~170^2~85 X-Git-Url: http://git-server-git.apps.pok.os.sepia.ceph.com/?a=commitdiff_plain;h=9379c9545f338516f19b005355c5a28f61693d6e;p=ceph.git mds: unmarry cdir projected_versoin vs project_fnode; pull Mutation out of MDRequest --- diff --git a/src/mds/CDir.cc b/src/mds/CDir.cc index 664335432278..07b16134fb4d 100644 --- a/src/mds/CDir.cc +++ b/src/mds/CDir.cc @@ -86,6 +86,10 @@ ostream& operator<<(ostream& out, CDir& dir) if (dir.state_test(CDir::STATE_EXPORTBOUND)) out << "|exportbound"; if (dir.state_test(CDir::STATE_IMPORTBOUND)) out << "|importbound"; + out << " s=" << dir.fnode.size; + out << " rb=" << dir.fnode.nested.rbytes; + out << " rf=" << dir.fnode.nested.rfiles; + out << " sz=" << dir.get_nitems() << "+" << dir.get_nnull(); if (dir.get_num_dirty()) out << " dirty=" << dir.get_num_dirty(); @@ -557,7 +561,7 @@ void CDir::split(int bits, list& subs, list& waiters) f->replica_map = replica_map; f->dir_auth = dir_auth; f->init_fragment_pins(); - f->fnode.version = get_version(); + f->set_version(get_version()); f->pop_me = pop_me; f->pop_me *= fac; @@ -776,22 +780,6 @@ fnode_t *CDir::project_fnode() return p; } -version_t CDir::pre_dirty(version_t min) -{ - fnode_t *pf = project_fnode(); - if (min > pf->version) - pf->version = min; - ++pf->version; - dout(10) << "pre_dirty " << pf->version << dendl; - return pf->version; -} - -void CDir::mark_dirty(version_t pv, LogSegment *ls) -{ - assert(get_version() < pv); - pop_and_dirty_projected_fnode(ls); -} - void CDir::pop_and_dirty_projected_fnode(LogSegment *ls) { assert(!projected_fnode.empty()); @@ -803,6 +791,24 @@ void CDir::pop_and_dirty_projected_fnode(LogSegment *ls) projected_fnode.pop_front(); } + +version_t CDir::pre_dirty(version_t min) +{ + if (min > projected_version) + projected_version = min; + ++projected_version; + dout(10) << "pre_dirty " << projected_version << dendl; + return projected_version; +} + +void CDir::mark_dirty(version_t pv, LogSegment *ls) +{ + assert(get_version() < pv); + assert(pv <= projected_version); + fnode.version = pv; + _mark_dirty(ls); +} + void CDir::_mark_dirty(LogSegment *ls) { if (!state_test(STATE_DIRTY)) { @@ -1063,7 +1069,7 @@ void CDir::_fetched(bufferlist &bl) assert(!is_projected()); assert(!state_test(STATE_COMMITTING)); fnode = got_fnode; - committing_version = committed_version = got_fnode.version; + projected_version = committing_version = committed_version = got_fnode.version; } //cache->mds->logger->inc("newin", num_new_inodes_loaded); diff --git a/src/mds/CDir.h b/src/mds/CDir.h index 034d122d2751..4109113ea7c0 100644 --- a/src/mds/CDir.h +++ b/src/mds/CDir.h @@ -153,29 +153,33 @@ class CDir : public MDSCacheObject { return dirfrag() < ((const CDir*)r)->dirfrag(); } - //int hack_num_accessed; + fnode_t fnode; protected: - fnode_t fnode; + version_t projected_version; list projected_fnode; + xlist::item xlist_dirty; + public: version_t get_version() { return fnode.version; } void set_version(version_t v) { assert(projected_fnode.empty()); - fnode.version = v; + projected_version = fnode.version = v; } + version_t get_projected_version() { return projected_version; } + fnode_t *get_projected_fnode() { if (projected_fnode.empty()) return &fnode; else return projected_fnode.back(); } - version_t get_projected_version() { return get_projected_fnode()->version; } fnode_t *project_fnode(); + void pop_and_dirty_projected_fnode(LogSegment *ls); - bool is_projected() { return !projected_fnode.empty(); } + bool is_projected() { return get_projected_version() > get_version(); } version_t pre_dirty(version_t min=0); void _mark_dirty(LogSegment *ls); void mark_dirty(version_t pv, LogSegment *ls); diff --git a/src/mds/CInode.cc b/src/mds/CInode.cc index d8a38e9be07c..7d74fb29e1f8 100644 --- a/src/mds/CInode.cc +++ b/src/mds/CInode.cc @@ -72,6 +72,7 @@ ostream& operator<<(ostream& out, CInode& in) out << " s=" << in.inode.size; out << " rb=" << in.inode.nested.rbytes << "/" << in.inode.accounted_nested.rbytes; + out << " rf=" << in.inode.nested.rfiles << "/" << in.inode.accounted_nested.rfiles; // locks out << " " << in.authlock; diff --git a/src/mds/FileLock.h b/src/mds/FileLock.h index aa0b3f0c1627..ad0d7570eddc 100644 --- a/src/mds/FileLock.h +++ b/src/mds/FileLock.h @@ -92,7 +92,7 @@ any + statlite(mtime) // -- lock... hard or file -class MDRequest; +class Mutation; class FileLock : public SimpleLock { int num_wrlock; @@ -136,7 +136,7 @@ class FileLock : public SimpleLock { } // read/write access - bool can_rdlock(MDRequest *mdr) { + bool can_rdlock(Mutation *mdr) { if (!parent->is_auth()) return (state == LOCK_SYNC); //if (state == LOCK_LOCK && mdr && xlock_by == mdr) return true; if (state == LOCK_LOCK && !xlock_by) return true; diff --git a/src/mds/Locker.cc b/src/mds/Locker.cc index f887f88735f7..1a426cbaad2b 100644 --- a/src/mds/Locker.cc +++ b/src/mds/Locker.cc @@ -320,15 +320,15 @@ bool Locker::acquire_locks(MDRequest *mdr, } -void Locker::drop_locks(MDRequest *mdr) +void Locker::drop_locks(Mutation *mut) { // leftover locks - while (!mdr->xlocks.empty()) - xlock_finish(*mdr->xlocks.begin(), mdr); - while (!mdr->rdlocks.empty()) - rdlock_finish(*mdr->rdlocks.begin(), mdr); - while (!mdr->wrlocks.empty()) - wrlock_finish(*mdr->wrlocks.begin(), mdr); + while (!mut->xlocks.empty()) + xlock_finish(*mut->xlocks.begin(), mut); + while (!mut->rdlocks.empty()) + rdlock_finish(*mut->rdlocks.begin(), mut); + while (!mut->wrlocks.empty()) + wrlock_finish(*mut->wrlocks.begin(), mut); } @@ -348,94 +348,94 @@ void Locker::eval_gather(SimpleLock *lock) } } -bool Locker::rdlock_start(SimpleLock *lock, MDRequest *mdr) +bool Locker::rdlock_start(SimpleLock *lock, MDRequest *mut) { switch (lock->get_type()) { case CEPH_LOCK_IFILE: - return file_rdlock_start((FileLock*)lock, mdr); + return file_rdlock_start((FileLock*)lock, mut); case CEPH_LOCK_IDFT: case CEPH_LOCK_IDIR: case CEPH_LOCK_INESTED: - return scatter_rdlock_start((ScatterLock*)lock, mdr); + return scatter_rdlock_start((ScatterLock*)lock, mut); default: - return simple_rdlock_start(lock, mdr); + return simple_rdlock_start(lock, mut); } } -void Locker::rdlock_finish(SimpleLock *lock, MDRequest *mdr) +void Locker::rdlock_finish(SimpleLock *lock, Mutation *mut) { switch (lock->get_type()) { case CEPH_LOCK_IFILE: - return file_rdlock_finish((FileLock*)lock, mdr); + return file_rdlock_finish((FileLock*)lock, mut); case CEPH_LOCK_IDFT: case CEPH_LOCK_IDIR: case CEPH_LOCK_INESTED: - return scatter_rdlock_finish((ScatterLock*)lock, mdr); + return scatter_rdlock_finish((ScatterLock*)lock, mut); default: - return simple_rdlock_finish(lock, mdr); + return simple_rdlock_finish(lock, mut); } } -bool Locker::wrlock_start(SimpleLock *lock, MDRequest *mdr) +bool Locker::wrlock_start(SimpleLock *lock, MDRequest *mut) { switch (lock->get_type()) { case CEPH_LOCK_IDFT: case CEPH_LOCK_IDIR: case CEPH_LOCK_INESTED: - return scatter_wrlock_start((ScatterLock*)lock, mdr); + return scatter_wrlock_start((ScatterLock*)lock, mut); case CEPH_LOCK_IVERSION: - return local_wrlock_start((LocalLock*)lock, mdr); + return local_wrlock_start((LocalLock*)lock, mut); //case CEPH_LOCK_IFILE: - //return file_wrlock_start((ScatterLock*)lock, mdr); + //return file_wrlock_start((ScatterLock*)lock, mut); default: assert(0); return false; } } -void Locker::wrlock_finish(SimpleLock *lock, MDRequest *mdr) +void Locker::wrlock_finish(SimpleLock *lock, Mutation *mut) { switch (lock->get_type()) { case CEPH_LOCK_IDFT: case CEPH_LOCK_IDIR: case CEPH_LOCK_INESTED: - return scatter_wrlock_finish((ScatterLock*)lock, mdr); + return scatter_wrlock_finish((ScatterLock*)lock, mut); case CEPH_LOCK_IVERSION: - return local_wrlock_finish((LocalLock*)lock, mdr); + return local_wrlock_finish((LocalLock*)lock, mut); default: assert(0); } } -bool Locker::xlock_start(SimpleLock *lock, MDRequest *mdr) +bool Locker::xlock_start(SimpleLock *lock, MDRequest *mut) { switch (lock->get_type()) { case CEPH_LOCK_IFILE: - return file_xlock_start((FileLock*)lock, mdr); + return file_xlock_start((FileLock*)lock, mut); case CEPH_LOCK_IVERSION: - return local_xlock_start((LocalLock*)lock, mdr); + return local_xlock_start((LocalLock*)lock, mut); case CEPH_LOCK_IDFT: case CEPH_LOCK_IDIR: case CEPH_LOCK_INESTED: assert(0); default: - return simple_xlock_start(lock, mdr); + return simple_xlock_start(lock, mut); } } -void Locker::xlock_finish(SimpleLock *lock, MDRequest *mdr) +void Locker::xlock_finish(SimpleLock *lock, Mutation *mut) { switch (lock->get_type()) { case CEPH_LOCK_IFILE: - return file_xlock_finish((FileLock*)lock, mdr); + return file_xlock_finish((FileLock*)lock, mut); case CEPH_LOCK_IVERSION: - return local_xlock_finish((LocalLock*)lock, mdr); + return local_xlock_finish((LocalLock*)lock, mut); case CEPH_LOCK_IDFT: case CEPH_LOCK_IDIR: case CEPH_LOCK_INESTED: assert(0); default: - return simple_xlock_finish(lock, mdr); + return simple_xlock_finish(lock, mut); } } @@ -471,33 +471,27 @@ version_t Locker::issue_file_data_version(CInode *in) struct C_Locker_FileUpdate_finish : public Context { Locker *locker; CInode *in; - list nest_updates; - LogSegment *ls; + Mutation *mut; bool share; - C_Locker_FileUpdate_finish(Locker *l, CInode *i, LogSegment *s, list &ls, bool e=false) : - locker(l), in(i), ls(s), share(e) { - nest_updates.swap(ls); + C_Locker_FileUpdate_finish(Locker *l, CInode *i, Mutation *m, bool e=false) : + locker(l), in(i), mut(m), share(e) { in->get(CInode::PIN_PTRWAITER); } void finish(int r) { - locker->file_update_finish(in, ls, nest_updates, share); + locker->file_update_finish(in, mut, share); } }; -void Locker::file_update_finish(CInode *in, LogSegment *ls, list &nest_updates, bool share) +void Locker::file_update_finish(CInode *in, Mutation *mut, bool share) { dout(10) << "file_update_finish on " << *in << dendl; - in->pop_and_dirty_projected_inode(ls); + in->pop_and_dirty_projected_inode(mut->ls); in->put(CInode::PIN_PTRWAITER); - for (list::iterator p = nest_updates.begin(); - p != nest_updates.end(); - p++) { - (*p)->pop_and_dirty_projected_inode(ls); - scatter_wrlock_finish(&(*p)->dirlock, 0); - } - - file_wrlock_finish(&in->filelock); + mut->pop_and_dirty_projected_inodes(); + mut->pop_and_dirty_projected_fnodes(); + drop_locks(mut); + if (share && in->is_auth() && in->filelock.is_stable()) share_inode_max_size(in); } @@ -846,20 +840,21 @@ bool Locker::check_inode_max_size(CInode *in, bool forcewrlock) dout(10) << "check_inode_max_size " << latest->max_size << " -> " << new_max << " on " << *in << dendl; + + Mutation *mut = new Mutation; + mut->ls = mds->mdlog->get_current_segment(); inode_t *pi = in->project_inode(); pi->version = in->pre_dirty(); pi->max_size = new_max; EOpen *le = new EOpen(mds->mdlog); + predirty_nested(mut, &le->metablob, in); le->metablob.add_dir_context(in->get_parent_dir()); - list nest_updates; - predirty_nested(&le->metablob, in, nest_updates); le->metablob.add_primary_dentry(in->parent, true, 0, pi); - LogSegment *ls = mds->mdlog->get_current_segment(); le->add_ino(in->ino()); - ls->open_files.push_back(&in->xlist_open_file); - mds->mdlog->submit_entry(le, new C_Locker_FileUpdate_finish(this, in, ls, nest_updates, true)); - file_wrlock_start(&in->filelock, forcewrlock); // wrlock for duration of journal + mut->ls->open_files.push_back(&in->xlist_open_file); + mds->mdlog->submit_entry(le, new C_Locker_FileUpdate_finish(this, in, mut, true)); + file_wrlock_start(&in->filelock, mut, forcewrlock); // wrlock for duration of journal return true; } @@ -1047,13 +1042,13 @@ void Locker::handle_client_file_caps(MClientFileCaps *m) << " for " << *in << dendl; pi->time_warp_seq = m->get_time_warp_seq(); } + Mutation *mut = new Mutation; + mut->ls = mds->mdlog->get_current_segment(); + file_wrlock_start(&in->filelock, mut); // wrlock for duration of journal + predirty_nested(mut, &le->metablob, in); le->metablob.add_dir_context(in->get_parent_dir()); - list nest_updates; - predirty_nested(&le->metablob, in, nest_updates); le->metablob.add_primary_dentry(in->parent, true, 0, pi); - LogSegment *ls = mds->mdlog->get_current_segment(); - mds->mdlog->submit_entry(le, new C_Locker_FileUpdate_finish(this, in, ls, nest_updates, change_max)); - file_wrlock_start(&in->filelock); // wrlock for duration of journal + mds->mdlog->submit_entry(le, new C_Locker_FileUpdate_finish(this, in, mut, change_max)); } // reevaluate, waiters @@ -1223,12 +1218,9 @@ void Locker::revoke_client_leases(SimpleLock *lock) // nested --------------------------------------------------------------- -void Locker::predirty_nested(EMetaBlob *blob, CInode *in, list &ls) +void Locker::predirty_nested(Mutation *mut, EMetaBlob *blob, CInode *in) { - assert(ls.empty()); - CDir *parent = in->get_projected_parent_dn()->get_dir(); - blob->add_dir_context(parent); // initial diff from *in inode_t *curi = in->get_projected_inode(); @@ -1248,6 +1240,7 @@ void Locker::predirty_nested(EMetaBlob *blob, CInode *in, list &ls) dout(10) << "predirty_nested delta " << drbytes << " bytes / " << drfiles << " files from " << *in << dendl; // build list of inodes to wrlock, dirty, and update + list lsi; CInode *cur = in; while (parent) { assert(cur->is_auth()); @@ -1260,12 +1253,24 @@ void Locker::predirty_nested(EMetaBlob *blob, CInode *in, list &ls) if (pin->is_base()) break; - if (!scatter_wrlock_try(&pin->dirlock)) { + if (mut->wrlocks.count(&pin->dirlock) == 0 && + !scatter_wrlock_try(&pin->dirlock, mut)) { dout(10) << "predirty_nested can't wrlock " << pin->dirlock << " on " << *pin << dendl; break; } - ls.push_back(pin); + // inode -> dirfrag + mut->add_projected_fnode(parent); + + fnode_t *pf = parent->project_fnode(); + pf->version = parent->pre_dirty(); + pf->nested.rbytes += drbytes; + pf->nested.rfiles += drfiles; + pf->nested.rctime = rctime; + + curi->accounted_nested.rbytes += drbytes; + curi->accounted_nested.rfiles += drfiles; + curi->accounted_nested.rctime = rctime; // FIXME if (!pin->is_auth()) { @@ -1273,7 +1278,10 @@ void Locker::predirty_nested(EMetaBlob *blob, CInode *in, list &ls) break; } - // project update + // dirfrag -> diri + mut->add_projected_inode(pin); + lsi.push_back(pin); + version_t ppv = pin->pre_dirty(); inode_t *pi = pin->project_inode(); pi->version = ppv; @@ -1281,15 +1289,6 @@ void Locker::predirty_nested(EMetaBlob *blob, CInode *in, list &ls) pi->nested.rfiles += drfiles; pi->nested.rctime = rctime; - frag_t fg = parent->dirfrag().frag; - pin->dirfrag_nested[fg].rbytes += drbytes; - pin->dirfrag_nested[fg].rfiles += drfiles; - pin->dirfrag_nested[fg].rctime = rctime; - - curi->accounted_nested.rbytes += drbytes; - curi->accounted_nested.rfiles += drfiles; - curi->accounted_nested.rctime = rctime; - cur = pin; curi = pi; parent = cur->get_projected_parent_dn()->get_dir(); @@ -1300,8 +1299,9 @@ void Locker::predirty_nested(EMetaBlob *blob, CInode *in, list &ls) } // now, stick it in the blob - for (list::iterator p = ls.begin(); - p != ls.end(); + blob->add_dir_context(parent); + for (list::iterator p = lsi.begin(); + p != lsi.end(); p++) { CInode *cur = *p; inode_t *pi = cur->get_projected_inode(); @@ -1653,31 +1653,31 @@ bool Locker::simple_rdlock_try(SimpleLock *lock, Context *con) return false; } -bool Locker::simple_rdlock_start(SimpleLock *lock, MDRequest *mdr) +bool Locker::simple_rdlock_start(SimpleLock *lock, MDRequest *mut) { dout(7) << "simple_rdlock_start on " << *lock << " on " << *lock->get_parent() << dendl; // can read? grab ref. - if (lock->can_rdlock(mdr)) { + if (lock->can_rdlock(mut)) { lock->get_rdlock(); - mdr->rdlocks.insert(lock); - mdr->locks.insert(lock); + mut->rdlocks.insert(lock); + mut->locks.insert(lock); return true; } // wait! dout(7) << "simple_rdlock_start waiting on " << *lock << " on " << *lock->get_parent() << dendl; - lock->add_waiter(SimpleLock::WAIT_RD, new C_MDS_RetryRequest(mdcache, mdr)); + lock->add_waiter(SimpleLock::WAIT_RD, new C_MDS_RetryRequest(mdcache, mut)); return false; } -void Locker::simple_rdlock_finish(SimpleLock *lock, MDRequest *mdr) +void Locker::simple_rdlock_finish(SimpleLock *lock, Mutation *mut) { // drop ref lock->put_rdlock(); - if (mdr) { - mdr->rdlocks.erase(lock); - mdr->locks.erase(lock); + if (mut) { + mut->rdlocks.erase(lock); + mut->locks.erase(lock); } dout(7) << "simple_rdlock_finish on " << *lock << " on " << *lock->get_parent() << dendl; @@ -1687,13 +1687,13 @@ void Locker::simple_rdlock_finish(SimpleLock *lock, MDRequest *mdr) simple_eval_gather(lock); } -bool Locker::simple_xlock_start(SimpleLock *lock, MDRequest *mdr) +bool Locker::simple_xlock_start(SimpleLock *lock, MDRequest *mut) { dout(7) << "simple_xlock_start on " << *lock << " on " << *lock->get_parent() << dendl; // xlock by me? if (lock->is_xlocked() && - lock->get_xlocked_by() == mdr) + lock->get_xlocked_by() == mut) return true; // auth? @@ -1708,57 +1708,57 @@ bool Locker::simple_xlock_start(SimpleLock *lock, MDRequest *mdr) if (lock->get_state() == LOCK_LOCK) { if (lock->is_xlocked()) { // by someone else. - lock->add_waiter(SimpleLock::WAIT_WR, new C_MDS_RetryRequest(mdcache, mdr)); + lock->add_waiter(SimpleLock::WAIT_WR, new C_MDS_RetryRequest(mdcache, mut)); return false; } // xlock. - lock->get_xlock(mdr); - mdr->xlocks.insert(lock); - mdr->locks.insert(lock); + lock->get_xlock(mut); + mut->xlocks.insert(lock); + mut->locks.insert(lock); return true; } else { // wait for lock - lock->add_waiter(SimpleLock::WAIT_STABLE, new C_MDS_RetryRequest(mdcache, mdr)); + lock->add_waiter(SimpleLock::WAIT_STABLE, new C_MDS_RetryRequest(mdcache, mut)); return false; } } else { // replica // this had better not be a remote xlock attempt! - assert(!mdr->slave_request); + assert(!mut->slave_request); // wait for single auth if (lock->get_parent()->is_ambiguous_auth()) { lock->get_parent()->add_waiter(MDSCacheObject::WAIT_SINGLEAUTH, - new C_MDS_RetryRequest(mdcache, mdr)); + new C_MDS_RetryRequest(mdcache, mut)); return false; } // send lock request int auth = lock->get_parent()->authority().first; - mdr->more()->slaves.insert(auth); - MMDSSlaveRequest *r = new MMDSSlaveRequest(mdr->reqid, MMDSSlaveRequest::OP_XLOCK); + mut->more()->slaves.insert(auth); + MMDSSlaveRequest *r = new MMDSSlaveRequest(mut->reqid, MMDSSlaveRequest::OP_XLOCK); r->set_lock_type(lock->get_type()); lock->get_parent()->set_object_info(r->get_object_info()); mds->send_message_mds(r, auth); // wait - lock->add_waiter(SimpleLock::WAIT_REMOTEXLOCK, new C_MDS_RetryRequest(mdcache, mdr)); + lock->add_waiter(SimpleLock::WAIT_REMOTEXLOCK, new C_MDS_RetryRequest(mdcache, mut)); return false; } } -void Locker::simple_xlock_finish(SimpleLock *lock, MDRequest *mdr) +void Locker::simple_xlock_finish(SimpleLock *lock, Mutation *mut) { dout(7) << "simple_xlock_finish on " << *lock << " on " << *lock->get_parent() << dendl; // drop ref - assert(lock->can_xlock(mdr)); + assert(lock->can_xlock(mut)); lock->put_xlock(); - assert(mdr); - mdr->xlocks.erase(lock); - mdr->locks.erase(lock); + assert(mut); + mut->xlocks.erase(lock); + mut->locks.erase(lock); // remote xlock? if (!lock->get_parent()->is_auth()) { @@ -1766,7 +1766,7 @@ void Locker::simple_xlock_finish(SimpleLock *lock, MDRequest *mdr) dout(7) << "simple_xlock_finish releasing remote xlock on " << *lock->get_parent() << dendl; int auth = lock->get_parent()->authority().first; if (mds->mdsmap->get_state(auth) >= MDSMap::STATE_REJOIN) { - MMDSSlaveRequest *slavereq = new MMDSSlaveRequest(mdr->reqid, MMDSSlaveRequest::OP_UNXLOCK); + MMDSSlaveRequest *slavereq = new MMDSSlaveRequest(mut->reqid, MMDSSlaveRequest::OP_UNXLOCK); slavereq->set_lock_type(lock->get_type()); lock->get_parent()->set_object_info(slavereq->get_object_info()); mds->send_message_mds(slavereq, auth); @@ -1834,7 +1834,7 @@ void Locker::dentry_anon_rdlock_trace_finish(vector& trace) // ========================================================================== // scatter lock -bool Locker::scatter_rdlock_start(ScatterLock *lock, MDRequest *mdr) +bool Locker::scatter_rdlock_start(ScatterLock *lock, MDRequest *mut) { dout(7) << "scatter_rdlock_start on " << *lock << " on " << *lock->get_parent() << dendl; @@ -1843,7 +1843,7 @@ bool Locker::scatter_rdlock_start(ScatterLock *lock, MDRequest *mdr) if (lock->get_state() == LOCK_SCATTER && !lock->get_parent()->is_auth()) { dout(7) << "scatter_rdlock_start scatterlock read on a stable scattered replica, fw to auth" << dendl; - mdcache->request_forward(mdr, lock->get_parent()->authority().first); + mdcache->request_forward(mut, lock->get_parent()->authority().first); return false; } @@ -1855,15 +1855,15 @@ bool Locker::scatter_rdlock_start(ScatterLock *lock, MDRequest *mdr) scatter_sync(lock); // can rdlock? - if (lock->can_rdlock(mdr)) { + if (lock->can_rdlock(mut)) { lock->get_rdlock(); - mdr->rdlocks.insert(lock); - mdr->locks.insert(lock); + mut->rdlocks.insert(lock); + mut->locks.insert(lock); return true; } // wait for read. - lock->add_waiter(SimpleLock::WAIT_RD|SimpleLock::WAIT_STABLE, new C_MDS_RetryRequest(mdcache, mdr)); + lock->add_waiter(SimpleLock::WAIT_RD|SimpleLock::WAIT_STABLE, new C_MDS_RetryRequest(mdcache, mut)); // initiate sync or tempsync? if (lock->is_stable() && @@ -1877,21 +1877,21 @@ bool Locker::scatter_rdlock_start(ScatterLock *lock, MDRequest *mdr) return false; } -void Locker::scatter_rdlock_finish(ScatterLock *lock, MDRequest *mdr) +void Locker::scatter_rdlock_finish(ScatterLock *lock, MDRequest *mut) { dout(7) << "scatter_rdlock_finish on " << *lock << " on " << *lock->get_parent() << dendl; lock->put_rdlock(); - if (mdr) { - mdr->rdlocks.erase(lock); - mdr->locks.erase(lock); + if (mut) { + mut->rdlocks.erase(lock); + mut->locks.erase(lock); } scatter_eval_gather(lock); } -bool Locker::scatter_wrlock_try(ScatterLock *lock) +bool Locker::scatter_wrlock_try(ScatterLock *lock, Mutation *mut) { // pre-twiddle? if (lock->get_parent()->is_auth() && @@ -1906,26 +1906,25 @@ bool Locker::scatter_wrlock_try(ScatterLock *lock) // can wrlock? if (lock->can_wrlock()) { lock->get_wrlock(); + mut->wrlocks.insert(lock); + mut->locks.insert(lock); return true; } return false; } -bool Locker::scatter_wrlock_start(ScatterLock *lock, MDRequest *mdr) +bool Locker::scatter_wrlock_start(ScatterLock *lock, MDRequest *mut) { dout(7) << "scatter_wrlock_start on " << *lock << " on " << *lock->get_parent() << dendl; - if (scatter_wrlock_try(lock)) { - mdr->wrlocks.insert(lock); - mdr->locks.insert(lock); + if (scatter_wrlock_try(lock, mut)) return true; - } // wait for write. lock->add_waiter(SimpleLock::WAIT_WR|SimpleLock::WAIT_STABLE, - new C_MDS_RetryRequest(mdcache, mdr)); + new C_MDS_RetryRequest(mdcache, mut)); // initiate scatter or lock? if (lock->is_stable()) { @@ -1948,14 +1947,14 @@ bool Locker::scatter_wrlock_start(ScatterLock *lock, MDRequest *mdr) return false; } -void Locker::scatter_wrlock_finish(ScatterLock *lock, MDRequest *mdr) +void Locker::scatter_wrlock_finish(ScatterLock *lock, Mutation *mut) { dout(7) << "scatter_wrlock_finish on " << *lock << " on " << *lock->get_parent() << dendl; lock->put_wrlock(); - if (mdr) { - mdr->wrlocks.erase(lock); - mdr->locks.erase(lock); + if (mut) { + mut->wrlocks.erase(lock); + mut->locks.erase(lock); } scatter_eval_gather(lock); @@ -2577,54 +2576,54 @@ void Locker::scatter_unscatter_autoscattered() // local lock -bool Locker::local_wrlock_start(LocalLock *lock, MDRequest *mdr) +bool Locker::local_wrlock_start(LocalLock *lock, MDRequest *mut) { dout(7) << "local_wrlock_start on " << *lock << " on " << *lock->get_parent() << dendl; if (lock->can_wrlock()) { lock->get_wrlock(); - mdr->wrlocks.insert(lock); - mdr->locks.insert(lock); + mut->wrlocks.insert(lock); + mut->locks.insert(lock); return true; } else { - lock->add_waiter(SimpleLock::WAIT_WR|SimpleLock::WAIT_STABLE, new C_MDS_RetryRequest(mdcache, mdr)); + lock->add_waiter(SimpleLock::WAIT_WR|SimpleLock::WAIT_STABLE, new C_MDS_RetryRequest(mdcache, mut)); return false; } } -void Locker::local_wrlock_finish(LocalLock *lock, MDRequest *mdr) +void Locker::local_wrlock_finish(LocalLock *lock, MDRequest *mut) { dout(7) << "local_wrlock_finish on " << *lock << " on " << *lock->get_parent() << dendl; lock->put_wrlock(); - mdr->wrlocks.erase(lock); - mdr->locks.erase(lock); + mut->wrlocks.erase(lock); + mut->locks.erase(lock); } -bool Locker::local_xlock_start(LocalLock *lock, MDRequest *mdr) +bool Locker::local_xlock_start(LocalLock *lock, MDRequest *mut) { dout(7) << "local_xlock_start on " << *lock << " on " << *lock->get_parent() << dendl; - if (lock->is_xlocked_by_other(mdr)) { - lock->add_waiter(SimpleLock::WAIT_WR|SimpleLock::WAIT_STABLE, new C_MDS_RetryRequest(mdcache, mdr)); + if (lock->is_xlocked_by_other(mut)) { + lock->add_waiter(SimpleLock::WAIT_WR|SimpleLock::WAIT_STABLE, new C_MDS_RetryRequest(mdcache, mut)); return false; } - lock->get_xlock(mdr); - mdr->xlocks.insert(lock); - mdr->locks.insert(lock); + lock->get_xlock(mut); + mut->xlocks.insert(lock); + mut->locks.insert(lock); return true; } -void Locker::local_xlock_finish(LocalLock *lock, MDRequest *mdr) +void Locker::local_xlock_finish(LocalLock *lock, MDRequest *mut) { dout(7) << "local_xlock_finish on " << *lock << " on " << *lock->get_parent() << dendl; lock->put_xlock(); - mdr->xlocks.erase(lock); - mdr->locks.erase(lock); + mut->xlocks.erase(lock); + mut->locks.erase(lock); lock->finish_waiters(SimpleLock::WAIT_STABLE | SimpleLock::WAIT_WR | @@ -2637,15 +2636,15 @@ void Locker::local_xlock_finish(LocalLock *lock, MDRequest *mdr) // file lock -bool Locker::file_rdlock_start(FileLock *lock, MDRequest *mdr) +bool Locker::file_rdlock_start(FileLock *lock, MDRequest *mut) { dout(7) << "file_rdlock_start " << *lock << " on " << *lock->get_parent() << dendl; // can read? grab ref. - if (lock->can_rdlock(mdr)) { + if (lock->can_rdlock(mut)) { lock->get_rdlock(); - mdr->rdlocks.insert(lock); - mdr->locks.insert(lock); + mut->rdlocks.insert(lock); + mut->locks.insert(lock); return true; } @@ -2662,17 +2661,17 @@ bool Locker::file_rdlock_start(FileLock *lock, MDRequest *mdr) if (lock->is_stable()) { file_lock(lock); // lock, bc easiest to back off ... FIXME - if (lock->can_rdlock(mdr)) { + if (lock->can_rdlock(mut)) { lock->get_rdlock(); - mdr->rdlocks.insert(lock); - mdr->locks.insert(lock); + mut->rdlocks.insert(lock); + mut->locks.insert(lock); lock->finish_waiters(SimpleLock::WAIT_STABLE); return true; } } else { dout(7) << "file_rdlock_start waiting until stable on " << *lock << " on " << *lock->get_parent() << dendl; - lock->add_waiter(SimpleLock::WAIT_STABLE, new C_MDS_RetryRequest(mdcache, mdr)); + lock->add_waiter(SimpleLock::WAIT_STABLE, new C_MDS_RetryRequest(mdcache, mut)); return false; } } else { @@ -2684,13 +2683,13 @@ bool Locker::file_rdlock_start(FileLock *lock, MDRequest *mdr) int auth = in->authority().first; dout(7) << "file_rdlock_start " << *lock << " on " << *lock->get_parent() << " on replica and async, fw to auth " << auth << dendl; assert(auth != mds->get_nodeid()); - mdcache->request_forward(mdr, auth); + mdcache->request_forward(mut, auth); return false; } else { // wait until stable dout(7) << "inode_file_rdlock_start waiting until stable on " << *lock << " on " << *lock->get_parent() << dendl; - lock->add_waiter(SimpleLock::WAIT_STABLE, new C_MDS_RetryRequest(mdcache, mdr)); + lock->add_waiter(SimpleLock::WAIT_STABLE, new C_MDS_RetryRequest(mdcache, mut)); return false; } } @@ -2698,21 +2697,21 @@ bool Locker::file_rdlock_start(FileLock *lock, MDRequest *mdr) // wait dout(7) << "file_rdlock_start waiting on " << *lock << " on " << *lock->get_parent() << dendl; - lock->add_waiter(SimpleLock::WAIT_RD, new C_MDS_RetryRequest(mdcache, mdr)); + lock->add_waiter(SimpleLock::WAIT_RD, new C_MDS_RetryRequest(mdcache, mut)); return false; } -void Locker::file_rdlock_finish(FileLock *lock, MDRequest *mdr) +void Locker::file_rdlock_finish(FileLock *lock, MDRequest *mut) { dout(7) << "rdlock_finish on " << *lock << " on " << *lock->get_parent() << dendl; // drop ref lock->put_rdlock(); - mdr->rdlocks.erase(lock); - mdr->locks.erase(lock); + mut->rdlocks.erase(lock); + mut->locks.erase(lock); if (!lock->is_rdlocked()) { if (!lock->is_stable()) @@ -2722,54 +2721,60 @@ void Locker::file_rdlock_finish(FileLock *lock, MDRequest *mdr) } } -bool Locker::file_wrlock_start(FileLock *lock, bool force) +bool Locker::file_wrlock_start(FileLock *lock, MDRequest *mut, bool force) { dout(7) << "file_wrlock_start on " << *lock << " on " << *lock->get_parent() << dendl; assert(force || lock->can_wrlock()); lock->get_wrlock(); + mut->wrlocks.insert(lock); + mut->locks.insert(lock); return true; /* if (lock->can_wrlock()) { lock->get_wrlock(); - mdr->wrlocks.insert(lock); - mdr->locks.insert(lock); + mut->wrlocks.insert(lock); + mut->locks.insert(lock); return true; } else { - lock->add_waiter(SimpleLock::WAIT_WR|SimpleLock::WAIT_STABLE, new C_MDS_RetryRequest(mdcache, mdr)); + lock->add_waiter(SimpleLock::WAIT_WR|SimpleLock::WAIT_STABLE, new C_MDS_RetryRequest(mdcache, mut)); return false; }*/ } -void Locker::file_wrlock_finish(FileLock *lock) +void Locker::file_wrlock_finish(FileLock *lock, Mutation *mut) { dout(7) << "wrlock_finish on " << *lock << " on " << *lock->get_parent() << dendl; lock->put_wrlock(); - + if (mut) { + mut->wrlocks.erase(lock); + mut->locks.erase(lock); + } + if (!lock->is_wrlocked()) file_eval_gather(lock); } -bool Locker::file_xlock_start(FileLock *lock, MDRequest *mdr) +bool Locker::file_xlock_start(FileLock *lock, MDRequest *mut) { dout(7) << "file_xlock_start on " << *lock << " on " << *lock->get_parent() << dendl; assert(lock->get_parent()->is_auth()); // remote file xlock not implemented // already xlocked by me? - if (lock->get_xlocked_by() == mdr) + if (lock->get_xlocked_by() == mut) return true; // can't write? - if (!lock->can_xlock(mdr)) { + if (!lock->can_xlock(mut)) { // auth if (!lock->can_xlock_soon()) { if (!lock->is_stable()) { dout(7) << "file_xlock_start on auth, waiting for stable on " << *lock << " on " << *lock->get_parent() << dendl; - lock->add_waiter(SimpleLock::WAIT_STABLE, new C_MDS_RetryRequest(mdcache, mdr)); + lock->add_waiter(SimpleLock::WAIT_STABLE, new C_MDS_RetryRequest(mdcache, mut)); return false; } @@ -2781,29 +2786,29 @@ bool Locker::file_xlock_start(FileLock *lock, MDRequest *mdr) } // check again - if (lock->can_xlock(mdr)) { + if (lock->can_xlock(mut)) { assert(lock->get_parent()->is_auth()); - lock->get_xlock(mdr); - mdr->locks.insert(lock); - mdr->xlocks.insert(lock); + lock->get_xlock(mut); + mut->locks.insert(lock); + mut->xlocks.insert(lock); return true; } else { dout(7) << "file_xlock_start on auth, waiting for write on " << *lock << " on " << *lock->get_parent() << dendl; - lock->add_waiter(SimpleLock::WAIT_WR, new C_MDS_RetryRequest(mdcache, mdr)); + lock->add_waiter(SimpleLock::WAIT_WR, new C_MDS_RetryRequest(mdcache, mut)); return false; } } -void Locker::file_xlock_finish(FileLock *lock, MDRequest *mdr) +void Locker::file_xlock_finish(FileLock *lock, MDRequest *mut) { dout(7) << "file_xlock_finish on " << *lock << " on " << *lock->get_parent() << dendl; // drop ref - assert(lock->can_xlock(mdr)); + assert(lock->can_xlock(mut)); lock->put_xlock(); - mdr->locks.erase(lock); - mdr->xlocks.erase(lock); + mut->locks.erase(lock); + mut->xlocks.erase(lock); assert(lock->get_parent()->is_auth()); // or implement remote xlocks diff --git a/src/mds/Locker.h b/src/mds/Locker.h index 0c09a348d5da..666e030127cb 100644 --- a/src/mds/Locker.h +++ b/src/mds/Locker.h @@ -29,6 +29,9 @@ class Session; class CDir; class CInode; class CDentry; +class Mutation; +class MDRequest; +class EMetaBlob; class Message; @@ -76,18 +79,18 @@ public: set &wrlocks, set &xlocks); - void drop_locks(MDRequest *mdr); + void drop_locks(Mutation *mut); void eval_gather(SimpleLock *lock); protected: - bool rdlock_start(SimpleLock *lock, MDRequest *mdr); - void rdlock_finish(SimpleLock *lock, MDRequest *mdr); - bool xlock_start(SimpleLock *lock, MDRequest *mdr); + bool rdlock_start(SimpleLock *lock, MDRequest *mut); + void rdlock_finish(SimpleLock *lock, Mutation *mut); + bool xlock_start(SimpleLock *lock, MDRequest *mut); public: - void xlock_finish(SimpleLock *lock, MDRequest *mdr); // public for Server's slave UNXLOCK + void xlock_finish(SimpleLock *lock, Mutation *mut); // public for Server's slave UNXLOCK protected: - bool wrlock_start(SimpleLock *lock, MDRequest *mdr); - void wrlock_finish(SimpleLock *lock, MDRequest *mdr); + bool wrlock_start(SimpleLock *lock, MDRequest *mut); + void wrlock_finish(SimpleLock *lock, Mutation *mut); public: void rejoin_set_state(SimpleLock *lock, int s, list& waiters); @@ -102,10 +105,10 @@ protected: void handle_simple_lock(SimpleLock *lock, MLock *m); void simple_sync(SimpleLock *lock); void simple_lock(SimpleLock *lock); - bool simple_rdlock_start(SimpleLock *lock, MDRequest *mdr); - void simple_rdlock_finish(SimpleLock *lock, MDRequest *mdr); - bool simple_xlock_start(SimpleLock *lock, MDRequest *mdr); - void simple_xlock_finish(SimpleLock *lock, MDRequest *mdr); + bool simple_rdlock_start(SimpleLock *lock, MDRequest *mut); + void simple_rdlock_finish(SimpleLock *lock, Mutation *mut); + bool simple_xlock_start(SimpleLock *lock, MDRequest *mut); + void simple_xlock_finish(SimpleLock *lock, Mutation *mut); public: bool dentry_can_rdlock_trace(vector& trace); @@ -133,11 +136,11 @@ protected: void scatter_sync(ScatterLock *lock); void scatter_scatter(ScatterLock *lock); void scatter_tempsync(ScatterLock *lock); - bool scatter_rdlock_start(ScatterLock *lock, MDRequest *mdr); - void scatter_rdlock_finish(ScatterLock *lock, MDRequest *mdr); - bool scatter_wrlock_try(ScatterLock *lock); - bool scatter_wrlock_start(ScatterLock *lock, MDRequest *mdr); - void scatter_wrlock_finish(ScatterLock *lock, MDRequest *mdr); + bool scatter_rdlock_start(ScatterLock *lock, MDRequest *mut); + void scatter_rdlock_finish(ScatterLock *lock, Mutation *mut); + bool scatter_wrlock_try(ScatterLock *lock, Mutation *mut); + bool scatter_wrlock_start(ScatterLock *lock, MDRequest *mut); + void scatter_wrlock_finish(ScatterLock *lock, Mutation *mut); void scatter_writebehind(ScatterLock *lock); class C_Locker_ScatterWB : public Context { @@ -153,14 +156,14 @@ protected: void scatter_writebehind_finish(ScatterLock *lock, LogSegment *ls); public: - void predirty_nested(class EMetaBlob *blob, CInode *in, list &ls); + void predirty_nested(Mutation *mut, EMetaBlob *blob, CInode *in); // local protected: - bool local_wrlock_start(LocalLock *lock, MDRequest *mdr); - void local_wrlock_finish(LocalLock *lock, MDRequest *mdr); - bool local_xlock_start(LocalLock *lock, MDRequest *mdr); - void local_xlock_finish(LocalLock *lock, MDRequest *mdr); + bool local_wrlock_start(LocalLock *lock, MDRequest *mut); + void local_wrlock_finish(LocalLock *lock, Mutation *mut); + bool local_xlock_start(LocalLock *lock, MDRequest *mut); + void local_xlock_finish(LocalLock *lock, Mutation *mut); // file @@ -175,12 +178,12 @@ protected: void file_mixed(FileLock *lock); void file_loner(FileLock *lock); bool file_rdlock_try(FileLock *lock, Context *con); - bool file_rdlock_start(FileLock *lock, MDRequest *mdr); - void file_rdlock_finish(FileLock *lock, MDRequest *mdr); - bool file_wrlock_start(FileLock *lock, bool force=false); + bool file_rdlock_start(FileLock *lock, MDRequest *mut); + void file_rdlock_finish(FileLock *lock, Mutation *mut); + bool file_wrlock_start(FileLock *lock, MDRequest *mut, bool force=false); void file_wrlock_finish(FileLock *lock); - bool file_xlock_start(FileLock *lock, MDRequest *mdr); - void file_xlock_finish(FileLock *lock, MDRequest *mdr); + bool file_xlock_start(FileLock *lock, MDRequest *mut); + void file_xlock_finish(FileLock *lock, Mutation *mut); @@ -199,7 +202,7 @@ protected: void request_inode_file_caps(CInode *in); void handle_inode_file_caps(class MInodeFileCaps *m); - void file_update_finish(CInode *in, LogSegment *ls, list &nest_updates, bool share); + void file_update_finish(CInode *in, Mutation *mut, bool share); public: bool check_inode_max_size(CInode *in, bool forcewrlock=false); private: diff --git a/src/mds/MDCache.h b/src/mds/MDCache.h index 95d5c472258a..a7a3b692c589 100644 --- a/src/mds/MDCache.h +++ b/src/mds/MDCache.h @@ -74,26 +74,7 @@ struct PVList { } }; -/** active_request_t - * state we track for requests we are currently processing. - * mostly information about locks held, so that we can drop them all - * the request is finished or forwarded. see request_*(). - */ -struct MDRequest { - metareqid_t reqid; - Session *session; - - // -- i am a client (master) request - MClientRequest *client_request; // client request (if any) - - vector trace; // original path traversal. - CInode *ref; // reference inode. if there is only one, and its path is pinned. - - // -- i am a slave request - MMDSSlaveRequest *slave_request; // slave request (if one is pending; implies slave == true) - int slave_to_mds; // this is a slave request if >= 0. - - // -- misc -- +struct Mutation { LogSegment *ls; // the log segment i'm committing to utime_t now; @@ -120,6 +101,104 @@ struct MDRequest { // for applying projected inode changes list projected_inodes; + list projected_fnodes; + + Mutation() : ls(0), + done_locking(false), committing(false), aborted(false) + {} + + // pin items in cache + void pin(MDSCacheObject *o) { + if (pins.count(o) == 0) { + o->get(MDSCacheObject::PIN_REQUEST); + pins.insert(o); + } + } + void set_stickydirs(CInode *in) { + if (stickydirs.count(in) == 0) { + in->get_stickydirs(); + stickydirs.insert(in); + } + } + + // auth pins + bool is_auth_pinned(MDSCacheObject *object) { + return auth_pins.count(object) || remote_auth_pins.count(object); + } + void auth_pin(MDSCacheObject *object) { + if (!is_auth_pinned(object)) { + object->auth_pin(); + auth_pins.insert(object); + } + } + void auth_unpin(MDSCacheObject *object) { + assert(is_auth_pinned(object)); + object->auth_unpin(); + auth_pins.erase(object); + } + void drop_local_auth_pins() { + for (set::iterator it = auth_pins.begin(); + it != auth_pins.end(); + it++) { + assert((*it)->is_auth()); + (*it)->auth_unpin(); + } + auth_pins.clear(); + } + + void add_projected_inode(CInode *in) { + projected_inodes.push_back(in); + } + void pop_and_dirty_projected_inodes() { + while (!projected_inodes.empty()) { + CInode *in = projected_inodes.front(); + projected_inodes.pop_front(); + in->pop_and_dirty_projected_inode(ls); + } + } + + void add_projected_fnode(CDir *dir) { + projected_fnodes.push_back(dir); + } + void pop_and_dirty_projected_fnodes() { + while (!projected_fnodes.empty()) { + CDir *dir = projected_fnodes.front(); + projected_fnodes.pop_front(); + dir->pop_and_dirty_projected_fnode(ls); + } + } + + virtual void print(ostream &out) { + out << "mutation(" << this << ")"; + } +}; + +inline ostream& operator<<(ostream& out, Mutation &mut) +{ + mut.print(out); + return out; +} + + +/** active_request_t + * state we track for requests we are currently processing. + * mostly information about locks held, so that we can drop them all + * the request is finished or forwarded. see request_*(). + */ +struct MDRequest : public Mutation { + metareqid_t reqid; + Session *session; + + // -- i am a client (master) request + MClientRequest *client_request; // client request (if any) + + vector trace; // original path traversal. + CInode *ref; // reference inode. if there is only one, and its path is pinned. + + // -- i am a slave request + MMDSSlaveRequest *slave_request; // slave request (if one is pending; implies slave == true) + int slave_to_mds; // this is a slave request if >= 0. + // break rarely-used fields into a separately allocated structure // to save memory for most ops @@ -157,20 +236,14 @@ struct MDRequest { MDRequest() : session(0), client_request(0), ref(0), slave_request(0), slave_to_mds(-1), - ls(0), - done_locking(false), committing(false), aborted(false), _more(0) {} MDRequest(metareqid_t ri, MClientRequest *req) : reqid(ri), session(0), client_request(req), ref(0), slave_request(0), slave_to_mds(-1), - ls(0), - done_locking(false), committing(false), aborted(false), _more(0) {} MDRequest(metareqid_t ri, int by) : reqid(ri), session(0), client_request(0), ref(0), slave_request(0), slave_to_mds(by), - ls(0), - done_locking(false), committing(false), aborted(false), _more(0) {} ~MDRequest() { delete _more; @@ -187,67 +260,16 @@ struct MDRequest { bool slave_did_prepare() { return more()->slave_commit; } - // pin items in cache - void pin(MDSCacheObject *o) { - if (pins.count(o) == 0) { - o->get(MDSCacheObject::PIN_REQUEST); - pins.insert(o); - } - } - void set_stickydirs(CInode *in) { - if (stickydirs.count(in) == 0) { - in->get_stickydirs(); - stickydirs.insert(in); - } - } - - // auth pins - bool is_auth_pinned(MDSCacheObject *object) { - return auth_pins.count(object) || remote_auth_pins.count(object); - } - void auth_pin(MDSCacheObject *object) { - if (!is_auth_pinned(object)) { - object->auth_pin(); - auth_pins.insert(object); - } - } - void auth_unpin(MDSCacheObject *object) { - assert(is_auth_pinned(object)); - object->auth_unpin(); - auth_pins.erase(object); - } - void drop_local_auth_pins() { - for (set::iterator it = auth_pins.begin(); - it != auth_pins.end(); - it++) { - assert((*it)->is_auth()); - (*it)->auth_unpin(); - } - auth_pins.clear(); - } - - void add_projected_inode(CInode *in) { - projected_inodes.push_back(in); - } - void pop_and_dirty_projected_inodes() { - while (!projected_inodes.empty()) { - CInode *in = projected_inodes.front(); - projected_inodes.pop_front(); - in->pop_and_dirty_projected_inode(ls); - } + void print(ostream &out) { + out << "request(" << reqid; + //if (request) out << " " << *request; + if (is_slave()) out << " slave_to mds" << slave_to_mds; + if (client_request) out << " cr=" << client_request; + if (slave_request) out << " sr=" << slave_request; + out << ")"; } }; -inline ostream& operator<<(ostream& out, MDRequest &mdr) -{ - out << "request(" << mdr.reqid; - //if (mdr.request) out << " " << *mdr.request; - if (mdr.is_slave()) out << " slave_to mds" << mdr.slave_to_mds; - if (mdr.client_request) out << " cr=" << mdr.client_request; - if (mdr.slave_request) out << " sr=" << mdr.slave_request; - out << ")"; - return out; -} struct MDSlaveUpdate { EMetaBlob commit; diff --git a/src/mds/Server.cc b/src/mds/Server.cc index 8e8a025e06b1..f00b7665dd1b 100644 --- a/src/mds/Server.cc +++ b/src/mds/Server.cc @@ -1542,26 +1542,6 @@ void Server::dirty_dn_diri(MDRequest *mdr, CDentry *dn, version_t dirpv) } } -void Server::predirty_nested(MDRequest *mdr, EMetaBlob *blob, CInode *in) -{ - list ls; - mds->locker->predirty_nested(blob, in, ls); - - for (list::iterator p = ls.begin(); - p != ls.end(); - p++) { - ScatterLock *lock = &(*p)->dirlock; - if (mdr->wrlocks.count(lock)) - lock->put_wrlock(); - else { - mdr->wrlocks.insert(lock); - mdr->locks.insert(lock); - } - mdr->add_projected_inode(*p); - } -} - - // =============================================================================== // STAT @@ -2037,6 +2017,7 @@ public: // dir inode's mtime mds->server->dirty_dn_diri(mdr, dn, dirpv); mdr->pop_and_dirty_projected_inodes(); + mdr->pop_and_dirty_projected_fnodes(); // hit pop mds->balancer->hit_inode(mdr->now, newi, META_POP_IWR); @@ -2075,9 +2056,9 @@ void Server::handle_client_mknod(MDRequest *mdr) EUpdate *le = new EUpdate(mdlog, "mknod"); le->metablob.add_client_req(req->get_reqid()); le->metablob.add_allocated_ino(newi->ino(), mds->idalloc->get_version()); + mds->locker->predirty_nested(mdr, &le->metablob, newi); version_t dirpv = predirty_dn_diri(mdr, dn, &le->metablob); // dir mtime too le->metablob.add_dir_context(dn->dir); - predirty_nested(mdr, &le->metablob, newi); le->metablob.add_primary_dentry(dn, true, newi, &newi->inode); // log + wait @@ -2122,7 +2103,7 @@ void Server::handle_client_mkdir(MDRequest *mdr) le->metablob.add_allocated_ino(newi->ino(), mds->idalloc->get_version()); version_t dirpv = predirty_dn_diri(mdr, dn, &le->metablob); // dir mtime too le->metablob.add_dir_context(dn->dir); - predirty_nested(mdr, &le->metablob, newi); + mds->locker->predirty_nested(mdr, &le->metablob, newi); le->metablob.add_primary_dentry(dn, true, newi, &newi->inode); le->metablob.add_dir(newdir, true, true); // dirty AND complete @@ -2175,7 +2156,7 @@ void Server::handle_client_symlink(MDRequest *mdr) le->metablob.add_allocated_ino(newi->ino(), mds->idalloc->get_version()); version_t dirpv = predirty_dn_diri(mdr, dn, &le->metablob); // dir mtime too le->metablob.add_dir_context(dn->dir); - predirty_nested(mdr, &le->metablob, newi); + mds->locker->predirty_nested(mdr, &le->metablob, newi); le->metablob.add_primary_dentry(dn, true, newi, &newi->inode); // log + wait diff --git a/src/mds/Server.h b/src/mds/Server.h index 97b7f649eb21..599a06da0cf7 100644 --- a/src/mds/Server.h +++ b/src/mds/Server.h @@ -21,6 +21,7 @@ class Logger; class LogEvent; class C_MDS_rename_finish; class MDRequest; +class Mutation; class EMetaBlob; class EUpdate; class PVList; @@ -102,7 +103,6 @@ public: version_t predirty_dn_diri(MDRequest *mdr, CDentry *dn, class EMetaBlob *blob, int deltasize=0); void dirty_dn_diri(MDRequest *mdr, CDentry *dn, version_t dirpv); - void predirty_nested(MDRequest *mdr, EMetaBlob *blob, CInode *in); // requests on existing inodes. diff --git a/src/mds/SimpleLock.h b/src/mds/SimpleLock.h index ddb865b1ddd8..6b59852df8d1 100644 --- a/src/mds/SimpleLock.h +++ b/src/mds/SimpleLock.h @@ -71,7 +71,7 @@ inline const char *get_simplelock_state_name(int n) { */ -class MDRequest; +class Mutation; class SimpleLock { @@ -97,7 +97,7 @@ protected: // local state int num_rdlock; - MDRequest *xlock_by; + Mutation *xlock_by; public: @@ -193,7 +193,7 @@ public: } int get_num_rdlocks() { return num_rdlock; } - void get_xlock(MDRequest *who) { + void get_xlock(Mutation *who) { assert(xlock_by == 0); parent->get(MDSCacheObject::PIN_LOCK); xlock_by = who; @@ -204,10 +204,10 @@ public: xlock_by = 0; } bool is_xlocked() { return xlock_by ? true:false; } - bool is_xlocked_by_other(MDRequest *mdr) { + bool is_xlocked_by_other(Mutation *mdr) { return is_xlocked() && xlock_by != mdr; } - MDRequest *get_xlocked_by() { return xlock_by; } + Mutation *get_xlocked_by() { return xlock_by; } bool is_used() { return is_xlocked() || is_rdlocked() || num_client_lease; @@ -285,12 +285,12 @@ public: bool can_lease() { return state == LOCK_SYNC; } - bool can_rdlock(MDRequest *mdr) { + bool can_rdlock(Mutation *mdr) { //if (state == LOCK_LOCK && mdr && xlock_by == mdr) return true; // xlocked by me. (actually, is this right?) //if (state == LOCK_LOCK && !xlock_by && parent->is_auth()) return true; return (state == LOCK_SYNC); } - bool can_xlock(MDRequest *mdr) { + bool can_xlock(Mutation *mdr) { if (mdr && xlock_by == mdr) { assert(state == LOCK_LOCK); return true; // auth or replica! xlocked by me. diff --git a/src/mds/events/EMetaBlob.h b/src/mds/events/EMetaBlob.h index efa266fb1b6c..62c9aaa59c91 100644 --- a/src/mds/events/EMetaBlob.h +++ b/src/mds/events/EMetaBlob.h @@ -164,7 +164,8 @@ public: static const int STATE_COMPLETE = (1<<1); static const int STATE_DIRTY = (1<<2); // dirty due to THIS journal item, that is! - version_t dirv; + //version_t dirv; + fnode_t fnode; __u32 state; __u32 nfull, nremote, nnull; @@ -176,7 +177,7 @@ public: list dnull; public: - dirlump() : dirv(0), state(0), nfull(0), nremote(0), nnull(0), dn_decoded(true) { } + dirlump() : state(0), nfull(0), nremote(0), nnull(0), dn_decoded(true) { } bool is_complete() { return state & STATE_COMPLETE; } void mark_complete() { state |= STATE_COMPLETE; } @@ -188,7 +189,7 @@ public: list &get_dnull() { return dnull; } void print(dirfrag_t dirfrag, ostream& out) { - out << "dirlump " << dirfrag << " dirv " << dirv + out << "dirlump " << dirfrag << " v " << fnode.version << " state " << state << " num " << nfull << "/" << nremote << "/" << nnull << std::endl; @@ -216,7 +217,7 @@ public: } void encode(bufferlist& bl) const { - ::encode(dirv, bl); + ::encode(fnode, bl); ::encode(state, bl); ::encode(nfull, bl); ::encode(nremote, bl); @@ -225,7 +226,7 @@ public: ::encode(dnbl, bl); } void decode(bufferlist::iterator &bl) { - ::decode(dirv, bl); + ::decode(fnode, bl); ::decode(state, bl); ::decode(nfull, bl); ::decode(nremote, bl); @@ -415,12 +416,14 @@ private: dirlump& add_dir(CDir *dir, bool dirty, bool complete=false) { - return add_dir(dir->dirfrag(), dir->get_projected_version(), dirty, complete); + return add_dir(dir->dirfrag(), dir->get_projected_fnode(), dir->get_projected_version(), + dirty, complete); } - dirlump& add_dir(dirfrag_t df, version_t pv, bool dirty, bool complete=false) { + dirlump& add_dir(dirfrag_t df, fnode_t *pf, version_t pv, bool dirty, bool complete=false) { if (lump_map.count(df) == 0) { lump_order.push_back(df); - lump_map[df].dirv = pv; + lump_map[df].fnode = *pf; + lump_map[df].fnode.version = pv; } dirlump& l = lump_map[df]; if (complete) l.mark_complete(); diff --git a/src/mds/journal.cc b/src/mds/journal.cc index 38c52e4c5b5d..7d5494e443c9 100644 --- a/src/mds/journal.cc +++ b/src/mds/journal.cc @@ -293,7 +293,7 @@ void EMetaBlob::replay(MDS *mds, LogSegment *logseg) dout(10) << "EMetaBlob.replay added dir " << *dir << dendl; } - dir->set_version( lump.dirv ); + dir->set_version( lump.fnode.version ); if (lump.is_dirty()) dir->_mark_dirty(logseg); if (lump.is_complete())