From: Yan, Zheng Date: Sun, 12 Jan 2014 12:36:25 +0000 (+0800) Subject: mds: revert commit 15a5d37a X-Git-Tag: v0.78~165^2~27 X-Git-Url: http://git-server-git.apps.pok.os.sepia.ceph.com/?a=commitdiff_plain;h=2c909cda0eb1bf0299b44687e0aba241b59b39f6;p=ceph.git mds: revert commit 15a5d37a commit 15a5d37a (mds: fix race between scatter gather and dirfrag export) is incomplete, it doesn't handles the race that no fragstat/neststat is gathered. Previous commit prevents scatter gather during exporting dir, which eliminates races of this type. Signed-off-by: Yan, Zheng --- diff --git a/src/mds/CDir.cc b/src/mds/CDir.cc index 3697929e8628..de29c6c312d5 100644 --- a/src/mds/CDir.cc +++ b/src/mds/CDir.cc @@ -154,7 +154,6 @@ ostream& CDir::print_db_line_prefix(ostream& out) // CDir CDir::CDir(CInode *in, frag_t fg, MDCache *mdcache, bool auth) : - mseq(0), dirty_rstat_inodes(member_offset(CInode, dirty_rstat_item)), item_dirty(this), item_new(this), pop_me(ceph_clock_now(g_ceph_context)), @@ -2102,8 +2101,6 @@ void CDir::_committed(version_t v) void CDir::encode_export(bufferlist& bl) { assert(!is_projected()); - ceph_seq_t seq = mseq + 1; - ::encode(seq, bl); ::encode(first, bl); ::encode(fnode, bl); ::encode(dirty_old_rstat, bl); @@ -2133,7 +2130,6 @@ void CDir::finish_export(utime_t now) void CDir::decode_import(bufferlist::iterator& blp, utime_t now, LogSegment *ls) { - ::decode(mseq, blp); ::decode(first, blp); ::decode(fnode, blp); ::decode(dirty_old_rstat, blp); diff --git a/src/mds/CDir.h b/src/mds/CDir.h index db11c060623b..2c0b7e45a67c 100644 --- a/src/mds/CDir.h +++ b/src/mds/CDir.h @@ -170,7 +170,6 @@ public: fnode_t fnode; snapid_t first; - ceph_seq_t mseq; // migrate sequence map dirty_old_rstat; // [value.first,key] // my inodes with dirty rstat data @@ -555,7 +554,6 @@ public: void encode_export(bufferlist& bl); void finish_export(utime_t now); void abort_export() { - mseq += 2; put(PIN_TEMPEXPORTING); } void decode_import(bufferlist::iterator& blp, utime_t now, LogSegment *ls); diff --git a/src/mds/CInode.cc b/src/mds/CInode.cc index 71550d155322..48ee839da9eb 100644 --- a/src/mds/CInode.cc +++ b/src/mds/CInode.cc @@ -1231,7 +1231,6 @@ void CInode::encode_lock_state(int type, bufferlist& bl) dout(20) << fg << " fragstat " << pf->fragstat << dendl; dout(20) << fg << " accounted_fragstat " << pf->accounted_fragstat << dendl; ::encode(fg, tmp); - ::encode(dir->mseq, tmp); ::encode(dir->first, tmp); ::encode(pf->fragstat, tmp); ::encode(pf->accounted_fragstat, tmp); @@ -1267,7 +1266,6 @@ void CInode::encode_lock_state(int type, bufferlist& bl) dout(10) << fg << " " << pf->rstat << dendl; dout(10) << fg << " " << dir->dirty_old_rstat << dendl; ::encode(fg, tmp); - ::encode(dir->mseq, tmp); ::encode(dir->first, tmp); ::encode(pf->rstat, tmp); ::encode(pf->accounted_rstat, tmp); @@ -1426,12 +1424,10 @@ void CInode::decode_lock_state(int type, bufferlist& bl) dout(10) << " ...got " << n << " fragstats on " << *this << dendl; while (n--) { frag_t fg; - ceph_seq_t mseq; snapid_t fgfirst; frag_info_t fragstat; frag_info_t accounted_fragstat; ::decode(fg, p); - ::decode(mseq, p); ::decode(fgfirst, p); ::decode(fragstat, p); ::decode(accounted_fragstat, p); @@ -1444,12 +1440,6 @@ void CInode::decode_lock_state(int type, bufferlist& bl) assert(dir); // i am auth; i had better have this dir open dout(10) << fg << " first " << dir->first << " -> " << fgfirst << " on " << *dir << dendl; - if (dir->fnode.fragstat.version == get_projected_inode()->dirstat.version && - ceph_seq_cmp(mseq, dir->mseq) < 0) { - dout(10) << " mseq " << mseq << " < " << dir->mseq << ", ignoring" << dendl; - continue; - } - dir->mseq = mseq; dir->first = fgfirst; dir->fnode.fragstat = fragstat; dir->fnode.accounted_fragstat = accounted_fragstat; @@ -1494,13 +1484,11 @@ void CInode::decode_lock_state(int type, bufferlist& bl) ::decode(n, p); while (n--) { frag_t fg; - ceph_seq_t mseq; snapid_t fgfirst; nest_info_t rstat; nest_info_t accounted_rstat; map dirty_old_rstat; ::decode(fg, p); - ::decode(mseq, p); ::decode(fgfirst, p); ::decode(rstat, p); ::decode(accounted_rstat, p); @@ -1515,12 +1503,6 @@ void CInode::decode_lock_state(int type, bufferlist& bl) assert(dir); // i am auth; i had better have this dir open dout(10) << fg << " first " << dir->first << " -> " << fgfirst << " on " << *dir << dendl; - if (dir->fnode.rstat.version == get_projected_inode()->rstat.version && - ceph_seq_cmp(mseq, dir->mseq) < 0) { - dout(10) << " mseq " << mseq << " < " << dir->mseq << ", ignoring" << dendl; - continue; - } - dir->mseq = mseq; dir->first = fgfirst; dir->fnode.rstat = rstat; dir->fnode.accounted_rstat = accounted_rstat; @@ -1650,36 +1632,6 @@ void CInode::start_scatter(ScatterLock *lock) } } -/* - * set dirfrag_version to inode_version - 1. so that we can use dirfrag version - * to check if we have gathered scatter state for a given dirfrag. - */ -void CInode::start_scatter_gather(ScatterLock *lock, int auth) -{ - assert(is_auth()); - inode_t *pi = get_projected_inode(); - - for (map::iterator p = dirfrags.begin(); - p != dirfrags.end(); - ++p) { - CDir *dir = p->second; - - if (dir->is_auth()) - continue; - if (auth >= 0 && dir->authority().first != auth) - continue; - - switch (lock->get_type()) { - case CEPH_LOCK_IFILE: - dir->fnode.fragstat.version = pi->dirstat.version - 1; - break; - case CEPH_LOCK_INEST: - dir->fnode.rstat.version = pi->rstat.version - 1; - break; - } - } -} - struct C_Inode_FragUpdate : public Context { CInode *in; CDir *dir; diff --git a/src/mds/CInode.h b/src/mds/CInode.h index d10ba24940f3..3b50560fb549 100644 --- a/src/mds/CInode.h +++ b/src/mds/CInode.h @@ -654,7 +654,6 @@ public: void clear_scatter_dirty(); // on rejoin ack void start_scatter(ScatterLock *lock); - void start_scatter_gather(ScatterLock *lock, int auth=-1); void finish_scatter_update(ScatterLock *lock, CDir *dir, version_t inode_version, version_t dir_accounted_version); void finish_scatter_gather_update(int type); diff --git a/src/mds/Locker.cc b/src/mds/Locker.cc index 827bb9c26f24..1906c49bc4c1 100644 --- a/src/mds/Locker.cc +++ b/src/mds/Locker.cc @@ -777,9 +777,8 @@ void Locker::eval_gather(SimpleLock *lock, bool first, bool *pneed_issue, listget_parent()->is_replicated()) { dout(10) << " finished (local) gather for mix->lock, now gathering from replicas" << dendl; send_lock_message(lock, LOCK_AC_LOCK); - lock->set_state(LOCK_MIX_LOCK2); lock->init_gather(); - in->start_scatter_gather(static_cast(lock)); + lock->set_state(LOCK_MIX_LOCK2); return; } @@ -3521,7 +3520,7 @@ bool Locker::simple_sync(SimpleLock *lock, bool *need_issue) assert(lock->is_stable()); CInode *in = 0; - if (lock->get_type() != CEPH_LOCK_DN) + if (lock->get_cap_shift()) in = static_cast(lock->get_parent()); int old_state = lock->get_state(); @@ -3543,11 +3542,10 @@ bool Locker::simple_sync(SimpleLock *lock, bool *need_issue) if (lock->get_parent()->is_replicated() && old_state == LOCK_MIX) { send_lock_message(lock, LOCK_AC_SYNC); lock->init_gather(); - in->start_scatter_gather(static_cast(lock)); gather++; } - if (lock->get_cap_shift() && in->is_head()) { + if (in && in->is_head()) { if (in->issued_caps_need_gather(lock)) { if (need_issue) *need_issue = true; @@ -3658,7 +3656,7 @@ void Locker::simple_lock(SimpleLock *lock, bool *need_issue) assert(lock->get_state() != LOCK_LOCK); CInode *in = 0; - if (lock->get_type() != CEPH_LOCK_DN) + if (lock->get_cap_shift()) in = static_cast(lock->get_parent()); int old_state = lock->get_state(); @@ -3685,7 +3683,7 @@ void Locker::simple_lock(SimpleLock *lock, bool *need_issue) } if (lock->is_rdlocked()) gather++; - if (lock->get_cap_shift() && in->is_head()) { + if (in && in->is_head()) { if (in->issued_caps_need_gather(lock)) { if (need_issue) *need_issue = true; @@ -3718,8 +3716,6 @@ void Locker::simple_lock(SimpleLock *lock, bool *need_issue) gather++; send_lock_message(lock, LOCK_AC_LOCK); lock->init_gather(); - if (lock->get_state() == LOCK_MIX_LOCK2) - in->start_scatter_gather(static_cast(lock)); } } @@ -4125,9 +4121,8 @@ void Locker::scatter_tempsync(ScatterLock *lock, bool *need_issue) if (lock->get_state() == LOCK_MIX_TSYN && in->is_replicated()) { - send_lock_message(lock, LOCK_AC_LOCK); lock->init_gather(); - in->start_scatter_gather(static_cast(lock)); + send_lock_message(lock, LOCK_AC_LOCK); gather++; } @@ -4451,8 +4446,6 @@ void Locker::file_excl(ScatterLock *lock, bool *need_issue) lock->get_state() != LOCK_XSYN_EXCL) { // if we were lock, replicas are already lock. send_lock_message(lock, LOCK_AC_LOCK); lock->init_gather(); - if (lock->get_state() == LOCK_MIX_EXCL) - in->start_scatter_gather(static_cast(lock)); gather++; } if (lock->is_leased()) { diff --git a/src/mds/MDCache.cc b/src/mds/MDCache.cc index 2e0f1c734238..0e2073223ab5 100644 --- a/src/mds/MDCache.cc +++ b/src/mds/MDCache.cc @@ -3812,7 +3812,6 @@ void MDCache::rejoin_walk(CDir *dir, MMDSCacheRejoin *rejoin) dout(15) << " add_strong_dirfrag " << *dir << dendl; rejoin->add_strong_dirfrag(dir->dirfrag(), dir->get_replica_nonce(), dir->get_dir_rep()); dir->state_set(CDir::STATE_REJOINING); - dir->mseq = 0; for (CDir::map_t::iterator p = dir->items.begin(); p != dir->items.end(); @@ -3974,15 +3973,11 @@ void MDCache::handle_cache_rejoin_weak(MMDSCacheRejoin *weak) ++p) { CInode *in = get_inode(p->first); assert(in); - if (survivor) { - in->start_scatter_gather(&in->filelock, from); - in->start_scatter_gather(&in->nestlock, from); - } else { - rejoin_potential_updated_scatterlocks.insert(in); - } in->decode_lock_state(CEPH_LOCK_IFILE, p->second.file); in->decode_lock_state(CEPH_LOCK_INEST, p->second.nest); in->decode_lock_state(CEPH_LOCK_IDFT, p->second.dft); + if (!survivor) + rejoin_potential_updated_scatterlocks.insert(in); } // recovering peer may send incorrect dirfrags here. we need to