From 5bd96198d939e15b11cf7772569a469feb16c3d6 Mon Sep 17 00:00:00 2001 From: Sage Weil Date: Wed, 30 Jul 2008 11:57:03 -0700 Subject: [PATCH] mds: fix up scatter/gather, but in project_rstat_to_inode --- src/mds/CInode.cc | 73 ++++++++++++++++++++++++++++------------------ src/mds/MDCache.cc | 22 +++++++++++--- 2 files changed, 62 insertions(+), 33 deletions(-) diff --git a/src/mds/CInode.cc b/src/mds/CInode.cc index 232c0d8c6f5c2..cb981edd755ca 100644 --- a/src/mds/CInode.cc +++ b/src/mds/CInode.cc @@ -586,7 +586,7 @@ void CInode::encode_lock_state(int type, bufferlist& bl) p != dirfrags.end(); ++p) { frag_t fg = p->first; - CDir *dir = dir; + CDir *dir = p->second; if (is_auth() || dir->is_auth()) { dout(15) << fg << " " << *dir << dendl; dout(20) << fg << " fragstat " << dir->fnode.fragstat << dendl; @@ -758,15 +758,16 @@ void CInode::decode_lock_state(int type, bufferlist& bl) dirlock.set_updated(); } } else { - if (dir && - dir->is_auth() && - !(dir->fnode.accounted_fragstat == fragstat)) { + if (dir && dir->is_auth()) { dout(10) << fg << " first " << dir->first << " -> " << fgfirst << " on " << *dir << dendl; dir->first = fgfirst; + dout(10) << fg << " setting accounted_fragstat and setting dirty bit" << dendl; fnode_t *pf = dir->get_projected_fnode(); pf->accounted_fragstat = fragstat; + pf->fragstat.version = fragstat.version; + assert(pf->fragstat == fragstat); dir->_set_dirty_flag(); // bit of a hack } } @@ -814,17 +815,17 @@ void CInode::decode_lock_state(int type, bufferlist& bl) nestlock.set_updated(); } } else { - if (dir && - dir->is_auth() && - !(dir->fnode.accounted_rstat == rstat)) { + if (dir && dir->is_auth()) { dout(10) << fg << " first " << dir->first << " -> " << fgfirst << " on " << *dir << dendl; dir->first = fgfirst; - dout(10) << fg << " setting accounted_rstat and setting dirty bit" << dendl; + dout(10) << fg << " resetting accounted_rstat and setting dirty bit" << dendl; fnode_t *pf = dir->get_projected_fnode(); pf->accounted_rstat = rstat; - dir->dirty_old_rstat.swap(dirty_old_rstat); + pf->rstat.version = rstat.version; + assert(rstat == pf->rstat); + dir->dirty_old_rstat.clear(); dir->_set_dirty_flag(); // bit of a hack, FIXME? } } @@ -878,27 +879,30 @@ void CInode::finish_scatter_gather_update(int type) // adjust summation assert(is_auth()); inode_t *pi = get_projected_inode(); + bool touched_mtime = false; - dout(20) << " orig dirstat " << pi->dirstat << dendl; + dout(20) << " orig dirstat " << pi->dirstat << dendl; for (map::iterator p = dirfrags.begin(); p != dirfrags.end(); p++) { - fnode_t *pf = p->second->get_projected_fnode(); - if (true) { // FIXME pf->accounted_fragstat.version == pi->dirstat.version) { - dout(20) << " frag " << p->first << " " << *p->second << dendl; - dout(20) << " fragstat " << pf->fragstat << dendl; - dout(20) << " accounted_fragstat " << pf->accounted_fragstat << dendl; - pi->dirstat.take_diff(pf->fragstat, - pf->accounted_fragstat, touched_mtime); + frag_t fg = p->first; + CDir *dir = p->second; + dout(20) << fg << " " << *dir << dendl; + fnode_t *pf = dir->get_projected_fnode(); + if (pf->accounted_fragstat.version == pi->dirstat.version) { + dout(20) << fg << " fragstat " << pf->fragstat << dendl; + dout(20) << fg << " accounted_fragstat " << pf->accounted_fragstat << dendl; + pi->dirstat.take_diff(pf->fragstat, pf->accounted_fragstat, touched_mtime); } else { - dout(20) << " frag " << p->first << " on " << *p->second << dendl; - dout(20) << " ignoring OLD accounted_fragstat " << pf->accounted_fragstat << dendl; + dout(20) << fg << " skipping OLD accounted_fragstat " << pf->accounted_fragstat << dendl; + pf->accounted_fragstat = pf->fragstat; } + pf->fragstat.version = pf->accounted_fragstat.version = pi->dirstat.version + 1; } if (touched_mtime) pi->mtime = pi->ctime = pi->dirstat.mtime; pi->dirstat.version++; - dout(20) << " final dirstat " << pi->dirstat << dendl; + dout(20) << " final dirstat " << pi->dirstat << dendl; assert(pi->dirstat.size() >= 0); assert(pi->dirstat.nfiles >= 0); assert(pi->dirstat.nsubdirs >= 0); @@ -910,23 +914,34 @@ void CInode::finish_scatter_gather_update(int type) // adjust summation assert(is_auth()); inode_t *pi = get_projected_inode(); - dout(20) << " orig rstat " << pi->rstat << dendl; + dout(20) << " orig rstat " << pi->rstat << dendl; for (map::iterator p = dirfrags.begin(); p != dirfrags.end(); p++) { + frag_t fg = p->first; CDir *dir = p->second; + dout(20) << fg << " " << *dir << dendl; fnode_t *pf = dir->get_projected_fnode(); - mdcache->project_rstat_frag_to_inode(pf->rstat, pf->accounted_rstat, - dir->first, CEPH_NOSNAP, this, true); - for (map::iterator q = dir->dirty_old_rstat.begin(); - q != dir->dirty_old_rstat.end(); - q++) - mdcache->project_rstat_frag_to_inode(q->second.rstat, q->second.accounted_rstat, - q->second.first, q->first, this, true); + if (pf->accounted_rstat.version == pi->rstat.version) { + dout(20) << fg << " rstat " << pf->rstat << dendl; + dout(20) << fg << " accounted_rstat " << pf->accounted_rstat << dendl; + dout(20) << fg << " dirty_old_rstat " << dir->dirty_old_rstat << dendl; + mdcache->project_rstat_frag_to_inode(pf->rstat, pf->accounted_rstat, + dir->first, CEPH_NOSNAP, this, true); + for (map::iterator q = dir->dirty_old_rstat.begin(); + q != dir->dirty_old_rstat.end(); + q++) + mdcache->project_rstat_frag_to_inode(q->second.rstat, q->second.accounted_rstat, + q->second.first, q->first, this, true); + } else { + dout(20) << fg << " skipping OLD accounted_rstat " << pf->accounted_rstat << dendl; + pf->accounted_rstat = pf->rstat; + } dir->dirty_old_rstat.clear(); + pf->rstat.version = pf->accounted_rstat.version = pi->rstat.version + 1; } pi->rstat.version++; - dout(20) << " final rstat " << pi->rstat << dendl; + dout(20) << " final rstat " << pi->rstat << dendl; assert(pi->rstat.rfiles >= 0); assert(pi->rstat.rsubdirs >= 0); } diff --git a/src/mds/MDCache.cc b/src/mds/MDCache.cc index 8a3ad5d3f198d..32468610d27d3 100644 --- a/src/mds/MDCache.cc +++ b/src/mds/MDCache.cc @@ -1268,15 +1268,22 @@ void MDCache::project_rstat_frag_to_inode(nest_info_t& rstat, nest_info_t& accou first = pin->first; pin->cow_old_inode(last, pi_to_cow); } else { + // our life is easier here because old_inodes is not sparse + // (although it may not begin at snapid 1) map::iterator p = pin->old_inodes.lower_bound(last); if (p == pin->old_inodes.end()) { dout(10) << " no old_inode <= " << last << ", done." << dendl; break; } first = p->second.first; + if (first > last) { + dout(10) << " oldest old_inode is [" << first << "," << p->first << "], done." << dendl; + assert(p == pin->old_inodes.begin()); + break; + } if (p->first > last) { dout(10) << " splitting right old_inode [" << first << "," << p->first << "] to [" - << (last+1) << "," << p->first << dendl; + << (last+1) << "," << p->first << "]" << dendl; pin->old_inodes[last] = p->second; p->second.first = last+1; pin->dirty_old_rstats.insert(p->first); @@ -1284,7 +1291,7 @@ void MDCache::project_rstat_frag_to_inode(nest_info_t& rstat, nest_info_t& accou } if (first < ofirst) { dout(10) << " splitting left old_inode [" << first << "," << last << "] to [" - << first << "," << ofirst-1 << dendl; + << first << "," << ofirst-1 << "]" << dendl; pin->old_inodes[ofirst-1] = pin->old_inodes[last]; pin->dirty_old_rstats.insert(ofirst-1); pin->old_inodes[last].first = first = ofirst; @@ -1293,7 +1300,6 @@ void MDCache::project_rstat_frag_to_inode(nest_info_t& rstat, nest_info_t& accou pin->dirty_old_rstats.insert(last); } dout(10) << " projecting to [" << first << "," << last << "] " << pi->rstat << dendl; - pi->rstat.version++; pi->rstat.add(delta); dout(15) << " result [" << first << "," << last << "] " << pi->rstat << dendl; @@ -1478,7 +1484,6 @@ void MDCache::predirty_journal_parents(Mutation *mut, EMetaBlob *blob, // dirstat if (do_parent_mtime || linkunlink) { - pi->dirstat.version++; dout(15) << "predirty_journal_parents take_diff " << pf->fragstat << dendl; dout(15) << "predirty_journal_parents - " << pf->accounted_fragstat << dendl; bool touched_mtime = false; @@ -2363,6 +2368,15 @@ void MDCache::recalc_auth_bits() * - any surviving replica in SCATTER state -> SCATTER. otherwise, SYNC. * - include base inode in ack for all inodes that saw scatterlock content * + * also, for scatter gather, + * + * - auth increments {frag,r}stat.version on completion of any gather. + * + * - auth incorporates changes in a gather _only_ if the version + * matches. + * + * - replica discards changes any time the scatterlock syncs, and + * after recovery. */ -- 2.39.5