From: Sage Weil Date: Mon, 12 May 2008 23:39:52 +0000 (-0700) Subject: mds: more nesting lock, data type work X-Git-Tag: v0.3~170^2~92 X-Git-Url: http://git-server-git.apps.pok.os.sepia.ceph.com/?a=commitdiff_plain;h=523d184b9df3a5b24aff7ebc9ac05bed6ce8845b;p=ceph.git mds: more nesting lock, data type work --- diff --git a/src/include/types.h b/src/include/types.h index aebe285ff5f7..a9272ba3c0ca 100644 --- a/src/include/types.h +++ b/src/include/types.h @@ -199,6 +199,21 @@ struct FileLayout { __u8 fl_pg_pool; /* implies crush ruleset AND object namespace */ }; +struct nested_info_t { + uint64_t nested_size; // \sum_{children}(size + nested_size) + utime_t nested_ctime; // \max_{children}(ctime, nested_ctime) + + void encode(bufferlist &bl) const { + ::encode(nested_size, bl); + ::encode(nested_ctime, bl); + } + void decode(bufferlist::iterator &bl) { + ::decode(nested_size, bl); + ::decode(nested_ctime, bl); + } +}; +WRITE_CLASS_ENCODER(nested_info_t) + struct inode_t { // base (immutable) inodeno_t ino; @@ -224,9 +239,8 @@ struct inode_t { utime_t atime; // file data access time. uint64_t time_warp_seq; // count of (potential) mtime/atime timewarps (i.e., utimes()) - // recursive accounting - uint64_t nested_size; // \sum_{children}(size + nested_size) - utime_t nested_ctime; // \max_{children}(ctime, nested_ctime) + // dirfrag, recursive accounting + nested_info_t nested; // inline summation // special stuff version_t version; // auth only @@ -252,7 +266,7 @@ static inline void encode(const inode_t &i, bufferlist &bl) { ::encode(i.max_size, bl); ::encode(i.mtime, bl); ::encode(i.atime, bl); - ::encode(i.rmtime, bl); + ::encode(i.nested, bl); ::encode(i.version, bl); ::encode(i.file_data_version, bl); } @@ -270,7 +284,7 @@ static inline void decode(inode_t &i, bufferlist::iterator &p) { ::decode(i.max_size, p); ::decode(i.mtime, p); ::decode(i.atime, p); - ::decode(i.rmtime, p); + ::decode(i.nested, p); ::decode(i.version, p); ::decode(i.file_data_version, p); } diff --git a/src/mds/CInode.cc b/src/mds/CInode.cc index 92d5004fcf5d..3f996892c81c 100644 --- a/src/mds/CInode.cc +++ b/src/mds/CInode.cc @@ -521,15 +521,16 @@ void CInode::encode_lock_state(int type, bufferlist& bl) break; case CEPH_LOCK_IDIR: - ::encode(inode.mtime, bl); - if (0) { + { + ::encode(inode.size, bl); + ::encode(inode.mtime, bl); map frag_sizes; for (map::iterator p = dirfrags.begin(); p != dirfrags.end(); ++p) if (p->second->is_auth()) { - //frag_t fg = (*p)->get_frag(); - //frag_sizes[f] = dirfrag_size[fg]; + frag_t fg = (*p)->get_frag(); + frag_sizes[f] = dirfrag_size[fg]; } ::encode(frag_sizes, bl); } @@ -540,8 +541,17 @@ void CInode::encode_lock_state(int type, bufferlist& bl) break; case CEPH_LOCK_INESTED: - //_encode(inode.nested_ctime, bl); - //_encode(inode.nested_size, bl); + { + map dfn; + for (map::iterator p = dirfrags.begin(); + p != dirfrags.end(); + ++p) + if (p->second->is_auth()) { + frag_t fg = (*p)->get_frag(); + dfn[fg] = dirfrag_nested[fg]; + } + ::encode(dfn, bl); + } break; default: @@ -552,7 +562,6 @@ void CInode::encode_lock_state(int type, bufferlist& bl) void CInode::decode_lock_state(int type, bufferlist& bl) { bufferlist::iterator p = bl.begin(); - utime_t tm; switch (type) { case CEPH_LOCK_IAUTH: @@ -594,21 +603,31 @@ void CInode::decode_lock_state(int type, bufferlist& bl) break; case CEPH_LOCK_IDIR: - //::_decode(inode.size, p); - ::decode(tm, p); - if (inode.mtime < tm) { - inode.mtime = tm; - if (is_auth()) { - dout(10) << "decode_lock_state auth got mtime " << tm << " > my " << inode.mtime - << ", setting dirlock updated flag on " << *this - << dendl; - dirlock.set_updated(); - } - } - if (0) { + { + utime_t tm; + uint64_t sz; map dfsz; + ::decode(sz, p); + ::decode(tm, p); ::decode(dfsz, p); - // hmm which to keep? + + if (is_auth()) { + if (tm > inode.mtime) { + dout(10) << "decode_lock_state auth got mtime " << tm << " > my " << inode.mtime + << ", setting dirlock updated flag on " << *this + << dendl; + inode.mtime = tm; + dirlock.set_updated(); + } + for (map::iterator p = dfsz.begin(); p != dfsz.end(); ++p) { + dirfragtree.force_to_leaf(p->first); + dirfrag_size[p->first] = p->second; + } + } else { + inode.mtime = tm; + inode.size = sz; + dirfrag_size.swap(dfsz); + } } break; @@ -617,7 +636,14 @@ void CInode::decode_lock_state(int type, bufferlist& bl) break; case CEPH_LOCK_INESTED: - // *** + { + map dfn; + ::decode(dfn, p); + for (map::iterator p = dfn.begin(); p != dfn.end(); ++p) { + dirfragtree.force_to_leaf(p->first); + dirfrag_nested[p->first] = p->second; + } + } break; default: @@ -632,6 +658,9 @@ void CInode::clear_dirty_scattered(int type) case CEPH_LOCK_IDIR: xlist_dirty_inode_mtime.remove_myself(); break; + case CEPH_LOCK_INESTED: + assert(0); // hmm! + break; default: assert(0); } diff --git a/src/mds/CInode.h b/src/mds/CInode.h index 3fa0f183229c..df58af954132 100644 --- a/src/mds/CInode.h +++ b/src/mds/CInode.h @@ -136,6 +136,7 @@ class CInode : public MDSCacheObject { map xattrs; fragtree_t dirfragtree; // dir frag tree, if any. always consistent with our dirfrag map. map dirfrag_size; // size of each dirfrag + map dirfrag_nested; off_t last_journaled; // log offset for the last time i was journaled off_t last_open_journaled; // log offset for the last journaled EOpen diff --git a/src/mds/Locker.cc b/src/mds/Locker.cc index 0238c6f9555d..2a620f5c96a8 100644 --- a/src/mds/Locker.cc +++ b/src/mds/Locker.cc @@ -341,6 +341,7 @@ void Locker::eval_gather(SimpleLock *lock) return file_eval_gather((FileLock*)lock); case CEPH_LOCK_IDFT: case CEPH_LOCK_IDIR: + case CEPH_LOCK_INESTED: return scatter_eval_gather((ScatterLock*)lock); default: return simple_eval_gather(lock); @@ -354,6 +355,7 @@ bool Locker::rdlock_start(SimpleLock *lock, MDRequest *mdr) return file_rdlock_start((FileLock*)lock, mdr); case CEPH_LOCK_IDFT: case CEPH_LOCK_IDIR: + case CEPH_LOCK_INESTED: return scatter_rdlock_start((ScatterLock*)lock, mdr); default: return simple_rdlock_start(lock, mdr); @@ -367,6 +369,7 @@ void Locker::rdlock_finish(SimpleLock *lock, MDRequest *mdr) return file_rdlock_finish((FileLock*)lock, mdr); case CEPH_LOCK_IDFT: case CEPH_LOCK_IDIR: + case CEPH_LOCK_INESTED: return scatter_rdlock_finish((ScatterLock*)lock, mdr); default: return simple_rdlock_finish(lock, mdr); @@ -378,6 +381,7 @@ bool Locker::wrlock_start(SimpleLock *lock, MDRequest *mdr) switch (lock->get_type()) { case CEPH_LOCK_IDFT: case CEPH_LOCK_IDIR: + case CEPH_LOCK_INESTED: return scatter_wrlock_start((ScatterLock*)lock, mdr); case CEPH_LOCK_IVERSION: return local_wrlock_start((LocalLock*)lock, mdr); @@ -394,6 +398,7 @@ void Locker::wrlock_finish(SimpleLock *lock, MDRequest *mdr) switch (lock->get_type()) { case CEPH_LOCK_IDFT: case CEPH_LOCK_IDIR: + case CEPH_LOCK_INESTED: return scatter_wrlock_finish((ScatterLock*)lock, mdr); case CEPH_LOCK_IVERSION: return local_wrlock_finish((LocalLock*)lock, mdr); @@ -411,6 +416,7 @@ bool Locker::xlock_start(SimpleLock *lock, MDRequest *mdr) return local_xlock_start((LocalLock*)lock, mdr); case CEPH_LOCK_IDFT: case CEPH_LOCK_IDIR: + case CEPH_LOCK_INESTED: assert(0); default: return simple_xlock_start(lock, mdr); @@ -426,6 +432,7 @@ void Locker::xlock_finish(SimpleLock *lock, MDRequest *mdr) return local_xlock_finish((LocalLock*)lock, mdr); case CEPH_LOCK_IDFT: case CEPH_LOCK_IDIR: + case CEPH_LOCK_INESTED: assert(0); default: return simple_xlock_finish(lock, mdr); @@ -1235,6 +1242,7 @@ SimpleLock *Locker::get_lock(int lock_type, MDSCacheObjectInfo &info) case CEPH_LOCK_IFILE: case CEPH_LOCK_IDIR: case CEPH_LOCK_IXATTR: + case CEPH_LOCK_INESTED: { CInode *in = mdcache->get_inode(info.ino); if (!in) { @@ -1248,6 +1256,7 @@ SimpleLock *Locker::get_lock(int lock_type, MDSCacheObjectInfo &info) case CEPH_LOCK_IFILE: return &in->filelock; case CEPH_LOCK_IDIR: return &in->dirlock; case CEPH_LOCK_IXATTR: return &in->xattrlock; + case CEPH_LOCK_INESTED: return &in->nestedlock; } } @@ -1286,6 +1295,7 @@ void Locker::handle_lock(MLock *m) case CEPH_LOCK_IDFT: case CEPH_LOCK_IDIR: + case CEPH_LOCK_INESTED: handle_scatter_lock((ScatterLock*)lock, m); break;