From: Sage Weil Date: Thu, 22 May 2008 23:46:01 +0000 (-0700) Subject: mds: added fnode_t. compiles, but untested. X-Git-Tag: v0.3~170^2~87 X-Git-Url: http://git-server-git.apps.pok.os.sepia.ceph.com/?a=commitdiff_plain;h=35988580c8131c11a60de6b61c8e324e9b27bf29;p=ceph.git mds: added fnode_t. compiles, but untested. --- diff --git a/src/include/types.h b/src/include/types.h index 86a81beef6e..d3c8121f506 100644 --- a/src/include/types.h +++ b/src/include/types.h @@ -297,7 +297,37 @@ static inline void decode(inode_t &i, bufferlist::iterator &p) { ::decode(i.file_data_version, p); } +/* + * like an inode, but for a dir frag + */ +struct fnode_t { + version_t version; + __u64 size; // files + dirs + __u64 nprimary, nremote; + __u64 nfiles; // files + __u64 nsubdirs; // subdirs + nested_info_t nested; // nested summation + void encode(bufferlist &bl) const { + ::encode(version, bl); + ::encode(size, bl); + ::encode(nprimary, bl); + ::encode(nremote, bl); + ::encode(nfiles, bl); + ::encode(nsubdirs, bl); + ::encode(nested, bl); + } + void decode(bufferlist::iterator &bl) { + ::decode(version, bl); + ::decode(size, bl); + ::decode(nprimary, bl); + ::decode(nremote, bl); + ::decode(nfiles, bl); + ::decode(nsubdirs, bl); + ::decode(nested, bl); + } +}; +WRITE_CLASS_ENCODER(fnode_t) diff --git a/src/mds/CDir.cc b/src/mds/CDir.cc index e635f8c84d3..d0243a77784 100644 --- a/src/mds/CDir.cc +++ b/src/mds/CDir.cc @@ -132,7 +132,8 @@ CDir::CDir(CInode *in, frag_t fg, MDCache *mdcache, bool auth) : state = STATE_INITIAL; - projected_version = version = 0; + memset(&fnode, 0, sizeof(fnode)); + committing_version = 0; committed_version_equivalent = committed_version = 0; @@ -157,6 +158,12 @@ CDir::CDir(CInode *in, frag_t fg, MDCache *mdcache, bool auth) : + + + + + + /*** * linking fun */ @@ -173,7 +180,7 @@ CDentry* CDir::add_null_dentry(const string& dname) cache->lru.lru_insert_mid(dn); dn->dir = this; - dn->version = projected_version; + dn->version = get_projected_version(); // add to dir assert(items.count(dn->name) == 0); @@ -205,7 +212,7 @@ CDentry* CDir::add_primary_dentry(const string& dname, CInode *in) cache->lru.lru_insert_mid(dn); dn->dir = this; - dn->version = projected_version; + dn->version = get_projected_version(); // add to dir assert(items.count(dn->name) == 0); @@ -236,7 +243,7 @@ CDentry* CDir::add_remote_dentry(const string& dname, inodeno_t ino, unsigned ch cache->lru.lru_insert_mid(dn); dn->dir = this; - dn->version = projected_version; + dn->version = get_projected_version(); // add to dir assert(items.count(dn->name) == 0); @@ -392,10 +399,10 @@ void CDir::try_remove_unlinked_dn(CDentry *dn) dn->mark_clean(); remove_dentry(dn); - if (version == projected_version && + if (!is_projected() && committing_version == committed_version && num_dirty == 0) { - dout(10) << "try_remove_unlinked_dn committed_equivalent now " << version + dout(10) << "try_remove_unlinked_dn committed_equivalent now " << get_version() << " vs committed " << committed_version << dendl; committed_version_equivalent = committed_version; @@ -477,8 +484,17 @@ void CDir::steal_dentry(CDentry *dn) get(PIN_CHILD); if (dn->is_null()) nnull++; - else + else { nitems++; + fnode.size++; + if (dn->is_primary()) { + fnode.nprimary++; + fnode.nested.rbytes += dn->get_inode()->inode.accounted_nested.rbytes; + fnode.nested.rfiles += dn->get_inode()->inode.accounted_nested.rfiles; + } else { + fnode.nremote++; + } + } nested_auth_pins += dn->auth_pins + dn->nested_auth_pins; if (dn->is_dirty()) @@ -541,8 +557,7 @@ void CDir::split(int bits, list& subs, list& waiters) f->replica_map = replica_map; f->dir_auth = dir_auth; f->init_fragment_pins(); - f->version = version; - f->projected_version = projected_version; + f->fnode.version = get_version(); f->pop_me = pop_me; f->pop_me *= fac; @@ -752,39 +767,59 @@ void CDir::finish_waiting(int mask, int result) // dirty/clean +fnode_t *CDir::project_fnode() +{ + fnode_t *p = new fnode_t; + *p = *get_projected_fnode(); + projected_fnode.push_back(p); + dout(10) << "project_fnode " << p << dendl; + return p; +} + version_t CDir::pre_dirty(version_t min) { - if (min > projected_version) - projected_version = min; - ++projected_version; - dout(10) << "pre_dirty " << projected_version << dendl; - return projected_version; + fnode_t *pf = project_fnode(); + if (min > pf->version) + pf->version = min; + ++pf->version; + dout(10) << "pre_dirty " << pf->version << dendl; + return pf->version; +} + +void CDir::mark_dirty(version_t pv, LogSegment *ls) +{ + assert(get_version() < pv); + pop_and_dirty_projected_fnode(ls); +} + +void CDir::pop_and_dirty_projected_fnode(LogSegment *ls) +{ + assert(!projected_fnode.empty()); + dout(15) << "pop_and_dirty_projected_fnode " << projected_fnode.front() + << " v" << projected_fnode.front()->version << dendl; + _mark_dirty(ls); + fnode = *projected_fnode.front(); + delete projected_fnode.front(); + projected_fnode.pop_front(); } void CDir::_mark_dirty(LogSegment *ls) { if (!state_test(STATE_DIRTY)) { state_set(STATE_DIRTY); - dout(10) << "mark_dirty (was clean) " << *this << " version " << version << dendl; + dout(10) << "mark_dirty (was clean) " << *this << " version " << get_version() << dendl; get(PIN_DIRTY); assert(ls); } else { - dout(10) << "mark_dirty (already dirty) " << *this << " version " << version << dendl; + dout(10) << "mark_dirty (already dirty) " << *this << " version " << get_version() << dendl; } if (ls) ls->dirty_dirfrags.push_back(&xlist_dirty); } -void CDir::mark_dirty(version_t pv, LogSegment *ls) -{ - assert(version < pv); - version = pv; - _mark_dirty(ls); -} - void CDir::mark_clean() { - dout(10) << "mark_clean " << *this << " version " << version << dendl; + dout(10) << "mark_clean " << *this << " version " << get_version() << dendl; if (state_test(STATE_DIRTY)) { state_clear(STATE_DIRTY); put(PIN_DIRTY); @@ -875,11 +910,11 @@ void CDir::_fetched(bufferlist &bl) // decode. int len = bl.length(); bufferlist::iterator p = bl.begin(); - version_t got_version; - - ::decode(got_version, p); - dout(10) << "_fetched version " << got_version + fnode_t got_fnode; + ::decode(got_fnode, p); + + dout(10) << "_fetched version " << got_fnode.version << ", " << len << " bytes" << dendl; @@ -1008,13 +1043,13 @@ void CDir::_fetched(bufferlist &bl) */ if (committed_version == 0 && dn && - dn->get_version() <= got_version && + dn->get_version() <= got_fnode.version && dn->is_dirty()) { dout(10) << "_fetched had underwater dentry " << *dn << ", marking clean" << dendl; dn->mark_clean(); if (dn->get_inode()) { - assert(dn->get_inode()->get_version() <= got_version); + assert(dn->get_inode()->get_version() <= got_fnode.version); dout(10) << "_fetched had underwater inode " << *dn->get_inode() << ", marking clean" << dendl; dn->get_inode()->mark_clean(); } @@ -1022,12 +1057,13 @@ void CDir::_fetched(bufferlist &bl) } //assert(off == len); FIXME no, directories may shrink. add this back in when we properly truncate objects on write. - // take the loaded version? + // take the loaded fnode? // only if we are a fresh CDir* with no prior state. - if (version == 0) { - assert(projected_version == 0); + if (get_version() == 0) { + assert(!is_projected()); assert(!state_test(STATE_COMMITTING)); - projected_version = version = committing_version = committed_version = got_version; + fnode = got_fnode; + committing_version = committed_version = got_fnode.version; } //cache->mds->logger->inc("newin", num_new_inodes_loaded); @@ -1056,10 +1092,10 @@ void CDir::_fetched(bufferlist &bl) void CDir::commit(version_t want, Context *c) { dout(10) << "commit want " << want << " on " << *this << dendl; - if (want == 0) want = version; + if (want == 0) want = get_version(); // preconditions - assert(want <= version || version == 0); // can't commit the future + assert(want <= get_version() || get_version() == 0); // can't commit the future assert(want > committed_version); // the caller is stupid assert(is_auth()); assert(can_auth_pin()); @@ -1106,7 +1142,7 @@ void CDir::_commit(version_t want) // we can't commit things in the future. // (even the projected future.) - assert(want <= version || version == 0); + assert(want <= get_version() || get_version() == 0); // check pre+postconditions. assert(is_auth()); @@ -1132,7 +1168,7 @@ void CDir::_commit(version_t want) } // commit. - committing_version = version; + committing_version = get_version(); // mark committing (if not already) if (!state_test(STATE_COMMITTING)) { @@ -1145,7 +1181,7 @@ void CDir::_commit(version_t want) // encode bufferlist bl; - ::encode(version, bl); + ::encode(fnode, bl); int32_t n = nitems; ::encode(n, bl); @@ -1200,7 +1236,7 @@ void CDir::_commit(version_t want) cache->mds->objecter->osdmap->file_to_object_layout( get_ondisk_object(), g_default_mds_dir_layout ), bl, 0, - NULL, new C_Dir_Committed(this, version) ); + NULL, new C_Dir_Committed(this, get_version()) ); } @@ -1224,7 +1260,7 @@ void CDir::_committed(version_t v) state_clear(CDir::STATE_COMMITTING); // dir clean? - if (committed_version == version) + if (committed_version == get_version()) mark_clean(); // dentries clean? @@ -1288,7 +1324,8 @@ void CDir::_committed(version_t v) void CDir::encode_export(bufferlist& bl) { - ::encode(version, bl); + assert(!is_projected()); + ::encode(fnode, bl); ::encode(committed_version, bl); ::encode(committed_version_equivalent, bl); @@ -1314,11 +1351,10 @@ void CDir::finish_export(utime_t now) void CDir::decode_import(bufferlist::iterator& blp) { - ::decode(version, blp); + ::decode(fnode, blp); ::decode(committed_version, blp); ::decode(committed_version_equivalent, blp); committing_version = committed_version; - projected_version = version; unsigned s; ::decode(s, blp); diff --git a/src/mds/CDir.h b/src/mds/CDir.h index 1cac78f7004..9ede2cbed10 100644 --- a/src/mds/CDir.h +++ b/src/mds/CDir.h @@ -155,10 +155,36 @@ class CDir : public MDSCacheObject { //int hack_num_accessed; +protected: + fnode_t fnode; + list projected_fnode; + +public: + version_t get_version() { return fnode.version; } + void set_version(version_t v) { + assert(projected_fnode.empty()); + fnode.version = v; + } + fnode_t *get_projected_fnode() { + if (projected_fnode.empty()) + return &fnode; + else + return projected_fnode.back(); + } + version_t get_projected_version() { return get_projected_fnode()->version; } + fnode_t *project_fnode(); + void pop_and_dirty_projected_fnode(LogSegment *ls); + bool is_projected() { return !projected_fnode.empty(); } + version_t pre_dirty(version_t min=0); + void _mark_dirty(LogSegment *ls); + void mark_dirty(version_t pv, LogSegment *ls); + void mark_clean(); + public: //typedef hash_map map_t; // there is a bug somewhere, valgrind me. typedef map map_t; protected: + // contents map_t items; // non-null AND null unsigned nitems; // # non-null @@ -167,13 +193,10 @@ protected: int num_dirty; - // state - version_t version; version_t committing_version; version_t committed_version; version_t committed_version_equivalent; // in case of, e.g., temporary file - version_t projected_version; xlist::item xlist_dirty; @@ -348,18 +371,11 @@ private: void wait_for_commit(Context *c, version_t v=0); // -- dirtyness -- - version_t get_version() { return version; } - void set_version(version_t v) { projected_version = version = v; } - version_t get_projected_version() { return projected_version; } version_t get_committing_version() { return committing_version; } version_t get_committed_version() { return committed_version; } version_t get_committed_version_equivalent() { return committed_version_equivalent; } void set_committed_version(version_t v) { committed_version = v; } - version_t pre_dirty(version_t min=0); - void _mark_dirty(LogSegment *ls); - void mark_dirty(version_t pv, LogSegment *ls); - void mark_clean(); void mark_complete() { state_set(STATE_COMPLETE); } diff --git a/src/mds/CInode.cc b/src/mds/CInode.cc index 0d826621646..d8a38e9be07 100644 --- a/src/mds/CInode.cc +++ b/src/mds/CInode.cc @@ -657,15 +657,41 @@ void CInode::clear_dirty_scattered(int type) case CEPH_LOCK_IDIR: xlist_dirty_inode_mtime.remove_myself(); break; + case CEPH_LOCK_INESTED: assert(0); // hmm! break; + default: assert(0); } } +void CInode::finish_scatter_gather_update(int type) +{ + dout(10) << "finish_scatter_gather_update " << type << " on " << *this << dendl; + switch (type) { + case CEPH_LOCK_IDIR: + { + inode_t *pi = get_projected_inode(); + pi->size = 0; + for (map::iterator p = dirfrag_size.begin(); p != dirfrag_size.end(); ++p) + pi->size += p->second; + } + break; + + case CEPH_LOCK_INESTED: + assert(0); // hmm! + break; + + default: + assert(0); + } + + +} + // waiting diff --git a/src/mds/CInode.h b/src/mds/CInode.h index 5fe837c0a1a..e855e92f659 100644 --- a/src/mds/CInode.h +++ b/src/mds/CInode.h @@ -361,6 +361,7 @@ public: void decode_lock_state(int type, bufferlist& bl); void clear_dirty_scattered(int type); + void finish_scatter_gather_update(int type); // -- caps -- (new) // client caps diff --git a/src/mds/Locker.cc b/src/mds/Locker.cc index 459118aedac..f887f88735f 100644 --- a/src/mds/Locker.cc +++ b/src/mds/Locker.cc @@ -2129,6 +2129,8 @@ void Locker::scatter_writebehind(ScatterLock *lock) inode_t *pi = in->project_inode(); pi->mtime = in->inode.mtime; // make sure an intermediate version isn't goofing us up pi->version = in->pre_dirty(); + + lock->get_parent()->finish_scatter_gather_update(lock->get_type()); EUpdate *le = new EUpdate(mds->mdlog, "scatter writebehind"); le->metablob.add_dir_context(in->get_parent_dn()->get_dir()); diff --git a/src/mds/Server.cc b/src/mds/Server.cc index 33fde1bec8d..8438c2ba8f0 100644 --- a/src/mds/Server.cc +++ b/src/mds/Server.cc @@ -1483,7 +1483,7 @@ CDir* Server::try_open_auth_dirfrag(CInode *diri, frag_t fg, MDRequest *mdr) * predirty the directory inode for a new dentry, if it is auth (and not root) * BUG: root inode doesn't get dirtied properly, currently. blech. */ -version_t Server::predirty_dn_diri(MDRequest *mdr, CDentry *dn, EMetaBlob *blob) +version_t Server::predirty_dn_diri(MDRequest *mdr, CDentry *dn, EMetaBlob *blob, int deltasize) { version_t dirpv = 0; CInode *diri = dn->dir->inode; @@ -1496,10 +1496,13 @@ version_t Server::predirty_dn_diri(MDRequest *mdr, CDentry *dn, EMetaBlob *blob) dirpv = diri->pre_dirty(); dout(10) << "predirty_dn_diri ctime/mtime " << mdr->now << " pv " << dirpv << " on " << *diri << dendl; + diri->dirfrag_size[dn->dir->dirfrag().frag] += deltasize; + // predirty+journal inode_t *pi = diri->project_inode(); if (dirpv) pi->version = dirpv; pi->ctime = pi->mtime = mdr->now; + pi->size += deltasize; blob->add_dir_context(diri->get_parent_dn()->get_dir()); blob->add_primary_dentry(diri->get_parent_dn(), true, 0, pi); } else { diff --git a/src/mds/Server.h b/src/mds/Server.h index 840492aae27..97b7f649eb2 100644 --- a/src/mds/Server.h +++ b/src/mds/Server.h @@ -100,7 +100,7 @@ public: CDir* try_open_auth_dirfrag(CInode *diri, frag_t fg, MDRequest *mdr); - version_t predirty_dn_diri(MDRequest *mdr, CDentry *dn, class EMetaBlob *blob); + version_t predirty_dn_diri(MDRequest *mdr, CDentry *dn, class EMetaBlob *blob, int deltasize=0); void dirty_dn_diri(MDRequest *mdr, CDentry *dn, version_t dirpv); void predirty_nested(MDRequest *mdr, EMetaBlob *blob, CInode *in); diff --git a/src/mds/mdstypes.h b/src/mds/mdstypes.h index 2be5b361a28..9b342b6607f 100644 --- a/src/mds/mdstypes.h +++ b/src/mds/mdstypes.h @@ -768,6 +768,7 @@ protected: virtual bool is_lock_waiting(int type, int mask) { assert(0); return false; } virtual void clear_dirty_scattered(int type) { assert(0); } + virtual void finish_scatter_gather_update(int type) { } // --------------------------------------------- // ordering