From: Sage Weil Date: Wed, 28 May 2008 23:41:40 +0000 (-0700) Subject: mds: fixed various predirty_nested problems, rename, straydir locking. temp fragstat... X-Git-Tag: v0.3~170^2~31 X-Git-Url: http://git-server-git.apps.pok.os.sepia.ceph.com/?a=commitdiff_plain;h=05ab69bf500354d67539b9716074e8654bab2b9b;p=ceph.git mds: fixed various predirty_nested problems, rename, straydir locking. temp fragstat sanity checks. --- diff --git a/src/TODO b/src/TODO index 9e61c4836211..9aae898bf847 100644 --- a/src/TODO +++ b/src/TODO @@ -74,9 +74,6 @@ mds mustfix - anchor_destroy needs to xlock linklock.. which means it needs a Mutation wrapper? - ... when it gets a caller.. someday.. -- fix rm -r vs mds exports - - use new dir size values in dirstat - - replay of dir fragmentation (dont want dir frozen, pins, etc.?) - rename slave in-memory rollback on failure diff --git a/src/client/SyntheticClient.cc b/src/client/SyntheticClient.cc index 0af362c43d47..d4ec5c57f918 100644 --- a/src/client/SyntheticClient.cc +++ b/src/client/SyntheticClient.cc @@ -1489,8 +1489,9 @@ int SyntheticClient::full_walk(string& basedir) } } - if (actual.nsubdirs != expect.nsubdirs || - actual.nfiles != expect.nfiles) { + if (dir != "" && + (actual.nsubdirs != expect.nsubdirs || + actual.nfiles != expect.nfiles)) { dout(0) << dir << ": expected " << expect << dendl; dout(0) << dir << ": got " << actual << dendl; } @@ -2857,6 +2858,7 @@ int SyntheticClient::thrash_links(const char *basedir, int dirs, int files, int if (time_to_stop()) return 0; + srand(0); if (1) { for (int k=0; kadjust_nested_anchors(by); } +void CDir::verify_fragstat() +{ + assert(is_complete()); + if (inode->is_stray()) + return; + + frag_info_t c; + memset(&c, 0, sizeof(c)); + + for (map_t::iterator it = items.begin(); + it != items.end(); + it++) { + CDentry *dn = it->second; + if (dn->is_null()) + continue; + + dout(10) << " " << *dn << dendl; + if (dn->is_primary()) + dout(10) << " " << *dn->inode << dendl; + + if (dn->is_primary()) { + if (dn->inode->is_dir()) + c.nsubdirs++; + else + c.nfiles++; + } + if (dn->is_remote()) { + if (dn->get_remote_d_type() == (S_IFDIR >> 12)) + c.nsubdirs++; + else + c.nfiles++; + } + } + + if (c.nsubdirs != fnode.fragstat.nsubdirs || + c.nfiles != fnode.fragstat.nfiles) { + dout(0) << "verify_fragstat failed " << fnode.fragstat << " on " << *this << dendl; + dout(0) << " i count " << c << dendl; + assert(0); + } else { + dout(0) << "verify_fragstat ok " << fnode.fragstat << " on " << *this << dendl; + } +} /***************************************************************************** * FREEZING diff --git a/src/mds/CDir.h b/src/mds/CDir.h index 5e87f83af261..9835409834cf 100644 --- a/src/mds/CDir.h +++ b/src/mds/CDir.h @@ -446,7 +446,9 @@ public: int get_nested_auth_pins() { return nested_auth_pins; } void auth_pin(); void auth_unpin(); + void adjust_nested_auth_pins(int inc); + void verify_fragstat(); int get_nested_anchors() { return nested_anchors; } void adjust_nested_anchors(int by); diff --git a/src/mds/Locker.cc b/src/mds/Locker.cc index c09caa0a6a90..c5c89032fedc 100644 --- a/src/mds/Locker.cc +++ b/src/mds/Locker.cc @@ -1220,10 +1220,10 @@ void Locker::predirty_nested(Mutation *mut, EMetaBlob *blob, EMetaBlob *rollback) { bool primary_dn = flags & PREDIRTY_PRIMARY; - bool do_parent = flags & PREDIRTY_DIR; + bool do_parent_mtime = flags & PREDIRTY_DIR; dout(10) << "predirty_nested" - << (do_parent ? " do_parent_mtime":"") + << (do_parent_mtime ? " do_parent_mtime":"") << " linkunlink=" << linkunlink << (primary_dn ? " primary_dn":" remote_dn") << " " << *in << dendl; @@ -1233,8 +1233,8 @@ void Locker::predirty_nested(Mutation *mut, EMetaBlob *blob, parent = in->get_projected_parent_dn()->get_dir(); } - if (flags == 0) { - dout(10) << " no flags, just adding dir context to blob(s)" << dendl; + if (flags == 0 && linkunlink == 0) { + dout(10) << " no flags/linkunlink, just adding dir context to blob(s)" << dendl; blob->add_dir_context(parent); if (rollback) rollback->add_dir_context(parent); @@ -1256,8 +1256,9 @@ void Locker::predirty_nested(Mutation *mut, EMetaBlob *blob, // opportunistically adjust parent dirfrag CInode *pin = parent->get_inode(); - if (do_parent) { - assert(mut->wrlocks.count(&pin->dirlock)); + if (do_parent_mtime || linkunlink) { + assert(mut->wrlocks.count(&pin->dirlock) || + mut->is_slave()); // we are slave. master will have wrlocked the dir. } // inode -> dirfrag @@ -1266,17 +1267,18 @@ void Locker::predirty_nested(Mutation *mut, EMetaBlob *blob, fnode_t *pf = parent->project_fnode(); pf->version = parent->pre_dirty(); - if (do_parent) { - dout(10) << "predirty_nested updating mtime/size on " << *parent << dendl; + if (do_parent_mtime) { + dout(10) << "predirty_nested updating mtime on " << *parent << dendl; pf->fragstat.mtime = mut->now; if (mut->now > pf->fragstat.rctime) pf->fragstat.rctime = mut->now; - if (linkunlink) { - if (in->is_dir()) - pf->fragstat.nsubdirs += linkunlink; - else - pf->fragstat.nfiles += linkunlink; - } + } + if (linkunlink) { + dout(10) << "predirty_nested updating size on " << *parent << dendl; + if (in->is_dir()) + pf->fragstat.nsubdirs += linkunlink; + else + pf->fragstat.nfiles += linkunlink; } if (primary_dn) { if (linkunlink == 0) { @@ -1352,7 +1354,7 @@ void Locker::predirty_nested(Mutation *mut, EMetaBlob *blob, curi = pi; parent = cur->get_projected_parent_dn()->get_dir(); linkunlink = 0; - do_parent = false; + do_parent_mtime = false; primary_dn = true; } @@ -2028,8 +2030,9 @@ void Locker::scatter_wrlock_finish(ScatterLock *lock, Mutation *mut) mut->wrlocks.erase(lock); mut->locks.erase(lock); } - - scatter_eval_gather(lock); + + if (!lock->is_wrlocked()) + scatter_eval_gather(lock); } @@ -2395,17 +2398,17 @@ void Locker::scatter_sync(ScatterLock *lock) break; // do it. case LOCK_SCATTER: - // lock first. this is the slow way, incidentally. - if (lock->get_parent()->is_replicated()) { - send_lock_message(lock, LOCK_AC_LOCK); - lock->init_gather(); - } else { - if (!lock->is_wrlocked()) { - break; // do it now, we're fine - } - } + if (!lock->get_parent()->is_replicated() && + !lock->is_wrlocked()) + break; // do it now + lock->set_state(LOCK_GLOCKC); lock->get_parent()->auth_pin(); + + if (lock->get_parent()->is_replicated()) { + lock->init_gather(); + send_lock_message(lock, LOCK_AC_LOCK); + } return; default: @@ -2561,12 +2564,14 @@ void Locker::scatter_tempsync(ScatterLock *lock) break; // do it. } + lock->set_state(LOCK_GTEMPSYNCC); + lock->get_parent()->auth_pin(); + if (lock->get_parent()->is_replicated()) { - send_lock_message(lock, LOCK_AC_LOCK); lock->init_gather(); + send_lock_message(lock, LOCK_AC_LOCK); } - lock->set_state(LOCK_GTEMPSYNCC); - lock->get_parent()->auth_pin(); + return; case LOCK_TEMPSYNC: diff --git a/src/mds/MDCache.cc b/src/mds/MDCache.cc index 87159039fe32..c5276b6c03ab 100644 --- a/src/mds/MDCache.cc +++ b/src/mds/MDCache.cc @@ -4017,6 +4017,10 @@ int MDCache::path_traverse(MDRequest *mdr, Message *req, // who } assert(curdir); + // HACK + if (curdir->is_complete()) + curdir->verify_fragstat(); + // frozen? /* if (curdir->is_frozen()) { diff --git a/src/mds/MDCache.h b/src/mds/MDCache.h index b442cef1b52e..a218618e7b2f 100644 --- a/src/mds/MDCache.h +++ b/src/mds/MDCache.h @@ -79,6 +79,9 @@ struct Mutation { LogSegment *ls; // the log segment i'm committing to utime_t now; + // flag mutation as slave + int slave_to_mds; // this is a slave request if >= 0. + // -- my pins and locks -- // cache pins (so things don't expire) set< MDSCacheObject* > pins; @@ -107,11 +110,16 @@ struct Mutation { Mutation() : ls(0), done_locking(false), committing(false), aborted(false) {} - Mutation(metareqid_t ri) : reqid(ri), - ls(0), - done_locking(false), committing(false), aborted(false) {} + Mutation(metareqid_t ri, int slave_to=-1) : + reqid(ri), + ls(0), + slave_to_mds(slave_to), + done_locking(false), committing(false), aborted(false) {} virtual ~Mutation() {} + bool is_master() { return slave_to_mds < 0; } + bool is_slave() { return slave_to_mds >= 0; } + // pin items in cache void pin(MDSCacheObject *o) { if (pins.count(o) == 0) { @@ -214,7 +222,6 @@ struct MDRequest : public Mutation { // -- i am a slave request MMDSSlaveRequest *slave_request; // slave request (if one is pending; implies slave == true) - int slave_to_mds; // this is a slave request if >= 0. // break rarely-used fields into a separately allocated structure @@ -252,25 +259,22 @@ struct MDRequest : public Mutation { // --------------------------------------------------- MDRequest() : session(0), client_request(0), ref(0), - slave_request(0), slave_to_mds(-1), + slave_request(0), _more(0) {} MDRequest(metareqid_t ri, MClientRequest *req) : Mutation(ri), session(0), client_request(req), ref(0), - slave_request(0), slave_to_mds(-1), + slave_request(0), _more(0) {} MDRequest(metareqid_t ri, int by) : - Mutation(ri), + Mutation(ri, by), session(0), client_request(0), ref(0), - slave_request(0), slave_to_mds(by), + slave_request(0), _more(0) {} ~MDRequest() { delete _more; } - bool is_master() { return slave_to_mds < 0; } - bool is_slave() { return slave_to_mds >= 0; } - More* more() { if (!_more) _more = new More(); return _more; diff --git a/src/mds/Migrator.cc b/src/mds/Migrator.cc index e9965486c42e..53eaaddf48b8 100644 --- a/src/mds/Migrator.cc +++ b/src/mds/Migrator.cc @@ -958,6 +958,9 @@ int Migrator::encode_export_dir(bufferlist& exportbl, assert(dir->get_projected_version() == dir->get_version()); + if (dir->is_complete()) + dir->verify_fragstat(); + // dir dirfrag_t df = dir->dirfrag(); ::encode(df, exportbl); @@ -2166,6 +2169,9 @@ int Migrator::decode_import_dir(bufferlist::iterator& blp, le->metablob.add_dentry(dn, dn->is_dirty()); } + if (dir->is_complete()) + dir->verify_fragstat(); + dout(7) << "decode_import_dir done " << *dir << dendl; return num_imported; } diff --git a/src/mds/Server.cc b/src/mds/Server.cc index 33e9daccb88c..11758cc6015d 100644 --- a/src/mds/Server.cc +++ b/src/mds/Server.cc @@ -629,6 +629,8 @@ void Server::set_trace_dist(Session *session, MClientReply *reply, CInode *in, C dout(20) << " trace added " << lmask << " " << *dn << dendl; // dir + if (dn->get_dir()->is_complete()) + dn->get_dir()->verify_fragstat(); DirStat::encode(bl, dn->get_dir(), whoami); dout(20) << " trace added " << *dn->get_dir() << dendl; @@ -1846,6 +1848,8 @@ void Server::handle_client_readdir(MDRequest *mdr) return; } + dir->verify_fragstat(); + mdr->now = g_clock.real_now(); // build dir contents @@ -3033,21 +3037,32 @@ void Server::handle_client_rename(MDRequest *mdr) dout(10) << " destdn " << *destdn << dendl; + bool linkmerge = (srcdn->inode == destdn->inode && + (srcdn->is_primary() || destdn->is_primary())); + if (linkmerge) + dout(10) << " this is a link merge" << dendl; + + // -- create stray dentry? -- + CDentry *straydn = 0; + if (destdn->is_primary() && !linkmerge) { + straydn = mdcache->get_or_create_stray_dentry(destdn->inode); + mdr->pin(straydn); + dout(10) << "straydn is " << *straydn << dendl; + } + // -- locks -- set rdlocks, wrlocks, xlocks; + // straydn? + if (straydn) + wrlocks.insert(&straydn->dir->inode->dirlock); + // rdlock sourcedir path, xlock src dentry for (int i=0; i<(int)srctrace.size()-1; i++) rdlocks.insert(&srctrace[i]->lock); xlocks.insert(&srcdn->lock); wrlocks.insert(&srcdn->dir->inode->dirlock); - /* - * no, this causes problems if the dftlock is scattered... - * and what was i thinking anyway? - * rdlocks.insert(&srcdn->dir->inode->dirfragtreelock); // rd lock on srci dirfragtree. - */ - // rdlock destdir path, xlock dest dentry for (int i=0; i<(int)desttrace.size(); i++) rdlocks.insert(&desttrace[i]->lock); @@ -3111,17 +3126,6 @@ void Server::handle_client_rename(MDRequest *mdr) if (mdr->now == utime_t()) mdr->now = g_clock.real_now(); - bool linkmerge = (srcdn->inode == destdn->inode && - (srcdn->is_primary() || destdn->is_primary())); - - // -- create stray dentry? -- - CDentry *straydn = 0; - if (destdn->is_primary() && !linkmerge) { - straydn = mdcache->get_or_create_stray_dentry(destdn->inode); - mdr->pin(straydn); - dout(10) << "straydn is " << *straydn << dendl; - } - // -- prepare witnesses -- /* * NOTE: we use _all_ replicas as witnesses. @@ -3404,22 +3408,21 @@ void Server::_rename_prepare(MDRequest *mdr, } // prepare nesting, mtime updates - if (mdr->is_master()) { - int predirty_dir = silent ? 0:PREDIRTY_DIR; - - // sub off target - if (!destdn->is_null()) - mds->locker->predirty_nested(mdr, metablob, destdn->inode, destdn->dir, - (destdn->is_primary() ? PREDIRTY_PRIMARY:0)|predirty_dir, -1, - rollback); - - // move srcdn - int predirty_primary = (srcdn->is_primary() && srcdn->dir != destdn->dir) ? PREDIRTY_PRIMARY:0; - int flags = predirty_dir | predirty_primary; - if (srcdn->is_auth()) - mds->locker->predirty_nested(mdr, metablob, srcdn->inode, srcdn->dir, flags, -1, rollback); + int predirty_dir = silent ? 0:PREDIRTY_DIR; + + // sub off target + if (destdn->is_auth() && !destdn->is_null()) + mds->locker->predirty_nested(mdr, metablob, destdn->inode, destdn->dir, + (destdn->is_primary() ? PREDIRTY_PRIMARY:0)|predirty_dir, -1, + rollback); + + // move srcdn + int predirty_primary = (srcdn->is_primary() && srcdn->dir != destdn->dir) ? PREDIRTY_PRIMARY:0; + int flags = predirty_dir | predirty_primary; + if (srcdn->is_auth()) + mds->locker->predirty_nested(mdr, metablob, srcdn->inode, srcdn->dir, flags, -1, rollback); + if (destdn->is_auth()) mds->locker->predirty_nested(mdr, metablob, srcdn->inode, destdn->dir, flags, 1, rollback); - } // add it all to the metablob // target inode