From 23ade8a7a8046066203d4e1ef6298b9e596e1601 Mon Sep 17 00:00:00 2001 From: sageweil Date: Thu, 8 Mar 2007 01:58:41 +0000 Subject: [PATCH] more dirfrag prep work. cleaned out MDStore. soem CInode and CDir constant cleanup. git-svn-id: https://ceph.svn.sf.net/svnroot/ceph@1180 29311d96-e01e-0410-9327-a35deaab8ce9 --- branches/sage/cephmds2/TODO | 10 +- branches/sage/cephmds2/include/types.h | 4 +- branches/sage/cephmds2/mds/CDir.cc | 31 +- branches/sage/cephmds2/mds/CDir.h | 100 +-- branches/sage/cephmds2/mds/CInode.cc | 28 +- branches/sage/cephmds2/mds/CInode.h | 86 +- branches/sage/cephmds2/mds/Locker.cc | 82 +- branches/sage/cephmds2/mds/MDCache.cc | 170 +--- branches/sage/cephmds2/mds/MDCache.h | 11 +- branches/sage/cephmds2/mds/MDS.cc | 11 +- branches/sage/cephmds2/mds/MDStore.cc | 752 ------------------ branches/sage/cephmds2/mds/MDStore.h | 75 -- branches/sage/cephmds2/mds/Migrator.cc | 24 +- branches/sage/cephmds2/mds/Renamer.cc | 14 +- branches/sage/cephmds2/mds/Server.cc | 32 +- branches/sage/cephmds2/mds/events/EExport.h | 16 +- branches/sage/cephmds2/mds/journal.cc | 19 +- branches/sage/cephmds2/mds/mdstypes.h | 1 + .../sage/cephmds2/script/check_cache_dumps.pl | 56 ++ 19 files changed, 318 insertions(+), 1204 deletions(-) delete mode 100644 branches/sage/cephmds2/mds/MDStore.cc delete mode 100644 branches/sage/cephmds2/mds/MDStore.h create mode 100755 branches/sage/cephmds2/script/check_cache_dumps.pl diff --git a/branches/sage/cephmds2/TODO b/branches/sage/cephmds2/TODO index 54bc43aa1e916..79577772c1fdc 100644 --- a/branches/sage/cephmds2/TODO +++ b/branches/sage/cephmds2/TODO @@ -12,7 +12,7 @@ doc mds -- bystanders should avoid contacting auth when it is ambiguous. +- bystanders should avoid contacting auth when it is ambiguous? - CDIR_WAIT_UNAMBIGUOUS? - during export prep phase, - if importer is trying to open dirs on a node that fails, what happens? @@ -20,15 +20,17 @@ mds 2- could force a CDir replica, since it'll get rejoined into cache later. - is this related to solving the larger problem of discover vs mds failure? - locker vs node failure -- do i need openingdir pins? won't the DIR waiter on the inode be sufficient? - does inode need it's own replica list? no? - osd needs a set_floor_and_read op for safe failover/STOGITH-like semantics. - failures during recovery stages (resolve, rejoin)... make sure rejoin still works! - fix mds initial osdmap weirdness (which will currently screw up on standby -> almost anything) - incremental mdsmaps - client failure -- EMetablob should return 'expired' if they have - higher versions (and are thus described by a newer journal entry) + +- EMetablob should return 'expired' if they have higher versions (and are thus described by a newer journal entry) +- dir version/committed/etc versus migration, log expires. + - DOCUMENT. + - mds failure vs clients - clean up client op redirection - idempotent ops diff --git a/branches/sage/cephmds2/include/types.h b/branches/sage/cephmds2/include/types.h index 294c2876017d5..096f13e2f3fd4 100644 --- a/branches/sage/cephmds2/include/types.h +++ b/branches/sage/cephmds2/include/types.h @@ -223,7 +223,7 @@ namespace __gnu_cxx { #define INODE_MASK_PERM 2 // uid, gid, mode #define INODE_MASK_SIZE 4 // size, blksize, blocks #define INODE_MASK_CTIME 8 // ctime -#define INODE_MASK_MTIME 16 // mtime +#define INODE_MASK_MTIME 16 // mtime #define INODE_MASK_ATIME 32 // atime #define INODE_MASK_ALL_STAT (INODE_MASK_BASE|INODE_MASK_PERM|INODE_MASK_SIZE|INODE_MASK_MTIME) @@ -231,7 +231,7 @@ namespace __gnu_cxx { struct inode_t { // base (immutable) - inodeno_t ino; // NOTE: ino _must_ come first for MDStore.cc to behave!! + inodeno_t ino; // other. FileLayout layout; // ?immutable? diff --git a/branches/sage/cephmds2/mds/CDir.cc b/branches/sage/cephmds2/mds/CDir.cc index 4a95ff6f7f567..77a431649f548 100644 --- a/branches/sage/cephmds2/mds/CDir.cc +++ b/branches/sage/cephmds2/mds/CDir.cc @@ -396,7 +396,7 @@ void CDir::add_waiter(int tag, void CDir::add_waiter(int tag, Context *c) { // hierarchical? - if (tag & CDIR_WAIT_ATFREEZEROOT && (is_freezing() || is_frozen())) { + if (tag & WAIT_ATFREEZEROOT && (is_freezing() || is_frozen())) { if (is_freezing_tree_root() || is_frozen_tree_root() || is_freezing_dir() || is_frozen_dir()) { // it's us, pin here. (fall thru) @@ -578,7 +578,7 @@ void CDir::fetch(Context *c) { dout(10) << "fetch on " << *this << endl; - if (c) add_waiter(CDIR_WAIT_COMPLETE, c); + if (c) add_waiter(WAIT_COMPLETE, c); // alrady fetching? if (state_test(CDir::STATE_FETCHING)) { @@ -611,7 +611,7 @@ void CDir::_fetch_dir_read(off_t read_off, bufferlist &bl) //ondisk_size = 0; // kick waiters? - finish_waiting(CDIR_WAIT_COMPLETE, -1); + finish_waiting(WAIT_COMPLETE, -1); return; } @@ -743,10 +743,10 @@ void CDir::_fetch_dir_read(off_t read_off, bufferlist &bl) state_clear(STATE_FETCHING); // kick waiters - finish_waiting(CDIR_WAIT_COMPLETE, 0); + finish_waiting(WAIT_COMPLETE, 0); /* list waiters; - take_waiting(CDIR_WAIT_COMPLETE, waiters); + take_waiting(WAIT_COMPLETE, waiters); cache->mds->queue_finished(waiters); */ } @@ -816,7 +816,7 @@ void CDir::commit(version_t want, Context *c) // authpinnable? if (!can_auth_pin()) { dout(7) << "can't auth_pin, waiting" << endl; - add_waiter(CDIR_WAIT_AUTHPINNABLE, + add_waiter(WAIT_AUTHPINNABLE, new C_Dir_RetryCommit(this, want)); return; } @@ -966,9 +966,6 @@ void CDir::_committed(version_t v) waiting_for_commit.erase(p); p = n; } - - // finish (FIXME) - finish_waiting(CDIR_WAIT_COMMITTED, 0); } @@ -1186,7 +1183,7 @@ void CDir::on_freezeable() particularly graceful, and might cause problems if the first one needs to know about other waiters.... FIXME? */ - finish_waiting(CDIR_WAIT_FREEZEABLE); + finish_waiting(WAIT_FREEZEABLE); } // FREEZE TREE @@ -1217,7 +1214,7 @@ void CDir::freeze_tree(Context *c) dout(10) << "freeze_tree + wait " << *this << endl; // need to wait for auth pins to expire - add_waiter(CDIR_WAIT_FREEZEABLE, new C_MDS_FreezeTree(this, c)); + add_waiter(WAIT_FREEZEABLE, new C_MDS_FreezeTree(this, c)); } } @@ -1228,7 +1225,7 @@ void CDir::freeze_tree_finish(Context *c) // wait again! dout(10) << "freeze_tree_finish still waiting " << *this << endl; state_set(STATE_FREEZINGTREE); - add_waiter(CDIR_WAIT_FREEZEABLE, new C_MDS_FreezeTree(this, c)); + add_waiter(WAIT_FREEZEABLE, new C_MDS_FreezeTree(this, c)); return; } @@ -1271,14 +1268,14 @@ void CDir::unfreeze_tree() inode->auth_unpin(); // waiters? - finish_waiting(CDIR_WAIT_UNFREEZE); + finish_waiting(WAIT_UNFREEZE); } else { // freezing. stop it. assert(state_test(STATE_FREEZINGTREE)); state_clear(STATE_FREEZINGTREE); // cancel freeze waiters - finish_waiting(CDIR_WAIT_FREEZEABLE, -1); + finish_waiting(WAIT_FREEZEABLE, -1); } } @@ -1352,7 +1349,7 @@ void CDir::freeze_dir(Context *c) dout(10) << "freeze_dir + wait " << *this << endl; // need to wait for auth pins to expire - add_waiter(CDIR_WAIT_FREEZEABLE, new C_MDS_FreezeDir(this, c)); + add_waiter(WAIT_FREEZEABLE, new C_MDS_FreezeDir(this, c)); } } @@ -1381,7 +1378,7 @@ void CDir::freeze_dir_finish(Context *c) // wait again! dout(10) << "freeze_dir_finish still waiting " << *this << endl; state_set(STATE_FREEZINGDIR); - add_waiter(CDIR_WAIT_FREEZEABLE, new C_MDS_FreezeDir(this, c)); + add_waiter(WAIT_FREEZEABLE, new C_MDS_FreezeDir(this, c)); } } @@ -1395,7 +1392,7 @@ void CDir::unfreeze_dir() inode->auth_unpin(); // waiters? - finish_waiting(CDIR_WAIT_UNFREEZE); + finish_waiting(WAIT_UNFREEZE); } diff --git a/branches/sage/cephmds2/mds/CDir.h b/branches/sage/cephmds2/mds/CDir.h index ea07f373978aa..2f3b49d2c6bc5 100644 --- a/branches/sage/cephmds2/mds/CDir.h +++ b/branches/sage/cephmds2/mds/CDir.h @@ -43,71 +43,13 @@ class MDCluster; class Context; +// -- authority delegation -- // directory authority types // >= 0 is the auth mds #define CDIR_AUTH_PARENT -1 // default #define CDIR_AUTH_UNKNOWN -2 -#define CDIR_AUTH_DEFAULT pair(CDIR_AUTH_PARENT,CDIR_AUTH_UNKNOWN) -#define CDIR_AUTH_UNDEF pair(CDIR_AUTH_UNKNOWN,CDIR_AUTH_UNKNOWN) - -#define CDIR_NONCE_EXPORT 1 - - - - - - - - - - -// wait reasons -#define CDIR_WAIT_DENTRY 1 // wait for item to be in cache - // waiters: path_traverse - // trigger: handle_discover, fetch_dir_2 -#define CDIR_WAIT_COMPLETE 2 // wait for complete dir contents - // waiters: fetch_dir, commit_dir - // trigger: fetch_dir_2 -#define CDIR_WAIT_FREEZEABLE 4 // hard_pins removed - // waiters: freeze, freeze_finish - // trigger: auth_unpin, adjust_nested_auth_pins -#define CDIR_WAIT_UNFREEZE 8 // unfreeze - // waiters: path_traverse, handle_discover, handle_inode_update, - // export_dir_frozen (mdcache) - // handle_client_readdir (mds) - // trigger: unfreeze -#define CDIR_WAIT_AUTHPINNABLE CDIR_WAIT_UNFREEZE - // waiters: commit_dir (mdstore) - // trigger: (see CDIR_WAIT_UNFREEZE) -#define CDIR_WAIT_COMMITTED 32 // did commit (who uses this?**) - // waiters: commit_dir (if already committing) - // trigger: commit_dir_2 -#define CDIR_WAIT_IMPORTED 64 // import finish - // waiters: import_dir_block - // triggers: handle_export_dir_finish - -#define CDIR_WAIT_EXPORTWARNING 8192 // on bystander. - // watiers: handle_export_dir_notify - // triggers: handle_export_dir_warning -#define CDIR_WAIT_EXPORTPREPACK 16384 - // waiter export_dir - // trigger handel_export_dir_prep_ack - -#define CDIR_WAIT_HASHED (1<<17) // hash finish -#define CDIR_WAIT_THISHASHEDREADDIR (1<<18) // current readdir lock -#define CDIR_WAIT_NEXTHASHEDREADDIR (1<<19) // after current readdir lock finishes - -#define CDIR_WAIT_DNREAD (1<<20) -#define CDIR_WAIT_DNLOCK (1<<21) -#define CDIR_WAIT_DNUNPINNED (1<<22) -#define CDIR_WAIT_DNPINNABLE (CDIR_WAIT_DNREAD|CDIR_WAIT_DNUNPINNED) - -#define CDIR_WAIT_DNREQXLOCK (1<<23) - -#define CDIR_WAIT_ANY (0xffffffff) - -#define CDIR_WAIT_ATFREEZEROOT (CDIR_WAIT_AUTHPINNABLE|\ - CDIR_WAIT_UNFREEZE) // hmm, same same +#define CDIR_AUTH_DEFAULT pair(-1, -2) +#define CDIR_AUTH_UNDEF pair(-2, -2) ostream& operator<<(ostream& out, class CDir& dir); @@ -212,6 +154,38 @@ class CDir : public MDSCacheObject { static const int REP_LIST = 2; + static const int NONCE_EXPORT = 1; + + + // -- wait masks -- + static const int WAIT_DENTRY = (1<<0); // wait for item to be in cache + // waiters: path_traverse + // trigger: handle_discover, fetch_dir_2 + static const int WAIT_COMPLETE = (1<<1); // wait for complete dir contents + // waiters: fetch_dir, commit_dir + // trigger: fetch_dir_2 + static const int WAIT_FREEZEABLE = (1<<2); // hard_pins removed + // waiters: freeze, freeze_finish + // trigger: auth_unpin, adjust_nested_auth_pins + static const int WAIT_UNFREEZE = (1<<3); // unfreeze + // waiters: path_traverse, handle_discover, handle_inode_update, + // export_dir_frozen (mdcache) + // handle_client_readdir (mds) + // trigger: unfreeze + static const int WAIT_AUTHPINNABLE = WAIT_UNFREEZE; + static const int WAIT_IMPORTED = (1<<4); // import finish + static const int WAIT_DNREAD = (1<<20); + static const int WAIT_DNLOCK = (1<<21); + static const int WAIT_DNUNPINNED = (1<<22); + static const int WAIT_DNPINNABLE = (WAIT_DNREAD|WAIT_DNUNPINNED); + static const int WAIT_DNREQXLOCK = (1<<23); + + static const int WAIT_ANY = (0xffffffff); + static const int WAIT_ATFREEZEROOT = (WAIT_AUTHPINNABLE|\ + WAIT_UNFREEZE); // hmm, same same + + + public: // context @@ -277,7 +251,9 @@ class CDir : public MDSCacheObject { // -- accessors -- - inodeno_t ino() { return inode->ino(); } + inodeno_t ino() { return inode->ino(); } + dirfrag_t dirfrag() { return dirfrag_t(inode->ino(), frag); } + CInode *get_inode() { return inode; } CDir *get_parent_dir() { return inode->get_parent_dir(); } diff --git a/branches/sage/cephmds2/mds/CInode.cc b/branches/sage/cephmds2/mds/CInode.cc index 1af674653951b..f0cc483d730e1 100644 --- a/branches/sage/cephmds2/mds/CInode.cc +++ b/branches/sage/cephmds2/mds/CInode.cc @@ -107,6 +107,24 @@ CInode::~CInode() { // dirfrags +// new interface for old way +void CInode::get_dirfrags(list& ls) +{ + if (dir) + ls.push_back(dir); +} +void CInode::get_nested_dirfrags(list& ls) +{ + if (dir && !dir->is_subtree_root()) + ls.push_back(dir); +} +void CInode::get_subtree_dirfrags(list& ls) +{ + if (dir && dir->is_subtree_root()) + ls.push_back(dir); +} + +/* new void CInode::get_dirfrags(list& ls) { for (map::iterator p = dirfrags.begin(); @@ -130,7 +148,7 @@ void CInode::get_subtree_dirfrags(list& ls) if (p->second->is_subtree_root()) ls.push_back(p->second); } - +*/ // pins @@ -406,8 +424,12 @@ bool CInode::waiting_for(int tag) void CInode::add_waiter(int tag, Context *c) { // waiting on hierarchy? - if (tag & CDIR_WAIT_ATFREEZEROOT && (is_freezing() || is_frozen())) { - parent->dir->add_waiter(tag, c); + if (tag & WAIT_AUTHPINNABLE) { + assert(tag == WAIT_AUTHPINNABLE); + assert(is_freezing() || is_frozen()); + + // wait on the directory + parent->dir->add_waiter(CDir::WAIT_AUTHPINNABLE, c); return; } diff --git a/branches/sage/cephmds2/mds/CInode.h b/branches/sage/cephmds2/mds/CInode.h index 422ef93a158b7..223524004b77f 100644 --- a/branches/sage/cephmds2/mds/CInode.h +++ b/branches/sage/cephmds2/mds/CInode.h @@ -36,50 +36,6 @@ using namespace std; -// wait reasons -#define CINODE_WAIT_AUTHPINNABLE CDIR_WAIT_UNFREEZE - // waiters: write_hard_start, read_file_start, write_file_start (mdcache) - // handle_client_chmod, handle_client_touch (mds) - // trigger: (see CDIR_WAIT_UNFREEZE) -#define CINODE_WAIT_GETREPLICA (1<<11) // update/replicate individual inode - // waiters: import_dentry_inode - // trigger: handle_inode_replicate_ack - -#define CINODE_WAIT_DIR (1<<13) - // waiters: traverse_path - // triggers: handle_disocver_reply - -#define CINODE_WAIT_LINK (1<<14) // as in remotely nlink++ -#define CINODE_WAIT_ANCHORED (1<<15) -#define CINODE_WAIT_UNLINK (1<<16) // as in remotely nlink-- - -#define CINODE_WAIT_HARDR (1<<17) // 131072 -#define CINODE_WAIT_HARDW (1<<18) // 262... -#define CINODE_WAIT_HARDB (1<<19) -#define CINODE_WAIT_HARDRWB (CINODE_WAIT_HARDR|CINODE_WAIT_HARDW|CINODE_WAIT_HARDB) -#define CINODE_WAIT_HARDSTABLE (1<<20) -#define CINODE_WAIT_HARDNORD (1<<21) -#define CINODE_WAIT_FILER (1<<22) -#define CINODE_WAIT_FILEW (1<<23) -#define CINODE_WAIT_FILEB (1<<24) -#define CINODE_WAIT_FILERWB (CINODE_WAIT_FILER|CINODE_WAIT_FILEW|CINODE_WAIT_FILEB) -#define CINODE_WAIT_FILESTABLE (1<<25) -#define CINODE_WAIT_FILENORD (1<<26) -#define CINODE_WAIT_FILENOWR (1<<27) - -#define CINODE_WAIT_RENAMEACK (1<<28) -#define CINODE_WAIT_RENAMENOTIFYACK (1<<29) - -#define CINODE_WAIT_CAPS (1<<30) - -#define CINODE_WAIT_ANY 0xffffffff - - - -// misc -#define CINODE_EXPORT_NONCE 1 // nonce given to replicas created by export -#define CINODE_HASHREPLICA_NONCE 1 // hashed inodes that are duped ???FIXME??? - class Context; class CDentry; class CDir; @@ -131,7 +87,7 @@ class CInode : public MDSCacheObject { } } - // state + // -- state -- static const int STATE_AUTH = (1<<0); static const int STATE_ROOT = (1<<1); static const int STATE_DIRTY = (1<<2); @@ -145,6 +101,37 @@ class CInode : public MDSCacheObject { //static const int STATE_RENAMING = (1<<8); // moving me //static const int STATE_RENAMINGTO = (1<<9); // rename target (will be unlinked) + // -- waiters -- + static const int WAIT_AUTHPINNABLE = (1<<10); + // waiters: write_hard_start, read_file_start, write_file_start (mdcache) + // handle_client_chmod, handle_client_touch (mds) + // trigger: (see CDIR_WAIT_UNFREEZE) + static const int WAIT_DIR = (1<<13); + // waiters: traverse_path + // triggers: handle_disocver_reply + static const int WAIT_LINK = (1<<14); // as in remotely nlink++ + static const int WAIT_ANCHORED = (1<<15); + static const int WAIT_UNLINK = (1<<16); // as in remotely nlink-- + static const int WAIT_HARDR = (1<<17); // 131072 + static const int WAIT_HARDW = (1<<18); // 262... + static const int WAIT_HARDB = (1<<19); + static const int WAIT_HARDRWB = (WAIT_HARDR|WAIT_HARDW|WAIT_HARDB); + static const int WAIT_HARDSTABLE = (1<<20); + static const int WAIT_HARDNORD = (1<<21); + static const int WAIT_FILER = (1<<22); + static const int WAIT_FILEW = (1<<23); + static const int WAIT_FILEB = (1<<24); + static const int WAIT_FILERWB = (WAIT_FILER|WAIT_FILEW|WAIT_FILEB); + static const int WAIT_FILESTABLE = (1<<25); + static const int WAIT_FILENORD = (1<<26); + static const int WAIT_FILENOWR = (1<<27); + static const int WAIT_RENAMEACK =(1<<28); + static const int WAIT_RENAMENOTIFYACK =(1<<29); + static const int WAIT_CAPS =(1<<30); + static const int WAIT_ANY = 0xffffffff; + + // misc + static const int EXPORT_NONCE = 1; // nonce given to replicas created by export @@ -163,6 +150,11 @@ class CInode : public MDSCacheObject { // new way map dirfrags; // cached dir fragments + CDir* get_dirfrag(frag_t fg) { + // old way + assert(fg == 0); + return dir; + } void get_dirfrags(list& ls); void get_nested_dirfrags(list& ls); void get_subtree_dirfrags(list& ls); @@ -297,10 +289,10 @@ protected: bool is_hardlock_write_wanted() { - return waiting_for(CINODE_WAIT_HARDW); + return waiting_for(WAIT_HARDW); } bool is_filelock_write_wanted() { - return waiting_for(CINODE_WAIT_FILEW); + return waiting_for(WAIT_FILEW); } // -- caps -- (new) diff --git a/branches/sage/cephmds2/mds/Locker.cc b/branches/sage/cephmds2/mds/Locker.cc index a99a4672448d8..69f1b79f2b7be 100644 --- a/branches/sage/cephmds2/mds/Locker.cc +++ b/branches/sage/cephmds2/mds/Locker.cc @@ -413,7 +413,7 @@ void Locker::handle_client_file_caps(MClientFileCaps *m) // reevaluate, waiters inode_file_eval(in); - in->finish_waiting(CINODE_WAIT_CAPS, 0); + in->finish_waiting(CInode::WAIT_CAPS, 0); delete m; } @@ -539,7 +539,7 @@ bool Locker::inode_hard_read_try(CInode *in, Context *con) // wait! dout(7) << "inode_hard_read_try waiting on " << *in << endl; - in->add_waiter(CINODE_WAIT_HARDR, con); + in->add_waiter(CInode::WAIT_HARDR, con); return false; } @@ -558,7 +558,7 @@ bool Locker::inode_hard_read_start(CInode *in, MClientRequest *m) // wait! dout(7) << "inode_hard_read_start waiting on " << *in << endl; - in->add_waiter(CINODE_WAIT_HARDR, new C_MDS_RetryRequest(mds, m, in)); + in->add_waiter(CInode::WAIT_HARDR, new C_MDS_RetryRequest(mds, m, in)); return false; } @@ -571,7 +571,7 @@ void Locker::inode_hard_read_finish(CInode *in) dout(7) << "inode_hard_read_finish on " << *in << endl; - //if (in->hardlock.get_nread() == 0) in->finish_waiting(CINODE_WAIT_HARDNORD); + //if (in->hardlock.get_nread() == 0) in->finish_waiting(CInode::WAIT_HARDNORD); } @@ -590,7 +590,7 @@ bool Locker::inode_hard_write_start(CInode *in, MClientRequest *m) assert(in->is_auth()); if (!in->can_auth_pin()) { dout(7) << "inode_hard_write_start waiting for authpinnable on " << *in << endl; - in->add_waiter(CINODE_WAIT_AUTHPINNABLE, new C_MDS_RetryRequest(mds, m, in)); + in->add_waiter(CInode::WAIT_AUTHPINNABLE, new C_MDS_RetryRequest(mds, m, in)); return false; } @@ -610,7 +610,7 @@ bool Locker::inode_hard_write_start(CInode *in, MClientRequest *m) } dout(7) << "inode_hard_write_start waiting on " << *in << endl; - in->add_waiter(CINODE_WAIT_HARDW, new C_MDS_RetryRequest(mds, m, in)); + in->add_waiter(CInode::WAIT_HARDW, new C_MDS_RetryRequest(mds, m, in)); return false; } else { @@ -636,7 +636,7 @@ void Locker::inode_hard_write_finish(CInode *in) // others waiting? if (in->is_hardlock_write_wanted()) { // wake 'em up - in->take_waiting(CINODE_WAIT_HARDW, mds->finished_queue); + in->take_waiting(CInode::WAIT_HARDW, mds->finished_queue); } else { // auto-sync if alone. if (in->is_auth() && @@ -662,7 +662,7 @@ void Locker::inode_hard_eval(CInode *in) // waiters //in->hardlock.get_write(); - in->finish_waiting(CINODE_WAIT_HARDRWB|CINODE_WAIT_HARDSTABLE); + in->finish_waiting(CInode::WAIT_HARDRWB|CInode::WAIT_HARDSTABLE); //in->hardlock.put_write(); break; @@ -713,7 +713,7 @@ void Locker::inode_hard_sync(CInode *in) in->hardlock.set_state(LOCK_SYNC); // waiters? - in->finish_waiting(CINODE_WAIT_HARDSTABLE); + in->finish_waiting(CInode::WAIT_HARDSTABLE); } void Locker::inode_hard_lock(CInode *in) @@ -804,7 +804,7 @@ void Locker::handle_lock_inode_hard(MLock *m) // no need to reply // waiters - in->finish_waiting(CINODE_WAIT_HARDR|CINODE_WAIT_HARDSTABLE); + in->finish_waiting(CInode::WAIT_HARDR|CInode::WAIT_HARDSTABLE); break; case LOCK_AC_LOCK: @@ -815,7 +815,7 @@ void Locker::handle_lock_inode_hard(MLock *m) if (lock->get_nread() > 0) { dout(7) << "handle_lock_inode_hard readers, waiting before ack on " << *in << endl; lock->set_state(LOCK_GLOCKR); - in->add_waiter(CINODE_WAIT_HARDNORD, + in->add_waiter(CInode::WAIT_HARDNORD, new C_MDS_RetryMessage(mds,m)); assert(0); // does this ever happen? (if so, fix hard_read_finish, and CInodeExport.update_inode!) return; @@ -883,13 +883,13 @@ bool Locker::inode_file_read_start(CInode *in, MClientRequest *m) in->filelock.get_read(); //in->filelock.get_write(); - in->finish_waiting(CINODE_WAIT_FILERWB|CINODE_WAIT_FILESTABLE); + in->finish_waiting(CInode::WAIT_FILERWB|CInode::WAIT_FILESTABLE); //in->filelock.put_write(); return true; } } else { dout(7) << "inode_file_read_start waiting until stable on " << *in << ", filelock=" << in->filelock << endl; - in->add_waiter(CINODE_WAIT_FILESTABLE, new C_MDS_RetryRequest(mds, m, in)); + in->add_waiter(CInode::WAIT_FILESTABLE, new C_MDS_RetryRequest(mds, m, in)); return false; } } else { @@ -906,7 +906,7 @@ bool Locker::inode_file_read_start(CInode *in, MClientRequest *m) } else { // wait until stable dout(7) << "inode_file_read_start waiting until stable on " << *in << ", filelock=" << in->filelock << endl; - in->add_waiter(CINODE_WAIT_FILESTABLE, new C_MDS_RetryRequest(mds, m, in)); + in->add_waiter(CInode::WAIT_FILESTABLE, new C_MDS_RetryRequest(mds, m, in)); return false; } } @@ -914,7 +914,7 @@ bool Locker::inode_file_read_start(CInode *in, MClientRequest *m) // wait dout(7) << "inode_file_read_start waiting on " << *in << ", filelock=" << in->filelock << endl; - in->add_waiter(CINODE_WAIT_FILER, new C_MDS_RetryRequest(mds, m, in)); + in->add_waiter(CInode::WAIT_FILER, new C_MDS_RetryRequest(mds, m, in)); return false; } @@ -929,7 +929,7 @@ void Locker::inode_file_read_finish(CInode *in) dout(7) << "inode_file_read_finish on " << *in << ", filelock=" << in->filelock << endl; if (in->filelock.get_nread() == 0) { - in->finish_waiting(CINODE_WAIT_FILENORD); + in->finish_waiting(CInode::WAIT_FILENORD); inode_file_eval(in); } } @@ -946,7 +946,7 @@ bool Locker::inode_file_write_start(CInode *in, MClientRequest *m) if (!in->filelock.can_write_soon(in->is_auth())) { if (!in->filelock.is_stable()) { dout(7) << "inode_file_write_start on auth, waiting for stable on " << *in << endl; - in->add_waiter(CINODE_WAIT_FILESTABLE, new C_MDS_RetryRequest(mds, m, in)); + in->add_waiter(CInode::WAIT_FILESTABLE, new C_MDS_RetryRequest(mds, m, in)); return false; } @@ -972,7 +972,7 @@ bool Locker::inode_file_write_start(CInode *in, MClientRequest *m) assert(in->is_auth()); if (!in->can_auth_pin()) { dout(7) << "inode_file_write_start waiting for authpinnable on " << *in << endl; - in->add_waiter(CINODE_WAIT_AUTHPINNABLE, new C_MDS_RetryRequest(mds, m, in)); + in->add_waiter(CInode::WAIT_AUTHPINNABLE, new C_MDS_RetryRequest(mds, m, in)); return false; } @@ -981,7 +981,7 @@ bool Locker::inode_file_write_start(CInode *in, MClientRequest *m) return true; } else { dout(7) << "inode_file_write_start on auth, waiting for write on " << *in << endl; - in->add_waiter(CINODE_WAIT_FILEW, new C_MDS_RetryRequest(mds, m, in)); + in->add_waiter(CInode::WAIT_FILEW, new C_MDS_RetryRequest(mds, m, in)); return false; } } @@ -996,7 +996,7 @@ void Locker::inode_file_write_finish(CInode *in) // drop lock? if (!in->is_filelock_write_wanted()) { - in->finish_waiting(CINODE_WAIT_FILENOWR); + in->finish_waiting(CInode::WAIT_FILENOWR); inode_file_eval(in); } } @@ -1031,7 +1031,7 @@ void Locker::inode_file_eval(CInode *in) // waiters in->filelock.get_read(); //in->filelock.get_write(); - in->finish_waiting(CINODE_WAIT_FILERWB|CINODE_WAIT_FILESTABLE); + in->finish_waiting(CInode::WAIT_FILERWB|CInode::WAIT_FILESTABLE); in->filelock.put_read(); //in->filelock.put_write(); } @@ -1041,7 +1041,7 @@ void Locker::inode_file_eval(CInode *in) case LOCK_GMIXEDR: if ((issued & ~(CAP_FILE_RD)) == 0) { in->filelock.set_state(LOCK_MIXED); - in->finish_waiting(CINODE_WAIT_FILESTABLE); + in->finish_waiting(CInode::WAIT_FILESTABLE); } break; @@ -1058,7 +1058,7 @@ void Locker::inode_file_eval(CInode *in) send_lock_message(in, LOCK_AC_MIXED, LOCK_OTYPE_IFILE, softdata); } - in->finish_waiting(CINODE_WAIT_FILESTABLE); + in->finish_waiting(CInode::WAIT_FILESTABLE); } break; @@ -1066,14 +1066,14 @@ void Locker::inode_file_eval(CInode *in) case LOCK_GLONERR: if (issued == 0) { in->filelock.set_state(LOCK_LONER); - in->finish_waiting(CINODE_WAIT_FILESTABLE); + in->finish_waiting(CInode::WAIT_FILESTABLE); } break; case LOCK_GLONERM: if ((issued & ~CAP_FILE_WR) == 0) { in->filelock.set_state(LOCK_LONER); - in->finish_waiting(CINODE_WAIT_FILESTABLE); + in->finish_waiting(CInode::WAIT_FILESTABLE); } break; @@ -1092,7 +1092,7 @@ void Locker::inode_file_eval(CInode *in) // waiters in->filelock.get_read(); - in->finish_waiting(CINODE_WAIT_FILER|CINODE_WAIT_FILESTABLE); + in->finish_waiting(CInode::WAIT_FILER|CInode::WAIT_FILESTABLE); in->filelock.put_read(); } break; @@ -1551,7 +1551,7 @@ void Locker::handle_lock_inode_file(MLock *m) // waiters in->filelock.get_read(); - in->finish_waiting(CINODE_WAIT_FILER|CINODE_WAIT_FILESTABLE); + in->finish_waiting(CInode::WAIT_FILER|CInode::WAIT_FILESTABLE); in->filelock.put_read(); inode_file_eval(in); break; @@ -1567,7 +1567,7 @@ void Locker::handle_lock_inode_file(MLock *m) } if (lock->get_nread() > 0) { dout(7) << "handle_lock_inode_file readers, waiting before ack on " << *in << endl; - in->add_waiter(CINODE_WAIT_FILENORD, + in->add_waiter(CInode::WAIT_FILENORD, new C_MDS_RetryMessage(mds,m)); lock->set_state(LOCK_GLOCKR); assert(0);// i am broken.. why retry message when state captures all the info i need? @@ -1619,7 +1619,7 @@ void Locker::handle_lock_inode_file(MLock *m) // waiters //in->filelock.get_write(); - in->finish_waiting(CINODE_WAIT_FILEW|CINODE_WAIT_FILESTABLE); + in->finish_waiting(CInode::WAIT_FILEW|CInode::WAIT_FILESTABLE); //in->filelock.put_write(); inode_file_eval(in); break; @@ -1718,7 +1718,7 @@ bool Locker::dentry_xlock_start(CDentry *dn, Message *m, CInode *ref) // not by me, wait dout(7) << "dentry " << *dn << " xlock by someone else" << endl; - dn->dir->add_waiter(CDIR_WAIT_DNREAD, dn->name, + dn->dir->add_waiter(CDir::WAIT_DNREAD, dn->name, new C_MDS_RetryRequest(mds,m,ref)); return false; } @@ -1727,11 +1727,11 @@ bool Locker::dentry_xlock_start(CDentry *dn, Message *m, CInode *ref) if (dn->lockstate == DN_LOCK_PREXLOCK) { if (dn->xlockedby == m) { dout(7) << "dentry " << *dn << " prexlock by me" << endl; - dn->dir->add_waiter(CDIR_WAIT_DNLOCK, dn->name, + dn->dir->add_waiter(CDir::WAIT_DNLOCK, dn->name, new C_MDS_RetryRequest(mds,m,ref)); } else { dout(7) << "dentry " << *dn << " prexlock by someone else" << endl; - dn->dir->add_waiter(CDIR_WAIT_DNREAD, dn->name, + dn->dir->add_waiter(CDir::WAIT_DNREAD, dn->name, new C_MDS_RetryRequest(mds,m,ref)); } return false; @@ -1745,7 +1745,7 @@ bool Locker::dentry_xlock_start(CDentry *dn, Message *m, CInode *ref) // dir auth pinnable? if (!dn->dir->can_auth_pin()) { dout(7) << "dentry " << *dn << " dir not pinnable, waiting" << endl; - dn->dir->add_waiter(CDIR_WAIT_AUTHPINNABLE, + dn->dir->add_waiter(CDir::WAIT_AUTHPINNABLE, new C_MDS_RetryRequest(mds,m,ref)); return false; } @@ -1754,7 +1754,7 @@ bool Locker::dentry_xlock_start(CDentry *dn, Message *m, CInode *ref) if (dn->is_pinned()) { dout(7) << "dentry " << *dn << " pinned, waiting" << endl; dn->lockstate = DN_LOCK_UNPINNING; - dn->dir->add_waiter(CDIR_WAIT_DNUNPINNED, + dn->dir->add_waiter(CDir::WAIT_DNUNPINNED, dn->name, new C_MDS_RetryRequest(mds,m,ref)); return false; @@ -1813,7 +1813,7 @@ bool Locker::dentry_xlock_start(CDentry *dn, Message *m, CInode *ref) // wait dout(7) << "dentry_xlock_start locking, waiting for replicas " << endl; - dn->dir->add_waiter(CDIR_WAIT_DNLOCK, dn->name, + dn->dir->add_waiter(CDir::WAIT_DNLOCK, dn->name, new C_MDS_RetryRequest(mds, m, ref)); return false; } else { @@ -1921,7 +1921,7 @@ void Locker::dentry_xlock_request(CDir *dir, string& dname, bool create, mds->send_message_mds(m, dauth, MDS_PORT_LOCKER); // add waiter - dir->add_waiter(CDIR_WAIT_DNREQXLOCK, dname, + dir->add_waiter(CDir::WAIT_DNREQXLOCK, dname, new C_MDC_XlockRequest(this, dir, dname, req, onfinish)); @@ -2090,7 +2090,7 @@ void Locker::handle_lock_dn(MLock *m) // wait dout(7) << "dn pinned, waiting " << *dn << endl; - dn->dir->add_waiter(CDIR_WAIT_DNUNPINNED, + dn->dir->add_waiter(CDir::WAIT_DNUNPINNED, dn->name, new C_MDS_RetryMessage(mds, m)); return; @@ -2105,7 +2105,7 @@ void Locker::handle_lock_dn(MLock *m) } // wake up waiters - dir->finish_waiting(CDIR_WAIT_DNLOCK, dname); // ? will this happen on replica ? + dir->finish_waiting(CDir::WAIT_DNLOCK, dname); // ? will this happen on replica ? break; case LOCK_AC_SYNC: @@ -2120,7 +2120,7 @@ void Locker::handle_lock_dn(MLock *m) } // wake up waiters - dir->finish_waiting(CDIR_WAIT_DNREAD, dname); // will this happen either? YES: if a rename lock backs out + dir->finish_waiting(CDir::WAIT_DNREAD, dname); // will this happen either? YES: if a rename lock backs out break; case LOCK_AC_REQXLOCKACK: @@ -2128,7 +2128,7 @@ void Locker::handle_lock_dn(MLock *m) { dout(10) << "handle_lock_dn got ack/nak on a reqxlock for " << *dn << endl; list finished; - dir->take_waiting(CDIR_WAIT_DNREQXLOCK, m->get_dn(), finished, 1); // TAKE ONE ONLY! + dir->take_waiting(CDir::WAIT_DNREQXLOCK, m->get_dn(), finished, 1); // TAKE ONE ONLY! finish_contexts(finished, (m->get_action() == LOCK_AC_REQXLOCKACK) ? 1:-1); } @@ -2144,7 +2144,7 @@ void Locker::handle_lock_dn(MLock *m) dout(7) << "handle_lock_dn finish gather, now xlock on " << *dn << endl; dn->lockstate = DN_LOCK_XLOCK; mdcache->active_requests[dn->xlockedby].xlocks.insert(dn); - dir->finish_waiting(CDIR_WAIT_DNLOCK, dname); + dir->finish_waiting(CDir::WAIT_DNLOCK, dname); } break; diff --git a/branches/sage/cephmds2/mds/MDCache.cc b/branches/sage/cephmds2/mds/MDCache.cc index ec1efdd004519..0a6ededd23fdc 100644 --- a/branches/sage/cephmds2/mds/MDCache.cc +++ b/branches/sage/cephmds2/mds/MDCache.cc @@ -898,7 +898,7 @@ void MDCache::handle_mds_failure(int who) // take waiters list waiters; - in->take_waiting(CINODE_WAIT_DIR, waiters); + in->take_waiting(CInode::WAIT_DIR, waiters); mds->queue_finished(waiters); dout(10) << "kicking WAIT_DIR on " << *in << endl; @@ -939,7 +939,7 @@ void MDCache::handle_mds_recovery(int who) while (!q.empty()) { CDir *d = q.front(); q.pop_front(); - d->take_waiting(CDIR_WAIT_ANY, waiters); + d->take_waiting(CDir::WAIT_ANY, waiters); // inode waiters too for (CDir_map_t::iterator p = d->items.begin(); @@ -947,7 +947,7 @@ void MDCache::handle_mds_recovery(int who) ++p) { CDentry *dn = p->second; if (dn->is_primary()) { - dn->get_inode()->take_waiting(CINODE_WAIT_ANY, waiters); + dn->get_inode()->take_waiting(CInode::WAIT_ANY, waiters); // recurse? if (dn->get_inode()->dir && @@ -2329,7 +2329,7 @@ int MDCache::path_traverse(filepath& origpath, // parent dir frozen_dir? if (cur->is_frozen_dir()) { dout(7) << "traverse: " << *cur->get_parent_dir() << " is frozen_dir, waiting" << endl; - cur->get_parent_dir()->add_waiter(CDIR_WAIT_UNFREEZE, ondelay); + cur->get_parent_dir()->add_waiter(CDir::WAIT_UNFREEZE, ondelay); if (onfinish) delete onfinish; return 1; } @@ -2339,7 +2339,7 @@ int MDCache::path_traverse(filepath& origpath, } else { // discover dir from/via inode auth assert(!cur->is_auth()); - if (cur->waiting_for(CINODE_WAIT_DIR)) { + if (cur->waiting_for(CInode::WAIT_DIR)) { dout(10) << "traverse: need dir for " << *cur << ", already doing discover" << endl; } else { filepath want = path.postfixpath(depth); @@ -2351,7 +2351,7 @@ int MDCache::path_traverse(filepath& origpath, cur->authority().first, MDS_PORT_CACHE); dir_discovers[cur->ino()].insert(cur->authority().first); } - cur->add_waiter(CINODE_WAIT_DIR, ondelay); + cur->add_waiter(CInode::WAIT_DIR, ondelay); if (onfinish) delete onfinish; return 1; } @@ -2363,7 +2363,7 @@ int MDCache::path_traverse(filepath& origpath, // doh! // FIXME: traverse is allowed? dout(7) << "traverse: " << *cur->dir << " is frozen, waiting" << endl; - cur->dir->add_waiter(CDIR_WAIT_UNFREEZE, ondelay); + cur->dir->add_waiter(CDir::WAIT_UNFREEZE, ondelay); if (onfinish) delete onfinish; return 1; } @@ -2404,7 +2404,7 @@ int MDCache::path_traverse(filepath& origpath, // dentry exists. xlocked? if (!noperm && dn->is_xlockedbyother(req)) { dout(10) << "traverse: xlocked dentry at " << *dn << endl; - cur->dir->add_waiter(CDIR_WAIT_DNREAD, + cur->dir->add_waiter(CDir::WAIT_DNREAD, path[depth], ondelay); if (onfinish) delete onfinish; @@ -2548,7 +2548,7 @@ int MDCache::path_traverse(filepath& origpath, // discover filepath want = path.postfixpath(depth); - if (cur->dir->waiting_for(CDIR_WAIT_DENTRY, path[depth])) { + if (cur->dir->waiting_for(CDir::WAIT_DENTRY, path[depth])) { dout(7) << "traverse: already waiting for discover on " << *cur << " for " << want.get_path() << " to mds" << dauth << endl; } else { dout(7) << "traverse: discover on " << *cur << " for " << want.get_path() << " to mds" << dauth << endl; @@ -2573,7 +2573,7 @@ int MDCache::path_traverse(filepath& origpath, // delay processing of current request. // delay finish vs ondelay until result of traverse, so that ENOENT can be // passed to onfinish if necessary - cur->dir->add_waiter(CDIR_WAIT_DENTRY, + cur->dir->add_waiter(CDir::WAIT_DENTRY, path[depth], new C_MDC_TraverseDiscover(onfinish, ondelay)); @@ -2639,7 +2639,7 @@ void MDCache::open_remote_dir(CInode *diri, true), // need the dir open diri->authority().first, MDS_PORT_CACHE); dir_discovers[diri->ino()].insert(diri->authority().first); - diri->add_waiter(CINODE_WAIT_DIR, fin); + diri->add_waiter(CInode::WAIT_DIR, fin); } @@ -2721,7 +2721,7 @@ bool MDCache::path_pin(vector& trace, // wait if (c) { dout(10) << "path_pin can't pin " << *dn << ", waiting" << endl; - dn->dir->add_waiter(CDIR_WAIT_DNPINNABLE, + dn->dir->add_waiter(CDir::WAIT_DNPINNABLE, dn->name, c); } else { @@ -2760,7 +2760,7 @@ void MDCache::path_unpin(vector& trace, dn->lockstate = DN_LOCK_SYNC; // run finisher right now to give them a fair shot. - dn->dir->finish_waiting(CDIR_WAIT_DNUNPINNED, dn->name); + dn->dir->finish_waiting(CDir::WAIT_DNUNPINNED, dn->name); } } } @@ -2840,7 +2840,7 @@ void MDCache::request_cleanup(Message *req) mds->locker->dentry_xlock_finish(dn); // queue finishers - dn->dir->take_waiting(CDIR_WAIT_ANY, dn->name, mds->finished_queue); + dn->dir->take_waiting(CDir::WAIT_ANY, dn->name, mds->finished_queue); // remove clean, null dentry? (from a failed rename or whatever) if (dn->is_null() && dn->is_sync() && !dn->is_dirty()) { @@ -2906,7 +2906,7 @@ void MDCache::request_cleanup(Message *req) if (g_conf.log_pins) { // pin /* -for (int i=0; ilogger2) mds->logger2->set(cinode_pin_names[i], cinode_pins[i]); } @@ -2989,7 +2989,7 @@ public: } // trigger - in->finish_waiting(CINODE_WAIT_ANCHORED, r); + in->finish_waiting(CInode::WAIT_ANCHORED, r); } }; @@ -3002,7 +3002,7 @@ void MDCache::anchor_inode(CInode *in, Context *onfinish) dout(7) << "anchor_inode already anchoring " << *in << endl; // wait - in->add_waiter(CINODE_WAIT_ANCHORED, + in->add_waiter(CInode::WAIT_ANCHORED, onfinish); } else { @@ -3013,7 +3013,7 @@ void MDCache::anchor_inode(CInode *in, Context *onfinish) in->get(CInode::PIN_ANCHORING); // wait - in->add_waiter(CINODE_WAIT_ANCHORED, + in->add_waiter(CInode::WAIT_ANCHORED, onfinish); // make trace @@ -3066,7 +3066,7 @@ void MDCache::handle_inode_link_ack(MInodeLinkAck *m) assert(in); dout(7) << "handle_inode_link_ack success = " << m->is_success() << " on " << *in << endl; - in->finish_waiting(CINODE_WAIT_LINK, + in->finish_waiting(CInode::WAIT_LINK, m->is_success() ? 1:-1); } @@ -3132,7 +3132,7 @@ void MDCache::handle_discover(MDiscover *dis) // frozen_dir? if (!cur->dir && cur->is_frozen_dir()) { dout(7) << "is frozen_dir, waiting" << endl; - cur->get_parent_dir()->add_waiter(CDIR_WAIT_UNFREEZE, + cur->get_parent_dir()->add_waiter(CDir::WAIT_UNFREEZE, new C_MDS_RetryMessage(mds, dis)); return; } @@ -3209,7 +3209,7 @@ void MDCache::handle_discover(MDiscover *dis) /* if (dn && !dn->can_read()) { // xlocked? dout(7) << "waiting on " << *dn << endl; - cur->dir->add_waiter(CDIR_WAIT_DNREAD, + cur->dir->add_waiter(CDir::WAIT_DNREAD, dn->name, new C_MDS_RetryMessage(mds, dis)); return; @@ -3292,7 +3292,7 @@ void MDCache::handle_discover(MDiscover *dis) // wait for frozen dir? if (cur->dir->is_frozen()) { dout(7) << "waiting for frozen " << *cur->dir << endl; - cur->dir->add_waiter(CDIR_WAIT_UNFREEZE, new C_MDS_RetryMessage(mds, dis)); + cur->dir->add_waiter(CDir::WAIT_UNFREEZE, new C_MDS_RetryMessage(mds, dis)); return; } else { dout(7) << "i'm not auth, dropping request (+this empty reply)." << endl; @@ -3386,7 +3386,7 @@ void MDCache::handle_discover_reply(MDiscoverReply *m) dout(7) << "added " << *cur->dir << " nonce " << cur->dir->replica_nonce << endl; // get waiters - cur->take_waiting(CINODE_WAIT_DIR, finished); + cur->take_waiting(CInode::WAIT_DIR, finished); dir_discovers.erase(cur->ino()); } } @@ -3398,12 +3398,12 @@ void MDCache::handle_discover_reply(MDiscoverReply *m) assert(cur->is_dir()); if (cur->dir) { dout(7) << " flag_error on dentry " << m->get_error_dentry() << ", triggering dentry?" << endl; - cur->dir->take_waiting(CDIR_WAIT_DENTRY, + cur->dir->take_waiting(CDir::WAIT_DENTRY, m->get_error_dentry(), error); } else { dout(7) << " flag_error on dentry " << m->get_error_dentry() << ", triggering dir?" << endl; - cur->take_waiting(CINODE_WAIT_DIR, error); + cur->take_waiting(CInode::WAIT_DIR, error); dir_discovers.erase(cur->ino()); } break; @@ -3428,7 +3428,7 @@ void MDCache::handle_discover_reply(MDiscoverReply *m) dout(7) << "added " << *dn << endl; } - cur->dir->take_waiting(CDIR_WAIT_DENTRY, + cur->dir->take_waiting(CDir::WAIT_DENTRY, m->get_dentry(i).get_dname(), finished); } @@ -3495,7 +3495,7 @@ void MDCache::handle_discover_reply(MDiscoverReply *m) // dir error at the end there? dout(7) << " flag_error on dir " << *cur << endl; assert(!cur->is_dir()); - cur->take_waiting(CINODE_WAIT_DIR, error); + cur->take_waiting(CInode::WAIT_DIR, error); dir_discovers.erase(cur->ino()); } @@ -3773,7 +3773,7 @@ void MDCache::dentry_unlink(CDentry *dn, Context *c) dn->_mark_dirty(); // fixme // add waiter - in->add_waiter(CINODE_WAIT_UNLINK, c); + in->add_waiter(CInode::WAIT_UNLINK, c); return; } } @@ -3802,7 +3802,7 @@ void MDCache::dentry_unlink_finish(CDentry *dn, CDir *dir, Context *c) migrator->export_empty_import(dir); // wake up any waiters - dir->take_waiting(CDIR_WAIT_ANY, dname, mds->finished_queue); + dir->take_waiting(CDir::WAIT_ANY, dname, mds->finished_queue); c->finish(0); } @@ -3838,8 +3838,8 @@ void MDCache::handle_dentry_unlink(MDentryUnlink *m) dn->dir->remove_dentry(dn); // wake up - //dir->finish_waiting(CDIR_WAIT_DNREAD, dname); - dir->take_waiting(CDIR_WAIT_DNREAD, dname, mds->finished_queue); + //dir->finish_waiting(CDir::WAIT_DNREAD, dname); + dir->take_waiting(CDir::WAIT_DNREAD, dname, mds->finished_queue); } } @@ -3903,7 +3903,7 @@ void MDCache::handle_inode_unlink_ack(MInodeUnlinkAck *m) assert(in); dout(7) << "handle_inode_unlink_ack on " << *in << endl; - in->finish_waiting(CINODE_WAIT_UNLINK, 0); + in->finish_waiting(CInode::WAIT_UNLINK, 0); } @@ -3915,112 +3915,6 @@ void MDCache::handle_inode_unlink_ack(MInodeUnlinkAck *m) -/* - * some import/export helpers - */ - -/** con = get_auth_container(dir) - * Returns the directory in which authority is delegated for *dir. - * This may be because a directory is an import, or because it is hashed - * and we are nested underneath an inode in that dir (that hashes to us). - * Thus do not assume result->is_auth()! It is_auth() || is_hashed(). - */ - -/* -CDir *MDCache::get_auth_container(CDir *dir) -{ - CDir *imp = dir; // might be *dir - - // find the underlying import or hash that delegates dir - while (true) { - if (imp->is_import()) break; // import - imp = imp->get_parent_dir(); - if (!imp) break; // none - if (imp->is_hashed()) break; // hash - } - - return imp; -} - -CDir *MDCache::get_export_container(CDir *dir) -{ - CDir *ex = dir; // might be *dir - assert(!ex->is_auth()); - - // find the underlying import or hash that delegates dir away - while (true) { - if (ex->is_export()) break; // import - ex = ex->get_parent_dir(); - assert(ex); - if (ex->is_hashed()) break; // hash - } - - return ex; -} - - -void MDCache::find_nested_exports(CDir *dir, set& s) -{ - CDir *import = get_auth_container(dir); - find_nested_exports_under(import, dir, s); -} - -void MDCache::find_nested_exports_under(CDir *import, CDir *dir, set& s) -{ - dout(10) << "find_nested_exports for " << *dir << endl; - dout(10) << "find_nested_exports_under import " << *import << endl; - - if (import == dir) { - // yay, my job is easy! - for (set::iterator p = nested_exports[import].begin(); - p != nested_exports[import].end(); - p++) { - CDir *nested = *p; - s.insert(nested); - dout(10) << "find_nested_exports " << *dir << " " << *nested << endl; - } - return; - } - - // ok, my job is annoying. - for (set::iterator p = nested_exports[import].begin(); - p != nested_exports[import].end(); - p++) { - CDir *nested = *p; - - dout(12) << "find_nested_exports checking " << *nested << endl; - - // trace back to import, or dir - CDir *cur = nested->get_parent_dir(); - while (!cur->is_import() || cur == dir) { - if (cur == dir) { - s.insert(nested); - dout(10) << "find_nested_exports " << *dir << " " << *nested << endl; - break; - } else { - cur = cur->get_parent_dir(); - } - } - } -} - - -*/ - - - - - - - - - - - - - - - // ============================================================== // debug crap diff --git a/branches/sage/cephmds2/mds/MDCache.h b/branches/sage/cephmds2/mds/MDCache.h index b7a8851a1265d..99dbbdec71799 100644 --- a/branches/sage/cephmds2/mds/MDCache.h +++ b/branches/sage/cephmds2/mds/MDCache.h @@ -259,13 +259,14 @@ public: return inode_map[ino]; return NULL; } - CDir* get_dir(inodeno_t dirino) { - if (have_inode(dirino)) - return inode_map[dirino]->dir; - return NULL; + CDir* get_dir(inodeno_t dirino) { // deprecated + return get_dirfrag(dirfrag_t(dirino, frag_t())); + } + CDir* get_dirfrag(dirfrag_t df) { + if (!have_inode(df.ino)) return NULL; + return inode_map[df.ino]->get_dirfrag(df.frag); } - int hash_dentry(inodeno_t ino, const string& s) { return 0; // fixme } diff --git a/branches/sage/cephmds2/mds/MDS.cc b/branches/sage/cephmds2/mds/MDS.cc index a022705b91cfe..b0ae8ab2b2d91 100644 --- a/branches/sage/cephmds2/mds/MDS.cc +++ b/branches/sage/cephmds2/mds/MDS.cc @@ -687,7 +687,7 @@ void MDS::boot_create() assert(root); // force empty root dir - CDir *dir = root->dir; + CDir *dir = root->get_dirfrag(frag_t()); dir->mark_complete(); dir->mark_dirty(dir->pre_dirty()); @@ -1004,8 +1004,10 @@ void MDS::my_dispatch(Message *m) int n = rand() % mdcache->inode_map.size(); hash_map::iterator p = mdcache->inode_map.begin(); while (n--) p++; - - CDir *dir = p->second->dir; + + list ls; + p->second->get_dirfrags(ls); + CDir *dir = ls.front(); if (!dir) continue; // must be a dir. if (!dir->get_parent_dir()) continue; // must be linked. if (!dir->is_auth()) continue; // must be auth. @@ -1022,6 +1024,7 @@ void MDS::my_dispatch(Message *m) // hack: force hash root? + /* if (false && mdcache->get_root() && mdcache->get_root()->dir && @@ -1030,7 +1033,7 @@ void MDS::my_dispatch(Message *m) dout(0) << "hashing root" << endl; mdcache->migrator->hash_dir(mdcache->get_root()->dir); } - + */ diff --git a/branches/sage/cephmds2/mds/MDStore.cc b/branches/sage/cephmds2/mds/MDStore.cc deleted file mode 100644 index 232de9e7c292e..0000000000000 --- a/branches/sage/cephmds2/mds/MDStore.cc +++ /dev/null @@ -1,752 +0,0 @@ -// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- -/* - * Ceph - scalable distributed file system - * - * Copyright (C) 2004-2006 Sage Weil - * - * This is free software; you can redistribute it and/or - * modify it under the terms of the GNU Lesser General Public - * License version 2.1, as published by the Free Software - * Foundation. See file COPYING. - * - */ - - - -#include "MDStore.h" -#include "MDS.h" -#include "MDCache.h" -#include "CInode.h" -#include "CDir.h" -#include "CDentry.h" -#include "MDSMap.h" - -#include "osd/OSDMap.h" -#include "osdc/Filer.h" - -#include "msg/Message.h" - -#include -#include -using namespace std; - - -#include "config.h" -#undef dout -#define dout(l) if (l<=g_conf.debug || l<=g_conf.debug_mds) cout << g_clock.now() << " mds" << mds->get_nodeid() << ".store " - - -/* - * separate hashed dir slices into "regions" - */ -size_t get_hash_offset(int hashcode) { - if (hashcode < 0) - return 0; // not hashed - else - return (size_t)(1<<30) * (size_t)(1+hashcode); -} - - - - -// ========================================================================== -// FETCH - - -class C_MDS_Fetch : public Context { - protected: - MDStore *ms; - inodeno_t ino; - - public: - C_MDS_Fetch(MDStore *ms, inodeno_t ino) : Context() { - this->ms = ms; - this->ino = ino; - } - - void finish(int result) { - ms->fetch_dir_2( result, ino ); - } -}; - -/** fetch_dir(dir, context) - * public call to fetch a dir. - */ -void MDStore::fetch_dir( CDir *dir, - Context *c ) -{ - dout(7) << "fetch_dir " << *dir << " context is " << c << endl; - assert(dir->is_auth() || - dir->is_hashed()); - - // wait - if (c) dir->add_waiter(CDIR_WAIT_COMPLETE, c); - - // already fetching? - if (dir->state_test(CDir::STATE_FETCHING)) { - dout(7) << "already fetching " << *dir << "; waiting" << endl; - return; - } - - // state - dir->state_set(CDir::STATE_FETCHING); - - // stats - if (mds->logger) mds->logger->inc("fdir"); - - // create return context - Context *fin = new C_MDS_Fetch( this, dir->ino() ); - if (dir->is_hashed()) - fetch_dir_hash( dir, fin, mds->get_nodeid()); // hashed - else - fetch_dir_hash( dir, fin ); // normal -} - -/* - * called by low level fn when it's fetched. - * fix up dir state. - */ -void MDStore::fetch_dir_2( int result, - inodeno_t ino) -{ - CInode *idir = mds->mdcache->get_inode(ino); - - if (!idir || result < 0) return; // hmm! nevermind i guess. - - assert(idir); - CDir *dir = idir->dir; - assert(dir); - - // dir is now complete - dir->state_set(CDir::STATE_COMPLETE); - dir->state_clear(CDir::STATE_FETCHING); - - // finish - list finished; - dir->take_waiting(CDIR_WAIT_COMPLETE|CDIR_WAIT_DENTRY, finished); - finish_contexts(finished, result); -} - - -/** low level methods **/ - -class C_MDS_FetchHash : public Context { -protected: - MDS *mds; - inode_t inode; - int hashcode; - Context *context; - -public: - bufferlist bl; - bufferlist bl2; - - C_MDS_FetchHash(MDS *mds, inode_t inode, Context *c, int hashcode) : Context() { - this->mds = mds; - this->inode = inode; - this->hashcode = hashcode; - this->context = c; - } - - void finish(int result) { - assert(result>0); - - // combine bufferlists bl + bl2 -> bl - bl.claim_append(bl2); - - // did i get the whole thing? - size_t size; - bl.copy(0, sizeof(size_t), (char*)&size); - size_t got = bl.length() - sizeof(size); - size_t left = size - got; - size_t from = bl.length(); - - // what part of dir are we getting? - from += get_hash_offset(hashcode); - - if (got >= size) { - // done. - mds->mdstore->fetch_dir_hash_2( bl, inode, context, hashcode ); - } - else { - // read the rest! - dout(12) << "fetch_dir_hash_2 dir size is " << size << ", got " << got << ", reading remaniing " << left << " from off " << from << endl; - - // create return context - C_MDS_FetchHash *fin = new C_MDS_FetchHash( mds, inode, context, hashcode ); - fin->bl.claim( bl ); - mds->filer->read(inode, - from, left, - &fin->bl2, - fin ); - return; - } - } -}; - -/** fetch_dir_hash - * low level method. - * fetch part of a dir. either the whole thing if hashcode is -1, or a specific - * hash segment. - */ -void MDStore::fetch_dir_hash( CDir *dir, - Context *c, - int hashcode) -{ - dout(11) << "fetch_dir_hash hashcode " << hashcode << " " << *dir << endl; - - // create return context - C_MDS_FetchHash *fin = new C_MDS_FetchHash( mds, dir->get_inode()->inode, c, hashcode ); - - // grab first stripe bit (which had better be more than 16 bytes!) - assert(dir->get_inode()->inode.layout.stripe_size >= 16); - mds->filer->read(dir->get_inode()->inode, - get_hash_offset(hashcode), dir->get_inode()->inode.layout.stripe_size, - &fin->bl, - fin ); -} - -void MDStore::fetch_dir_hash_2( bufferlist& bl, - inode_t& inode, - Context *c, - int hashcode) -{ - CInode *idir = mds->mdcache->get_inode(inode.ino); - if (!idir) { - dout(7) << "fetch_dir_hash_2 on ino " << inode.ino << " but no longer in our cache!" << endl; - c->finish(-1); - delete c; - return; - } - - if (!idir->dir_is_auth() || - !idir->dir) { - dout(7) << "fetch_dir_hash_2 on " << *idir << ", but i'm not auth, or dir not open" << endl; - c->finish(-1); - delete c; - return; - } - - // make sure we have a CDir - CDir *dir = idir->get_or_open_dir(mds->mdcache); - - // do it - dout(7) << "fetch_dir_hash_2 hashcode " << hashcode << " dir " << *dir << endl; - - // parse buffer contents into cache - dout(15) << "bl is " << bl << endl; - - int off = 0; - size_t size; - __uint32_t num; - version_t got_version; - int got_hashcode; - bl.copy(off, sizeof(size), (char*)&size); - off += sizeof(size); - assert(bl.length() >= size + sizeof(size)); - bl.copy(off, sizeof(num), (char*)&num); - off += sizeof(num); - bl.copy(off, sizeof(got_version), (char*)&got_version); - off += sizeof(got_version); - bl.copy(off, sizeof(got_hashcode), (char*)&got_hashcode); - off += sizeof(got_hashcode); - - assert(got_hashcode == hashcode); - - int buflen = bl.length(); - - dout(10) << " " << num << " items in " << size << " bytes" << endl; - - unsigned parsed = 0; - while (parsed < num) { - assert(off < buflen && num > 0); - parsed++; - - dout(24) << " " << parsed << "/" << num << " pos " << off << endl; - - // dentry - string dname; - ::_decode(dname, bl, off); - dout(24) << "parse filename '" << dname << "'" << endl; - - CDentry *dn = dir->lookup(dname); // existing dentry? - - char type = bl[off]; - ++off; - if (type == 'L') { - // hard link - inodeno_t ino; - bl.copy(off, sizeof(ino), (char*)&ino); - off += sizeof(ino); - - // what to do? - if (hashcode >= 0) { - int dentryhashcode = mds->mdcache->hash_dentry( dir->ino(), dname ); - assert(dentryhashcode == hashcode); - } - - if (dn) { - if (dn->get_inode() == 0) { - // negative dentry? - dout(12) << "readdir had NEG dentry " << dname << endl; - } else { - // had dentry - dout(12) << "readdir had dentry " << dname << endl; - } - continue; - } - - // (remote) link - CDentry *dn = dir->add_dentry( dname, ino ); - - // link to inode? - CInode *in = mds->mdcache->get_inode(ino); // we may or may not have it. - if (in) { - dn->link_remote(in); - dout(12) << "readdir got remote link " << ino << " which we have " << *in << endl; - } else { - dout(12) << "readdir got remote link " << ino << " (dont' have it)" << endl; - } - } - else if (type == 'I') { - // inode - - // parse out inode - inode_t inode; - bl.copy(off, sizeof(inode), (char*)&inode); - off += sizeof(inode); - - string symlink; - if (inode.is_symlink()) - ::_decode(symlink, bl, off); - - // what to do? - if (hashcode >= 0) { - int dentryhashcode = mds->mdcache->hash_dentry( dir->ino(), dname ); - assert(dentryhashcode == hashcode); - } - - if (dn) { - if (dn->get_inode() == 0) { - // negative dentry? - dout(12) << "readdir had NEG dentry " << dname << endl; - } else { - // had dentry - dout(12) << "readdir had dentry " << dname << endl; - - // under water? - if (dn->get_version() <= got_version) { - assert(dn->get_inode()->get_version() <= got_version); - dout(10) << "readdir had underwater dentry " << dname << " and inode, marking clean" << endl; - dn->mark_clean(); - dn->get_inode()->mark_clean(); - } - } - continue; - } - - // add inode - CInode *in = 0; - if (mds->mdcache->have_inode(inode.ino)) { - in = mds->mdcache->get_inode(inode.ino); - dout(12) << "readdir got (but i already had) " << *in - << " mode " << in->inode.mode - << " mtime " << in->inode.mtime << endl; - } else { - // inode - in = new CInode(mds->mdcache); - in->inode = inode; - - // symlink? - if (in->is_symlink()) { - in->symlink = symlink; - } - - // add - mds->mdcache->add_inode( in ); - } - - // link - dir->add_dentry( dname, in ); - dout(12) << "readdir got " << *in << " mode " << in->inode.mode << " mtime " << in->inode.mtime << endl; - } - else { - dout(1) << "corrupt directory, i got tag char '" << type << "' val " << (int)(type) - << " at pos " << off << endl; - assert(0); - } - } - dout(15) << "parsed " << parsed << endl; - - if (c) { - c->finish(0); - delete c; - } -} - - - - -// ================================================================== -// COMMIT - -class C_MDS_CommitDirVerify : public Context { -public: - MDS *mds; - inodeno_t ino; - version_t version; - Context *c; - - C_MDS_CommitDirVerify( MDS *mds, - inodeno_t ino, - version_t version, - Context *c) { - this->mds = mds; - this->c = c; - this->version = version; - this->ino = ino; - } - - virtual void finish(int r) { - - if (r >= 0) { - CInode *in = mds->mdcache->get_inode(ino); - assert(in && in->dir); - if (in && in->dir && in->dir->is_auth()) { - dout(7) << "CommitDirVerify: current = " << in->dir->get_version() - << ", committed = " << in->dir->get_committed_version() - << ", required = " << version << endl; - - if (in->dir->get_committed_version() >= version) { - dout(7) << "my required version is safe, done." << endl; - if (c) { - c->finish(0); - delete c; - } - } else { - dout(7) << "my required version is still not safe, committing again." << endl; - - // what was requested isn't committed yet. - mds->mdstore->commit_dir(in->dir, - version, - c); - } - return; - } - } - - // must have exported ors omethign! - dout(7) << "can't retry commit dir on " << ino << ", must have exported?" << endl; - - // finish. - if (c) { - c->finish(-1); - delete c; - } - } -}; - -class C_MDS_CommitDirFinish : public Context { - protected: - MDStore *ms; - CDir *dir; - version_t version; - - public: - - C_MDS_CommitDirFinish(MDStore *ms, CDir *dir) : Context() { - this->ms = ms; - this->dir = dir; - this->version = dir->get_version(); // just for sanity check later - } - - void finish(int result) { - ms->commit_dir_2( result, dir, version ); - } -}; - - -void MDStore::commit_dir( CDir *dir, - Context *c ) -{ - assert(dir->is_dirty()); - - // commit thru current version - commit_dir(dir, dir->get_version(), c); -} - -void MDStore::commit_dir( CDir *dir, - version_t version, - Context *c ) -{ - assert(dir->is_auth() || - dir->is_hashed()); - - // already committing? - if (dir->state_test(CDir::STATE_COMMITTING)) { - // already mid-commit! - dout(7) << "commit_dir " << *dir << " mid-commit of " << dir->get_committing_version() << endl; - dout(7) << " current version = " << dir->get_version() << endl; - dout(7) << "requested version = " << version << endl; - - assert(version >= dir->get_committed_version()); // why would we request _old_ one? - - dir->add_waiter(CDIR_WAIT_COMMITTED, - new C_MDS_CommitDirVerify(mds, dir->ino(), version, c) ); - return; - } - - if (!dir->can_auth_pin()) { - // something must be frozen up the hiearchy! - dout(7) << "commit_dir " << *dir << " can't auth_pin, waiting" << endl; - dir->add_waiter(CDIR_WAIT_AUTHPINNABLE, - new C_MDS_CommitDirVerify(mds, dir->ino(), version, c) ); - return; - } - - - // is it complete? - if (!dir->is_complete()) { - dout(7) << "commit_dir " << *dir << " not complete, fetching first" << endl; - // fetch dir first - fetch_dir(dir, - new C_MDS_CommitDirVerify(mds, dir->ino(), version, c) ); - return; - } - - - // ok go - dout(7) << "commit_dir " << *dir << " version " << dir->get_version() << endl; - - // add waiter - if (c) dir->add_waiter(CDIR_WAIT_COMMITTED, c); - - // get continuation ready - Context *fin = new C_MDS_CommitDirFinish(this, dir); - - // state - dir->state_set(CDir::STATE_COMMITTING); - dir->set_committing_version(); - - // stats - if (mds->logger) mds->logger->inc("cdir"); - - if (dir->is_hashed()) { - // hashed - commit_dir_slice( dir, fin, mds->get_nodeid() ); - } else { - // non-hashed - commit_dir_slice( dir, fin ); - } -} - -void MDStore::commit_dir_2( int result, - CDir *dir, - version_t committed_version) -{ - dout(5) << "commit_dir_2 " << *dir << " committed " << committed_version << ", current version " << dir->get_version() << endl; - assert(committed_version == dir->get_committing_version()); - - // remember which version is now safe - dir->set_committed_version(committed_version); - - // is the dir now clean? - if (committed_version == dir->get_version()) - dir->mark_clean(); - - dir->state_clear(CDir::STATE_COMMITTING); - - // finish - dir->finish_waiting(CDIR_WAIT_COMMITTED); -} - - - - -// low-level committer (hashed or normal) - -class C_MDS_CommitSlice : public Context { - protected: - MDStore *ms; - CDir *dir; - Context *c; - int hashcode; - version_t version; - -public: - bufferlist bl; - - C_MDS_CommitSlice(MDStore *ms, CDir *dir, Context *c, int w) : Context() { - this->ms = ms; - this->dir = dir; - this->c = c; - this->hashcode = w; - version = dir->get_version(); - } - - void finish(int result) { - ms->commit_dir_slice_2( result, dir, c, version, hashcode ); - } -}; - - -void MDStore::commit_dir_slice( CDir *dir, - Context *c, - int hashcode) -{ - if (hashcode >= 0) { - assert(dir->is_hashed()); - dout(10) << "commit_dir_slice hashcode " << hashcode << " " << *dir << " version " << dir->get_version() << endl; - } else { - assert(dir->is_auth()); - dout(10) << "commit_dir_slice (whole dir) " << *dir << " version " << dir->get_version() << endl; - } - - // get continuation ready - C_MDS_CommitSlice *fin = new C_MDS_CommitSlice(this, dir, c, hashcode); - - // fill buffer - __uint32_t num = 0; - - bufferlist dirdata; - - version_t v = dir->get_version(); - dirdata.append((char*)&v, sizeof(v)); - dirdata.append((char*)&hashcode, sizeof(hashcode)); - - for (CDir_map_t::iterator it = dir->begin(); - it != dir->end(); - it++) { - CDentry *dn = it->second; - - if (hashcode >= 0) { - int dentryhashcode = mds->mdcache->hash_dentry( dir->ino(), it->first ); - if (dentryhashcode != hashcode) continue; - } - - if (dn->is_null()) continue; // skipping negative entry - - // primary or remote? - if (dn->is_remote()) { - - inodeno_t ino = dn->get_remote_ino(); - dout(14) << " pos " << dirdata.length() << " dn '" << it->first << "' remote ino " << ino << endl; - - // name, marker, ion - dirdata.append( it->first.c_str(), it->first.length() + 1); - dirdata.append( "L", 1 ); // remote link - dirdata.append((char*)&ino, sizeof(ino)); - - } else { - // primary link - CInode *in = dn->get_inode(); - assert(in); - - dout(14) << " pos " << dirdata.length() << " dn '" << it->first << "' inode " << *in << endl; - - // name, marker, inode, [symlink string] - dirdata.append( it->first.c_str(), it->first.length() + 1); - dirdata.append( "I", 1 ); // inode - dirdata.append( (char*) &in->inode, sizeof(inode_t)); - - if (in->is_symlink()) { - // include symlink destination! - dout(18) << " inlcuding symlink ptr " << in->symlink << endl; - dirdata.append( (char*) in->symlink.c_str(), in->symlink.length() + 1); - } - } - - num++; - } - dout(14) << "num " << num << endl; - - // put count in buffer - //bufferlist bl; - size_t size = sizeof(num) + dirdata.length(); - fin->bl.append((char*)&size, sizeof(size)); - fin->bl.append((char*)&num, sizeof(num)); - fin->bl.claim_append(dirdata); //.c_str(), dirdata.length()); - assert(fin->bl.length() == size + sizeof(size)); - - // pin inode - dir->auth_pin(); - - // submit to osd - mds->filer->write( dir->get_inode()->inode, - 0, fin->bl.length(), - fin->bl, - 0, //OSD_OP_FLAGS_TRUNCATE, // truncate file/object after end of this write - NULL, fin ); // on safe -} - - -void MDStore::commit_dir_slice_2( int result, - CDir *dir, - Context *c, - version_t committed_version, - int hashcode ) -{ - dout(11) << "commit_dir_slice_2 hashcode " << hashcode << " " << *dir << " v " << committed_version << endl; - - // mark inodes and dentries clean too (if we committed them!) - list null_clean; - for (CDir_map_t::iterator it = dir->begin(); - it != dir->end(); ) { - CDentry *dn = it->second; - it++; - - if (hashcode >= 0) { - int dentryhashcode = mds->mdcache->hash_dentry( dir->ino(), dn->get_name() ); - if (dentryhashcode != hashcode) continue; - } - - // dentry - if (committed_version >= dn->get_version()) { - if (dn->is_dirty()) { - dout(15) << " dir " << committed_version << " >= dn " << dn->get_version() << " now clean " << *dn << endl; - dn->mark_clean(); - } - } else { - dout(15) << " dir " << committed_version << " < dn " << dn->get_version() << " still dirty " << *dn << endl; - } - - // only do primary... - if (!dn->is_primary()) - continue; - - CInode *in = dn->get_inode(); - assert(in); - assert(in->is_auth()); - - if (committed_version >= in->get_version()) { - if (in->is_dirty()) { - dout(15) << " dir " << committed_version << " >= inode " << in->get_version() << " now clean " << *in << endl; - in->mark_clean(); - } - } else { - dout(15) << " dir " << committed_version << " < inode " << in->get_version() << " still dirty " << *in << endl; - assert(in->is_dirty()); - } - } - - // unpin - dir->auth_unpin(); - - // finish - if (c) { - c->finish(0); - delete c; - } -} - - - - - - - - - - - - diff --git a/branches/sage/cephmds2/mds/MDStore.h b/branches/sage/cephmds2/mds/MDStore.h deleted file mode 100644 index fe7553608a975..0000000000000 --- a/branches/sage/cephmds2/mds/MDStore.h +++ /dev/null @@ -1,75 +0,0 @@ -// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- -/* - * Ceph - scalable distributed file system - * - * Copyright (C) 2004-2006 Sage Weil - * - * This is free software; you can redistribute it and/or - * modify it under the terms of the GNU Lesser General Public - * License version 2.1, as published by the Free Software - * Foundation. See file COPYING. - * - */ - - - -#ifndef __MDSTORE_H -#define __MDSTORE_H - -#include "include/types.h" -#include "include/buffer.h" - -class MDS; -class CDir; -class Context; - -class MDStore { - protected: - MDS *mds; - - - public: - MDStore(MDS *m) { - mds = m; - } - - - // fetch - public: - void fetch_dir( CDir *dir, Context *c ); - protected: - void fetch_dir_2( int result, inodeno_t ino ); - - void fetch_dir_hash( CDir *dir, - Context *c, - int hashcode = -1); - void fetch_dir_hash_2( bufferlist &bl, - inode_t& inode, - Context *c, - int which); - friend class C_MDS_Fetch; - friend class C_MDS_FetchHash; - - // commit - public: - void commit_dir( CDir *dir, Context *c ); // commit current dir version to disk. - void commit_dir( CDir *dir, __uint64_t version, Context *c ); // commit specified version to disk - protected: - void commit_dir_2( int result, CDir *dir, __uint64_t committed_version ); - - // low level committers - void commit_dir_slice( CDir *dir, - Context *c, - int hashcode = -1); - void commit_dir_slice_2( int result, - CDir *dir, - Context *c, - __uint64_t version, - int hashcode ); - - friend class C_MDS_CommitDirFinish; - friend class C_MDS_CommitSlice; -}; - - -#endif diff --git a/branches/sage/cephmds2/mds/Migrator.cc b/branches/sage/cephmds2/mds/Migrator.cc index 4cceb95be5245..361ff2fcfb8c5 100644 --- a/branches/sage/cephmds2/mds/Migrator.cc +++ b/branches/sage/cephmds2/mds/Migrator.cc @@ -746,7 +746,7 @@ void Migrator::export_go(CDir *dir) mds->send_message_mds(req, dest, MDS_PORT_MIGRATOR); // queue up the finisher - dir->add_waiter( CDIR_WAIT_UNFREEZE, fin ); + dir->add_waiter( CDir::WAIT_UNFREEZE, fin ); // stats if (mds->logger) mds->logger->inc("ex"); @@ -823,7 +823,7 @@ void Migrator::encode_export_inode(CInode *in, bufferlist& enc_state, int new_au // mark auth assert(in->is_auth()); in->set_auth(false); - in->replica_nonce = CINODE_EXPORT_NONCE; + in->replica_nonce = CInode::EXPORT_NONCE; // *** other state too? @@ -857,7 +857,7 @@ int Migrator::encode_export_dir(list& dirstatelist, // mark assert(dir->is_auth()); dir->state_clear(CDir::STATE_AUTH); - dir->replica_nonce = CDIR_NONCE_EXPORT; + dir->replica_nonce = CDir::NONCE_EXPORT; list subdirs; @@ -875,7 +875,7 @@ int Migrator::encode_export_dir(list& dirstatelist, // suck up all waiters list waiting; - dir->take_waiting(CDIR_WAIT_ANY, waiting); // all dir waiters + dir->take_waiting(CDir::WAIT_ANY, waiting); // all dir waiters fin->take(waiting); // inodes @@ -936,7 +936,7 @@ int Migrator::encode_export_dir(list& dirstatelist, // waiters list waiters; - in->take_waiting(CINODE_WAIT_ANY, waiters); + in->take_waiting(CInode::WAIT_ANY, waiters); fin->take(waiters); } } @@ -1306,7 +1306,7 @@ void Migrator::handle_export_discover_2(MExportDirDiscover *m, CInode *in, int r /* if (in->is_frozen()) { dout(7) << "frozen, waiting." << endl; - in->add_waiter(CINODE_WAIT_AUTHPINNABLE, + in->add_waiter(CInode::WAIT_AUTHPINNABLE, new C_MDS_RetryMessage(mds,m)); return; } @@ -1357,7 +1357,7 @@ void Migrator::handle_export_prep(MExportDirPrep *m) dout(7) << "handle_export_prep on " << *dir << " (opening dir)" << endl; - diri->take_waiting(CINODE_WAIT_DIR, finished); + diri->take_waiting(CInode::WAIT_DIR, finished); } assert(dir->is_auth() == false); @@ -1412,7 +1412,7 @@ void Migrator::handle_export_prep(MExportDirPrep *m) in->set_dir( new CDir(in, mds->mdcache, false) ); m->get_dir(in->ino())->update_dir(in->dir); dout(7) << " added " << *in->dir << endl; - in->take_waiting(CINODE_WAIT_DIR, finished); + in->take_waiting(CInode::WAIT_DIR, finished); } } } @@ -1785,7 +1785,7 @@ void Migrator::import_finish(CDir *dir, bool now) // ok now finish contexts dout(5) << "finishing any waiters on imported data" << endl; - dir->finish_waiting(CDIR_WAIT_IMPORTED); + dir->finish_waiting(CDir::WAIT_IMPORTED); // log it if (mds->logger) { @@ -1839,7 +1839,7 @@ void Migrator::decode_import_inode(CDentry *dn, bufferlist& bl, int& off, int ol // adjust replica list //assert(!in->is_replica(oldauth)); // not true on failed export - in->add_replica( oldauth, CINODE_EXPORT_NONCE ); + in->add_replica( oldauth, CInode::EXPORT_NONCE ); if (in->is_replica(mds->get_nodeid())) in->remove_replica(mds->get_nodeid()); @@ -1929,11 +1929,11 @@ int Migrator::decode_import_dir(bufferlist& bl, // a replica's presense in my cache implies/forces it's presense in authority's. list waiters; - dir->take_waiting(CDIR_WAIT_ANY, waiters); + dir->take_waiting(CDir::WAIT_ANY, waiters); for (list::iterator it = waiters.begin(); it != waiters.end(); it++) - import_root->add_waiter(CDIR_WAIT_IMPORTED, *it); + import_root->add_waiter(CDir::WAIT_IMPORTED, *it); dout(15) << "doing contents" << endl; diff --git a/branches/sage/cephmds2/mds/Renamer.cc b/branches/sage/cephmds2/mds/Renamer.cc index 15a81bda7524d..9abbb422a7856 100644 --- a/branches/sage/cephmds2/mds/Renamer.cc +++ b/branches/sage/cephmds2/mds/Renamer.cc @@ -380,7 +380,7 @@ void Renamer::file_rename(CDentry *srcdn, CDentry *destdn, Context *onfinish) assert(0); // set waiter on the inode (is this the best place?) - in->add_waiter(CINODE_WAIT_RENAMEACK, + in->add_waiter(CInode::WAIT_RENAMEACK, new C_MDC_RenameAck(this, srcdir, in, onfinish)); return; @@ -423,11 +423,11 @@ void Renamer::file_rename(CDentry *srcdn, CDentry *destdn, Context *onfinish) file_rename_notify(in, srcdir, srcname, destdir, destname, notify, mds->get_nodeid()); // wait for MRenameNotifyAck's - in->add_waiter(CINODE_WAIT_RENAMENOTIFYACK, + in->add_waiter(CInode::WAIT_RENAMENOTIFYACK, new C_MDC_RenameNotifyAck(this, in, mds->get_nodeid())); // i am initiator // wait for finish - in->add_waiter(CINODE_WAIT_RENAMEACK, + in->add_waiter(CInode::WAIT_RENAMEACK, new C_MDC_RenameAck(this, srcdir, in, onfinish)); } else { // sweet, no notify necessary, we're done! @@ -443,7 +443,7 @@ void Renamer::handle_rename_ack(MRenameAck *m) dout(7) << "handle_rename_ack on " << *in << endl; // all done! - in->finish_waiting(CINODE_WAIT_RENAMEACK); + in->finish_waiting(CInode::WAIT_RENAMEACK); delete m; } @@ -560,7 +560,7 @@ void Renamer::file_rename_foreign_src(CDentry *srcdn, // wait for MRenameNotifyAck's - in->add_waiter(CINODE_WAIT_RENAMENOTIFYACK, + in->add_waiter(CInode::WAIT_RENAMENOTIFYACK, new C_MDC_RenameNotifyAck(this, in, initiator)); } @@ -591,7 +591,7 @@ void Renamer::handle_rename_notify_ack(MRenameNotifyAck *m) if (rename_waiting_for_ack[in->ino()].empty()) { // last one! rename_waiting_for_ack.erase(in->ino()); - in->finish_waiting(CINODE_WAIT_RENAMENOTIFYACK, 0); + in->finish_waiting(CInode::WAIT_RENAMENOTIFYACK, 0); } else { dout(7) << "still waiting for " << rename_waiting_for_ack[in->ino()] << endl; } @@ -613,7 +613,7 @@ void Renamer::file_rename_ack(CInode *in, int initiator) if (initiator == mds->get_nodeid()) { // it's me, finish dout(7) << "file_rename_ack i am initiator, finishing" << endl; - in->finish_waiting(CINODE_WAIT_RENAMEACK); + in->finish_waiting(CInode::WAIT_RENAMEACK); } else { // send ack dout(7) << "file_rename_ack sending MRenameAck to initiator " << initiator << endl; diff --git a/branches/sage/cephmds2/mds/Server.cc b/branches/sage/cephmds2/mds/Server.cc index cb530d16a0c72..fa73eef2b83d9 100644 --- a/branches/sage/cephmds2/mds/Server.cc +++ b/branches/sage/cephmds2/mds/Server.cc @@ -543,7 +543,7 @@ bool Server::try_open_dir(CInode *in, MClientRequest *req) // doh! dout(10) << " dir inode is frozen, can't open dir, waiting " << *in << endl; assert(in->get_parent_dir()); - in->get_parent_dir()->add_waiter(CDIR_WAIT_UNFREEZE, + in->get_parent_dir()->add_waiter(CDir::WAIT_UNFREEZE, new C_MDS_RetryRequest(mds, req, in)); return false; } @@ -861,6 +861,7 @@ void Server::handle_hash_readdir(MHashReaddir *m) void Server::handle_hash_readdir_reply(MHashReaddirReply *m) { + /* CInode *cur = mdcache->get_inode(m->get_ino()); assert(cur); @@ -894,7 +895,7 @@ void Server::handle_hash_readdir_reply(MHashReaddirReply *m) // do these finishers. they'll copy the results. list finished; - dir->take_waiting(CDIR_WAIT_THISHASHEDREADDIR, finished); + dir->take_waiting(CDir::WAIT_THISHASHEDREADDIR, finished); finish_contexts(finished); // now discard these results @@ -912,7 +913,8 @@ void Server::handle_hash_readdir_reply(MHashReaddirReply *m) dir->auth_unpin(); // trigger any waiters for next hashed readdir cycle - dir->take_waiting(CDIR_WAIT_NEXTHASHEDREADDIR, mds->finished_queue); + dir->take_waiting(CDir::WAIT_NEXTHASHEDREADDIR, mds->finished_queue); + */ } @@ -985,7 +987,7 @@ void Server::handle_client_readdir(MClientRequest *req, if (cur->dir->is_hashed() && cur->dir->is_unhashing()) { dout(10) << "unhashing, waiting" << endl; - cur->dir->add_waiter(CDIR_WAIT_UNFREEZE, + cur->dir->add_waiter(CDir::WAIT_UNFREEZE, new C_MDS_RetryRequest(mds, req, cur)); return; } @@ -1007,16 +1009,17 @@ void Server::handle_client_readdir(MClientRequest *req, if (dir->is_hashed()) { // HASHED + /* dout(7) << "hashed dir" << endl; if (!dir->can_auth_pin()) { dout(7) << "can't auth_pin dir " << *dir << " waiting" << endl; - dir->add_waiter(CDIR_WAIT_AUTHPINNABLE, new C_MDS_RetryRequest(mds, req, cur)); + dir->add_waiter(CDir::WAIT_AUTHPINNABLE, new C_MDS_RetryRequest(mds, req, cur)); return; } if (!dir->hashed_readdir.empty()) { dout(7) << "another readdir gather in progres, waiting" << endl; - dir->add_waiter(CDIR_WAIT_NEXTHASHEDREADDIR, new C_MDS_RetryRequest(mds, req, cur)); + dir->add_waiter(CDir::WAIT_NEXTHASHEDREADDIR, new C_MDS_RetryRequest(mds, req, cur)); return; } @@ -1038,8 +1041,9 @@ void Server::handle_client_readdir(MClientRequest *req, } // wait - dir->add_waiter(CDIR_WAIT_THISHASHEDREADDIR, + dir->add_waiter(CDir::WAIT_THISHASHEDREADDIR, new C_MDS_HashReaddir(this, req, dir)); + */ } else { // NON-HASHED // build dir contents @@ -1173,7 +1177,7 @@ CDir *Server::validate_new_dentry_dir(MClientRequest *req, CInode *diri, string& // dir auth pinnable? if (!dir->can_auth_pin()) { dout(7) << "validate_new_dentry_dir: dir " << *dir << " not pinnable, waiting" << endl; - dir->add_waiter(CDIR_WAIT_AUTHPINNABLE, + dir->add_waiter(CDir::WAIT_AUTHPINNABLE, new C_MDS_RetryRequest(mds, req, diri)); return false; } @@ -1181,7 +1185,7 @@ CDir *Server::validate_new_dentry_dir(MClientRequest *req, CInode *diri, string& // frozen? if (dir->is_frozen()) { dout(7) << "dir is frozen " << *dir << endl; - dir->add_waiter(CDIR_WAIT_UNFREEZE, + dir->add_waiter(CDir::WAIT_UNFREEZE, new C_MDS_RetryRequest(mds, req, diri)); return false; } @@ -1218,7 +1222,7 @@ int Server::prepare_mknod(MClientRequest *req, CInode *diri, if (*pdn) { if (!(*pdn)->can_read(req)) { dout(10) << "waiting on (existing!) dentry " << **pdn << endl; - dir->add_waiter(CDIR_WAIT_DNREAD, name, new C_MDS_RetryRequest(mds, req, diri)); + dir->add_waiter(CDir::WAIT_DNREAD, name, new C_MDS_RetryRequest(mds, req, diri)); return 0; } @@ -1507,7 +1511,7 @@ void Server::handle_client_link_2(int r, MClientRequest *req, CInode *diri, vect mds->send_message_mds(new MInodeLink(targeti->ino(), mds->get_nodeid()), targeti->authority().first, MDS_PORT_CACHE); // wait - targeti->add_waiter(CINODE_WAIT_LINK, + targeti->add_waiter(CInode::WAIT_LINK, new C_MDS_RemoteLink(this, req, diri, dn, targeti)); return; } @@ -1584,7 +1588,7 @@ void Server::handle_client_unlink(MClientRequest *req, // have it. locked? if (!dn->can_read(req)) { dout(10) << " waiting on " << *dn << endl; - dir->add_waiter(CDIR_WAIT_DNREAD, + dir->add_waiter(CDir::WAIT_DNREAD, name, new C_MDS_RetryRequest(mds, req, diri)); return; @@ -1653,7 +1657,7 @@ void Server::handle_client_unlink(MClientRequest *req, // i should be exporting this now/soon, since the dir is empty. dout(7) << "handle_client_rmdir dir is auth, but not inode." << endl; mdcache->migrator->export_empty_import(in->dir); - in->dir->add_waiter(CDIR_WAIT_UNFREEZE, + in->dir->add_waiter(CDir::WAIT_UNFREEZE, new C_MDS_RetryRequest(mds, req, diri)); return; } @@ -1861,7 +1865,7 @@ void Server::handle_client_rename(MClientRequest *req, // xlocked? if (srcdn && !srcdn->can_read(req)) { dout(10) << " waiting on " << *srcdn << endl; - srcdir->add_waiter(CDIR_WAIT_DNREAD, + srcdir->add_waiter(CDir::WAIT_DNREAD, srcname, new C_MDS_RetryRequest(mds, req, srcdiri)); return; diff --git a/branches/sage/cephmds2/mds/events/EExport.h b/branches/sage/cephmds2/mds/events/EExport.h index 69cfbb31aa009..eaa6d8e4bcce7 100644 --- a/branches/sage/cephmds2/mds/events/EExport.h +++ b/branches/sage/cephmds2/mds/events/EExport.h @@ -26,31 +26,31 @@ class EExport : public LogEvent { public: EMetaBlob metablob; // exported dir protected: - inodeno_t dirino; - set bounds; + dirfrag_t base; + set bounds; public: EExport(CDir *dir) : LogEvent(EVENT_EXPORT), - dirino(dir->ino()) { + base(dir->dirfrag()) { metablob.add_dir_context(dir); } EExport() : LogEvent(EVENT_EXPORT) { } - set &get_bounds() { return bounds; } + set &get_bounds() { return bounds; } void print(ostream& out) { - out << "export " << dirino << " " << metablob; + out << "export " << base << " " << metablob; } virtual void encode_payload(bufferlist& bl) { metablob._encode(bl); - bl.append((char*)&dirino, sizeof(dirino)); + bl.append((char*)&base, sizeof(base)); ::_encode(bounds, bl); } void decode_payload(bufferlist& bl, int& off) { metablob._decode(bl, off); - bl.copy(off, sizeof(dirino), (char*)&dirino); - off += sizeof(dirino); + bl.copy(off, sizeof(base), (char*)&base); + off += sizeof(base); ::_decode(bounds, bl, off); } diff --git a/branches/sage/cephmds2/mds/journal.cc b/branches/sage/cephmds2/mds/journal.cc index f4d73fad1785d..e43699ea74766 100644 --- a/branches/sage/cephmds2/mds/journal.cc +++ b/branches/sage/cephmds2/mds/journal.cc @@ -555,9 +555,7 @@ void EPurgeFinish::replay(MDS *mds) bool EExport::has_expired(MDS *mds) { - CInode *diri = mds->mdcache->get_inode(dirino); - if (!diri) return true; - CDir *dir = diri->dir; + CDir *dir = mds->mdcache->get_dirfrag(base); if (!dir) return true; if (!mds->mdcache->migrator->is_exporting(dir)) return true; @@ -567,9 +565,7 @@ bool EExport::has_expired(MDS *mds) void EExport::expire(MDS *mds, Context *c) { - CInode *diri = mds->mdcache->get_inode(dirino); - assert(diri); - CDir *dir = diri->dir; + CDir *dir = mds->mdcache->get_dirfrag(base); assert(dir); assert(mds->mdcache->migrator->is_exporting(dir)); @@ -579,20 +575,17 @@ void EExport::expire(MDS *mds, Context *c) void EExport::replay(MDS *mds) { - dout(10) << "EExport.replay " << dirino << endl; + dout(10) << "EExport.replay " << base << endl; metablob.replay(mds); - CInode *diri = mds->mdcache->get_inode(dirino); - assert(diri); - CDir *dir = diri->dir; + CDir *dir = mds->mdcache->get_dirfrag(base); assert(dir); set realbounds; - for (set::iterator p = bounds.begin(); + for (set::iterator p = bounds.begin(); p != bounds.end(); ++p) { - CInode *bdi = mds->mdcache->get_inode(*p); - CDir *bd = bdi->dir; + CDir *bd = mds->mdcache->get_dirfrag(*p); assert(bd); realbounds.insert(bd); } diff --git a/branches/sage/cephmds2/mds/mdstypes.h b/branches/sage/cephmds2/mds/mdstypes.h index 73bb0f0d68b16..ac0e0db41eb7e 100644 --- a/branches/sage/cephmds2/mds/mdstypes.h +++ b/branches/sage/cephmds2/mds/mdstypes.h @@ -52,6 +52,7 @@ struct dirfrag_t { frag_t frag; dirfrag_t() { } + //dirfrag_t(inodeno_t i) : ino(i) { } dirfrag_t(inodeno_t i, frag_t f) : ino(i), frag(f) { } }; diff --git a/branches/sage/cephmds2/script/check_cache_dumps.pl b/branches/sage/cephmds2/script/check_cache_dumps.pl new file mode 100755 index 0000000000000..95bd28a474991 --- /dev/null +++ b/branches/sage/cephmds2/script/check_cache_dumps.pl @@ -0,0 +1,56 @@ +#!/usr/bin/perl + +my $epoch = shift || die "specify epoch"; + +my %auth; # mds -> id -> replica -> nonce +my %replica; # mds -> id -> auth -> nonce + +print "reading\n"; +for (my $i=0; -e "cachedump.$epoch.mds$i"; $i++) { + open(O,"cachedump.$epoch.mds$i"); + while () { + my ($name,$s); + ($name,$s) = /^\[(inode \d+) \S+ (\S+)/; + ($name,$s) = /^\[(dir \d+) \S+ (\S+)/ unless $name; + ($name,$s) = /^\[dentry (\S+) (\S+)/ unless $name; + if ($name) { + if ($s =~ /^auth/) { + $auth{$i}->{$name} = {}; + my ($rl) = $s =~ /\{(.*)\}/; + for my $r (split(/,/,$rl)) { + my ($who,$nonce) = $r =~ /(\d+)\=(\d+)/; + $auth{$i}->{$name}->{$who} = $nonce; + #print "auth $name rep by $who $nonce $s\n"; + } + } + else { + my ($a,$b,$n) = $s =~ /rep@(\d+)\,([\-\d]+)\.(\d+)/; + die $_ unless $a >= 0; + $replica{$i}->{$name}->{$a} = $n; + if ($b >= 0) { + $replica{$i}->{$name}->{$b} = $n; + } + } + } + } +} + +print "verifying replicas\n"; +for my $mds (keys %replica) { + for my $name (keys %{$replica{$mds}}) { + for my $auth (keys %{$replica{$mds}->{$name}}) { + if ($auth{$auth}->{$name}->{$mds}) { + if ($auth{$auth}->{$name}->{$mds} < $replica{$mds}->{$name}->{$auth}) { + print "problem: mds$mds has $name from mds$auth nonce $replica{$mds}->{$name}->{$auth}, auth has nonce $auth{$auth}->{$name}->{$mds}\n"; + } else { + print "ok: mds$mds has $name from mds$auth nonce $replica{$mds}->{$name}->{$auth}, auth has nonce $auth{$auth}->{$name}->{$mds}\n"; + } + } else { + print "??: mds$mds has $name from mds$auth nonce $replica{$mds}->{$name}->{$auth}, auth has no nonce\n"; + } + + } + } +} + + -- 2.39.5