From f2bedc9225864585b044b4c17e542ee80eea21ca Mon Sep 17 00:00:00 2001 From: sageweil Date: Mon, 12 Mar 2007 18:40:27 +0000 Subject: [PATCH] - CDentry wasn't setting auth bit on import - discover_reply bugfix - improved MDCache.trim() and friends.. proper CEx logic. - cleaned up MDCache.shutdown_pass - Migrator::audit() will verify import/export_state is clean - importing, exporting pins on CDir - fixed bug with EMetaBlob expire logic (on import) git-svn-id: https://ceph.svn.sf.net/svnroot/ceph@1214 29311d96-e01e-0410-9327-a35deaab8ce9 --- branches/sage/cephmds2/mds/CDentry.h | 1 + branches/sage/cephmds2/mds/CDir.h | 28 +- branches/sage/cephmds2/mds/CInode.h | 7 - branches/sage/cephmds2/mds/MDCache.cc | 243 ++++++++++-------- branches/sage/cephmds2/mds/MDCache.h | 5 +- branches/sage/cephmds2/mds/Migrator.cc | 58 ++++- branches/sage/cephmds2/mds/Migrator.h | 1 + branches/sage/cephmds2/mds/Server.cc | 5 - branches/sage/cephmds2/mds/journal.cc | 30 ++- .../sage/cephmds2/messages/MDiscoverReply.h | 36 +-- branches/sage/cephmds2/msg/FakeMessenger.cc | 11 +- 11 files changed, 257 insertions(+), 168 deletions(-) diff --git a/branches/sage/cephmds2/mds/CDentry.h b/branches/sage/cephmds2/mds/CDentry.h index 49ed6dc9cfb53..35fa87c79495a 100644 --- a/branches/sage/cephmds2/mds/CDentry.h +++ b/branches/sage/cephmds2/mds/CDentry.h @@ -227,6 +227,7 @@ class CDentry : public MDSCacheObject, public LRUObject { add_replica(from, EXPORT_NONCE); if (is_replica(to)) remove_replica(to); + state_set(CDentry::STATE_AUTH); } // -- locking diff --git a/branches/sage/cephmds2/mds/CDir.h b/branches/sage/cephmds2/mds/CDir.h index 511854a26557f..9f4c21ab06afa 100644 --- a/branches/sage/cephmds2/mds/CDir.h +++ b/branches/sage/cephmds2/mds/CDir.h @@ -79,8 +79,6 @@ class CDir : public MDSCacheObject { static const int PIN_EXPORTING = 10; static const int PIN_IMPORTBOUND = 11; static const int PIN_EXPORTBOUND = 12; - static const int PIN_HASHED = 13; - static const int PIN_HASHING = 14; static const int PIN_DIRTY = 15; static const int PIN_REQUEST = 16; static const int PIN_LOGGINGEXPORTFINISH = 17; @@ -99,8 +97,6 @@ class CDir : public MDSCacheObject { // case PIN_FREEZELEAF: return "freezeleaf"; case PIN_PROXY: return "proxy"; case PIN_AUTHPIN: return "authpin"; - case PIN_HASHED: return "hashed"; - case PIN_HASHING: return "hashing"; case PIN_DIRTY: return "dirty"; case PIN_REQUEST: return "request"; case PIN_LOGGINGEXPORTFINISH: return "loggingexportfinish"; @@ -121,12 +117,11 @@ class CDir : public MDSCacheObject { static const unsigned STATE_FETCHING = (1<< 9); // currenting fetching static const unsigned STATE_DELETED = (1<<10); //static const unsigned STATE_IMPORT = (1<<11); // flag set if this is an import. - static const unsigned STATE_EXPORT = (1<<12); + static const unsigned STATE_EXPORT = (1<<12); static const unsigned STATE_IMPORTBOUND = (1<<13); static const unsigned STATE_EXPORTBOUND = (1<<14); - static const unsigned STATE_HASHED = (1<<15); // if hashed - static const unsigned STATE_HASHING = (1<<16); - static const unsigned STATE_UNHASHING = (1<<17); + static const unsigned STATE_EXPORTING = (1<<15); + static const unsigned STATE_IMPORTING = (1<<16); // common states static const unsigned STATE_CLEAN = 0; @@ -139,10 +134,11 @@ class CDir : public MDSCacheObject { static const unsigned MASK_STATE_IMPORT_KEPT = //STATE_IMPORT| STATE_EXPORT + |STATE_IMPORTING |STATE_IMPORTBOUND|STATE_EXPORTBOUND |STATE_FROZENTREE|STATE_PROXY; static const unsigned MASK_STATE_EXPORT_KEPT = - STATE_HASHED + STATE_EXPORTING |STATE_IMPORTBOUND|STATE_EXPORTBOUND |STATE_FROZENTREE |STATE_FROZENDIR @@ -338,10 +334,11 @@ class CDir : public MDSCacheObject { bool is_proxy() { return state & STATE_PROXY; } //bool is_import() { return state & STATE_IMPORT; } //bool is_export() { return state & STATE_EXPORT; } + bool is_exporting() { return state & STATE_EXPORTING; } + bool is_importing() { return state & STATE_IMPORTING; } - bool is_hashed() { return state & STATE_HASHED; } - bool is_hashing() { return state & STATE_HASHING; } - bool is_unhashing() { return state & STATE_UNHASHING; } + bool is_hashed() { return false; } + bool is_hashing() { return false; } bool is_rep() { if (dir_rep == REP_NONE) return false; @@ -589,11 +586,8 @@ class CDirExport { dir->projected_version = dir->version = st.version; // twiddle state - if (dir->state & CDir::STATE_HASHED) - dir->state_set( CDir::STATE_AUTH ); // just inherit auth flag when hashed - else - dir->state = (dir->state & CDir::MASK_STATE_IMPORT_KEPT) | // remember import flag, etc. - (st.state & CDir::MASK_STATE_EXPORTED); + dir->state = (dir->state & CDir::MASK_STATE_IMPORT_KEPT) | // remember import flag, etc. + (st.state & CDir::MASK_STATE_EXPORTED); dir->dir_rep = st.dir_rep; dir->popularity[MDS_POP_JUSTME] += st.popularity_justme; diff --git a/branches/sage/cephmds2/mds/CInode.h b/branches/sage/cephmds2/mds/CInode.h index c2bec5b4104d9..78422921a27ab 100644 --- a/branches/sage/cephmds2/mds/CInode.h +++ b/branches/sage/cephmds2/mds/CInode.h @@ -177,8 +177,6 @@ class CInode : public MDSCacheObject { // distributed caching (old) pair dangling_auth; // explicit auth, when dangling. - //int num_request_pins; - // waiters multimap waiting; @@ -432,15 +430,10 @@ protected: linked to an active_request, so they're automatically cleaned up when a request is finished. pin at will! */ void request_pin_get() { - //if (num_request_pins == 0) get(PIN_REQUEST); - //num_request_pins++; } void request_pin_put() { - //num_request_pins--; - //if (num_request_pins == 0) put(PIN_REQUEST); - //assert(num_request_pins >= 0); } void bad_put(int by) { diff --git a/branches/sage/cephmds2/mds/MDCache.cc b/branches/sage/cephmds2/mds/MDCache.cc index ce35ab97d63ed..8b3dbe0ed6d6d 100644 --- a/branches/sage/cephmds2/mds/MDCache.cc +++ b/branches/sage/cephmds2/mds/MDCache.cc @@ -96,7 +96,6 @@ MDCache::MDCache(MDS *m) lru.lru_set_max(g_conf.mds_cache_size); lru.lru_set_midpoint(g_conf.mds_cache_mid); - did_shutdown_exports = false; did_shutdown_log_cap = false; shutdown_commits = 0; } @@ -343,16 +342,15 @@ void MDCache::adjust_subtree_auth(CDir *dir, pair auth) * but the inode is auth. * * import points don't need to be pinned the same way simply because the - * exporter is pinned and thus always open. + * exporting mds is pinning the exprot (as above) thus the dir is + * always open on the importer. */ void MDCache::adjust_export_state(CDir *dir) { - //if (!dir->is_auth() && dir->inode->is_auth()) { - // be auth bit agnostic, so that we work during recovery // (before recalc_auth_bits) - if (!dir->authority().first == mds->get_nodeid() && - dir->inode->authority().first != mds->get_nodeid()) { + if (dir->authority().first != mds->get_nodeid() && + dir->inode->authority().first == mds->get_nodeid()) { // export. if (!dir->state_test(CDir::STATE_EXPORT)) { dout(10) << "adjust_export_state pinning new export " << *dir << endl; @@ -1610,9 +1608,14 @@ bool MDCache::trim(int max) if (!dn->is_auth()) { pair auth = dn->authority(); - for (int a=auth.first; - a != auth.second && auth.second >= 0 && auth.second != mds->get_nodeid(); - a=auth.second) { + for (int p=0; p<2; p++) { + int a = auth.first; + if (p) a = auth.second; + if (a < 0 || (p == 1 && auth.second == auth.first)) break; + if (mds->get_nodeid() == auth.second && + con->is_importing()) break; // don't send any expire while importing. + if (a == mds->get_nodeid()) continue; // on export, ignore myself. + dout(12) << " sending expire to mds" << a << " on " << *dn << endl; assert(a != mds->get_nodeid()); if (expiremap.count(a) == 0) @@ -1630,7 +1633,7 @@ bool MDCache::trim(int max) // expire the inode, too. CInode *in = dn->get_inode(); assert(in); - trim_inode(dn, in, con->dirfrag(), expiremap); + trim_inode(dn, in, con, expiremap); } else { assert(dn->is_null()); @@ -1658,11 +1661,11 @@ bool MDCache::trim(int max) p != ls.end(); ++p) if ((*p)->get_num_ref() == 0) - trim_dirfrag(*p, (*p)->dirfrag(), expiremap); + trim_dirfrag(*p, *p, expiremap); // root inode? if (root->get_num_ref() == 0) - trim_inode(0, root, dirfrag_t(1,frag_t()), expiremap); // hrm, FIXME + trim_inode(0, root, 0, expiremap); // hrm, FIXME } // send expires @@ -1676,31 +1679,38 @@ bool MDCache::trim(int max) return true; } -void MDCache::trim_dirfrag(CDir *dir, dirfrag_t condf, map& expiremap) +void MDCache::trim_dirfrag(CDir *dir, CDir *con, map& expiremap) { assert(dir->get_num_ref() == 0); CInode *in = dir->get_inode(); if (!dir->is_auth()) { - pair dirauth = dir->authority(); - assert(dirauth.second < 100); // hack die bug die + pair auth = dir->authority(); // was this an auth delegation? (if so, slightly modified container) - dirfrag_t dcondf = condf; + dirfrag_t condf; if (dir->is_subtree_root()) { - dout(12) << " this is a subtree, removing from map, container is " << *dir << endl; - dcondf = dir->dirfrag(); + dout(12) << " subtree root, container is " << *dir << endl; + con = dir; + condf = dir->dirfrag(); + } else { + condf = con->dirfrag(); } - for (int a=dirauth.first; - a != dirauth.second && dirauth.second >= 0 && dirauth.second != mds->get_nodeid(); - a=dirauth.second) { + for (int p=0; p<2; p++) { + int a = auth.first; + if (p) a = auth.second; + if (a < 0 || (p == 1 && auth.second == auth.first)) break; + if (mds->get_nodeid() == auth.second && + con->is_importing()) break; // don't send any expire while importing. + if (a == mds->get_nodeid()) continue; // on export, ignore myself. + dout(12) << " sending expire to mds" << a << " on " << *in->dir << endl; assert(a != mds->get_nodeid()); if (expiremap.count(a) == 0) expiremap[a] = new MCacheExpire(mds->get_nodeid()); - expiremap[a]->add_dir(dcondf, dir->dirfrag(), dir->replica_nonce); + expiremap[a]->add_dir(condf, dir->dirfrag(), dir->replica_nonce); } } @@ -1709,7 +1719,7 @@ void MDCache::trim_dirfrag(CDir *dir, dirfrag_t condf, map& in->close_dirfrag(dir->dirfrag().frag); } -void MDCache::trim_inode(CDentry *dn, CInode *in, dirfrag_t condf, map& expiremap) +void MDCache::trim_inode(CDentry *dn, CInode *in, CDir *con, map& expiremap) { dout(15) << "trim_inode " << *in << endl; assert(in->get_num_ref() == 0); @@ -1720,20 +1730,31 @@ void MDCache::trim_inode(CDentry *dn, CInode *in, dirfrag_t condf, map::iterator p = dfls.begin(); p != dfls.end(); ++p) - trim_dirfrag(*p, condf, expiremap); + trim_dirfrag(*p, con ? con:*p, expiremap); // if no container (e.g. root dirfrag), use *p // INODE if (!in->is_auth()) { pair auth = in->authority(); - for (int a=auth.first; - a != auth.second && auth.second >= 0 && auth.second != mds->get_nodeid(); - a=auth.second) { + dirfrag_t df; + if (con) + df = con->dirfrag(); + else + df = dirfrag_t(1,frag_t()); + + for (int p=0; p<2; p++) { + int a = auth.first; + if (p) a = auth.second; + if (a < 0 || (p == 1 && auth.second == auth.first)) break; + if (con && mds->get_nodeid() == auth.second && + con->is_importing()) break; // don't send any expire while importing. + if (a == mds->get_nodeid()) continue; // on export, ignore myself. + dout(12) << " sending expire to mds" << a << " on " << *in << endl; assert(a != mds->get_nodeid()); if (expiremap.count(a) == 0) expiremap[a] = new MCacheExpire(mds->get_nodeid()); - expiremap[a]->add_inode(condf, in->ino(), in->get_replica_nonce()); + expiremap[a]->add_inode(df, in->ino(), in->get_replica_nonce()); } } @@ -2051,7 +2072,7 @@ void MDCache::shutdown_start() bool MDCache::shutdown_pass() { dout(7) << "shutdown_pass" << endl; - //assert(mds->is_shutting_down()); + if (mds->is_out()) { dout(7) << " already shut down" << endl; show_cache(); @@ -2059,24 +2080,6 @@ bool MDCache::shutdown_pass() return true; } - // unhash dirs? - /* - if (!hashdirs.empty()) { - // unhash any of my dirs? - for (set::iterator it = hashdirs.begin(); - it != hashdirs.end(); - it++) { - CDir *dir = *it; - if (!dir->is_auth()) continue; - if (dir->is_unhashing()) continue; - //migrator->unhash_dir(dir); - } - - dout(7) << "waiting for dirs to unhash" << endl; - return false; - } - */ - // commit dirs? if (g_conf.mds_commit_on_shutdown) { @@ -2108,58 +2111,55 @@ bool MDCache::shutdown_pass() trim(0); dout(5) << "lru size now " << lru.lru_get_size() << endl; - mds->mdlog->trim(0); - - // (wait for) flush log? - if (g_conf.mds_log_flush_on_shutdown) { - if (mds->mdlog->get_non_importmap_events()) { - dout(7) << "waiting for log to flush .. " << mds->mdlog->get_num_events() - << " (" << mds->mdlog->get_non_importmap_events() << ")" << endl; - return false; - } - } - + // SUBTREES // send all imports back to 0. - if (mds->get_nodeid() != 0 && + if (!subtrees.empty() && + mds->get_nodeid() != 0 && !migrator->is_exporting() && !migrator->is_importing()) { - // flush what i can from the cache first.. - trim(0); - // export to root + dout(7) << "looking for subtrees to export to mds0" << endl; + list ls; for (map >::iterator it = subtrees.begin(); it != subtrees.end(); it++) { CDir *dir = it->first; - if (dir->inode->is_root()) continue; if (dir->is_frozen() || dir->is_freezing()) continue; if (!dir->is_fullauth()) continue; - + ls.push_back(dir); + } + for (list::iterator p = ls.begin(); p != ls.end(); ++p) { + CDir *dir = *p; dout(7) << "sending " << *dir << " back to mds0" << endl; migrator->export_dir(dir, 0); } - did_shutdown_exports = true; - } - - // close root? - if (lru.lru_get_size() == 0 && - root && - root->is_pinned_by(CInode::PIN_DIRTY)) { - dout(7) << "clearing root inode dirty flag" << endl; - root->put(CInode::PIN_DIRTY); } - + // subtrees map not empty yet? if (!subtrees.empty()) { dout(7) << "still have " << num_subtrees() << " subtrees" << endl; - show_cache(); + show_subtrees(); + //show_cache(); return false; } assert(subtrees.empty()); assert(!migrator->is_exporting()); assert(!migrator->is_importing()); - + + + // LOG + mds->mdlog->trim(0); + + // (wait for) flush log? + if (g_conf.mds_log_flush_on_shutdown) { + if (mds->mdlog->get_non_importmap_events()) { + dout(7) << "waiting for log to flush .. " << mds->mdlog->get_num_events() + << " (" << mds->mdlog->get_non_importmap_events() << ")" << endl; + return false; + } + } + // cap log? if (g_conf.mds_log_flush_on_shutdown) { @@ -3340,46 +3340,50 @@ void MDCache::handle_discover(MDiscover *dis) void MDCache::handle_discover_reply(MDiscoverReply *m) { // starting point - CInode *cur; list finished, error; - if (m->has_root()) { - // nowhere! - dout(7) << "discover_reply root + " << m->get_path() << " " << m->get_num_inodes() << " inodes" << endl; + // grab base inode + CInode *cur = get_inode(m->get_base_ino()); + + if (cur) { + dout(7) << "discover_reply " << *cur << " + " << m->get_path() << ", have " << m->get_num_inodes() << " inodes" << endl; + } else { + if (!m->has_root()) { + dout(7) << "discover_reply don't have base ino " << m->get_base_ino() << ", dropping" << endl; + delete m; + return; + } + + // it's the root inode. assert(!root); - assert(m->get_base_ino() == 0); + assert(m->get_base_ino() == 1); assert(!m->has_base_dentry()); assert(!m->has_base_dir()); + dout(7) << "discover_reply root + " << m->get_path() << " " << m->get_num_inodes() << " inodes" << endl; + // add in root cur = new CInode(this, false); - - m->get_inode(0).update_inode(cur); + m->get_inode(0).update_inode(cur); // that thar 0 is an array index (the 0th inode in the reply). // root set_root( cur ); add_inode( cur ); - dout(7) << " got root: " << *cur << endl; - - // take waiters - finished.swap(waiting_for_root); - } else { - // grab inode - cur = get_inode(m->get_base_ino()); - - if (!cur) { - dout(7) << "discover_reply don't have base ino " << m->get_base_ino() << ", dropping" << endl; - delete m; - return; - } + dout(7) << "discover_reply got root " << *cur << endl; - dout(7) << "discover_reply " << *cur << " + " << m->get_path() << ", have " << m->get_num_inodes() << " inodes" << endl; + // take root waiters + finished.swap(waiting_for_root); } // fyi if (m->is_flag_error_dir()) dout(7) << " flag error, dir" << endl; if (m->is_flag_error_dn()) dout(7) << " flag error, dentry = " << m->get_error_dentry() << endl; - dout(10) << "depth is " << m->get_depth() << ", has_root = " << m->has_root() << endl; + dout(10) << "depth = " << m->get_depth() + << ", has base_dir/base_dn/root = " + << m->has_base_dir() << " / " << m->has_base_dentry() << " / " << m->has_root() + << ", num dirs/dentries/inodes = " + << m->get_num_dirs() << " / " << m->get_num_dentries() << " / " << m->get_num_inodes() + << endl; // loop over discover results. // indexese follow each ([[dir] dentry] inode) @@ -3445,7 +3449,7 @@ void MDCache::handle_discover_reply(MDiscoverReply *m) break; } - if (i >= m->get_num_dentries()) break; + if (i >= m->get_last_dentry()) break; // dentry dout(7) << "i = " << i << " dentry is " << m->get_dentry(i).get_dname() << endl; @@ -3475,7 +3479,7 @@ void MDCache::handle_discover_reply(MDiscoverReply *m) finished); } - if (i >= m->get_num_inodes()) break; + if (i >= m->get_last_inode()) break; // inode dout(7) << "i = " << i << " ino is " << m->get_ino(i) << endl; @@ -3969,32 +3973,61 @@ void MDCache::handle_inode_unlink_ack(MInodeUnlinkAck *m) void MDCache::show_subtrees(int dbl) { //dout(10) << "show_subtrees" << endl; - + + if (dbl > g_conf.debug && dbl > g_conf.debug_mds) + return; // i won't print anything. + list > q; string indent; + set seen; + + // calc depth if (root && root->dir) q.push_back(pair(root->dir, 0)); - set seen; - + int depth = 0; while (!q.empty()) { CDir *dir = q.front().first; int d = q.front().second; q.pop_front(); + if (d > depth) depth = d; + // sanity check if (seen.count(dir)) dout(0) << "aah, already seen " << *dir << endl; assert(seen.count(dir) == 0); seen.insert(dir); + // nested items? + if (!subtrees[dir].empty()) { + for (set::iterator p = subtrees[dir].begin(); + p != subtrees[dir].end(); + ++p) + q.push_front(pair(*p, d+1)); + } + } + + + // print tree + if (root && root->dir) + q.push_back(pair(root->dir, 0)); + + while (!q.empty()) { + CDir *dir = q.front().first; + int d = q.front().second; + q.pop_front(); + // adjust indenter while ((unsigned)d < indent.size()) indent.resize(d); // pad - string pad = "__________________________________"; - pad.resize(12-indent.size()); + string pad = "______________________________________"; + pad.resize(depth*2+1-indent.size()); + if (!subtrees[dir].empty()) + pad[0] = '.'; // parent + string auth; if (dir->is_auth()) diff --git a/branches/sage/cephmds2/mds/MDCache.h b/branches/sage/cephmds2/mds/MDCache.h index 9b18aa09f4374..d58387dadc404 100644 --- a/branches/sage/cephmds2/mds/MDCache.h +++ b/branches/sage/cephmds2/mds/MDCache.h @@ -161,7 +161,6 @@ protected: // shutdown crap int shutdown_commits; - bool did_shutdown_exports; bool did_shutdown_log_cap; friend class C_MDC_ShutdownCommit; @@ -242,9 +241,9 @@ public: void set_cache_size(size_t max) { lru.lru_set_max(max); } size_t get_cache_size() { return lru.lru_get_size(); } bool trim(int max = -1); // trim cache - void trim_dirfrag(CDir *dir, dirfrag_t condf, + void trim_dirfrag(CDir *dir, CDir *con, map& expiremap); - void trim_inode(CDentry *dn, CInode *in, dirfrag_t condf, + void trim_inode(CDentry *dn, CInode *in, CDir *con, map& expiremap); void trim_non_auth(); // trim out trimmable non-auth items diff --git a/branches/sage/cephmds2/mds/Migrator.cc b/branches/sage/cephmds2/mds/Migrator.cc index 61dec66b704b0..02fde9f0f4166 100644 --- a/branches/sage/cephmds2/mds/Migrator.cc +++ b/branches/sage/cephmds2/mds/Migrator.cc @@ -188,12 +188,14 @@ void Migrator::handle_mds_failure(int who) dir->unfreeze_tree(); // cancel the freeze dir->auth_unpin(); // remove the auth_pin (that was holding up the freeze) export_state.erase(dir); // clean up + dir->state_clear(CDir::STATE_EXPORTING); break; case EXPORT_FREEZING: dout(10) << "export state=freezing : canceling freeze" << endl; dir->unfreeze_tree(); // cancel the freeze export_state.erase(dir); // clean up + dir->state_clear(CDir::STATE_EXPORTING); break; // NOTE: state order reversal, warning comes after loggingstart+prepping @@ -219,12 +221,14 @@ void Migrator::handle_mds_failure(int who) cache->adjust_subtree_auth(dir, mds->get_nodeid()); cache->try_subtree_merge(dir); export_state.erase(dir); // clean up + dir->state_clear(CDir::STATE_EXPORTING); break; case EXPORT_EXPORTING: dout(10) << "export state=exporting : reversing, and unfreezing" << endl; export_reverse(dir); export_state.erase(dir); // clean up + dir->state_clear(CDir::STATE_EXPORTING); break; case EXPORT_LOGGINGFINISH: @@ -361,6 +365,47 @@ void Migrator::handle_mds_failure(int who) +void Migrator::audit() +{ + if (g_conf.debug_mds < 5) return; // hrm. + + // import_state + for (map::iterator p = import_state.begin(); + p != import_state.end(); + p++) { + if (p->second == IMPORT_DISCOVERED) { + CInode *in = cache->get_inode(p->first.ino); + assert(in); + continue; + } + CDir *dir = cache->get_dirfrag(p->first); + assert(dir); + if (p->second == IMPORT_PREPPING) continue; + assert(dir->auth_is_ambiguous()); + assert(dir->authority().first == mds->get_nodeid() || + dir->authority().second == mds->get_nodeid()); + } + + // export_state + for (map::iterator p = export_state.begin(); + p != export_state.end(); + p++) { + CDir *dir = p->first; + if (p->second == EXPORT_DISCOVERING || + p->second == EXPORT_FREEZING) continue; + assert(dir->auth_is_ambiguous()); + assert(dir->authority().first == mds->get_nodeid() || + dir->authority().second == mds->get_nodeid()); + } + + // ambiguous+me subtrees should be importing|exporting + + // write me +} + + + + // ========================================================== // EXPORT @@ -429,6 +474,9 @@ void Migrator::export_dir(CDir *dir, export_state[dir] = EXPORT_DISCOVERING; export_peer[dir] = dest; + assert(!dir->state_test(CDir::STATE_EXPORTING)); + dir->state_set(CDir::STATE_EXPORTING); + // send ExportDirDiscover (ask target) mds->send_message_mds(new MExportDirDiscover(dir), dest, MDS_PORT_MIGRATOR); dir->auth_pin(); // pin dir, to hang up our freeze (unpin on discover ack) @@ -1132,11 +1180,12 @@ void Migrator::export_finish(CDir *dir) cache->discard_delayed_expire(dir); // remove from exporting list, clean up state + dir->state_clear(CDir::STATE_EXPORTING); export_state.erase(dir); export_peer.erase(dir); export_bounds.erase(dir); export_notify_ack_waiting.erase(dir); - + // queue finishers mds->queue_finished(export_finish_waiters[dir]); export_finish_waiters.erase(dir); @@ -1145,6 +1194,7 @@ void Migrator::export_finish(CDir *dir) //if (mds->logger) mds->logger->set("nex", cache->exports.size()); cache->show_subtrees(); + audit(); // send pending import_maps? mds->mdcache->send_pending_import_maps(); @@ -1266,6 +1316,7 @@ void Migrator::handle_export_prep(MExportDirPrep *m) // move pin to dir diri->put(CInode::PIN_IMPORTING); dir->get(CDir::PIN_IMPORTING); + dir->state_set(CDir::STATE_IMPORTING); // change import state import_state[dir->dirfrag()] = IMPORT_PREPPING; @@ -1586,6 +1637,7 @@ void Migrator::import_reverse_unpin(CDir *dir) // remove importing pin dir->put(CDir::PIN_IMPORTING); + dir->state_clear(CDir::STATE_IMPORTING); // remove bound pins for (set::iterator it = import_bounds[dir].begin(); @@ -1604,7 +1656,7 @@ void Migrator::import_reverse_unpin(CDir *dir) import_bystanders.erase(dir); cache->show_subtrees(); - cache->show_cache(); + audit(); } @@ -1642,6 +1694,7 @@ void Migrator::import_finish(CDir *dir, bool now) // remove pins dir->put(CDir::PIN_IMPORTING); + dir->state_clear(CDir::STATE_IMPORTING); for (set::iterator it = import_bounds[dir].begin(); it != import_bounds[dir].end(); @@ -1681,6 +1734,7 @@ void Migrator::import_finish(CDir *dir, bool now) //mds->logger->set("nim", cache->imports.size()); } cache->show_subtrees(); + audit(); // is it empty? if (dir->get_size() == 0 && diff --git a/branches/sage/cephmds2/mds/Migrator.h b/branches/sage/cephmds2/mds/Migrator.h index 5ddef2517b38a..7a67f5dff1f38 100644 --- a/branches/sage/cephmds2/mds/Migrator.h +++ b/branches/sage/cephmds2/mds/Migrator.h @@ -154,6 +154,7 @@ public: // -- misc -- void handle_mds_failure(int who); + void audit(); // -- import/export -- // exporter diff --git a/branches/sage/cephmds2/mds/Server.cc b/branches/sage/cephmds2/mds/Server.cc index 48411e0c2500a..1199ed0af79cc 100644 --- a/branches/sage/cephmds2/mds/Server.cc +++ b/branches/sage/cephmds2/mds/Server.cc @@ -827,11 +827,6 @@ int Server::encode_dir_contents(CDir *dir, it++) { CDentry *dn = it->second; - // hashed? - if (dir->is_hashed() && - mds->get_nodeid() != mds->mdcache->hash_dentry( dir->ino(), it->first )) - continue; - if (dn->is_null()) continue; CInode *in = dn->inode; diff --git a/branches/sage/cephmds2/mds/journal.cc b/branches/sage/cephmds2/mds/journal.cc index 13b9b2843e037..efc55532c670e 100644 --- a/branches/sage/cephmds2/mds/journal.cc +++ b/branches/sage/cephmds2/mds/journal.cc @@ -97,9 +97,17 @@ bool EMetaBlob::has_expired(MDS *mds) } if (dir->auth_is_ambiguous()) { - dout(10) << "EMetaBlob.has_expired ambiguous auth on " - << *dir << endl; - return false; // not committed. + CDir *ex = mds->mdcache->get_subtree_root(dir); + if (ex->is_exporting()) { + // wait until export is acked (logged on remote) and committed (logged locally) + dout(10) << "EMetaBlob.has_expired ambiguous auth for " << *dir + << ", exporting (not yet safe) on " << *ex << endl; + return false; + } else { + dout(10) << "EMetaBlob.has_expired ambiguous auth for " << *dir + << ", _importing_ (safe) on " << *ex << endl; + return true; + } } if (dir->get_committed_version() < lp->second.dirv) { @@ -144,12 +152,18 @@ void EMetaBlob::expire(MDS *mds, Context *c) } if (dir->auth_is_ambiguous()) { - // wait until export is acked (logged on remote) and committed (logged locally) CDir *ex = mds->mdcache->get_subtree_root(dir); - dout(10) << "EMetaBlob.expire ambiguous auth for " << *dir - << ", waiting for export finish on " << *ex << endl; - waitfor_export.push_back(ex); - continue; + if (ex->is_exporting()) { + // wait until export is acked (logged on remote) and committed (logged locally) + dout(10) << "EMetaBlob.expire ambiguous auth for " << *dir + << ", waiting for export finish on " << *ex << endl; + waitfor_export.push_back(ex); + continue; + } else { + dout(10) << "EMetaBlob.expire ambiguous auth for " << *dir + << ", but _importing_, we're safe on " << *ex << endl; + continue; + } } if (dir->get_committed_version() < lp->second.dirv) { dout(10) << "EMetaBlob.expire need dirv " << lp->second.dirv diff --git a/branches/sage/cephmds2/messages/MDiscoverReply.h b/branches/sage/cephmds2/messages/MDiscoverReply.h index c1b3f75d9d4dd..b8cb8a2b4ffa3 100644 --- a/branches/sage/cephmds2/messages/MDiscoverReply.h +++ b/branches/sage/cephmds2/messages/MDiscoverReply.h @@ -50,18 +50,20 @@ using namespace std; * * see MDCache::handle_discover, handle_discover_reply. * - - old crap, maybe not accurate: - - // dir [ + ... ] : discover want_base_dir=true - - // dentry [ + inode [ + ... ] ] : discover want_base_dir=false - // no_base_dir=true - // -> we only exclude inode if dentry is null+xlock - - // inode [ + ... ], base_ino = 0 : discover base_ino=0, start w/ root ino, - // no_base_dir=no_base_dentry=true - + * + * so basically, we get + * + * dir den ino i + * x 0 + * x x x 1 + * or + * x x 0 + * x x x 1 + * or + * x x x 0 + * x x x 1 + * ...and trail off however we want. + * * */ @@ -87,6 +89,10 @@ class MDiscoverReply : public Message { int get_num_dentries() { return dentries.size(); } int get_num_dirs() { return dirs.size(); } + int get_last_inode() { return inodes.size(); } + int get_last_dentry() { return dentries.size() + no_base_dentry; } + int get_last_dir() { return dirs.size() + no_base_dir; } + int get_depth() { // return depth of deepest object (in dir/dentry/inode units) return max( inodes.size(), // at least this many max( no_base_dentry + dentries.size() + flag_error_dn, // inode start + path + possible error @@ -96,7 +102,7 @@ class MDiscoverReply : public Message { bool has_base_dir() { return !no_base_dir && dirs.size(); } bool has_base_dentry() { return !no_base_dentry && dentries.size(); } bool has_root() { - if (base_ino == 0) { + if (base_ino == 1) { assert(no_base_dir && no_base_dentry); return true; } @@ -112,9 +118,9 @@ class MDiscoverReply : public Message { int get_dir_auth_hint() { return dir_auth_hint; } // these index _arguments_ are aligned to each ([[dir, ] dentry, ] inode) set. - CDirDiscover& get_dir(int n) { return *(dirs[n - no_base_dir]); } - CDentryDiscover& get_dentry(int n) { return *(dentries[n - no_base_dentry]); } CInodeDiscover& get_inode(int n) { return *(inodes[n]); } + CDentryDiscover& get_dentry(int n) { return *(dentries[n - no_base_dentry]); } + CDirDiscover& get_dir(int n) { return *(dirs[n - no_base_dir]); } inodeno_t get_ino(int n) { return inodes[n]->get_ino(); } // cons diff --git a/branches/sage/cephmds2/msg/FakeMessenger.cc b/branches/sage/cephmds2/msg/FakeMessenger.cc index 7bf68c767e621..6c563cf58078f 100644 --- a/branches/sage/cephmds2/msg/FakeMessenger.cc +++ b/branches/sage/cephmds2/msg/FakeMessenger.cc @@ -157,7 +157,7 @@ int fakemessenger_do_loop_2() dout(1) << "---- " << m->get_dest() << " <- " << m->get_source() << " ---- " << *m - << " (" << m << ")" + << " ---- " << m << endl; if (g_conf.fakemessenger_serialize) { @@ -311,14 +311,13 @@ int FakeMessenger::send_message(Message *m, entity_inst_t inst, int port, int fr // queue if (directory.count(inst.addr)) { - dout(1) << "--> " << get_myname() << " -> " << inst.name - << " " << *m - << " (" << m << ")" + dout(1) << "--> " << get_myname() << " -> " << inst.name << " " << *m << " -- " << m << endl; directory[inst.addr]->queue_incoming(m); } else { - dout(0) << "--> " << get_myname() << " -> " << inst.name << " " << *m - << " *** destination DNE ***" << endl; + dout(0) << "--> " << get_myname() << " -> " << inst.name << " " << *m << " -- " << m + << " *** destination DNE ***" + << endl; for (map::iterator p = directory.begin(); p != directory.end(); ++p) { -- 2.39.5