// (this is a large hammer to ensure that dirfragtree updates will
// hit the disk before the relevant dirfrags ever close)
if (dir->inode->is_auth() &&
- dir->inode->can_auth_pin()) {
+ dir->inode->can_auth_pin() &&
+ (mds->is_active() || mds->is_stopping())) {
CInode *in = dir->inode;
dout(10) << "try_subtree_merge_at journaling merged bound " << *in << endl;
mds->send_message_mds(ack, from, MDS_PORT_CACHE);
}
+ // am i a surviving ambiguous importer?
+ if (mds->is_active() || mds->is_stopping()) {
+ // check for any import success/failure (from this node)
+ map<dirfrag_t, list<dirfrag_t> >::iterator p = my_ambiguous_imports.begin();
+ while (p != my_ambiguous_imports.end()) {
+ map<dirfrag_t, list<dirfrag_t> >::iterator next = p;
+ next++;
+ CDir *dir = get_dirfrag(p->first);
+ assert(dir);
+ dout(10) << "checking ambiguous import " << *dir << endl;
+ if (migrator->is_importing(dir->dirfrag()) &&
+ migrator->get_import_peer(dir->dirfrag()) == from) {
+ assert(migrator->get_import_state(dir->dirfrag()) == Migrator::IMPORT_ACKING);
+
+ // check if sender claims the subtree
+ bool claimed_by_sender = false;
+ for (map<dirfrag_t, list<dirfrag_t> >::iterator q = m->subtrees.begin();
+ q != m->subtrees.end();
+ ++q) {
+ CDir *base = get_dirfrag(q->first);
+ if (!base || !base->contains(dir))
+ continue; // base not dir or an ancestor of dir, clearly doesn't claim dir.
+
+ bool inside = true;
+ for (list<dirfrag_t>::iterator r = q->second.begin();
+ r != q->second.end();
+ ++r) {
+ CDir *bound = get_dirfrag(*r);
+ if (bound && bound->contains(dir)) {
+ inside = false; // nope, bound is dir or parent of dir, not inside.
+ break;
+ }
+ }
+ if (inside)
+ claimed_by_sender = true;
+ }
+
+ if (claimed_by_sender) {
+ dout(7) << "ambiguous import failed on " << *dir << endl;
+ migrator->import_reverse(dir);
+ } else {
+ dout(7) << "ambiguous import succeeded on " << *dir << endl;
+ migrator->import_finish(dir);
+ }
+ }
+ p = next;
+ }
+ }
+
// update my dir_auth values
for (map<dirfrag_t, list<dirfrag_t> >::iterator pi = m->subtrees.begin();
pi != m->subtrees.end();
try_subtree_merge(dir);
}
- // am i a surviving ambiguous importer?
- /*
- * note: it would be cleaner to do this check before updating our own
- * subtree map.. then the import_finish or _reverse could operate on an
- * un-munged subtree map. but... checking for import completion against
- * the provided resolve isn't easy. so, we skip audit checks in these
- * functions.
- */
- if (mds->is_active() || mds->is_stopping()) {
- // check for any import success/failure (from this node)
- map<dirfrag_t, list<dirfrag_t> >::iterator p = my_ambiguous_imports.begin();
- while (p != my_ambiguous_imports.end()) {
- map<dirfrag_t, list<dirfrag_t> >::iterator n = p;
- n++;
- CDir *dir = get_dirfrag(p->first);
- assert(dir);
- dout(10) << "checking ambiguous import " << *dir << endl;
- if (migrator->is_importing(dir->dirfrag())) {
- assert(migrator->get_import_state(dir->dirfrag()) == Migrator::IMPORT_ACKING);
- if (migrator->get_import_peer(dir->dirfrag()) == from) {
- if (dir->is_ambiguous_dir_auth()) {
- dout(7) << "ambiguous import succeeded on " << *dir << endl;
- migrator->import_finish(dir, true); // don't wait for log flush
- } else {
- dout(7) << "ambiguous import failed on " << *dir << endl;
- migrator->import_reverse(dir, false); // don't adjust dir_auth.
- }
- my_ambiguous_imports.erase(p);
- }
- }
- p = n;
- }
- }
-
show_subtrees();
void MDCache::finish_ambiguous_import(dirfrag_t df)
{
assert(my_ambiguous_imports.count(df));
- list<dirfrag_t> bound_inos;
- bound_inos.swap(my_ambiguous_imports[df]);
+ list<dirfrag_t> bounds;
+ bounds.swap(my_ambiguous_imports[df]);
my_ambiguous_imports.erase(df);
dout(10) << "finish_ambiguous_import " << df
- << " bounds " << bound_inos
+ << " bounds " << bounds
<< endl;
CDir *dir = get_dirfrag(df);
assert(dir);
// adjust dir_auth, import maps
- adjust_bounded_subtree_auth(dir, bound_inos, mds->get_nodeid());
+ adjust_bounded_subtree_auth(dir, bounds, mds->get_nodeid());
try_subtree_merge(dir);
}
dout(10) << "import state=prepping : unpinning base+bounds " << *dir << endl;
}
assert(dir);
- import_reverse_unpin(dir); // unpin
+ {
+ set<CDir*> bounds;
+ cache->map_dirfrag_set(import_bound_ls[dir], bounds);
+ import_remove_pins(dir, bounds);
+ import_reverse_final(dir);
+ }
break;
case IMPORT_PREPPED:
- dout(10) << "import state=prepping : unpinning base+bounds, unfreezing " << *dir << endl;
+ dout(10) << "import state=prepped : unpinning base+bounds, unfreezing " << *dir << endl;
assert(dir);
-
- // adjust auth back to me
- cache->adjust_subtree_auth(dir, import_peer[df]);
- cache->try_subtree_merge(dir);
-
- // bystanders?
- if (import_bystanders[dir].empty()) {
- import_reverse_unfreeze(dir);
- } else {
- // notify them; wait in aborting state
- import_notify_abort(dir);
- import_state[df] = IMPORT_ABORTING;
+ {
+ set<CDir*> bounds;
+ cache->get_subtree_bounds(dir, bounds);
+ import_remove_pins(dir, bounds);
+
+ // adjust auth back to me
+ cache->adjust_subtree_auth(dir, import_peer[df]);
+ cache->try_subtree_merge(dir);
+
+ // bystanders?
+ if (import_bystanders[dir].empty()) {
+ import_reverse_unfreeze(dir);
+ } else {
+ // notify them; wait in aborting state
+ import_notify_abort(dir, bounds);
+ import_state[df] = IMPORT_ABORTING;
+ }
}
break;
cache->add_ambiguous_import(dir, bounds);
}
break;
-
+
case IMPORT_ABORTING:
dout(10) << "import state=aborting : ignoring repeat failure " << *dir << endl;
break;
}
+ } else {
+ if (q->second == IMPORT_ABORTING &&
+ import_bystanders[dir].count(who)) {
+ dout(10) << "faking export_notify_ack from mds" << who
+ << " on aborting import " << *dir << " from mds" << import_peer[df]
+ << endl;
+ import_bystanders[dir].erase(who);
+ if (import_bystanders[dir].empty()) {
+ import_bystanders.erase(dir);
+ import_reverse_unfreeze(dir);
+ }
+ }
}
// next!
}
}
- // include spanning tree for all nested exports.
- // these need to be on the destination _before_ the final export so that
- // dir_auth updates on any nested exports are properly absorbed.
- // this includes inodes and dirfrags included in the subtree, but
- // only the inodes at the bounds.
+ /* include spanning tree for all nested exports.
+ * these need to be on the destination _before_ the final export so that
+ * dir_auth updates on any nested exports are properly absorbed.
+ * this includes inodes and dirfrags included in the subtree, but
+ * only the inodes at the bounds.
+ */
set<inodeno_t> inodes_added;
// include base dirfrag
export_state[dir] = EXPORT_EXPORTING;
assert(export_data.count(dir) == 0);
-
assert(dir->get_cum_auth_pins() == 0);
// set ambiguous auth
// send the export data!
MExportDir *req = new MExportDir(dir->dirfrag());
-
- // export state
- req->set_dirstate( export_data[dir] );
+ req->set_dirstate(export_data[dir]);
// add bounds to message
set<CDir*> bounds;
assert(export_state[dir] == EXPORT_EXPORTING);
assert(export_data.count(dir));
+ set<CDir*> bounds;
+ cache->get_subtree_bounds(dir, bounds);
+
// adjust auth, with possible subtree merge.
cache->adjust_subtree_auth(dir, mds->get_nodeid());
cache->try_subtree_merge(dir);
// unpin bounds
- set<CDir*> bounds;
- cache->get_subtree_bounds(dir, bounds);
for (set<CDir*>::iterator p = bounds.begin();
p != bounds.end();
++p) {
cache->show_subtrees();
- // build bound map
- map<inodeno_t, fragset_t> bound_dirfragset;
+ // build import bound map
+ map<inodeno_t, fragset_t> import_bound_fragset;
for (list<dirfrag_t>::iterator p = m->get_bounds().begin();
p != m->get_bounds().end();
++p) {
dout(10) << " bound " << *p << endl;
- bound_dirfragset[p->ino].insert(p->frag);
+ import_bound_fragset[p->ino].insert(p->frag);
}
// assimilate contents?
// change import state
import_state[dir->dirfrag()] = IMPORT_PREPPING;
+ import_bound_ls[dir] = m->get_bounds();
// bystander list
import_bystanders[dir] = m->get_bystanders();
}
// make bound sticky
- for (map<inodeno_t,fragset_t>::iterator p = bound_dirfragset.begin();
- p != bound_dirfragset.end();
+ for (map<inodeno_t,fragset_t>::iterator p = import_bound_fragset.begin();
+ p != import_bound_fragset.end();
++p) {
CInode *in = cache->get_inode(p->first);
assert(in);
// open all bounds
set<CDir*> import_bounds;
- for (map<inodeno_t,fragset_t>::iterator p = bound_dirfragset.begin();
- p != bound_dirfragset.end();
+ for (map<inodeno_t,fragset_t>::iterator p = import_bound_fragset.begin();
+ p != import_bound_fragset.end();
++p) {
CInode *in = cache->get_inode(p->first);
assert(in);
cache->get_subtree_bounds(dir, import_bounds);
for (set<CDir*>::iterator it = import_bounds.begin();
it != import_bounds.end();
- it++) {
- CDir *bd = *it;
- // include bounding dirs in EImportStart
- // (now that the interior metadata is already in the event)
- le->metablob.add_dir(bd, false);
- }
+ it++)
+ le->metablob.add_dir(*it, false); // note that parent metadata is already in the event
// adjust popularity
mds->balancer->add_import(dir);
}
+/*
+ * this is an import helper
+ * called by import_finish, and import_reverse and friends.
+ */
+void Migrator::import_remove_pins(CDir *dir, set<CDir*>& bounds)
+{
+ // root
+ dir->put(CDir::PIN_IMPORTING);
+ dir->state_clear(CDir::STATE_IMPORTING);
+
+ // bounds
+ set<CInode*> didinodes;
+ for (set<CDir*>::iterator it = bounds.begin();
+ it != bounds.end();
+ it++) {
+ CDir *bd = *it;
+ bd->put(CDir::PIN_IMPORTBOUND);
+ bd->state_clear(CDir::STATE_IMPORTBOUND);
+ CInode *bdi = bd->get_inode();
+ if (didinodes.count(bdi) == 0) {
+ bdi->put_stickydirs();
+ didinodes.insert(bdi);
+ }
+ }
+}
+
+
/*
* note: this does teh full work of reversing and import and cleaning up
* state.
- * called by both handle_mds_failure and by handle_import_map (if we are
+ * called by both handle_mds_failure and by handle_resolve (if we are
* a survivor coping with an exporter failure+recovery).
*/
-void Migrator::import_reverse(CDir *dir, bool fix_dir_auth)
+void Migrator::import_reverse(CDir *dir)
{
dout(7) << "import_reverse " << *dir << endl;
+ set<CDir*> bounds;
+ cache->get_subtree_bounds(dir, bounds);
+
+ // remove pins
+ import_remove_pins(dir, bounds);
+
// update auth, with possible subtree merge.
- if (fix_dir_auth) {
- assert(dir->is_subtree_root());
- cache->adjust_subtree_auth(dir, import_peer[dir->dirfrag()]);
- cache->try_subtree_merge(dir);
- }
+ assert(dir->is_subtree_root());
+ cache->adjust_subtree_auth(dir, import_peer[dir->dirfrag()]);
+ cache->try_subtree_merge(dir);
// adjust auth bits.
list<CDir*> q;
list<CDir*> dfs;
in->get_dirfrags(dfs);
for (list<CDir*>::iterator p = dfs.begin(); p != dfs.end(); ++p)
- if (!(*p)->state_test(CDir::STATE_IMPORTBOUND))
+ if (bounds.count(*p) == 0)
q.push_back(*p);
}
}
}
// log our failure
- mds->mdlog->submit_entry(new EImportFinish(dir,false)); // log failure
+ mds->mdlog->submit_entry(new EImportFinish(dir, false)); // log failure
// bystanders?
if (import_bystanders[dir].empty()) {
} else {
// notify them; wait in aborting state
dout(7) << "notifying bystanders of abort" << endl;
- import_notify_abort(dir);
+ import_notify_abort(dir, bounds);
import_state[dir->dirfrag()] = IMPORT_ABORTING;
}
}
-void Migrator::import_notify_abort(CDir *dir)
+void Migrator::import_notify_abort(CDir *dir, set<CDir*>& bounds)
{
dout(7) << "import_notify_abort " << *dir << endl;
- set<CDir*> import_bounds;
- cache->get_subtree_bounds(dir, import_bounds);
-
for (set<int>::iterator p = import_bystanders[dir].begin();
p != import_bystanders[dir].end();
++p) {
new MExportDirNotify(dir->dirfrag(), true,
pair<int,int>(mds->get_nodeid(), CDIR_AUTH_UNKNOWN),
pair<int,int>(import_peer[dir->dirfrag()], CDIR_AUTH_UNKNOWN));
- notify->copy_bounds(import_bounds);
+ notify->copy_bounds(bounds);
mds->send_message_mds(notify, *p, MDS_PORT_MIGRATOR);
}
}
void Migrator::import_reverse_unfreeze(CDir *dir)
{
dout(7) << "import_reverse_unfreeze " << *dir << endl;
-
- // unfreeze
dir->unfreeze_tree();
-
- // discard expire crap
cache->discard_delayed_expire(dir);
-
- import_reverse_unpin(dir);
-}
-
-void Migrator::import_remove_pins(CDir *dir)
-{
- // root
- dir->put(CDir::PIN_IMPORTING);
- dir->state_clear(CDir::STATE_IMPORTING);
-
- // bounds
- set<CDir*> bounds;
- cache->get_subtree_bounds(dir, bounds);
- set<CInode*> didinodes;
- for (set<CDir*>::iterator it = bounds.begin();
- it != bounds.end();
- it++) {
- CDir *bd = *it;
- bd->put(CDir::PIN_IMPORTBOUND);
- bd->state_clear(CDir::STATE_IMPORTBOUND);
- CInode *bdi = bd->get_inode();
- if (didinodes.count(bdi) == 0) {
- bdi->put_stickydirs();
- didinodes.insert(bdi);
- }
- }
+ import_reverse_final(dir);
}
-void Migrator::import_reverse_unpin(CDir *dir)
+void Migrator::import_reverse_final(CDir *dir)
{
- dout(7) << "import_reverse_unpin " << *dir << endl;
-
- import_remove_pins(dir);
+ dout(7) << "import_reverse_final " << *dir << endl;
// clean up
import_state.erase(dir->dirfrag());
import_peer.erase(dir->dirfrag());
import_bystanders.erase(dir);
+ import_bound_ls.erase(dir);
cache->show_subtrees();
//audit(); // this fails, bc we munge up the subtree map during handle_import_map (resolve phase)
delete m;
}
-void Migrator::import_finish(CDir *dir, bool now)
+void Migrator::import_finish(CDir *dir)
{
dout(7) << "import_finish on " << *dir << endl;
mds->mdlog->submit_entry(new EImportFinish(dir, true));
// remove pins
- import_remove_pins(dir);
+ set<CDir*> bounds;
+ cache->get_subtree_bounds(dir, bounds);
+ import_remove_pins(dir, bounds);
// unfreeze
dir->unfreeze_tree();
import_state.erase(dir->dirfrag());
import_peer.erase(dir->dirfrag());
import_bystanders.erase(dir);
+ import_bound_ls.erase(dir);
// process delayed expires
cache->process_delayed_expire(dir);