From c5611ad4d0878d4f99b388fadcf79b29dd9e1860 Mon Sep 17 00:00:00 2001 From: sageweil Date: Wed, 15 Aug 2007 16:23:21 +0000 Subject: [PATCH] mds.migrator import bound handling refactor, untested git-svn-id: https://ceph.svn.sf.net/svnroot/ceph@1637 29311d96-e01e-0410-9327-a35deaab8ce9 --- branches/sage/mds/mds/CDir.cc | 11 ++ branches/sage/mds/mds/CDir.h | 3 +- branches/sage/mds/mds/MDCache.cc | 94 ++++++++------ branches/sage/mds/mds/MDS.h | 1 + branches/sage/mds/mds/Migrator.cc | 199 ++++++++++++++++-------------- branches/sage/mds/mds/Migrator.h | 11 +- 6 files changed, 184 insertions(+), 135 deletions(-) diff --git a/branches/sage/mds/mds/CDir.cc b/branches/sage/mds/mds/CDir.cc index c14d9de225f4f..8bdb453780366 100644 --- a/branches/sage/mds/mds/CDir.cc +++ b/branches/sage/mds/mds/CDir.cc @@ -1234,6 +1234,17 @@ bool CDir::is_subtree_root() } } +/** contains(x) + * true if we are x, or an ancestor of x + */ +bool CDir::contains(CDir *x) +{ + while (1) { + if (x == this) return true; + x = x->get_parent_dir(); + if (x == 0) return false; + } +} diff --git a/branches/sage/mds/mds/CDir.h b/branches/sage/mds/mds/CDir.h index e4ac2f2a5c82f..cc43beccfe261 100644 --- a/branches/sage/mds/mds/CDir.h +++ b/branches/sage/mds/mds/CDir.h @@ -295,7 +295,8 @@ private: bool is_subtree_root(); - + bool contains(CDir *x); // true if we are x or an ancestor of x + // for giving to clients void get_dist_spec(set& ls, int auth) { diff --git a/branches/sage/mds/mds/MDCache.cc b/branches/sage/mds/mds/MDCache.cc index 0ee95f853b372..aa1353c61747d 100644 --- a/branches/sage/mds/mds/MDCache.cc +++ b/branches/sage/mds/mds/MDCache.cc @@ -533,7 +533,8 @@ void MDCache::try_subtree_merge_at(CDir *dir) // (this is a large hammer to ensure that dirfragtree updates will // hit the disk before the relevant dirfrags ever close) if (dir->inode->is_auth() && - dir->inode->can_auth_pin()) { + dir->inode->can_auth_pin() && + (mds->is_active() || mds->is_stopping())) { CInode *in = dir->inode; dout(10) << "try_subtree_merge_at journaling merged bound " << *in << endl; @@ -1372,6 +1373,55 @@ void MDCache::handle_resolve(MMDSResolve *m) mds->send_message_mds(ack, from, MDS_PORT_CACHE); } + // am i a surviving ambiguous importer? + if (mds->is_active() || mds->is_stopping()) { + // check for any import success/failure (from this node) + map >::iterator p = my_ambiguous_imports.begin(); + while (p != my_ambiguous_imports.end()) { + map >::iterator next = p; + next++; + CDir *dir = get_dirfrag(p->first); + assert(dir); + dout(10) << "checking ambiguous import " << *dir << endl; + if (migrator->is_importing(dir->dirfrag()) && + migrator->get_import_peer(dir->dirfrag()) == from) { + assert(migrator->get_import_state(dir->dirfrag()) == Migrator::IMPORT_ACKING); + + // check if sender claims the subtree + bool claimed_by_sender = false; + for (map >::iterator q = m->subtrees.begin(); + q != m->subtrees.end(); + ++q) { + CDir *base = get_dirfrag(q->first); + if (!base || !base->contains(dir)) + continue; // base not dir or an ancestor of dir, clearly doesn't claim dir. + + bool inside = true; + for (list::iterator r = q->second.begin(); + r != q->second.end(); + ++r) { + CDir *bound = get_dirfrag(*r); + if (bound && bound->contains(dir)) { + inside = false; // nope, bound is dir or parent of dir, not inside. + break; + } + } + if (inside) + claimed_by_sender = true; + } + + if (claimed_by_sender) { + dout(7) << "ambiguous import failed on " << *dir << endl; + migrator->import_reverse(dir); + } else { + dout(7) << "ambiguous import succeeded on " << *dir << endl; + migrator->import_finish(dir); + } + } + p = next; + } + } + // update my dir_auth values for (map >::iterator pi = m->subtrees.begin(); pi != m->subtrees.end(); @@ -1392,40 +1442,6 @@ void MDCache::handle_resolve(MMDSResolve *m) try_subtree_merge(dir); } - // am i a surviving ambiguous importer? - /* - * note: it would be cleaner to do this check before updating our own - * subtree map.. then the import_finish or _reverse could operate on an - * un-munged subtree map. but... checking for import completion against - * the provided resolve isn't easy. so, we skip audit checks in these - * functions. - */ - if (mds->is_active() || mds->is_stopping()) { - // check for any import success/failure (from this node) - map >::iterator p = my_ambiguous_imports.begin(); - while (p != my_ambiguous_imports.end()) { - map >::iterator n = p; - n++; - CDir *dir = get_dirfrag(p->first); - assert(dir); - dout(10) << "checking ambiguous import " << *dir << endl; - if (migrator->is_importing(dir->dirfrag())) { - assert(migrator->get_import_state(dir->dirfrag()) == Migrator::IMPORT_ACKING); - if (migrator->get_import_peer(dir->dirfrag()) == from) { - if (dir->is_ambiguous_dir_auth()) { - dout(7) << "ambiguous import succeeded on " << *dir << endl; - migrator->import_finish(dir, true); // don't wait for log flush - } else { - dout(7) << "ambiguous import failed on " << *dir << endl; - migrator->import_reverse(dir, false); // don't adjust dir_auth. - } - my_ambiguous_imports.erase(p); - } - } - p = n; - } - } - show_subtrees(); @@ -1613,18 +1629,18 @@ void MDCache::cancel_ambiguous_import(dirfrag_t df) void MDCache::finish_ambiguous_import(dirfrag_t df) { assert(my_ambiguous_imports.count(df)); - list bound_inos; - bound_inos.swap(my_ambiguous_imports[df]); + list bounds; + bounds.swap(my_ambiguous_imports[df]); my_ambiguous_imports.erase(df); dout(10) << "finish_ambiguous_import " << df - << " bounds " << bound_inos + << " bounds " << bounds << endl; CDir *dir = get_dirfrag(df); assert(dir); // adjust dir_auth, import maps - adjust_bounded_subtree_auth(dir, bound_inos, mds->get_nodeid()); + adjust_bounded_subtree_auth(dir, bounds, mds->get_nodeid()); try_subtree_merge(dir); } diff --git a/branches/sage/mds/mds/MDS.h b/branches/sage/mds/mds/MDS.h index 6c243a5b53f90..9122e10e90f31 100644 --- a/branches/sage/mds/mds/MDS.h +++ b/branches/sage/mds/mds/MDS.h @@ -126,6 +126,7 @@ class MDS : public Dispatcher { waiting_for_active_peer[who].push_back(c); } + int get_state() { return state; } bool is_dne() { return state == MDSMap::STATE_DNE; } bool is_failed() { return state == MDSMap::STATE_FAILED; } bool is_creating() { return state == MDSMap::STATE_CREATING; } diff --git a/branches/sage/mds/mds/Migrator.cc b/branches/sage/mds/mds/Migrator.cc index bb049b59bebe5..e7f3168877346 100644 --- a/branches/sage/mds/mds/Migrator.cc +++ b/branches/sage/mds/mds/Migrator.cc @@ -328,24 +328,34 @@ void Migrator::handle_mds_failure_or_stop(int who) dout(10) << "import state=prepping : unpinning base+bounds " << *dir << endl; } assert(dir); - import_reverse_unpin(dir); // unpin + { + set bounds; + cache->map_dirfrag_set(import_bound_ls[dir], bounds); + import_remove_pins(dir, bounds); + import_reverse_final(dir); + } break; case IMPORT_PREPPED: - dout(10) << "import state=prepping : unpinning base+bounds, unfreezing " << *dir << endl; + dout(10) << "import state=prepped : unpinning base+bounds, unfreezing " << *dir << endl; assert(dir); - - // adjust auth back to me - cache->adjust_subtree_auth(dir, import_peer[df]); - cache->try_subtree_merge(dir); - - // bystanders? - if (import_bystanders[dir].empty()) { - import_reverse_unfreeze(dir); - } else { - // notify them; wait in aborting state - import_notify_abort(dir); - import_state[df] = IMPORT_ABORTING; + { + set bounds; + cache->get_subtree_bounds(dir, bounds); + import_remove_pins(dir, bounds); + + // adjust auth back to me + cache->adjust_subtree_auth(dir, import_peer[df]); + cache->try_subtree_merge(dir); + + // bystanders? + if (import_bystanders[dir].empty()) { + import_reverse_unfreeze(dir); + } else { + // notify them; wait in aborting state + import_notify_abort(dir, bounds); + import_state[df] = IMPORT_ABORTING; + } } break; @@ -363,11 +373,23 @@ void Migrator::handle_mds_failure_or_stop(int who) cache->add_ambiguous_import(dir, bounds); } break; - + case IMPORT_ABORTING: dout(10) << "import state=aborting : ignoring repeat failure " << *dir << endl; break; } + } else { + if (q->second == IMPORT_ABORTING && + import_bystanders[dir].count(who)) { + dout(10) << "faking export_notify_ack from mds" << who + << " on aborting import " << *dir << " from mds" << import_peer[df] + << endl; + import_bystanders[dir].erase(who); + if (import_bystanders[dir].empty()) { + import_bystanders.erase(dir); + import_reverse_unfreeze(dir); + } + } } // next! @@ -589,11 +611,12 @@ void Migrator::export_frozen(CDir *dir) } } - // include spanning tree for all nested exports. - // these need to be on the destination _before_ the final export so that - // dir_auth updates on any nested exports are properly absorbed. - // this includes inodes and dirfrags included in the subtree, but - // only the inodes at the bounds. + /* include spanning tree for all nested exports. + * these need to be on the destination _before_ the final export so that + * dir_auth updates on any nested exports are properly absorbed. + * this includes inodes and dirfrags included in the subtree, but + * only the inodes at the bounds. + */ set inodes_added; // include base dirfrag @@ -720,7 +743,6 @@ void Migrator::export_go(CDir *dir) export_state[dir] = EXPORT_EXPORTING; assert(export_data.count(dir) == 0); - assert(dir->get_cum_auth_pins() == 0); // set ambiguous auth @@ -746,9 +768,7 @@ void Migrator::export_go(CDir *dir) // send the export data! MExportDir *req = new MExportDir(dir->dirfrag()); - - // export state - req->set_dirstate( export_data[dir] ); + req->set_dirstate(export_data[dir]); // add bounds to message set bounds; @@ -1013,13 +1033,14 @@ void Migrator::export_reverse(CDir *dir) assert(export_state[dir] == EXPORT_EXPORTING); assert(export_data.count(dir)); + set bounds; + cache->get_subtree_bounds(dir, bounds); + // adjust auth, with possible subtree merge. cache->adjust_subtree_auth(dir, mds->get_nodeid()); cache->try_subtree_merge(dir); // unpin bounds - set bounds; - cache->get_subtree_bounds(dir, bounds); for (set::iterator p = bounds.begin(); p != bounds.end(); ++p) { @@ -1344,13 +1365,13 @@ void Migrator::handle_export_prep(MExportDirPrep *m) cache->show_subtrees(); - // build bound map - map bound_dirfragset; + // build import bound map + map import_bound_fragset; for (list::iterator p = m->get_bounds().begin(); p != m->get_bounds().end(); ++p) { dout(10) << " bound " << *p << endl; - bound_dirfragset[p->ino].insert(p->frag); + import_bound_fragset[p->ino].insert(p->frag); } // assimilate contents? @@ -1365,6 +1386,7 @@ void Migrator::handle_export_prep(MExportDirPrep *m) // change import state import_state[dir->dirfrag()] = IMPORT_PREPPING; + import_bound_ls[dir] = m->get_bounds(); // bystander list import_bystanders[dir] = m->get_bystanders(); @@ -1405,8 +1427,8 @@ void Migrator::handle_export_prep(MExportDirPrep *m) } // make bound sticky - for (map::iterator p = bound_dirfragset.begin(); - p != bound_dirfragset.end(); + for (map::iterator p = import_bound_fragset.begin(); + p != import_bound_fragset.end(); ++p) { CInode *in = cache->get_inode(p->first); assert(in); @@ -1424,8 +1446,8 @@ void Migrator::handle_export_prep(MExportDirPrep *m) // open all bounds set import_bounds; - for (map::iterator p = bound_dirfragset.begin(); - p != bound_dirfragset.end(); + for (map::iterator p = import_bound_fragset.begin(); + p != import_bound_fragset.end(); ++p) { CInode *in = cache->get_inode(p->first); assert(in); @@ -1539,12 +1561,8 @@ void Migrator::handle_export_dir(MExportDir *m) cache->get_subtree_bounds(dir, import_bounds); for (set::iterator it = import_bounds.begin(); it != import_bounds.end(); - it++) { - CDir *bd = *it; - // include bounding dirs in EImportStart - // (now that the interior metadata is already in the event) - le->metablob.add_dir(bd, false); - } + it++) + le->metablob.add_dir(*it, false); // note that parent metadata is already in the event // adjust popularity mds->balancer->add_import(dir); @@ -1569,22 +1587,53 @@ void Migrator::handle_export_dir(MExportDir *m) } +/* + * this is an import helper + * called by import_finish, and import_reverse and friends. + */ +void Migrator::import_remove_pins(CDir *dir, set& bounds) +{ + // root + dir->put(CDir::PIN_IMPORTING); + dir->state_clear(CDir::STATE_IMPORTING); + + // bounds + set didinodes; + for (set::iterator it = bounds.begin(); + it != bounds.end(); + it++) { + CDir *bd = *it; + bd->put(CDir::PIN_IMPORTBOUND); + bd->state_clear(CDir::STATE_IMPORTBOUND); + CInode *bdi = bd->get_inode(); + if (didinodes.count(bdi) == 0) { + bdi->put_stickydirs(); + didinodes.insert(bdi); + } + } +} + + /* * note: this does teh full work of reversing and import and cleaning up * state. - * called by both handle_mds_failure and by handle_import_map (if we are + * called by both handle_mds_failure and by handle_resolve (if we are * a survivor coping with an exporter failure+recovery). */ -void Migrator::import_reverse(CDir *dir, bool fix_dir_auth) +void Migrator::import_reverse(CDir *dir) { dout(7) << "import_reverse " << *dir << endl; + set bounds; + cache->get_subtree_bounds(dir, bounds); + + // remove pins + import_remove_pins(dir, bounds); + // update auth, with possible subtree merge. - if (fix_dir_auth) { - assert(dir->is_subtree_root()); - cache->adjust_subtree_auth(dir, import_peer[dir->dirfrag()]); - cache->try_subtree_merge(dir); - } + assert(dir->is_subtree_root()); + cache->adjust_subtree_auth(dir, import_peer[dir->dirfrag()]); + cache->try_subtree_merge(dir); // adjust auth bits. list q; @@ -1626,14 +1675,14 @@ void Migrator::import_reverse(CDir *dir, bool fix_dir_auth) list dfs; in->get_dirfrags(dfs); for (list::iterator p = dfs.begin(); p != dfs.end(); ++p) - if (!(*p)->state_test(CDir::STATE_IMPORTBOUND)) + if (bounds.count(*p) == 0) q.push_back(*p); } } } // log our failure - mds->mdlog->submit_entry(new EImportFinish(dir,false)); // log failure + mds->mdlog->submit_entry(new EImportFinish(dir, false)); // log failure // bystanders? if (import_bystanders[dir].empty()) { @@ -1642,18 +1691,15 @@ void Migrator::import_reverse(CDir *dir, bool fix_dir_auth) } else { // notify them; wait in aborting state dout(7) << "notifying bystanders of abort" << endl; - import_notify_abort(dir); + import_notify_abort(dir, bounds); import_state[dir->dirfrag()] = IMPORT_ABORTING; } } -void Migrator::import_notify_abort(CDir *dir) +void Migrator::import_notify_abort(CDir *dir, set& bounds) { dout(7) << "import_notify_abort " << *dir << endl; - set import_bounds; - cache->get_subtree_bounds(dir, import_bounds); - for (set::iterator p = import_bystanders[dir].begin(); p != import_bystanders[dir].end(); ++p) { @@ -1663,7 +1709,7 @@ void Migrator::import_notify_abort(CDir *dir) new MExportDirNotify(dir->dirfrag(), true, pair(mds->get_nodeid(), CDIR_AUTH_UNKNOWN), pair(import_peer[dir->dirfrag()], CDIR_AUTH_UNKNOWN)); - notify->copy_bounds(import_bounds); + notify->copy_bounds(bounds); mds->send_message_mds(notify, *p, MDS_PORT_MIGRATOR); } } @@ -1671,50 +1717,20 @@ void Migrator::import_notify_abort(CDir *dir) void Migrator::import_reverse_unfreeze(CDir *dir) { dout(7) << "import_reverse_unfreeze " << *dir << endl; - - // unfreeze dir->unfreeze_tree(); - - // discard expire crap cache->discard_delayed_expire(dir); - - import_reverse_unpin(dir); -} - -void Migrator::import_remove_pins(CDir *dir) -{ - // root - dir->put(CDir::PIN_IMPORTING); - dir->state_clear(CDir::STATE_IMPORTING); - - // bounds - set bounds; - cache->get_subtree_bounds(dir, bounds); - set didinodes; - for (set::iterator it = bounds.begin(); - it != bounds.end(); - it++) { - CDir *bd = *it; - bd->put(CDir::PIN_IMPORTBOUND); - bd->state_clear(CDir::STATE_IMPORTBOUND); - CInode *bdi = bd->get_inode(); - if (didinodes.count(bdi) == 0) { - bdi->put_stickydirs(); - didinodes.insert(bdi); - } - } + import_reverse_final(dir); } -void Migrator::import_reverse_unpin(CDir *dir) +void Migrator::import_reverse_final(CDir *dir) { - dout(7) << "import_reverse_unpin " << *dir << endl; - - import_remove_pins(dir); + dout(7) << "import_reverse_final " << *dir << endl; // clean up import_state.erase(dir->dirfrag()); import_peer.erase(dir->dirfrag()); import_bystanders.erase(dir); + import_bound_ls.erase(dir); cache->show_subtrees(); //audit(); // this fails, bc we munge up the subtree map during handle_import_map (resolve phase) @@ -1746,7 +1762,7 @@ void Migrator::handle_export_finish(MExportDirFinish *m) delete m; } -void Migrator::import_finish(CDir *dir, bool now) +void Migrator::import_finish(CDir *dir) { dout(7) << "import_finish on " << *dir << endl; @@ -1754,7 +1770,9 @@ void Migrator::import_finish(CDir *dir, bool now) mds->mdlog->submit_entry(new EImportFinish(dir, true)); // remove pins - import_remove_pins(dir); + set bounds; + cache->get_subtree_bounds(dir, bounds); + import_remove_pins(dir, bounds); // unfreeze dir->unfreeze_tree(); @@ -1767,6 +1785,7 @@ void Migrator::import_finish(CDir *dir, bool now) import_state.erase(dir->dirfrag()); import_peer.erase(dir->dirfrag()); import_bystanders.erase(dir); + import_bound_ls.erase(dir); // process delayed expires cache->process_delayed_expire(dir); diff --git a/branches/sage/mds/mds/Migrator.h b/branches/sage/mds/mds/Migrator.h index e224faf0d04fb..135bf9415dfaa 100644 --- a/branches/sage/mds/mds/Migrator.h +++ b/branches/sage/mds/mds/Migrator.h @@ -114,6 +114,7 @@ protected: map import_state; // FIXME make these dirfrags map import_peer; map > import_bystanders; + map > import_bound_ls; /* @@ -228,16 +229,16 @@ public: map& imported_client_map); public: - void import_reverse(CDir *dir, bool fix_dir_auth=true); + void import_reverse(CDir *dir); protected: - void import_remove_pins(CDir *dir); + void import_remove_pins(CDir *dir, set& bounds); void import_reverse_unfreeze(CDir *dir); - void import_reverse_unpin(CDir *dir); - void import_notify_abort(CDir *dir); + void import_reverse_final(CDir *dir); + void import_notify_abort(CDir *dir, set& bounds); void import_logged_start(CDir *dir, int from); void handle_export_finish(MExportDirFinish *m); public: - void import_finish(CDir *dir, bool now=false); + void import_finish(CDir *dir); protected: friend class C_MDS_ImportDirLoggedStart; -- 2.39.5