]> git.apps.os.sepia.ceph.com Git - ceph.git/commitdiff
mds: cancel exports in PREPPING state on any failure
authorSage Weil <sage@newdream.net>
Thu, 14 Apr 2011 01:36:33 +0000 (18:36 -0700)
committerSage Weil <sage@newdream.net>
Fri, 15 Apr 2011 23:32:55 +0000 (16:32 -0700)
The prepping nodes may need to discover bounds from the failed node and
may hang indefinitely.  Meanwhile, we won't send out mds_resolve messages
until in-progress migrations complete.  Deadlock.

In certain cases the importing node can manufacture the replica.  If it
doesn't realize that right off, though, it will get hung up trying to
discover from the wrong node, get referred to the failed node, and block
waiting for recovery.  The replica forging is a bit suspect anyway, so
let's avoid the whole thing if we can!

Signed-off-by: Sage Weil <sage@newdream.net>
src/mds/Migrator.cc

index c9209b239c38e7db0a1496eb240a928ea5f1395f..78992ed06aebd64d69b646f60f297d2e66f16c32 100644 (file)
@@ -205,8 +205,11 @@ void Migrator::handle_mds_failure_or_stop(int who)
     // abort exports:
     //  - that are going to the failed node
     //  - that aren't frozen yet (to avoid auth_pin deadlock)
+    //  - they havne't prepped yet (they may need to discover bounds to do that)
     if (export_peer[dir] == who ||
-       p->second == EXPORT_DISCOVERING || p->second == EXPORT_FREEZING) { 
+       p->second == EXPORT_DISCOVERING ||
+       p->second == EXPORT_FREEZING ||
+       p->second == EXPORT_PREPPING) { 
       // the guy i'm exporting to failed, or we're just freezing.
       dout(10) << "cleaning up export state (" << p->second << ")" << get_export_statename(p->second)
               << " of " << *dir << dendl;
@@ -258,6 +261,8 @@ void Migrator::handle_mds_failure_or_stop(int who)
        export_state.erase(dir); // clean up
        export_locks.erase(dir);
        dir->state_clear(CDir::STATE_EXPORTING);
+       if (export_peer[dir] != who) // tell them.
+         mds->send_message_mds(new MExportDirCancel(dir->dirfrag()), export_peer[dir]);
        break;
        
       case EXPORT_EXPORTING: