]> git.apps.os.sepia.ceph.com Git - ceph.git/commitdiff
mds: handle blacklisting during journal recovery 11413/head
authorJohn Spray <john.spray@redhat.com>
Mon, 19 Sep 2016 16:34:26 +0000 (17:34 +0100)
committerLoic Dachary <ldachary@redhat.com>
Wed, 9 Nov 2016 11:14:39 +0000 (12:14 +0100)
EBLACKLISTED was being incorrectly handled as an
indication of metadata damage.

Fixes: http://tracker.ceph.com/issues/17236
Signed-off-by: John Spray <john.spray@redhat.com>
(cherry picked from commit 19bb8c0df9b48ebbccfd1913126bb48b6337319e)

src/mds/MDLog.cc

index 795ffb3f5b29175d95c8207adf183a6775cae634..260e8747056405fcd8af0182fd95dd8fb8332e87 100644 (file)
@@ -910,6 +910,10 @@ void MDLog::_recovery_thread(MDSInternalContextBase *completion)
     int write_result = jp.save(mds->objecter);
     // Nothing graceful we can do for this
     assert(write_result >= 0);
+  } else if (read_result == -EBLACKLISTED) {
+    derr << "Blacklisted during JournalPointer read!  Respawning..." << dendl;
+    mds->respawn();
+    assert(0); // Should be unreachable because respawn calls execv
   } else if (read_result != 0) {
     mds->clog->error() << "failed to read JournalPointer: " << read_result
                        << " (" << cpp_strerror(read_result) << ")";
@@ -936,7 +940,11 @@ void MDLog::_recovery_thread(MDSInternalContextBase *completion)
     C_SaferCond recover_wait;
     back.recover(&recover_wait);
     int recovery_result = recover_wait.wait();
-    if (recovery_result != 0) {
+    if (recovery_result == -EBLACKLISTED) {
+      derr << "Blacklisted during journal recovery!  Respawning..." << dendl;
+      mds->respawn();
+      assert(0); // Should be unreachable because respawn calls execv
+    } else if (recovery_result != 0) {
       // Journaler.recover succeeds if no journal objects are present: an error
       // means something worse like a corrupt header, which we can't handle here.
       mds->clog->error() << "Error recovering journal " << jp.front << ": "
@@ -979,7 +987,11 @@ void MDLog::_recovery_thread(MDSInternalContextBase *completion)
   int recovery_result = recover_wait.wait();
   dout(4) << "Journal " << jp.front << " recovered." << dendl;
 
-  if (recovery_result != 0) {
+  if (recovery_result == -EBLACKLISTED) {
+    derr << "Blacklisted during journal recovery!  Respawning..." << dendl;
+    mds->respawn();
+    assert(0); // Should be unreachable because respawn calls execv
+  } else if (recovery_result != 0) {
     mds->clog->error() << "Error recovering journal " << jp.front << ": "
       << cpp_strerror(recovery_result);
     mds->damaged_unlocked();