mds: don't assert on read errors in RecoveryQueue

author John Spray <john.spray@redhat.com>

Wed, 15 Mar 2017 22:16:55 +0000 (22:16 +0000)

committer John Spray <john.spray@redhat.com>

Fri, 17 Mar 2017 11:51:15 +0000 (11:51 +0000)
author John Spray <john.spray@redhat.com>
Wed, 15 Mar 2017 22:16:55 +0000 (22:16 +0000)
committer John Spray <john.spray@redhat.com>
Fri, 17 Mar 2017 11:51:15 +0000 (11:51 +0000)
diff --git a/src/mds/RecoveryQueue.cc b/src/mds/RecoveryQueue.cc

index bd8fe0dd88dab4e012e0ab601ef5539d97af0687..aca08bbffe010512699917bc5f5a2e8b9d08e93e 100644 (file)
--- a/src/mds/RecoveryQueue.cc
+++ b/src/mds/RecoveryQueue.cc
@@ -162,8 +162,16 @@ void RecoveryQueue::_recovered(CInode *in, int r, uint64_t size, utime_t mtime)
      if (r == -EBLACKLISTED) {
        mds->respawn();
        return;
+    } else {
+      // Something wrong on the OSD side trying to recover the size
+      // of this inode.  In principle we could record this as a piece
+      // of per-inode damage, but it's actually more likely that
+      // this indicates something wrong with the MDS (like maybe
+      // it has the wrong auth caps?)
+      mds->clog->error() << " OSD read error while recovering size for inode 0x"
+                         << std::hex << in->ino() << std::dec;
+      mds->damaged();
      }
-    assert(0 == "unexpected error from osd during recovery");
    }
  
    file_recovering.erase(in);
author	John Spray <john.spray@redhat.com>
	Wed, 15 Mar 2017 22:16:55 +0000 (22:16 +0000)
committer	John Spray <john.spray@redhat.com>
	Fri, 17 Mar 2017 11:51:15 +0000 (11:51 +0000)