From d98c56bc29219ab2e5bb64b3fdb584c11a061d4f Mon Sep 17 00:00:00 2001
From: John Spray <john.spray@redhat.com>
Date: Wed, 15 Mar 2017 22:16:55 +0000
Subject: [PATCH] mds: don't assert on read errors in RecoveryQueue

On reflection, an error here is much more likely to
be some kind of system/config error than it is
to be something wrong with a particular file's objects,
so do a rank-wide damaged() instead of inventing
a whole new type in DamageTable just for this.

Fixes: http://tracker.ceph.com/issues/19282
Signed-off-by: John Spray <john.spray@redhat.com>
---
 src/mds/RecoveryQueue.cc | 10 +++++++++-
 1 file changed, 9 insertions(+), 1 deletion(-)

diff --git a/src/mds/RecoveryQueue.cc b/src/mds/RecoveryQueue.cc
index bd8fe0dd88da..aca08bbffe01 100644
--- a/src/mds/RecoveryQueue.cc
+++ b/src/mds/RecoveryQueue.cc
@@ -162,8 +162,16 @@ void RecoveryQueue::_recovered(CInode *in, int r, uint64_t size, utime_t mtime)
     if (r == -EBLACKLISTED) {
       mds->respawn();
       return;
+    } else {
+      // Something wrong on the OSD side trying to recover the size
+      // of this inode.  In principle we could record this as a piece
+      // of per-inode damage, but it's actually more likely that
+      // this indicates something wrong with the MDS (like maybe
+      // it has the wrong auth caps?)
+      mds->clog->error() << " OSD read error while recovering size for inode 0x"
+                         << std::hex << in->ino() << std::dec;
+      mds->damaged();
     }
-    assert(0 == "unexpected error from osd during recovery");
   }
 
   file_recovering.erase(in);
-- 
2.47.3