From d98c56bc29219ab2e5bb64b3fdb584c11a061d4f Mon Sep 17 00:00:00 2001 From: John Spray Date: Wed, 15 Mar 2017 22:16:55 +0000 Subject: [PATCH] mds: don't assert on read errors in RecoveryQueue On reflection, an error here is much more likely to be some kind of system/config error than it is to be something wrong with a particular file's objects, so do a rank-wide damaged() instead of inventing a whole new type in DamageTable just for this. Fixes: http://tracker.ceph.com/issues/19282 Signed-off-by: John Spray --- src/mds/RecoveryQueue.cc | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/src/mds/RecoveryQueue.cc b/src/mds/RecoveryQueue.cc index bd8fe0dd88da..aca08bbffe01 100644 --- a/src/mds/RecoveryQueue.cc +++ b/src/mds/RecoveryQueue.cc @@ -162,8 +162,16 @@ void RecoveryQueue::_recovered(CInode *in, int r, uint64_t size, utime_t mtime) if (r == -EBLACKLISTED) { mds->respawn(); return; + } else { + // Something wrong on the OSD side trying to recover the size + // of this inode. In principle we could record this as a piece + // of per-inode damage, but it's actually more likely that + // this indicates something wrong with the MDS (like maybe + // it has the wrong auth caps?) + mds->clog->error() << " OSD read error while recovering size for inode 0x" + << std::hex << in->ino() << std::dec; + mds->damaged(); } - assert(0 == "unexpected error from osd during recovery"); } file_recovering.erase(in); -- 2.47.3