From: Samuel Just Date: Thu, 24 Apr 2014 19:48:44 +0000 (-0700) Subject: ECBackend::continue_recovery_op: handle a source shard going down X-Git-Tag: v0.80~8 X-Git-Url: http://git-server-git.apps.pok.os.sepia.ceph.com/?a=commitdiff_plain;h=d9106ce5e4437ab02279c4d7dfa23ce0d69c59df;p=ceph.git ECBackend::continue_recovery_op: handle a source shard going down get_min_avail_to_read_shards might return an error if there are no longer enough sources to reconstruct the missing shards. This is possible if osds went down while we were writing the previous chunk -- we already notice in check_recovery_sources if a source goes down during a read. Fixes: #8161 Signed-off-by: Samuel Just (cherry picked from commit 1885792c517670086332a8bab237c58558ee6dda) --- diff --git a/src/osd/ECBackend.cc b/src/osd/ECBackend.cc index 66b7dd5c56f8..3c27288c5325 100644 --- a/src/osd/ECBackend.cc +++ b/src/osd/ECBackend.cc @@ -472,6 +472,15 @@ void ECBackend::continue_recovery_op( set to_read; int r = get_min_avail_to_read_shards( op.hoid, want, true, &to_read); + if (r != 0) { + // we must have lost a recovery source + assert(!op.recovery_progress.first); + dout(10) << __func__ << ": canceling recovery op for obj " << op.hoid + << dendl; + get_parent()->cancel_pull(op.hoid); + recovery_ops.erase(op.hoid); + return; + } assert(r == 0); m->read( this,