From d9106ce5e4437ab02279c4d7dfa23ce0d69c59df Mon Sep 17 00:00:00 2001
From: Samuel Just <sam.just@inktank.com>
Date: Thu, 24 Apr 2014 12:48:44 -0700
Subject: [PATCH] ECBackend::continue_recovery_op: handle a source shard going
 down

get_min_avail_to_read_shards might return an error if there are
no longer enough sources to reconstruct the missing shards.
This is possible if osds went down while we were writing the
previous chunk -- we already notice in check_recovery_sources
if a source goes down during a read.

Fixes: #8161
Signed-off-by: Samuel Just <sam.just@inktank.com>
(cherry picked from commit 1885792c517670086332a8bab237c58558ee6dda)
---
 src/osd/ECBackend.cc | 9 +++++++++
 1 file changed, 9 insertions(+)

diff --git a/src/osd/ECBackend.cc b/src/osd/ECBackend.cc
index 66b7dd5c56f..3c27288c532 100644
--- a/src/osd/ECBackend.cc
+++ b/src/osd/ECBackend.cc
@@ -472,6 +472,15 @@ void ECBackend::continue_recovery_op(
       set<pg_shard_t> to_read;
       int r = get_min_avail_to_read_shards(
 	op.hoid, want, true, &to_read);
+      if (r != 0) {
+	// we must have lost a recovery source
+	assert(!op.recovery_progress.first);
+	dout(10) << __func__ << ": canceling recovery op for obj " << op.hoid
+		 << dendl;
+	get_parent()->cancel_pull(op.hoid);
+	recovery_ops.erase(op.hoid);
+	return;
+      }
       assert(r == 0);
       m->read(
 	this,
-- 
2.47.3