From 0b2e272430dd7433e6763be99b8a4cb127d9be19 Mon Sep 17 00:00:00 2001 From: Samuel Just Date: Thu, 26 Mar 2015 10:26:48 -0700 Subject: [PATCH] ReplicatedPG::cancel_pull: requeue waiters as well If we are in recovery_wait, we might not recover that object as part of recover_primary for some time. Worse, if we are waiting on a backfill which is blocked waiting on a copy_from on the missing object in question, it can become a dead lock. Fixes: 11244 Backport: firefly Signed-off-by: Samuel Just --- src/osd/ReplicatedPG.cc | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/src/osd/ReplicatedPG.cc b/src/osd/ReplicatedPG.cc index 4964bc4899515..265ead30e8e1f 100644 --- a/src/osd/ReplicatedPG.cc +++ b/src/osd/ReplicatedPG.cc @@ -10519,6 +10519,7 @@ void ReplicatedPG::_clear_recovery_state() void ReplicatedPG::cancel_pull(const hobject_t &soid) { + dout(20) << __func__ << ": soid" << dendl; assert(recovering.count(soid)); ObjectContextRef obc = recovering[soid]; if (obc) { @@ -10528,6 +10529,16 @@ void ReplicatedPG::cancel_pull(const hobject_t &soid) } recovering.erase(soid); finish_recovery_op(soid); + if (waiting_for_degraded_object.count(soid)) { + dout(20) << " kicking degraded waiters on " << soid << dendl; + requeue_ops(waiting_for_degraded_object[soid]); + waiting_for_degraded_object.erase(soid); + } + if (waiting_for_unreadable_object.count(soid)) { + dout(20) << " kicking unreadable waiters on " << soid << dendl; + requeue_ops(waiting_for_unreadable_object[soid]); + waiting_for_unreadable_object.erase(soid); + } if (is_missing_object(soid)) pg_log.set_last_requested(0); // get recover_primary to start over } -- 2.39.5