]> git.apps.os.sepia.ceph.com Git - ceph.git/commitdiff
ReplicatedPG::cancel_pull: requeue waiters as well 4230/head
authorSamuel Just <sjust@redhat.com>
Thu, 26 Mar 2015 17:26:48 +0000 (10:26 -0700)
committerSamuel Just <sjust@redhat.com>
Tue, 31 Mar 2015 16:21:36 +0000 (09:21 -0700)
If we are in recovery_wait, we might not recover that object as part of
recover_primary for some time.  Worse, if we are waiting on a backfill
which is blocked waiting on a copy_from on the missing object in
question, it can become a dead lock.

Fixes: 11244
Backport: firefly
Signed-off-by: Samuel Just <sjust@redhat.com>
src/osd/ReplicatedPG.cc

index 4964bc489951536220f4c950abfb46c0ac0f4ff6..265ead30e8e1f8d26092f7a074f278c43da6196c 100644 (file)
@@ -10519,6 +10519,7 @@ void ReplicatedPG::_clear_recovery_state()
 
 void ReplicatedPG::cancel_pull(const hobject_t &soid)
 {
+  dout(20) << __func__ << ": soid" << dendl;
   assert(recovering.count(soid));
   ObjectContextRef obc = recovering[soid];
   if (obc) {
@@ -10528,6 +10529,16 @@ void ReplicatedPG::cancel_pull(const hobject_t &soid)
   }
   recovering.erase(soid);
   finish_recovery_op(soid);
+  if (waiting_for_degraded_object.count(soid)) {
+    dout(20) << " kicking degraded waiters on " << soid << dendl;
+    requeue_ops(waiting_for_degraded_object[soid]);
+    waiting_for_degraded_object.erase(soid);
+  }
+  if (waiting_for_unreadable_object.count(soid)) {
+    dout(20) << " kicking unreadable waiters on " << soid << dendl;
+    requeue_ops(waiting_for_unreadable_object[soid]);
+    waiting_for_unreadable_object.erase(soid);
+  }
   if (is_missing_object(soid))
     pg_log.set_last_requested(0); // get recover_primary to start over
 }