From: Sage Weil Date: Mon, 20 Aug 2012 19:33:08 +0000 (-0700) Subject: osd: fix requeue order of dup ops X-Git-Tag: v0.51~9 X-Git-Url: http://git-server-git.apps.pok.os.sepia.ceph.com/?a=commitdiff_plain;h=dd4c1dc9f9dae43e4761caca049bfe7361d9ebfb;p=ceph.git osd: fix requeue order of dup ops The waiting_for_ondisk (and ack) maps get dups of ops that are in progress. If we have a peering change in which the role does not change, we will requeue the in-progress ops but leave these in the waiting_for_ondisk maps, which will then trigger an assert the next time we examine that map and find it didn't match up with what we expected. Fix this by requeuing these on any peering reset in on_change(). This keeps the two queues in sync. Fixes: #2956 Signed-off-by: Sage Weil --- diff --git a/src/osd/ReplicatedPG.cc b/src/osd/ReplicatedPG.cc index 50cde51a948d..296bffb4387b 100644 --- a/src/osd/ReplicatedPG.cc +++ b/src/osd/ReplicatedPG.cc @@ -5793,6 +5793,15 @@ void ReplicatedPG::on_change() requeue_ops(waiting_for_all_missing); waiting_for_all_missing.clear(); + // take commit waiters; these are dups of what + // apply_and_flush_repops() will requeue. + for (map >::iterator p = waiting_for_ondisk.begin(); + p != waiting_for_ondisk.end(); + p++) + requeue_ops(p->second); + waiting_for_ondisk.clear(); + waiting_for_ack.clear(); + // this will requeue ops we were working on but didn't finish apply_and_flush_repops(is_primary()); @@ -5808,18 +5817,9 @@ void ReplicatedPG::on_change() void ReplicatedPG::on_role_change() { dout(10) << "on_role_change" << dendl; - - // take commit waiters - for (map >::iterator p = waiting_for_ondisk.begin(); - p != waiting_for_ondisk.end(); - p++) - requeue_ops(p->second); - waiting_for_ondisk.clear(); - waiting_for_ack.clear(); } - // clear state. called on recovery completion AND cancellation. void ReplicatedPG::_clear_recovery_state() {