From 17ceec0d10cc6540c958e8f2d5ec1961750ced48 Mon Sep 17 00:00:00 2001 From: Sage Weil Date: Mon, 27 Aug 2012 14:31:32 -0700 Subject: [PATCH] osd: requeue dup ops inline with in-progress ops We should requeue the dups along with the originals. This avoids situations where, after requeue, the dups are reordered with respect to each other. For example: - client sends A, B, C - osd receives A - connection drops - client sends A', B', C' - osd puts A' in waiting_for_ondisk, starts B' and C' - on_change() requeues everything Final queue order (before this patch) is A, B', C', A' After this patch, the resulting queue order is A, A', B', C' Or somewhat more generally, it might be: A, A', B, B', B'', C', C'', D'', .... Fixes (another source of): #2947 Signed-off-by: Sage Weil Reviewed-by: Samuel Just --- src/osd/OSD.cc | 5 ++--- src/osd/ReplicatedPG.cc | 23 +++++++++++++---------- 2 files changed, 15 insertions(+), 13 deletions(-) diff --git a/src/osd/OSD.cc b/src/osd/OSD.cc index 161cf1196a405..c2ab64ca813bf 100644 --- a/src/osd/OSD.cc +++ b/src/osd/OSD.cc @@ -5477,8 +5477,7 @@ bool OSD::op_is_discardable(MOSDOp *op) */ void OSD::enqueue_op(PG *pg, OpRequestRef op) { - dout(15) << *pg << " enqueue_op " << op->request << " " - << *(op->request) << dendl; + dout(15) << *pg << " enqueue_op " << op << " " << *(op->request) << dendl; assert(pg->is_locked()); pg->queue_op(op); } @@ -5566,7 +5565,7 @@ void OSD::dequeue_op(PG *pg) op = pg->op_queue.front(); pg->op_queue.pop_front(); - dout(10) << "dequeue_op " << *op->request << " pg " << *pg << dendl; + dout(10) << "dequeue_op " << op << " " << *op->request << " pg " << *pg << dendl; op->mark_reached_pg(); diff --git a/src/osd/ReplicatedPG.cc b/src/osd/ReplicatedPG.cc index d9f868539829f..2850efb09cde8 100644 --- a/src/osd/ReplicatedPG.cc +++ b/src/osd/ReplicatedPG.cc @@ -5731,6 +5731,14 @@ void ReplicatedPG::apply_and_flush_repops(bool requeue) dout(10) << " requeuing " << *repop->ctx->op->request << dendl; rq.push_back(repop->ctx->op); repop->ctx->op = OpRequestRef(); + + // also requeue any dups, interleaved into position + map >::iterator p = waiting_for_ondisk.find(repop->v); + if (p != waiting_for_ondisk.end()) { + dout(10) << " also requeuing ondisk waiters " << p->second << dendl; + rq.splice(rq.end(), p->second); + waiting_for_ondisk.erase(p); + } } remove_repop(repop); @@ -5739,6 +5747,9 @@ void ReplicatedPG::apply_and_flush_repops(bool requeue) if (requeue) { requeue_ops(rq); } + + assert(waiting_for_ondisk.empty()); + waiting_for_ack.clear(); } void ReplicatedPG::on_removal() @@ -5793,16 +5804,8 @@ void ReplicatedPG::on_change() requeue_ops(waiting_for_all_missing); waiting_for_all_missing.clear(); - // take commit waiters; these are dups of what - // apply_and_flush_repops() will requeue. - for (map >::reverse_iterator p = waiting_for_ondisk.rbegin(); - p != waiting_for_ondisk.rend(); - p++) - requeue_ops(p->second); - waiting_for_ondisk.clear(); - waiting_for_ack.clear(); - - // this will requeue ops we were working on but didn't finish + // this will requeue ops we were working on but didn't finish, and + // any dups apply_and_flush_repops(is_primary()); // clear pushing/pulling maps -- 2.39.5