From 645ac87b84ae348483c7d52b1968af5f44378dea Mon Sep 17 00:00:00 2001 From: Samuel Just Date: Mon, 9 Jul 2012 15:53:31 -0700 Subject: [PATCH] ReplicatedPG: fix replay op ordering After a client reconnect, the client replays outstanding ops. The OSD then immediately responds with success if the op has already committed (version < ReplicatedPG::get_first_in_progress). Otherwise, we stick it in waiting_for_ondisk to be replied to when eval_repop concludes that waitfor_disk is empty. Fixes #2508 Signed-off-by: Samuel Just --- src/osd/ReplicatedPG.cc | 16 +++++++++++----- src/osd/ReplicatedPG.h | 11 +++++++++++ 2 files changed, 22 insertions(+), 5 deletions(-) diff --git a/src/osd/ReplicatedPG.cc b/src/osd/ReplicatedPG.cc index 17128a76bf1d3..adf6282685f42 100644 --- a/src/osd/ReplicatedPG.cc +++ b/src/osd/ReplicatedPG.cc @@ -846,7 +846,7 @@ void ReplicatedPG::do_op(OpRequestRef op) delete ctx; put_object_context(obc); put_object_contexts(src_obc); - if (oldv <= last_update_ondisk) { + if (already_complete(oldv)) { osd->reply_op_error(op, 0, oldv); } else { dout(10) << " waiting for " << oldv << " to commit" << dendl; @@ -3474,10 +3474,6 @@ void ReplicatedPG::op_commit(RepGather *repop) repop->waitfor_ack.erase(whoami); last_update_ondisk = repop->v; - if (waiting_for_ondisk.count(repop->v)) { - requeue_ops(waiting_for_ondisk[repop->v]); - waiting_for_ondisk.erase(repop->v); - } last_complete_ondisk = repop->pg_local_last_complete; eval_repop(repop); @@ -3526,6 +3522,16 @@ void ReplicatedPG::eval_repop(RepGather *repop) log_op_stats(repop->ctx); update_stats(); + if (waiting_for_ondisk.count(repop->v)) { + assert(waiting_for_ondisk.begin()->first == repop->v); + for (list::iterator i = waiting_for_ondisk[repop->v].begin(); + i != waiting_for_ondisk[repop->v].end(); + ++i) { + osd->reply_op_error(*i, 0, repop->v); + } + waiting_for_ondisk.erase(repop->v); + } + if (m->wants_ondisk() && !repop->sent_disk) { // send commit. MOSDOpReply *reply = repop->ctx->reply; diff --git a/src/osd/ReplicatedPG.h b/src/osd/ReplicatedPG.h index da85a6b924230..8c7112110e048 100644 --- a/src/osd/ReplicatedPG.h +++ b/src/osd/ReplicatedPG.h @@ -470,6 +470,17 @@ protected: // replica ops // [primary|tail] xlist repop_queue; + bool already_complete(eversion_t v) { + for (xlist::iterator i = repop_queue.begin(); + !i.end(); + ++i) { + if ((*i)->v > v) + break; + if (!(*i)->waitfor_disk.empty()) + return false; + } + return true; + } map repop_map; void apply_repop(RepGather *repop); -- 2.39.5