From: Sage Weil Date: Mon, 14 Apr 2014 04:31:35 +0000 (-0700) Subject: osd/ReplicatedPG: handle dup ops earlier in do_op X-Git-Tag: v0.80-rc1~41^2 X-Git-Url: http://git-server-git.apps.pok.os.sepia.ceph.com/?a=commitdiff_plain;h=8905e3e2285c211e695d3d2747e6feda8ea5e55c;p=ceph.git osd/ReplicatedPG: handle dup ops earlier in do_op Current the dup op checks happen in execute_ctx, long after we handle cache ops or get the obc and (potentially) return ENOENT. That means that object deletions and cache ops both aren't properly idempotent. This is easy to fix by moving the check earlier in do_op. Fixes: #8089 Signed-off-by: Sage Weil --- diff --git a/src/osd/ReplicatedPG.cc b/src/osd/ReplicatedPG.cc index 9095df4ca3f1..ee9b31c78641 100644 --- a/src/osd/ReplicatedPG.cc +++ b/src/osd/ReplicatedPG.cc @@ -1220,6 +1220,35 @@ void ReplicatedPG::do_op(OpRequestRef op) return; } + // dup/replay? + if (op->may_write() || op->may_cache()) { + const pg_log_entry_t *entry = pg_log.get_log().get_request(m->get_reqid()); + if (entry) { + const eversion_t& oldv = entry->version; + dout(3) << __func__ << " dup " << m->get_reqid() + << " was " << oldv << dendl; + if (already_complete(oldv)) { + osd->reply_op_error(op, 0, oldv, entry->user_version); + } else { + if (m->wants_ack()) { + if (already_ack(oldv)) { + MOSDOpReply *reply = new MOSDOpReply(m, 0, get_osdmap()->get_epoch(), 0, false); + reply->add_flags(CEPH_OSD_FLAG_ACK); + reply->set_reply_versions(oldv, entry->user_version); + osd->send_message_osd_client(reply, m->get_connection()); + } else { + dout(10) << " waiting for " << oldv << " to ack" << dendl; + waiting_for_ack[oldv].push_back(op); + } + } + dout(10) << " waiting for " << oldv << " to commit" << dendl; + waiting_for_ondisk[oldv].push_back(op); // always queue ondisk waiters, so that we can requeue if needed + op->mark_delayed("waiting for ondisk"); + } + return; + } + } + ObjectContextRef obc; bool can_create = op->may_write() || op->may_cache(); hobject_t missing_oid; @@ -1618,34 +1647,6 @@ void ReplicatedPG::execute_ctx(OpContext *ctx) ctx->op_t = pgbackend->get_transaction(); if (op->may_write() || op->may_cache()) { - // dup/replay? - const pg_log_entry_t *entry = pg_log.get_log().get_request(ctx->reqid); - if (entry) { - const eversion_t& oldv = entry->version; - dout(3) << "do_op dup " << ctx->reqid << " was " << oldv << dendl; - if (already_complete(oldv)) { - reply_ctx(ctx, 0, oldv, entry->user_version); - } else { - close_op_ctx(ctx, -EBUSY); - - if (m->wants_ack()) { - if (already_ack(oldv)) { - MOSDOpReply *reply = new MOSDOpReply(m, 0, get_osdmap()->get_epoch(), 0, false); - reply->add_flags(CEPH_OSD_FLAG_ACK); - reply->set_reply_versions(oldv, entry->user_version); - osd->send_message_osd_client(reply, m->get_connection()); - } else { - dout(10) << " waiting for " << oldv << " to ack" << dendl; - waiting_for_ack[oldv].push_back(op); - } - } - dout(10) << " waiting for " << oldv << " to commit" << dendl; - waiting_for_ondisk[oldv].push_back(op); // always queue ondisk waiters, so that we can requeue if needed - op->mark_delayed("waiting for ondisk"); - } - return; - } - op->mark_started(); // snap