]> git-server-git.apps.pok.os.sepia.ceph.com Git - ceph.git/commitdiff
osd/ReplicatedPG: handle dup ops earlier in do_op 1667/head
authorSage Weil <sage@inktank.com>
Mon, 14 Apr 2014 04:31:35 +0000 (21:31 -0700)
committerSage Weil <sage@inktank.com>
Mon, 14 Apr 2014 04:33:16 +0000 (21:33 -0700)
Current the dup op checks happen in execute_ctx, long after we handle
cache ops or get the obc and (potentially) return ENOENT.  That means that
object deletions and cache ops both aren't properly idempotent.

This is easy to fix by moving the check earlier in do_op.

Fixes: #8089
Signed-off-by: Sage Weil <sage@inktank.com>
src/osd/ReplicatedPG.cc

index 9095df4ca3f1789621cc49df5b7ab1349eacebeb..ee9b31c78641ea1e278570a586280ded53a73d19 100644 (file)
@@ -1220,6 +1220,35 @@ void ReplicatedPG::do_op(OpRequestRef op)
     return;
   }
 
+  // dup/replay?
+  if (op->may_write() || op->may_cache()) {
+    const pg_log_entry_t *entry = pg_log.get_log().get_request(m->get_reqid());
+    if (entry) {
+      const eversion_t& oldv = entry->version;
+      dout(3) << __func__ << " dup " << m->get_reqid()
+             << " was " << oldv << dendl;
+      if (already_complete(oldv)) {
+       osd->reply_op_error(op, 0, oldv, entry->user_version);
+      } else {
+       if (m->wants_ack()) {
+         if (already_ack(oldv)) {
+           MOSDOpReply *reply = new MOSDOpReply(m, 0, get_osdmap()->get_epoch(), 0, false);
+           reply->add_flags(CEPH_OSD_FLAG_ACK);
+           reply->set_reply_versions(oldv, entry->user_version);
+           osd->send_message_osd_client(reply, m->get_connection());
+         } else {
+           dout(10) << " waiting for " << oldv << " to ack" << dendl;
+           waiting_for_ack[oldv].push_back(op);
+         }
+       }
+       dout(10) << " waiting for " << oldv << " to commit" << dendl;
+       waiting_for_ondisk[oldv].push_back(op);  // always queue ondisk waiters, so that we can requeue if needed
+       op->mark_delayed("waiting for ondisk");
+      }
+      return;
+    }
+  }
+
   ObjectContextRef obc;
   bool can_create = op->may_write() || op->may_cache();
   hobject_t missing_oid;
@@ -1618,34 +1647,6 @@ void ReplicatedPG::execute_ctx(OpContext *ctx)
   ctx->op_t = pgbackend->get_transaction();
 
   if (op->may_write() || op->may_cache()) {
-    // dup/replay?
-    const pg_log_entry_t *entry = pg_log.get_log().get_request(ctx->reqid);
-    if (entry) {
-      const eversion_t& oldv = entry->version;
-      dout(3) << "do_op dup " << ctx->reqid << " was " << oldv << dendl;
-      if (already_complete(oldv)) {
-       reply_ctx(ctx, 0, oldv, entry->user_version);
-      } else {
-       close_op_ctx(ctx, -EBUSY);
-
-       if (m->wants_ack()) {
-         if (already_ack(oldv)) {
-           MOSDOpReply *reply = new MOSDOpReply(m, 0, get_osdmap()->get_epoch(), 0, false);
-           reply->add_flags(CEPH_OSD_FLAG_ACK);
-           reply->set_reply_versions(oldv, entry->user_version);
-           osd->send_message_osd_client(reply, m->get_connection());
-         } else {
-           dout(10) << " waiting for " << oldv << " to ack" << dendl;
-           waiting_for_ack[oldv].push_back(op);
-         }
-       }
-       dout(10) << " waiting for " << oldv << " to commit" << dendl;
-       waiting_for_ondisk[oldv].push_back(op);  // always queue ondisk waiters, so that we can requeue if needed
-       op->mark_delayed("waiting for ondisk");
-      }
-      return;
-    }
-
     op->mark_started();
 
     // snap