From 18f08cd9c0a3972149eef635fb1035dfc4fcb662 Mon Sep 17 00:00:00 2001 From: Josh Durgin Date: Mon, 17 Feb 2014 22:35:13 -0800 Subject: [PATCH] ReplicatedPG: clear osd op reply output for writes Since the vector of OSDOps used by the reply is the same as the one processed by do_osd_ops() now, any output data needs to be cleared for writes. To be compatible with current behavior, allow writes that aren't applying anything or have failed to return data still. Add a new parameter to the MOSDOpReply constructor to determine whether the output data should be cleared. Clear it for successful writes, and remove a redundant result < 0 -> result > 0 check in the process. This was caught by ceph_test_cls_hello and its writes_dont_return_data method. Signed-off-by: Josh Durgin --- src/messages/MOSDOpReply.h | 9 ++++++--- src/osd/OSD.cc | 3 ++- src/osd/ReplicatedPG.cc | 25 +++++++++++++------------ 3 files changed, 21 insertions(+), 16 deletions(-) diff --git a/src/messages/MOSDOpReply.h b/src/messages/MOSDOpReply.h index c0e989f7c3a05..9739ae135578b 100644 --- a/src/messages/MOSDOpReply.h +++ b/src/messages/MOSDOpReply.h @@ -124,7 +124,7 @@ public: public: MOSDOpReply() : Message(CEPH_MSG_OSD_OPREPLY, HEAD_VERSION, COMPAT_VERSION) { } - MOSDOpReply(MOSDOp *req, int r, epoch_t e, int acktype) + MOSDOpReply(MOSDOp *req, int r, epoch_t e, int acktype, bool ignore_out_data) : Message(CEPH_MSG_OSD_OPREPLY, HEAD_VERSION, COMPAT_VERSION) { set_tid(req->get_tid()); ops = req->ops; @@ -137,9 +137,12 @@ public: user_version = 0; retry_attempt = req->get_retry_attempt(); - // zero out ops payload_len - for (unsigned i = 0; i < ops.size(); i++) + // zero out ops payload_len and possibly out data + for (unsigned i = 0; i < ops.size(); i++) { ops[i].op.payload_len = 0; + if (ignore_out_data) + ops[i].outdata.clear(); + } } private: ~MOSDOpReply() {} diff --git a/src/osd/OSD.cc b/src/osd/OSD.cc index a4f1b5193decd..76896ff4a9bbc 100644 --- a/src/osd/OSD.cc +++ b/src/osd/OSD.cc @@ -7143,7 +7143,8 @@ void OSDService::reply_op_error(OpRequestRef op, int err, eversion_t v, int flags; flags = m->get_flags() & (CEPH_OSD_FLAG_ACK|CEPH_OSD_FLAG_ONDISK); - MOSDOpReply *reply = new MOSDOpReply(m, err, osdmap->get_epoch(), flags); + MOSDOpReply *reply = new MOSDOpReply(m, err, osdmap->get_epoch(), flags, + true); reply->set_reply_versions(v, uv); m->get_connection()->get_messenger()->send_message(reply, m->get_connection()); } diff --git a/src/osd/ReplicatedPG.cc b/src/osd/ReplicatedPG.cc index 9a04358ec5c61..585c9bfe7e320 100644 --- a/src/osd/ReplicatedPG.cc +++ b/src/osd/ReplicatedPG.cc @@ -968,7 +968,8 @@ void ReplicatedPG::do_pg_op(OpRequestRef op) // reply MOSDOpReply *reply = new MOSDOpReply(m, 0, get_osdmap()->get_epoch(), - CEPH_OSD_FLAG_ACK | CEPH_OSD_FLAG_ONDISK); + CEPH_OSD_FLAG_ACK | CEPH_OSD_FLAG_ONDISK, + false); reply->claim_op_out_data(ops); reply->set_result(result); reply->set_reply_versions(info.last_update, info.last_user_version); @@ -1548,7 +1549,7 @@ void ReplicatedPG::do_cache_redirect(OpRequestRef op, ObjectContextRef obc) MOSDOp *m = static_cast(op->get_req()); int flags = m->get_flags() & (CEPH_OSD_FLAG_ACK|CEPH_OSD_FLAG_ONDISK); MOSDOpReply *reply = new MOSDOpReply(m, -ENOENT, - get_osdmap()->get_epoch(), flags); + get_osdmap()->get_epoch(), flags, false); request_redirect_t redir(m->get_object_locator(), pool.info.tier_of); reply->set_redirect(redir); dout(10) << "sending redirect to pool " << pool.info.tier_of << " for op " @@ -1625,7 +1626,7 @@ void ReplicatedPG::execute_ctx(OpContext *ctx) if (m->wants_ack()) { if (already_ack(oldv)) { - MOSDOpReply *reply = new MOSDOpReply(m, 0, get_osdmap()->get_epoch(), 0); + MOSDOpReply *reply = new MOSDOpReply(m, 0, get_osdmap()->get_epoch(), 0, false); reply->add_flags(CEPH_OSD_FLAG_ACK); reply->set_reply_versions(oldv, entry->user_version); osd->send_message_osd_client(reply, m->get_connection()); @@ -1721,8 +1722,10 @@ void ReplicatedPG::execute_ctx(OpContext *ctx) return; } + bool successful_write = !ctx->op_t->empty() && ctx->modify && result >= 0; // prepare the reply - ctx->reply = new MOSDOpReply(m, 0, get_osdmap()->get_epoch(), 0); + ctx->reply = new MOSDOpReply(m, 0, get_osdmap()->get_epoch(), 0, + successful_write); // Write operations aren't allowed to return a data payload because // we can't do so reliably. If the client has to resend the request @@ -1731,12 +1734,10 @@ void ReplicatedPG::execute_ctx(OpContext *ctx) // possible to construct an operation that does a read, does a guard // check (e.g., CMPXATTR), and then a write. Then we either succeed // with the write, or return a CMPXATTR and the read value. - if (!((ctx->op_t->empty() && !ctx->modify) || result < 0)) { + if (successful_write) { // write. normalize the result code. - if (result > 0) { - dout(20) << " zeroing write result code " << result << dendl; - result = 0; - } + dout(20) << " zeroing write result code " << result << dendl; + result = 0; } ctx->reply->set_result(result); @@ -6224,7 +6225,7 @@ void ReplicatedPG::eval_repop(RepGather *repop) if (reply) repop->ctx->reply = NULL; else { - reply = new MOSDOpReply(m, 0, get_osdmap()->get_epoch(), 0); + reply = new MOSDOpReply(m, 0, get_osdmap()->get_epoch(), 0, true); reply->set_reply_versions(repop->ctx->at_version, repop->ctx->user_at_version); } @@ -6248,7 +6249,7 @@ void ReplicatedPG::eval_repop(RepGather *repop) i != waiting_for_ack[repop->v].end(); ++i) { MOSDOp *m = (MOSDOp*)(*i)->get_req(); - MOSDOpReply *reply = new MOSDOpReply(m, 0, get_osdmap()->get_epoch(), 0); + MOSDOpReply *reply = new MOSDOpReply(m, 0, get_osdmap()->get_epoch(), 0, true); reply->set_reply_versions(repop->ctx->at_version, repop->ctx->user_at_version); reply->add_flags(CEPH_OSD_FLAG_ACK); @@ -6263,7 +6264,7 @@ void ReplicatedPG::eval_repop(RepGather *repop) if (reply) repop->ctx->reply = NULL; else { - reply = new MOSDOpReply(m, 0, get_osdmap()->get_epoch(), 0); + reply = new MOSDOpReply(m, 0, get_osdmap()->get_epoch(), 0, true); reply->set_reply_versions(repop->ctx->at_version, repop->ctx->user_at_version); } -- 2.39.5