From: Sage Weil Date: Fri, 27 Dec 2013 19:15:19 +0000 (-0800) Subject: osd: add rados CACHE mode (different from RD and WR) X-Git-Tag: v0.77~22^2~36 X-Git-Url: http://git-server-git.apps.pok.os.sepia.ceph.com/?a=commitdiff_plain;h=90e352ca73111b429b044747b01e46aac5e66c59;p=ceph.git osd: add rados CACHE mode (different from RD and WR) It is useful to distinguish cache operations from read and modify operations. Specifically, we will allow cache ops to be sent for snaps and also allow those ops to result in a write. Signed-off-by: Sage Weil --- diff --git a/src/include/rados.h b/src/include/rados.h index 1994cd385ee3..c4aa5035fb36 100644 --- a/src/include/rados.h +++ b/src/include/rados.h @@ -157,6 +157,7 @@ extern const char *ceph_osd_state_name(int s); #define CEPH_OSD_OP_MODE_WR 0x2000 #define CEPH_OSD_OP_MODE_RMW 0x3000 #define CEPH_OSD_OP_MODE_SUB 0x4000 +#define CEPH_OSD_OP_MODE_CACHE 0x8000 #define CEPH_OSD_OP_TYPE 0x0f00 #define CEPH_OSD_OP_TYPE_LOCK 0x0100 @@ -319,6 +320,10 @@ static inline int ceph_osd_op_mode_modify(int op) { return op & CEPH_OSD_OP_MODE_WR; } +static inline int ceph_osd_op_mode_cache(int op) +{ + return op & CEPH_OSD_OP_MODE_CACHE; +} /* * note that the following tmap stuff is also defined in the ceph librados.h diff --git a/src/osd/OSD.cc b/src/osd/OSD.cc index aa6820f53600..e922402e6951 100644 --- a/src/osd/OSD.cc +++ b/src/osd/OSD.cc @@ -7387,6 +7387,9 @@ int OSD::init_op_flags(OpRequestRef op) if (ceph_osd_op_type_pg(iter->op.op)) op->set_pg_op(); + if (ceph_osd_op_mode_cache(iter->op.op)) + op->set_cache(); + switch (iter->op.op) { case CEPH_OSD_OP_CALL: { diff --git a/src/osd/OpRequest.cc b/src/osd/OpRequest.cc index 0730ce86577d..33e7fbdb9d25 100644 --- a/src/osd/OpRequest.cc +++ b/src/osd/OpRequest.cc @@ -64,6 +64,7 @@ bool OpRequest::check_rmw(int flag) { } bool OpRequest::may_read() { return need_read_cap() || need_class_read_cap(); } bool OpRequest::may_write() { return need_write_cap() || need_class_write_cap(); } +bool OpRequest::may_cache() { return check_rmw(CEPH_OSD_RMW_FLAG_CACHE); } bool OpRequest::includes_pg_op() { return check_rmw(CEPH_OSD_RMW_FLAG_PGOP); } bool OpRequest::need_read_cap() { return check_rmw(CEPH_OSD_RMW_FLAG_READ); @@ -82,3 +83,4 @@ void OpRequest::set_write() { rmw_flags |= CEPH_OSD_RMW_FLAG_WRITE; } void OpRequest::set_class_read() { rmw_flags |= CEPH_OSD_RMW_FLAG_CLASS_READ; } void OpRequest::set_class_write() { rmw_flags |= CEPH_OSD_RMW_FLAG_CLASS_WRITE; } void OpRequest::set_pg_op() { rmw_flags |= CEPH_OSD_RMW_FLAG_PGOP; } +void OpRequest::set_cache() { rmw_flags |= CEPH_OSD_RMW_FLAG_CACHE; } diff --git a/src/osd/OpRequest.h b/src/osd/OpRequest.h index 6d367db1a9e7..96bb5a4864f6 100644 --- a/src/osd/OpRequest.h +++ b/src/osd/OpRequest.h @@ -59,6 +59,7 @@ struct OpRequest : public TrackedOp { bool check_rmw(int flag); bool may_read(); bool may_write(); + bool may_cache(); bool includes_pg_op(); bool need_read_cap(); bool need_write_cap(); @@ -66,6 +67,7 @@ struct OpRequest : public TrackedOp { bool need_class_write_cap(); void set_read(); void set_write(); + void set_cache(); void set_class_read(); void set_class_write(); void set_pg_op(); diff --git a/src/osd/PG.cc b/src/osd/PG.cc index e30c79d4c281..686fe9c361d0 100644 --- a/src/osd/PG.cc +++ b/src/osd/PG.cc @@ -4791,7 +4791,7 @@ bool PG::can_discard_op(OpRequestRef op) if (OSD::op_is_discardable(m)) { dout(20) << " discard " << *m << dendl; return true; - } else if (op->may_write() && + } else if ((op->may_write() || op->may_cache()) && (!is_primary() || !same_for_modify_since(m->get_map_epoch()))) { osd->handle_misdirected_op(this, op); diff --git a/src/osd/ReplicatedPG.cc b/src/osd/ReplicatedPG.cc index d83cb4352026..1061ee44d5ac 100644 --- a/src/osd/ReplicatedPG.cc +++ b/src/osd/ReplicatedPG.cc @@ -1068,11 +1068,15 @@ void ReplicatedPG::do_op(OpRequestRef op) } // order this op as a write? - bool write_ordered = op->may_write() || (m->get_flags() & CEPH_OSD_FLAG_RWORDERED); + bool write_ordered = + op->may_write() || + op->may_cache() || + (m->get_flags() & CEPH_OSD_FLAG_RWORDERED); dout(10) << "do_op " << *m << (op->may_write() ? " may_write" : "") << (op->may_read() ? " may_read" : "") + << (op->may_cache() ? " may_cache" : "") << " -> " << (write_ordered ? "write-ordered" : "read-ordered") << " flags " << ceph_osd_flag_string(m->get_flags()) << dendl; @@ -1123,7 +1127,7 @@ void ReplicatedPG::do_op(OpRequestRef op) } ObjectContextRef obc; - bool can_create = op->may_write(); + bool can_create = op->may_write() || op->may_cache(); hobject_t missing_oid; hobject_t oid(m->get_oid(), m->get_object_locator().key, @@ -1330,8 +1334,8 @@ void ReplicatedPG::do_op(OpRequestRef op) osd->reply_op_error(op, -ENFILE); return; } - if (!op->may_write() && (!obc->obs.exists || - obc->obs.oi.is_whiteout())) { + if (!op->may_write() && !op->may_cache() && (!obc->obs.exists || + obc->obs.oi.is_whiteout())) { close_op_ctx(ctx); osd->reply_op_error(op, -ENOENT); return; @@ -1494,8 +1498,8 @@ void ReplicatedPG::execute_ctx(OpContext *ctx) ctx->op_t = ObjectStore::Transaction(); ctx->local_t = ObjectStore::Transaction(); - // dup/replay? - if (op->may_write()) { + if (op->may_write() || op->may_cache()) { + // dup/replay? const pg_log_entry_t *entry = pg_log.get_log().get_request(ctx->reqid); if (entry) { const eversion_t& oldv = entry->version; @@ -1650,7 +1654,7 @@ void ReplicatedPG::execute_ctx(OpContext *ctx) ctx->reply->set_reply_versions(ctx->at_version, ctx->user_at_version); - assert(op->may_write()); + assert(op->may_write() || op->may_cache()); // trim log? calc_trim_to(); @@ -1735,7 +1739,7 @@ void ReplicatedPG::log_op_stats(OpContext *ctx) osd->logger->inc(l_osd_op_r); osd->logger->inc(l_osd_op_r_outb, outb); osd->logger->tinc(l_osd_op_r_lat, latency); - } else if (op->may_write()) { + } else if (op->may_write() || op->may_cache()) { osd->logger->inc(l_osd_op_w); osd->logger->inc(l_osd_op_w_inb, inb); osd->logger->tinc(l_osd_op_w_rlat, rlatency); diff --git a/src/osd/ReplicatedPG.h b/src/osd/ReplicatedPG.h index 38946e9f60a6..82605f8fb065 100644 --- a/src/osd/ReplicatedPG.h +++ b/src/osd/ReplicatedPG.h @@ -507,7 +507,7 @@ protected: * @return true on success, false if we are queued */ bool get_rw_locks(OpContext *ctx) { - if (ctx->op->may_write()) { + if (ctx->op->may_write() || ctx->op->may_cache()) { if (ctx->obc->get_write(ctx->op)) { ctx->lock_to_release = OpContext::W_LOCK; return true; diff --git a/src/osd/osd_types.h b/src/osd/osd_types.h index f56d6e6b756d..09ecc45e5ebf 100644 --- a/src/osd/osd_types.h +++ b/src/osd/osd_types.h @@ -206,6 +206,7 @@ enum { CEPH_OSD_RMW_FLAG_CLASS_READ = (1 << 3), CEPH_OSD_RMW_FLAG_CLASS_WRITE = (1 << 4), CEPH_OSD_RMW_FLAG_PGOP = (1 << 5), + CEPH_OSD_RMW_FLAG_CACHE = (1 << 6), };