From: Sage Weil Date: Fri, 27 Dec 2013 19:15:19 +0000 (-0800) Subject: osd: add rados CACHE mode (different from RD and WR) X-Git-Tag: v0.77~22^2~36 X-Git-Url: http://git-server-git.apps.pok.os.sepia.ceph.com/?a=commitdiff_plain;h=90e352ca73111b429b044747b01e46aac5e66c59;p=ceph.git osd: add rados CACHE mode (different from RD and WR) It is useful to distinguish cache operations from read and modify operations. Specifically, we will allow cache ops to be sent for snaps and also allow those ops to result in a write. Signed-off-by: Sage Weil --- diff --git a/src/include/rados.h b/src/include/rados.h index 1994cd385ee..c4aa5035fb3 100644 --- a/src/include/rados.h +++ b/src/include/rados.h @@ -157,6 +157,7 @@ extern const char *ceph_osd_state_name(int s); #define CEPH_OSD_OP_MODE_WR 0x2000 #define CEPH_OSD_OP_MODE_RMW 0x3000 #define CEPH_OSD_OP_MODE_SUB 0x4000 +#define CEPH_OSD_OP_MODE_CACHE 0x8000 #define CEPH_OSD_OP_TYPE 0x0f00 #define CEPH_OSD_OP_TYPE_LOCK 0x0100 @@ -319,6 +320,10 @@ static inline int ceph_osd_op_mode_modify(int op) { return op & CEPH_OSD_OP_MODE_WR; } +static inline int ceph_osd_op_mode_cache(int op) +{ + return op & CEPH_OSD_OP_MODE_CACHE; +} /* * note that the following tmap stuff is also defined in the ceph librados.h diff --git a/src/osd/OSD.cc b/src/osd/OSD.cc index aa6820f5360..e922402e695 100644 --- a/src/osd/OSD.cc +++ b/src/osd/OSD.cc @@ -7387,6 +7387,9 @@ int OSD::init_op_flags(OpRequestRef op) if (ceph_osd_op_type_pg(iter->op.op)) op->set_pg_op(); + if (ceph_osd_op_mode_cache(iter->op.op)) + op->set_cache(); + switch (iter->op.op) { case CEPH_OSD_OP_CALL: { diff --git a/src/osd/OpRequest.cc b/src/osd/OpRequest.cc index 0730ce86577..33e7fbdb9d2 100644 --- a/src/osd/OpRequest.cc +++ b/src/osd/OpRequest.cc @@ -64,6 +64,7 @@ bool OpRequest::check_rmw(int flag) { } bool OpRequest::may_read() { return need_read_cap() || need_class_read_cap(); } bool OpRequest::may_write() { return need_write_cap() || need_class_write_cap(); } +bool OpRequest::may_cache() { return check_rmw(CEPH_OSD_RMW_FLAG_CACHE); } bool OpRequest::includes_pg_op() { return check_rmw(CEPH_OSD_RMW_FLAG_PGOP); } bool OpRequest::need_read_cap() { return check_rmw(CEPH_OSD_RMW_FLAG_READ); @@ -82,3 +83,4 @@ void OpRequest::set_write() { rmw_flags |= CEPH_OSD_RMW_FLAG_WRITE; } void OpRequest::set_class_read() { rmw_flags |= CEPH_OSD_RMW_FLAG_CLASS_READ; } void OpRequest::set_class_write() { rmw_flags |= CEPH_OSD_RMW_FLAG_CLASS_WRITE; } void OpRequest::set_pg_op() { rmw_flags |= CEPH_OSD_RMW_FLAG_PGOP; } +void OpRequest::set_cache() { rmw_flags |= CEPH_OSD_RMW_FLAG_CACHE; } diff --git a/src/osd/OpRequest.h b/src/osd/OpRequest.h index 6d367db1a9e..96bb5a4864f 100644 --- a/src/osd/OpRequest.h +++ b/src/osd/OpRequest.h @@ -59,6 +59,7 @@ struct OpRequest : public TrackedOp { bool check_rmw(int flag); bool may_read(); bool may_write(); + bool may_cache(); bool includes_pg_op(); bool need_read_cap(); bool need_write_cap(); @@ -66,6 +67,7 @@ struct OpRequest : public TrackedOp { bool need_class_write_cap(); void set_read(); void set_write(); + void set_cache(); void set_class_read(); void set_class_write(); void set_pg_op(); diff --git a/src/osd/PG.cc b/src/osd/PG.cc index e30c79d4c28..686fe9c361d 100644 --- a/src/osd/PG.cc +++ b/src/osd/PG.cc @@ -4791,7 +4791,7 @@ bool PG::can_discard_op(OpRequestRef op) if (OSD::op_is_discardable(m)) { dout(20) << " discard " << *m << dendl; return true; - } else if (op->may_write() && + } else if ((op->may_write() || op->may_cache()) && (!is_primary() || !same_for_modify_since(m->get_map_epoch()))) { osd->handle_misdirected_op(this, op); diff --git a/src/osd/ReplicatedPG.cc b/src/osd/ReplicatedPG.cc index d83cb435202..1061ee44d5a 100644 --- a/src/osd/ReplicatedPG.cc +++ b/src/osd/ReplicatedPG.cc @@ -1068,11 +1068,15 @@ void ReplicatedPG::do_op(OpRequestRef op) } // order this op as a write? - bool write_ordered = op->may_write() || (m->get_flags() & CEPH_OSD_FLAG_RWORDERED); + bool write_ordered = + op->may_write() || + op->may_cache() || + (m->get_flags() & CEPH_OSD_FLAG_RWORDERED); dout(10) << "do_op " << *m << (op->may_write() ? " may_write" : "") << (op->may_read() ? " may_read" : "") + << (op->may_cache() ? " may_cache" : "") << " -> " << (write_ordered ? "write-ordered" : "read-ordered") << " flags " << ceph_osd_flag_string(m->get_flags()) << dendl; @@ -1123,7 +1127,7 @@ void ReplicatedPG::do_op(OpRequestRef op) } ObjectContextRef obc; - bool can_create = op->may_write(); + bool can_create = op->may_write() || op->may_cache(); hobject_t missing_oid; hobject_t oid(m->get_oid(), m->get_object_locator().key, @@ -1330,8 +1334,8 @@ void ReplicatedPG::do_op(OpRequestRef op) osd->reply_op_error(op, -ENFILE); return; } - if (!op->may_write() && (!obc->obs.exists || - obc->obs.oi.is_whiteout())) { + if (!op->may_write() && !op->may_cache() && (!obc->obs.exists || + obc->obs.oi.is_whiteout())) { close_op_ctx(ctx); osd->reply_op_error(op, -ENOENT); return; @@ -1494,8 +1498,8 @@ void ReplicatedPG::execute_ctx(OpContext *ctx) ctx->op_t = ObjectStore::Transaction(); ctx->local_t = ObjectStore::Transaction(); - // dup/replay? - if (op->may_write()) { + if (op->may_write() || op->may_cache()) { + // dup/replay? const pg_log_entry_t *entry = pg_log.get_log().get_request(ctx->reqid); if (entry) { const eversion_t& oldv = entry->version; @@ -1650,7 +1654,7 @@ void ReplicatedPG::execute_ctx(OpContext *ctx) ctx->reply->set_reply_versions(ctx->at_version, ctx->user_at_version); - assert(op->may_write()); + assert(op->may_write() || op->may_cache()); // trim log? calc_trim_to(); @@ -1735,7 +1739,7 @@ void ReplicatedPG::log_op_stats(OpContext *ctx) osd->logger->inc(l_osd_op_r); osd->logger->inc(l_osd_op_r_outb, outb); osd->logger->tinc(l_osd_op_r_lat, latency); - } else if (op->may_write()) { + } else if (op->may_write() || op->may_cache()) { osd->logger->inc(l_osd_op_w); osd->logger->inc(l_osd_op_w_inb, inb); osd->logger->tinc(l_osd_op_w_rlat, rlatency); diff --git a/src/osd/ReplicatedPG.h b/src/osd/ReplicatedPG.h index 38946e9f60a..82605f8fb06 100644 --- a/src/osd/ReplicatedPG.h +++ b/src/osd/ReplicatedPG.h @@ -507,7 +507,7 @@ protected: * @return true on success, false if we are queued */ bool get_rw_locks(OpContext *ctx) { - if (ctx->op->may_write()) { + if (ctx->op->may_write() || ctx->op->may_cache()) { if (ctx->obc->get_write(ctx->op)) { ctx->lock_to_release = OpContext::W_LOCK; return true; diff --git a/src/osd/osd_types.h b/src/osd/osd_types.h index f56d6e6b756..09ecc45e5eb 100644 --- a/src/osd/osd_types.h +++ b/src/osd/osd_types.h @@ -206,6 +206,7 @@ enum { CEPH_OSD_RMW_FLAG_CLASS_READ = (1 << 3), CEPH_OSD_RMW_FLAG_CLASS_WRITE = (1 << 4), CEPH_OSD_RMW_FLAG_PGOP = (1 << 5), + CEPH_OSD_RMW_FLAG_CACHE = (1 << 6), };