From 0db90486ee0bea92e3e51d9d3df89b66a23f33a6 Mon Sep 17 00:00:00 2001 From: Alice Zhao Date: Mon, 11 Apr 2022 08:35:57 -0400 Subject: [PATCH] osd: don't require RWEXCL lock for stat+write ops. In librbd, a stat op is inserted before write op for cloned image. OSD used to use RWEXCL and such requests are processed one by one. With this fix, OSD will use RWWRITE rather than RWEXCL for such [stat,write] request to allow multiple [stat,write] on the same object and improve performance. Signed-off-by: Alice Zhao (cherry picked from commit 9be266b0a0304aaaaa0ca12f28fcd3e9cce1f9d7) --- src/osd/OpRequest.h | 1 + src/osd/PrimaryLogPG.cc | 6 +++++- src/osd/osd_op_util.cc | 19 ++++++++++++++++++- src/osd/osd_op_util.h | 2 ++ src/osd/osd_types.h | 1 + 5 files changed, 27 insertions(+), 2 deletions(-) diff --git a/src/osd/OpRequest.h b/src/osd/OpRequest.h index 1a608b58341b2..832c793535d81 100644 --- a/src/osd/OpRequest.h +++ b/src/osd/OpRequest.h @@ -35,6 +35,7 @@ public: bool op_info_needs_init() const { return op_info.get_flags() == 0; } bool check_rmw(int flag) const { return op_info.check_rmw(flag); } bool may_read() const { return op_info.may_read(); } + bool may_read_data() const { return op_info.may_read_data(); } bool may_write() const { return op_info.may_write(); } bool may_cache() const { return op_info.may_cache(); } bool rwordered_forced() const { return op_info.rwordered_forced(); } diff --git a/src/osd/PrimaryLogPG.cc b/src/osd/PrimaryLogPG.cc index 243e127eb0a8d..913d9b6aad737 100644 --- a/src/osd/PrimaryLogPG.cc +++ b/src/osd/PrimaryLogPG.cc @@ -1667,7 +1667,11 @@ bool PrimaryLogPG::get_rw_locks(bool write_ordered, OpContext *ctx) * to get the second. */ if (write_ordered && ctx->op->may_read()) { - ctx->lock_type = RWState::RWEXCL; + if (ctx->op->may_read_data()) { + ctx->lock_type = RWState::RWEXCL; + } else { + ctx->lock_type = RWState::RWWRITE; + } } else if (write_ordered) { ctx->lock_type = RWState::RWWRITE; } else { diff --git a/src/osd/osd_op_util.cc b/src/osd/osd_op_util.cc index a33e2f110d0d4..d400a94dbf4db 100644 --- a/src/osd/osd_op_util.cc +++ b/src/osd/osd_op_util.cc @@ -16,6 +16,7 @@ bool OpInfo::check_rmw(int flag) const { ceph_assert(rmw_flags != 0); return rmw_flags & flag; } +// Returns true if op performs a read (including of the object_info). bool OpInfo::may_read() const { return need_read_cap() || check_rmw(CEPH_OSD_RMW_FLAG_CLASS_READ); } @@ -51,6 +52,16 @@ bool OpInfo::need_skip_promote() const { bool OpInfo::allows_returnvec() const { return check_rmw(CEPH_OSD_RMW_FLAG_RETURNVEC); } +/** + * may_read_data() + * + * Returns true if op reads information other than the object_info. Requires that the + * osd flush any prior writes prior to servicing this op. Includes any information not + * cached by the osd in the object_info or snapset. + */ +bool OpInfo::may_read_data() const { + return check_rmw(CEPH_OSD_RMW_FLAG_READ_DATA); +} void OpInfo::set_rmw_flags(int flags) { rmw_flags |= flags; @@ -67,6 +78,7 @@ void OpInfo::set_skip_handle_cache() { set_rmw_flags(CEPH_OSD_RMW_FLAG_SKIP_HAND void OpInfo::set_skip_promote() { set_rmw_flags(CEPH_OSD_RMW_FLAG_SKIP_PROMOTE); } void OpInfo::set_force_rwordered() { set_rmw_flags(CEPH_OSD_RMW_FLAG_RWORDERED); } void OpInfo::set_returnvec() { set_rmw_flags(CEPH_OSD_RMW_FLAG_RETURNVEC); } +void OpInfo::set_read_data() { set_rmw_flags(CEPH_OSD_RMW_FLAG_READ_DATA); } int OpInfo::set_from_op( @@ -108,8 +120,12 @@ int OpInfo::set_from_op( if (ceph_osd_op_mode_modify(iter->op.op)) set_write(); } - if (ceph_osd_op_mode_read(iter->op.op)) + if (ceph_osd_op_mode_read(iter->op.op)) { set_read(); + if (iter->op.op != CEPH_OSD_OP_STAT) { + set_read_data(); + } + } // set READ flag if there are src_oids if (iter->soid.oid.name.length()) @@ -202,6 +218,7 @@ int OpInfo::set_from_op( // watch state (and may return early if the watch exists) or, in // the case of ping, is simply a read op. set_read(); + set_read_data(); // fall through case CEPH_OSD_OP_NOTIFY: case CEPH_OSD_OP_NOTIFY_ACK: diff --git a/src/osd/osd_op_util.h b/src/osd/osd_op_util.h index 300fe40cc87fd..fcd06c74ba3a5 100644 --- a/src/osd/osd_op_util.h +++ b/src/osd/osd_op_util.h @@ -47,6 +47,7 @@ public: bool check_rmw(int flag) const ; bool may_read() const; + bool may_read_data() const; bool may_write() const; bool may_cache() const; bool rwordered_forced() const; @@ -70,6 +71,7 @@ public: void set_skip_promote(); void set_force_rwordered(); void set_returnvec(); + void set_read_data(); int set_from_op( const MOSDOp *m, diff --git a/src/osd/osd_types.h b/src/osd/osd_types.h index afed5fa835103..8b0e410e5bba4 100644 --- a/src/osd/osd_types.h +++ b/src/osd/osd_types.h @@ -367,6 +367,7 @@ enum { CEPH_OSD_RMW_FLAG_SKIP_PROMOTE = (1 << 9), CEPH_OSD_RMW_FLAG_RWORDERED = (1 << 10), CEPH_OSD_RMW_FLAG_RETURNVEC = (1 << 11), + CEPH_OSD_RMW_FLAG_READ_DATA = (1 << 12), }; -- 2.39.5