From: wangzhengyong Date: Fri, 21 Apr 2017 08:51:09 +0000 (+0800) Subject: ceph osd: add support for new op cmpext X-Git-Tag: v12.0.3~228^2~4 X-Git-Url: http://git-server-git.apps.pok.os.sepia.ceph.com/?a=commitdiff_plain;h=bfc334b5376768e7f4cb41f577c73c90ae586e25;p=ceph.git ceph osd: add support for new op cmpext This adds support for a new op cmpext. The request will read extent.length bytes and compare them to extent.length bytes at extent.offset on disk. return 0 on success, negative error code on failure, (-MAX_ERRNO - mismatch_off) on mismatch rbd will use this in a multi op request to implement the SCSI COMPARE_AND_WRITE request which is used by VMware for its atomic test and set request. Signed-off-by: Zhengyong Wang Signed-off-by: Mike Christie [ddiss@suse.de: ReplicatedPG -> PrimaryLogPG] Reviewed-by: David Disseldorp --- diff --git a/src/include/rados.h b/src/include/rados.h index 4c5d379fdf07..4eb09b89cc2e 100644 --- a/src/include/rados.h +++ b/src/include/rados.h @@ -270,6 +270,7 @@ extern const char *ceph_osd_state_name(int s); \ /* ESX/SCSI */ \ f(WRITESAME, __CEPH_OSD_OP(WR, DATA, 38), "write-same") \ + f(CMPEXT, __CEPH_OSD_OP(RD, DATA, 32), "cmpext") \ \ /** attrs **/ \ /* read */ \ @@ -361,6 +362,7 @@ static inline int ceph_osd_op_uses_extent(int op) case CEPH_OSD_OP_ZERO: case CEPH_OSD_OP_APPEND: case CEPH_OSD_OP_TRIMTRUNC: + case CEPH_OSD_OP_CMPEXT: return true; default: return false; diff --git a/src/osd/PrimaryLogPG.cc b/src/osd/PrimaryLogPG.cc index 7745607c8c07..1ef56d0c5977 100644 --- a/src/osd/PrimaryLogPG.cc +++ b/src/osd/PrimaryLogPG.cc @@ -3782,6 +3782,37 @@ int PrimaryLogPG::do_xattr_cmp_str(int op, string& v1s, bufferlist& xattr) } } +int PrimaryLogPG::do_extent_cmp(OpContext *ctx, OSDOp& osd_op) +{ + ceph_osd_op& op = osd_op.op; + vector read_ops(1); + OSDOp& read_op = read_ops[0]; + int result = 0; + + read_op.op.op = CEPH_OSD_OP_SYNC_READ; + read_op.op.extent.offset = op.extent.offset; + read_op.op.extent.length = op.extent.length; + read_op.op.extent.truncate_seq = op.extent.truncate_seq; + read_op.op.extent.truncate_size = op.extent.truncate_size; + + result = do_osd_ops(ctx, read_ops); + if (result < 0) { + derr << "do_extent_cmp do_osd_ops failed " << result << dendl; + return result; + } + + if (read_op.outdata.length() != osd_op.indata.length()) + return -EINVAL; + + for (uint64_t p = 0; p < osd_op.indata.length(); p++) { + if (read_op.outdata[p] != osd_op.indata[p]) { + return (-MAX_ERRNO - p); + } + } + + return result; +} + int PrimaryLogPG::do_writesame(OpContext *ctx, OSDOp& osd_op) { ceph_osd_op& op = osd_op.op; @@ -4476,6 +4507,12 @@ int PrimaryLogPG::do_osd_ops(OpContext *ctx, vector& ops) // --- READS --- + case CEPH_OSD_OP_CMPEXT: + ++ctx->num_read; + tracepoint(osd, do_osd_op_pre_extent_cmp, soid.oid.name.c_str(), soid.snap.val, oi.size, oi.truncate_seq, op.extent.offset, op.extent.length, op.extent.truncate_size, op.extent.truncate_seq); + result = do_extent_cmp(ctx, osd_op); + break; + case CEPH_OSD_OP_SYNC_READ: if (pool.info.require_rollback()) { result = -EOPNOTSUPP; diff --git a/src/osd/PrimaryLogPG.h b/src/osd/PrimaryLogPG.h index 8f56f060edf7..1ebde000c5b7 100644 --- a/src/osd/PrimaryLogPG.h +++ b/src/osd/PrimaryLogPG.h @@ -1311,6 +1311,7 @@ protected: friend class C_ChecksumRead; + int do_extent_cmp(OpContext *ctx, OSDOp& osd_op); int do_writesame(OpContext *ctx, OSDOp& osd_op); bool pgls_filter(PGLSFilter *filter, hobject_t& sobj, bufferlist& outdata); diff --git a/src/tracing/osd.tp b/src/tracing/osd.tp index 3582ce63b8b8..6f199fa2edba 100644 --- a/src/tracing/osd.tp +++ b/src/tracing/osd.tp @@ -91,6 +91,28 @@ TRACEPOINT_EVENT(osd, do_osd_op_pre, ) ) +TRACEPOINT_EVENT(osd, do_osd_op_pre_extent_cmp, + TP_ARGS( + const char*, oid, + uint64_t, snap, + uint64_t, osize, + uint32_t, oseq, + uint64_t, offset, + uint64_t, length, + uint64_t, truncate_size, + uint32_t, truncate_seq), + TP_FIELDS( + ctf_string(oid, oid) + ctf_integer(uint64_t, snap, snap) + ctf_integer(uint64_t, osize, osize) + ctf_integer(uint32_t, oseq, oseq) + ctf_integer(uint64_t, offset, offset) + ctf_integer(uint64_t, length, length) + ctf_integer(uint64_t, truncate_size, truncate_size) + ctf_integer(uint32_t, truncate_seq, truncate_seq) + ) +) + TRACEPOINT_EVENT(osd, do_osd_op_pre_read, TP_ARGS( const char*, oid,