]> git.apps.os.sepia.ceph.com Git - ceph.git/commitdiff
ceph osd: add support for new op cmpext
authorwangzhengyong <wangzhengyong@cmss.chinamobile.com>
Fri, 21 Apr 2017 08:51:09 +0000 (16:51 +0800)
committerwangzhengyong <wangzhengyong@cmss.chinamobile.com>
Wed, 26 Apr 2017 01:08:22 +0000 (09:08 +0800)
This adds support for a new op cmpext. The request will read
extent.length bytes and compare them to extent.length bytes at
extent.offset on disk. return 0 on success, negative error code
on failure, (-MAX_ERRNO - mismatch_off) on mismatch

rbd will use this in a multi op request to implement the
SCSI COMPARE_AND_WRITE request which is used by VMware for
its atomic test and set request.

Signed-off-by: Zhengyong Wang <wangzhengyong@cmss.chinamobile.com>
Signed-off-by: Mike Christie <mchristi@redhat.com>
[ddiss@suse.de: ReplicatedPG -> PrimaryLogPG]
Reviewed-by: David Disseldorp <ddiss@suse.de>
src/include/rados.h
src/osd/PrimaryLogPG.cc
src/osd/PrimaryLogPG.h
src/tracing/osd.tp

index 4c5d379fdf0742e265a28652a7bddccf6e6d719f..4eb09b89cc2eef68f71f62671c314a619a5776eb 100644 (file)
@@ -270,6 +270,7 @@ extern const char *ceph_osd_state_name(int s);
                                                                            \
        /* ESX/SCSI */                                                      \
        f(WRITESAME,    __CEPH_OSD_OP(WR, DATA, 38),    "write-same")       \
+       f(CMPEXT,       __CEPH_OSD_OP(RD, DATA, 32),    "cmpext")           \
                                                                            \
        /** attrs **/                                                       \
        /* read */                                                          \
@@ -361,6 +362,7 @@ static inline int ceph_osd_op_uses_extent(int op)
        case CEPH_OSD_OP_ZERO:
        case CEPH_OSD_OP_APPEND:
        case CEPH_OSD_OP_TRIMTRUNC:
+       case CEPH_OSD_OP_CMPEXT:
                return true;
        default:
                return false;
index 7745607c8c0753dd29ff081081831c2b2b69097f..1ef56d0c59773d76672a0a7f33ed6f0d4ec56b19 100644 (file)
@@ -3782,6 +3782,37 @@ int PrimaryLogPG::do_xattr_cmp_str(int op, string& v1s, bufferlist& xattr)
   }
 }
 
+int PrimaryLogPG::do_extent_cmp(OpContext *ctx, OSDOp& osd_op)
+{
+  ceph_osd_op& op = osd_op.op;
+  vector<OSDOp> read_ops(1);
+  OSDOp& read_op = read_ops[0];
+  int result = 0;
+
+  read_op.op.op = CEPH_OSD_OP_SYNC_READ;
+  read_op.op.extent.offset = op.extent.offset;
+  read_op.op.extent.length = op.extent.length;
+  read_op.op.extent.truncate_seq = op.extent.truncate_seq;
+  read_op.op.extent.truncate_size = op.extent.truncate_size;
+
+  result = do_osd_ops(ctx, read_ops);
+  if (result < 0) {
+    derr << "do_extent_cmp do_osd_ops failed " << result << dendl;
+    return result;
+  }
+
+  if (read_op.outdata.length() != osd_op.indata.length())
+    return -EINVAL;
+
+  for (uint64_t p = 0; p < osd_op.indata.length(); p++) {
+    if (read_op.outdata[p] != osd_op.indata[p]) {
+      return (-MAX_ERRNO - p);
+    }
+  }
+
+  return result;
+}
+
 int PrimaryLogPG::do_writesame(OpContext *ctx, OSDOp& osd_op)
 {
   ceph_osd_op& op = osd_op.op;
@@ -4476,6 +4507,12 @@ int PrimaryLogPG::do_osd_ops(OpContext *ctx, vector<OSDOp>& ops)
       
       // --- READS ---
 
+    case CEPH_OSD_OP_CMPEXT:
+      ++ctx->num_read;
+      tracepoint(osd, do_osd_op_pre_extent_cmp, soid.oid.name.c_str(), soid.snap.val, oi.size, oi.truncate_seq, op.extent.offset, op.extent.length, op.extent.truncate_size, op.extent.truncate_seq);
+      result = do_extent_cmp(ctx, osd_op);
+      break;
+
     case CEPH_OSD_OP_SYNC_READ:
       if (pool.info.require_rollback()) {
        result = -EOPNOTSUPP;
index 8f56f060edf7d6d79b84fdc46f73e8f21ed12d4e..1ebde000c5b7a7bf2c929b167e14240707536a95 100644 (file)
@@ -1311,6 +1311,7 @@ protected:
 
   friend class C_ChecksumRead;
 
+  int do_extent_cmp(OpContext *ctx, OSDOp& osd_op);
   int do_writesame(OpContext *ctx, OSDOp& osd_op);
 
   bool pgls_filter(PGLSFilter *filter, hobject_t& sobj, bufferlist& outdata);
index 3582ce63b8b811ecbfc94fb0d78b5c23b2f1750e..6f199fa2edbaf8b88d2d30bf02366dacbd0653de 100644 (file)
@@ -91,6 +91,28 @@ TRACEPOINT_EVENT(osd, do_osd_op_pre,
     )
 )
 
+TRACEPOINT_EVENT(osd, do_osd_op_pre_extent_cmp,
+    TP_ARGS(
+        const char*, oid,
+        uint64_t, snap,
+        uint64_t, osize,
+        uint32_t, oseq,
+        uint64_t, offset,
+        uint64_t, length,
+        uint64_t, truncate_size,
+        uint32_t, truncate_seq),
+    TP_FIELDS(
+        ctf_string(oid, oid)
+        ctf_integer(uint64_t, snap, snap)
+        ctf_integer(uint64_t, osize, osize)
+        ctf_integer(uint32_t, oseq, oseq)
+        ctf_integer(uint64_t, offset, offset)
+        ctf_integer(uint64_t, length, length)
+        ctf_integer(uint64_t, truncate_size, truncate_size)
+        ctf_integer(uint32_t, truncate_seq, truncate_seq)
+    )
+)
+
 TRACEPOINT_EVENT(osd, do_osd_op_pre_read,
     TP_ARGS(
         const char*, oid,