]> git.apps.os.sepia.ceph.com Git - ceph.git/commitdiff
librados: add writesame API
authorDavid Disseldorp <ddiss@suse.de>
Wed, 6 Apr 2016 17:13:18 +0000 (19:13 +0200)
committerDavid Disseldorp <ddiss@suse.de>
Mon, 25 Apr 2016 13:07:57 +0000 (15:07 +0200)
The writesame operation allows callers to write the same data buffer
multiple times to a given object.

Signed-off-by: David Disseldorp <ddiss@suse.de>
src/include/rados/librados.h
src/include/rados/librados.hpp
src/librados/IoCtxImpl.cc
src/librados/IoCtxImpl.h
src/librados/librados.cc
src/osdc/Objecter.h
src/tracing/librados.tp

index 44373ffb4438d527c7da595943f21e3b390b97fd..502c97522e6e9cfae0875637aebb309bc10a9775 100644 (file)
@@ -283,8 +283,8 @@ struct rados_cluster_stat_t {
  *   rados_write_op_assert_version()
  * - Creating objects: rados_write_op_create()
  * - IO on objects: rados_write_op_append(), rados_write_op_write(), rados_write_op_zero
- *   rados_write_op_write_full(), rados_write_op_remove, rados_write_op_truncate(),
- *   rados_write_op_zero()
+ *   rados_write_op_write_full(), rados_write_op_writesame(), rados_write_op_remove,
+ *   rados_write_op_truncate(), rados_write_op_zero()
  * - Hints: rados_write_op_set_alloc_hint()
  * - Performing the operation: rados_write_op_operate(), rados_aio_write_op_operate()
  */
@@ -1333,6 +1333,24 @@ CEPH_RADOS_API int rados_write(rados_ioctx_t io, const char *oid,
 CEPH_RADOS_API int rados_write_full(rados_ioctx_t io, const char *oid,
                                     const char *buf, size_t len);
 
+/**
+ * Write the same *data_len* bytes from *buf* multiple times into the
+ * *oid* object. *write_len* bytes are written in total, which must be
+ * a multiple of *data_len*. The value of *write_len* and *data_len*
+ * must be <= UINT_MAX/2.
+ *
+ * @param io the io context in which the write will occur
+ * @param oid name of the object
+ * @param buf data to write
+ * @param data_len length of the data, in bytes
+ * @param write_len the total number of bytes to write
+ * @param off byte offset in the object to begin writing at
+ * @returns 0 on success, negative error code on failure
+ */
+CEPH_RADOS_API int rados_writesame(rados_ioctx_t io, const char *oid,
+                                   const char *buf, size_t data_len,
+                                   size_t write_len, uint64_t off);
+
 /**
  * Efficiently copy a portion of one object to another
  *
@@ -1889,6 +1907,29 @@ CEPH_RADOS_API int rados_aio_write_full(rados_ioctx_t io, const char *oid,
                                        rados_completion_t completion,
                                        const char *buf, size_t len);
 
+/**
+ * Asychronously write the same buffer multiple times
+ *
+ * Queues the writesame and returns.
+ *
+ * The return value of the completion will be 0 on success, negative
+ * error code on failure.
+ *
+ * @param io the io context in which the write will occur
+ * @param oid name of the object
+ * @param completion what to do when the writesame is safe and complete
+ * @param buf data to write
+ * @param data_len length of the data, in bytes
+ * @param write_len the total number of bytes to write
+ * @param off byte offset in the object to begin writing at
+ * @returns 0 on success, -EROFS if the io context specifies a snap_seq
+ * other than LIBRADOS_SNAP_HEAD
+ */
+CEPH_RADOS_API int rados_aio_writesame(rados_ioctx_t io, const char *oid,
+                                      rados_completion_t completion,
+                                      const char *buf, size_t data_len,
+                                      size_t write_len, uint64_t off);
+
 /**
  * Asychronously remove an object
  *
@@ -2510,6 +2551,20 @@ CEPH_RADOS_API void rados_write_op_write_full(rados_write_op_t write_op,
                                               const char *buffer,
                                               size_t len);
 
+/**
+ * Write the same buffer multiple times
+ * @param write_op operation to add this action to
+ * @param buffer bytes to write
+ * @param data_len length of buffer
+ * @param write_len total number of bytes to write, as a multiple of @data_len
+ * @param offset offset to write to
+ */
+CEPH_RADOS_API void rados_write_op_writesame(rados_write_op_t write_op,
+                                             const char *buffer,
+                                             size_t data_len,
+                                             size_t write_len,
+                                             uint64_t offset);
+
 /**
  * Append to end of object.
  * @param write_op operation to add this action to
index 8450f22a033e46f002159bb10e4113ee2922dfe2..5c6abdb63f0ea418b865ad961a863908c04b943f 100644 (file)
@@ -369,6 +369,8 @@ namespace librados
 
     void write(uint64_t off, const bufferlist& bl);
     void write_full(const bufferlist& bl);
+    void writesame(uint64_t off, uint64_t write_len,
+                  const bufferlist& bl);
     void append(const bufferlist& bl);
     void remove();
     void truncate(uint64_t off);
@@ -683,6 +685,8 @@ namespace librados
      * NOTE: this call steals the contents of @param bl.
      */
     int write_full(const std::string& oid, bufferlist& bl);
+    int writesame(const std::string& oid, bufferlist& bl,
+                 size_t write_len, uint64_t off);
     int clone_range(const std::string& dst_oid, uint64_t dst_off,
                    const std::string& src_oid, uint64_t src_off,
                    size_t len);
@@ -908,6 +912,8 @@ namespace librados
     int aio_append(const std::string& oid, AioCompletion *c, const bufferlist& bl,
                  size_t len);
     int aio_write_full(const std::string& oid, AioCompletion *c, const bufferlist& bl);
+    int aio_writesame(const std::string& oid, AioCompletion *c, const bufferlist& bl,
+                     size_t write_len, uint64_t off);
 
     /**
      * Asychronously remove an object
index ce1a220d4e59a856031adcb76aba565d1cf0e1a7..3880e159ab98e545c325ec70ba2020caefd39ca1 100644 (file)
@@ -644,6 +644,21 @@ int librados::IoCtxImpl::write_full(const object_t& oid, bufferlist& bl)
   return operate(oid, &op, NULL);
 }
 
+int librados::IoCtxImpl::writesame(const object_t& oid, bufferlist& bl,
+                                  size_t write_len, uint64_t off)
+{
+  if ((bl.length() > UINT_MAX/2) || (write_len > UINT_MAX/2))
+    return -E2BIG;
+  if ((bl.length() == 0) || (write_len % bl.length()))
+    return -EINVAL;
+  ::ObjectOperation op;
+  prepare_assert_ops(&op);
+  bufferlist mybl;
+  mybl.substr_of(bl, 0, bl.length());
+  op.writesame(off, write_len, mybl);
+  return operate(oid, &op, NULL);
+}
+
 int librados::IoCtxImpl::clone_range(const object_t& dst_oid,
                                     uint64_t dst_offset,
                                     const object_t& src_oid,
@@ -934,6 +949,38 @@ int librados::IoCtxImpl::aio_write_full(const object_t &oid,
   return 0;
 }
 
+int librados::IoCtxImpl::aio_writesame(const object_t &oid,
+                                      AioCompletionImpl *c,
+                                      const bufferlist& bl,
+                                      size_t write_len,
+                                      uint64_t off)
+{
+  auto ut = ceph::real_clock::now(client->cct);
+
+  if ((bl.length() > UINT_MAX/2) || (write_len > UINT_MAX/2))
+    return -E2BIG;
+  if ((bl.length() == 0) || (write_len % bl.length()))
+    return -EINVAL;
+  /* can't write to a snapshot */
+  if (snap_seq != CEPH_NOSNAP)
+    return -EROFS;
+
+  Context *onack = new C_aio_Ack(c);
+  Context *onsafe = new C_aio_Safe(c);
+
+  c->io = this;
+  queue_aio_write(c);
+
+  Objecter::Op *o = objecter->prepare_writesame_op(
+    oid, oloc,
+    write_len, off,
+    snapc, bl, ut, 0,
+    onack, onsafe, &c->objver);
+  objecter->op_submit(o, &c->tid);
+
+  return 0;
+}
+
 int librados::IoCtxImpl::aio_remove(const object_t &oid, AioCompletionImpl *c)
 {
   auto ut = ceph::real_clock::now(client->cct);
index 47ed90ab01296376a3a152d80360c7684aedb54e..4f85a2ba30f6127bfb1b8ca3d4d7913fbf40478a 100644 (file)
@@ -127,6 +127,8 @@ struct librados::IoCtxImpl {
   int write(const object_t& oid, bufferlist& bl, size_t len, uint64_t off);
   int append(const object_t& oid, bufferlist& bl, size_t len);
   int write_full(const object_t& oid, bufferlist& bl);
+  int writesame(const object_t& oid, bufferlist& bl,
+               size_t write_len, uint64_t offset);
   int clone_range(const object_t& dst_oid, uint64_t dst_offset,
                   const object_t& src_oid, uint64_t src_offset, uint64_t len);
   int read(const object_t& oid, bufferlist& bl, size_t len, uint64_t off);
@@ -201,6 +203,8 @@ struct librados::IoCtxImpl {
                 const bufferlist& bl, size_t len);
   int aio_write_full(const object_t &oid, AioCompletionImpl *c,
                     const bufferlist& bl);
+  int aio_writesame(const object_t &oid, AioCompletionImpl *c,
+                   const bufferlist& bl, size_t write_len, uint64_t off);
   int aio_remove(const object_t &oid, AioCompletionImpl *c);
   int aio_exec(const object_t& oid, AioCompletionImpl *c, const char *cls,
               const char *method, bufferlist& inbl, bufferlist *outbl);
index 05dfac509f712710513f5aaddc8452668caae07f..4182aa2cc71a9800743dc8240a342993483ac886 100644 (file)
@@ -400,6 +400,14 @@ void librados::ObjectWriteOperation::write_full(const bufferlist& bl)
   o->write_full(c);
 }
 
+void librados::ObjectWriteOperation::writesame(uint64_t off, uint64_t write_len,
+                                              const bufferlist& bl)
+{
+  ::ObjectOperation *o = &impl->o;
+  bufferlist c = bl;
+  o->writesame(off, write_len, c);
+}
+
 void librados::ObjectWriteOperation::append(const bufferlist& bl)
 {
   ::ObjectOperation *o = &impl->o;
@@ -1184,6 +1192,13 @@ int librados::IoCtx::write_full(const std::string& oid, bufferlist& bl)
   return io_ctx_impl->write_full(obj, bl);
 }
 
+int librados::IoCtx::writesame(const std::string& oid, bufferlist& bl,
+                              size_t write_len, uint64_t off)
+{
+  object_t obj(oid);
+  return io_ctx_impl->writesame(obj, bl, write_len, off);
+}
+
 int librados::IoCtx::clone_range(const std::string& dst_oid, uint64_t dst_off,
                                 const std::string& src_oid, uint64_t src_off,
                                 size_t len)
@@ -1796,6 +1811,14 @@ int librados::IoCtx::aio_write_full(const std::string& oid, librados::AioComplet
   return io_ctx_impl->aio_write_full(obj, c->pc, bl);
 }
 
+int librados::IoCtx::aio_writesame(const std::string& oid, librados::AioCompletion *c,
+                                  const bufferlist& bl, size_t write_len,
+                                  uint64_t off)
+{
+  return io_ctx_impl->aio_writesame(oid, c->pc, bl, write_len, off);
+}
+
+
 int librados::IoCtx::aio_remove(const std::string& oid, librados::AioCompletion *c)
 {
   return io_ctx_impl->aio_remove(oid, c->pc);
@@ -3279,6 +3302,23 @@ extern "C" int rados_write_full(rados_ioctx_t io, const char *o, const char *buf
   return retval;
 }
 
+extern "C" int rados_writesame(rados_ioctx_t io,
+                               const char *o,
+                               const char *buf,
+                               size_t data_len,
+                               size_t write_len,
+                               uint64_t off)
+{
+  tracepoint(librados, rados_writesame_enter, io, o, buf, data_len, write_len, off);
+  librados::IoCtxImpl *ctx = (librados::IoCtxImpl *)io;
+  object_t oid(o);
+  bufferlist bl;
+  bl.append(buf, data_len);
+  int retval = ctx->writesame(oid, bl, write_len, off);
+  tracepoint(librados, rados_writesame_exit, retval);
+  return retval;
+}
+
 extern "C" int rados_clone_range(rados_ioctx_t io, const char *dst, uint64_t dst_off,
                                  const char *src, uint64_t src_off, size_t len)
 {
@@ -4338,6 +4378,23 @@ extern "C" int rados_aio_write_full(rados_ioctx_t io, const char *o,
   return retval;
 }
 
+extern "C" int rados_aio_writesame(rados_ioctx_t io, const char *o,
+                                  rados_completion_t completion,
+                                  const char *buf, size_t data_len,
+                                  size_t write_len, uint64_t off)
+{
+  tracepoint(librados, rados_aio_writesame_enter, io, o, completion, buf,
+                                               data_len, write_len, off);
+  librados::IoCtxImpl *ctx = (librados::IoCtxImpl *)io;
+  object_t oid(o);
+  bufferlist bl;
+  bl.append(buf, data_len);
+  int retval = ctx->aio_writesame(o, (librados::AioCompletionImpl*)completion,
+                                 bl, write_len, off);
+  tracepoint(librados, rados_aio_writesame_exit, retval);
+  return retval;
+}
+
 extern "C" int rados_aio_remove(rados_ioctx_t io, const char *o,
                                rados_completion_t completion)
 {
@@ -4876,6 +4933,19 @@ extern "C" void rados_write_op_write_full(rados_write_op_t write_op,
   tracepoint(librados, rados_write_op_write_full_exit);
 }
 
+extern "C" void rados_write_op_writesame(rados_write_op_t write_op,
+                                        const char *buffer,
+                                        size_t data_len,
+                                        size_t write_len,
+                                        uint64_t offset)
+{
+  tracepoint(librados, rados_write_op_writesame_enter, write_op, buffer, data_len, write_len, offset);
+  bufferlist bl;
+  bl.append(buffer, data_len);
+  ((::ObjectOperation *)write_op)->writesame(offset, write_len, bl);
+  tracepoint(librados, rados_write_op_writesame_exit);
+}
+
 extern "C" void rados_write_op_append(rados_write_op_t write_op,
                                      const char *buffer,
                                      size_t len)
index 6b519ffd8d5a81c30ac3e4f38fd4096def24e0a9..163e7db384cbe5553983ad83f00f80c77c457ea9 100644 (file)
@@ -142,6 +142,14 @@ struct ObjectOperation {
     osd_op.op.extent.length = len;
     osd_op.indata.claim_append(bl);
   }
+  void add_writesame(int op, uint64_t off, uint64_t write_len,
+                    bufferlist& bl) {
+    OSDOp& osd_op = add_op(op);
+    osd_op.op.writesame.offset = off;
+    osd_op.op.writesame.length = write_len;
+    osd_op.op.writesame.data_length = bl.length();
+    osd_op.indata.claim_append(bl);
+  }
   void add_clone_range(int op, uint64_t off, uint64_t len,
                       const object_t& srcoid, uint64_t srcoff,
                       snapid_t srcsnapid) {
@@ -370,6 +378,9 @@ struct ObjectOperation {
   void write_full(bufferlist& bl) {
     add_data(CEPH_OSD_OP_WRITEFULL, 0, bl.length(), bl);
   }
+  void writesame(uint64_t off, uint64_t write_len, bufferlist& bl) {
+    add_writesame(CEPH_OSD_OP_WRITESAME, off, write_len, bl);
+  }
   void append(bufferlist& bl) {
     add_data(CEPH_OSD_OP_APPEND, 0, bl.length(), bl);
   }
@@ -2598,6 +2609,44 @@ public:
     op_submit(o, &tid);
     return tid;
   }
+  Op *prepare_writesame_op(
+    const object_t& oid, const object_locator_t& oloc,
+    uint64_t write_len, uint64_t off,
+    const SnapContext& snapc, const bufferlist &bl,
+    ceph::real_time mtime, int flags, Context *onack,
+    Context *oncommit, version_t *objver = NULL,
+    ObjectOperation *extra_ops = NULL, int op_flags = 0) {
+
+    vector<OSDOp> ops;
+    int i = init_ops(ops, 1, extra_ops);
+    ops[i].op.op = CEPH_OSD_OP_WRITESAME;
+    ops[i].op.writesame.offset = off;
+    ops[i].op.writesame.length = write_len;
+    ops[i].op.writesame.data_length = bl.length();
+    ops[i].indata = bl;
+    ops[i].op.flags = op_flags;
+    Op *o = new Op(oid, oloc, ops, flags | global_op_flags.read() |
+                  CEPH_OSD_FLAG_WRITE, onack, oncommit, objver);
+    o->mtime = mtime;
+    o->snapc = snapc;
+    return o;
+  }
+  ceph_tid_t writesame(
+    const object_t& oid, const object_locator_t& oloc,
+    uint64_t write_len, uint64_t off,
+    const SnapContext& snapc, const bufferlist &bl,
+    ceph::real_time mtime, int flags, Context *onack,
+    Context *oncommit, version_t *objver = NULL,
+    ObjectOperation *extra_ops = NULL, int op_flags = 0) {
+
+    Op *o = prepare_writesame_op(oid, oloc, write_len, off, snapc, bl,
+                                mtime, flags, onack, oncommit, objver,
+                                extra_ops, op_flags);
+
+    ceph_tid_t tid;
+    op_submit(o, &tid);
+    return tid;
+  }
   ceph_tid_t trunc(const object_t& oid, const object_locator_t& oloc,
                   const SnapContext& snapc, ceph::real_time mtime, int flags,
                   uint64_t trunc_size, __u32 trunc_seq, Context *onack,
index a66464cc8ba9cbe62f34aa3d61721173f809b54c..8afe1e29ebf43787e17ef919f924032a0708feb6 100644 (file)
@@ -815,6 +815,31 @@ TRACEPOINT_EVENT(librados, rados_write_full_exit,
     )
 )
 
+TRACEPOINT_EVENT(librados, rados_writesame_enter,
+    TP_ARGS(
+       rados_ioctx_t, ioctx,
+       const char*, oid,
+       const void*, buf,
+       size_t, data_len,
+       size_t, write_len,
+       uint64_t, off),
+    TP_FIELDS(
+       ctf_integer_hex(rados_ioctx_t, ioctx, ioctx)
+       ctf_string(oid, oid)
+       ceph_ctf_sequence(unsigned char, buf, buf, size_t, data_len)
+       ctf_integer(size_t, write_len, write_len)
+       ctf_integer(uint64_t, off, off)
+    )
+)
+
+TRACEPOINT_EVENT(librados, rados_writesame_exit,
+    TP_ARGS(
+       int, retval),
+    TP_FIELDS(
+       ctf_integer(int, retval, retval)
+    )
+)
+
 TRACEPOINT_EVENT(librados, rados_clone_range_enter,
     TP_ARGS(
         rados_ioctx_t, ioctx,
@@ -2147,6 +2172,35 @@ TRACEPOINT_EVENT(librados, rados_aio_write_full_exit,
     )
 )
 
+TRACEPOINT_EVENT(librados, rados_aio_writesame_enter,
+    TP_ARGS(
+       rados_ioctx_t, ioctx,
+       const char*, oid,
+       rados_completion_t, completion,
+       const char*, buf,
+       size_t, data_len,
+       size_t, write_len,
+       uint64_t, off),
+    TP_FIELDS(
+       ctf_integer_hex(rados_ioctx_t, ioctx, ioctx)
+       ctf_string(oid, oid)
+       ctf_integer_hex(rados_completion_t, completion, completion)
+       ctf_sequence(unsigned char, buf, buf, size_t, CEPH_MIN(data_len, CEPH_TRACE_BUF_TRUNC_LEN))
+       ctf_integer(size_t, data_len, data_len)
+       ctf_integer(size_t, write_len, write_len)
+       ctf_integer(uint64_t, off, off)
+    )
+)
+
+TRACEPOINT_EVENT(librados, rados_aio_writesame_exit,
+    TP_ARGS(
+       int, retval),
+    TP_FIELDS(
+       ctf_integer(int, retval, retval)
+    )
+)
+
+
 TRACEPOINT_EVENT(librados, rados_aio_remove_enter,
     TP_ARGS(
         rados_ioctx_t, ioctx,
@@ -2878,6 +2932,26 @@ TRACEPOINT_EVENT(librados, rados_write_op_write_full_exit,
     TP_FIELDS()
 )
 
+TRACEPOINT_EVENT(librados, rados_write_op_writesame_enter,
+    TP_ARGS(
+       rados_write_op_t, op,
+       const char*, buffer,
+       size_t, data_len,
+       size_t, write_len,
+       uint64_t, offset),
+    TP_FIELDS(
+       ctf_integer_hex(rados_write_op_t, op, op)
+       ceph_ctf_sequence(unsigned char, buffer, buffer, size_t, data_len)
+       ctf_integer(size_t, write_len, write_len)
+       ctf_integer(uint64_t, offset, offset)
+    )
+)
+
+TRACEPOINT_EVENT(librados, rados_write_op_writesame_exit,
+    TP_ARGS(),
+    TP_FIELDS()
+)
+
 TRACEPOINT_EVENT(librados, rados_write_op_append_enter,
     TP_ARGS(
         rados_write_op_t, op,