]> git-server-git.apps.pok.os.sepia.ceph.com Git - ceph.git/commitdiff
librbd: add 'write_zeroes' public C/C++ API methods
authorJason Dillaman <dillaman@redhat.com>
Tue, 30 Jun 2020 12:44:38 +0000 (08:44 -0400)
committerJason Dillaman <dillaman@redhat.com>
Tue, 30 Jun 2020 21:53:24 +0000 (17:53 -0400)
Unlike the existing 'discard' option which is more of a hint to
attempt to release space, the new 'write_zeroes' APIs will ensure
that the entire provided extent is fully zeroed.

Signed-off-by: Jason Dillaman <dillaman@redhat.com>
src/include/rbd/librbd.h
src/include/rbd/librbd.hpp
src/librbd/api/Io.cc
src/librbd/api/Io.h
src/librbd/librbd.cc
src/test/librbd/test_librbd.cc

index 50e2e990262cbb62de313f7205e9711b33927140..7113a0e2970648d9fea083fb2165020ddf993fef 100644 (file)
@@ -47,6 +47,7 @@ extern "C" {
 #define LIBRBD_SUPPORTS_IOVEC 1
 #define LIBRBD_SUPPORTS_WATCH 0
 #define LIBRBD_SUPPORTS_WRITESAME 1
+#define LIBRBD_SUPPORTS_WRITE_ZEROES 1
 
 #if __GNUC__ >= 4
   #define CEPH_RBD_API          __attribute__ ((visibility ("default")))
@@ -1100,10 +1101,15 @@ CEPH_RBD_API ssize_t rbd_write2(rbd_image_t image, uint64_t ofs, size_t len,
                                 const char *buf, int op_flags);
 CEPH_RBD_API int rbd_discard(rbd_image_t image, uint64_t ofs, uint64_t len);
 CEPH_RBD_API ssize_t rbd_writesame(rbd_image_t image, uint64_t ofs, size_t len,
-                                   const char *buf, size_t data_len, int op_flags);
+                                   const char *buf, size_t data_len,
+                                   int op_flags);
+CEPH_RBD_API ssize_t rbd_write_zeroes(rbd_image_t image, uint64_t ofs,
+                                      uint64_t len, int zero_flags,
+                                      int op_flags);
 CEPH_RBD_API ssize_t rbd_compare_and_write(rbd_image_t image, uint64_t ofs,
                                            size_t len, const char *cmp_buf,
-                                           const char *buf, uint64_t *mismatch_off,
+                                           const char *buf,
+                                           uint64_t *mismatch_off,
                                            int op_flags);
 
 CEPH_RBD_API int rbd_aio_write(rbd_image_t image, uint64_t off, size_t len,
@@ -1131,10 +1137,15 @@ CEPH_RBD_API int rbd_aio_discard(rbd_image_t image, uint64_t off, uint64_t len,
 CEPH_RBD_API int rbd_aio_writesame(rbd_image_t image, uint64_t off, size_t len,
                                    const char *buf, size_t data_len,
                                    rbd_completion_t c, int op_flags);
+CEPH_RBD_API int rbd_aio_write_zeroes(rbd_image_t image, uint64_t off,
+                                      size_t len, rbd_completion_t c,
+                                      int zero_flags, int op_flags);
 CEPH_RBD_API ssize_t rbd_aio_compare_and_write(rbd_image_t image,
                                                uint64_t off, size_t len,
-                                               const char *cmp_buf, const char *buf,
-                                               rbd_completion_t c, uint64_t *mismatch_off,
+                                               const char *cmp_buf,
+                                               const char *buf,
+                                               rbd_completion_t c,
+                                               uint64_t *mismatch_off,
                                                int op_flags);
 
 CEPH_RBD_API int rbd_aio_create_completion(void *cb_arg,
index 987a76eb9da8f0f3d67859c35b88a4a20834b683..76acfa9b393eca724134b08c40bef4151478dbaa 100644 (file)
@@ -679,8 +679,11 @@ public:
   ssize_t write(uint64_t ofs, size_t len, ceph::bufferlist& bl);
   /* @param op_flags see librados.h constants beginning with LIBRADOS_OP_FLAG */
   ssize_t write2(uint64_t ofs, size_t len, ceph::bufferlist& bl, int op_flags);
+
   int discard(uint64_t ofs, uint64_t len);
   ssize_t writesame(uint64_t ofs, size_t len, ceph::bufferlist &bl, int op_flags);
+  ssize_t write_zeroes(uint64_t ofs, size_t len, int zero_flags, int op_flags);
+
   ssize_t compare_and_write(uint64_t ofs, size_t len, ceph::bufferlist &cmp_bl,
                             ceph::bufferlist& bl, uint64_t *mismatch_off, int op_flags);
 
@@ -688,11 +691,17 @@ public:
   /* @param op_flags see librados.h constants beginning with LIBRADOS_OP_FLAG */
   int aio_write2(uint64_t off, size_t len, ceph::bufferlist& bl,
                  RBD::AioCompletion *c, int op_flags);
+
+  int aio_discard(uint64_t off, uint64_t len, RBD::AioCompletion *c);
   int aio_writesame(uint64_t off, size_t len, ceph::bufferlist& bl,
                     RBD::AioCompletion *c, int op_flags);
+  int aio_write_zeroes(uint64_t ofs, size_t len, RBD::AioCompletion *c,
+                       int zero_flags, int op_flags);
+
   int aio_compare_and_write(uint64_t off, size_t len, ceph::bufferlist& cmp_bl,
                             ceph::bufferlist& bl, RBD::AioCompletion *c,
                             uint64_t *mismatch_off, int op_flags);
+
   /**
    * read async from image
    *
@@ -714,7 +723,6 @@ public:
   /* @param op_flags see librados.h constants beginning with LIBRADOS_OP_FLAG */
   int aio_read2(uint64_t off, size_t len, ceph::bufferlist& bl,
                  RBD::AioCompletion *c, int op_flags);
-  int aio_discard(uint64_t off, uint64_t len, RBD::AioCompletion *c);
 
   int flush();
   /**
index a91b5556861943537c1446e88dcbb1bc899b1ba8..4d97b9d8a8cfb10a7ae3a0050e100f553c236f3b 100644 (file)
@@ -133,6 +133,32 @@ ssize_t Io<I>::write_same(
   return len;
 }
 
+template <typename I>
+ssize_t Io<I>::write_zeroes(I& image_ctx, uint64_t off, uint64_t len,
+                            int zero_flags, int op_flags) {
+  auto cct = image_ctx.cct;
+  ldout(cct, 20) << "ictx=" << &image_ctx << ", off=" << off << ", "
+                 << "len = " << len << dendl;
+
+  image_ctx.image_lock.lock_shared();
+  int r = clip_io(util::get_image_ctx(&image_ctx), off, &len);
+  image_ctx.image_lock.unlock_shared();
+  if (r < 0) {
+    lderr(cct) << "invalid IO request: " << cpp_strerror(r) << dendl;
+    return r;
+  }
+
+  C_SaferCond ctx;
+  auto aio_comp = io::AioCompletion::create(&ctx);
+  aio_write_zeroes(image_ctx, aio_comp, off, len, zero_flags, op_flags, false);
+
+  r = ctx.wait();
+  if (r < 0) {
+    return r;
+  }
+  return len;
+}
+
 template <typename I>
 ssize_t Io<I>::compare_and_write(
     I &image_ctx, uint64_t off, uint64_t len, bufferlist &&cmp_bl,
@@ -303,6 +329,46 @@ void Io<I>::aio_write_same(I &image_ctx, io::AioCompletion *aio_comp,
   req->send();
 }
 
+template <typename I>
+void Io<I>::aio_write_zeroes(I& image_ctx, io::AioCompletion *aio_comp,
+                             uint64_t off, uint64_t len, int zero_flags,
+                             int op_flags, bool native_async) {
+  auto cct = image_ctx.cct;
+  FUNCTRACE(cct);
+  ZTracer::Trace trace;
+  if (image_ctx.blkin_trace_all) {
+    trace.init("io: write_zeroes", &image_ctx.trace_endpoint);
+    trace.event("init");
+  }
+
+  aio_comp->init_time(util::get_image_ctx(&image_ctx), io::AIO_TYPE_DISCARD);
+  ldout(cct, 20) << "ictx=" << &image_ctx << ", "
+                 << "completion=" << aio_comp << ", off=" << off << ", "
+                 << "len=" << len << dendl;
+
+  if (native_async && image_ctx.event_socket.is_valid()) {
+    aio_comp->set_event_notify(true);
+  }
+
+  // validate the supported flags
+  if (zero_flags != 0U) {
+    aio_comp->fail(-EINVAL);
+    return;
+  }
+
+  if (!is_valid_io(image_ctx, aio_comp)) {
+    return;
+  }
+
+  // enable partial discard (zeroing) of objects
+  uint32_t discard_granularity_bytes = 0;
+
+  auto req = io::ImageDispatchSpec<I>::create_discard(
+    image_ctx, io::IMAGE_DISPATCH_LAYER_API_START, aio_comp, off, len,
+    discard_granularity_bytes, trace, 0);
+  req->send();
+}
+
 template <typename I>
 void Io<I>::aio_compare_and_write(I &image_ctx, io::AioCompletion *aio_comp,
                                   uint64_t off, uint64_t len,
index 95120d499f1f48a7431959d3da9588e5c0e81775..4e2ec5028e4326d9a38c4cae54f6808ce3bf180b 100644 (file)
@@ -24,6 +24,8 @@ struct Io {
                          uint32_t discard_granularity_bytes);
   static ssize_t write_same(ImageCtxT &image_ctx, uint64_t off, uint64_t len,
                             bufferlist &&bl, int op_flags);
+  static ssize_t write_zeroes(ImageCtxT &image_ctx, uint64_t off, uint64_t len,
+                              int zero_flags, int op_flags);
   static ssize_t compare_and_write(ImageCtxT &image_ctx, uint64_t off,
                                    uint64_t len, bufferlist &&cmp_bl,
                                    bufferlist &&bl, uint64_t *mismatch_off,
@@ -43,6 +45,9 @@ struct Io {
   static void aio_write_same(ImageCtxT &image_ctx, io::AioCompletion *c,
                              uint64_t off, uint64_t len, bufferlist &&bl,
                              int op_flags, bool native_async);
+  static void aio_write_zeroes(ImageCtxT &image_ctx, io::AioCompletion *c,
+                               uint64_t off, uint64_t len, int zero_flags,
+                               int op_flags, bool native_async);
   static void aio_compare_and_write(ImageCtxT &image_ctx, io::AioCompletion *c,
                                     uint64_t off, uint64_t len,
                                     bufferlist &&cmp_bl, bufferlist &&bl,
index 83722258971a794157f06d8904e9b3eccb6a6bce..2e10188acea9f7d03c1bfc89255860d20de13fb2 100644 (file)
@@ -2570,8 +2570,8 @@ namespace librbd {
     }
 
     bool discard_zero = ictx->config.get_val<bool>("rbd_discard_on_zeroed_write_same");
-    if (discard_zero && mem_is_zero(bl.c_str(), bl.length())) {
-      int r = api::Io<>::discard(*ictx, ofs, len, 0);
+    if (discard_zero && bl.is_zero()) {
+      int r = api::Io<>::write_zeroes(*ictx, ofs, len, 0U, op_flags);
       tracepoint(librbd, writesame_exit, r);
       return r;
     }
@@ -2581,6 +2581,13 @@ namespace librbd {
     return r;
   }
 
+  ssize_t Image::write_zeroes(uint64_t ofs, size_t len, int zero_flags,
+                              int op_flags)
+  {
+    ImageCtx *ictx = (ImageCtx *)ctx;
+    return api::Io<>::write_zeroes(*ictx, ofs, len, zero_flags, op_flags);
+  }
+
   ssize_t Image::compare_and_write(uint64_t ofs, size_t len,
                                    ceph::bufferlist &cmp_bl, ceph::bufferlist& bl,
                                    uint64_t *mismatch_off, int op_flags)
@@ -2638,17 +2645,6 @@ namespace librbd {
     return 0;
   }
 
-  int Image::aio_discard(uint64_t off, uint64_t len, RBD::AioCompletion *c)
-  {
-    ImageCtx *ictx = (ImageCtx *)ctx;
-    tracepoint(librbd, aio_discard_enter, ictx, ictx->name.c_str(), ictx->snap_name.c_str(), ictx->read_only, off, len, c->pc);
-    api::Io<>::aio_discard(
-      *ictx,  get_aio_completion(c), off, len, ictx->discard_granularity_bytes,
-      true);
-    tracepoint(librbd, aio_discard_exit, 0);
-    return 0;
-  }
-
   int Image::aio_read(uint64_t off, size_t len, bufferlist& bl,
                      RBD::AioCompletion *c)
   {
@@ -2696,6 +2692,17 @@ namespace librbd {
     return 0;
   }
 
+  int Image::aio_discard(uint64_t off, uint64_t len, RBD::AioCompletion *c)
+  {
+    ImageCtx *ictx = (ImageCtx *)ctx;
+    tracepoint(librbd, aio_discard_enter, ictx, ictx->name.c_str(), ictx->snap_name.c_str(), ictx->read_only, off, len, c->pc);
+    api::Io<>::aio_discard(
+      *ictx,  get_aio_completion(c), off, len, ictx->discard_granularity_bytes,
+      true);
+    tracepoint(librbd, aio_discard_exit, 0);
+    return 0;
+  }
+
   int Image::aio_writesame(uint64_t off, size_t len, bufferlist& bl,
                            RBD::AioCompletion *c, int op_flags)
   {
@@ -2709,8 +2716,9 @@ namespace librbd {
     }
 
     bool discard_zero = ictx->config.get_val<bool>("rbd_discard_on_zeroed_write_same");
-    if (discard_zero && mem_is_zero(bl.c_str(), bl.length())) {
-      api::Io<>::aio_discard(*ictx, get_aio_completion(c), off, len, 0, true);
+    if (discard_zero && bl.is_zero()) {
+      api::Io<>::aio_write_zeroes(*ictx, get_aio_completion(c), off, len, 0U,
+                                  op_flags, true);
       tracepoint(librbd, aio_writesame_exit, 0);
       return 0;
     }
@@ -2721,6 +2729,15 @@ namespace librbd {
     return 0;
   }
 
+  int Image::aio_write_zeroes(uint64_t off, size_t len, RBD::AioCompletion *c,
+                              int zero_flags, int op_flags)
+  {
+    ImageCtx *ictx = (ImageCtx *)ctx;
+    api::Io<>::aio_write_zeroes(*ictx, get_aio_completion(c), off, len,
+                                zero_flags, op_flags, true);
+    return 0;
+  }
+
   int Image::aio_compare_and_write(uint64_t off, size_t len,
                                    ceph::bufferlist& cmp_bl, ceph::bufferlist& bl,
                                    RBD::AioCompletion *c, uint64_t *mismatch_off,
@@ -5929,7 +5946,7 @@ extern "C" ssize_t rbd_writesame(rbd_image_t image, uint64_t ofs, size_t len,
 
   bool discard_zero = ictx->config.get_val<bool>("rbd_discard_on_zeroed_write_same");
   if (discard_zero && mem_is_zero(buf, data_len)) {
-    int r = librbd::api::Io<>::discard(*ictx, ofs, len, 0);
+    int r = librbd::api::Io<>::write_zeroes(*ictx, ofs, len, 0, op_flags);
     tracepoint(librbd, writesame_exit, r);
     return r;
   }
@@ -5942,6 +5959,13 @@ extern "C" ssize_t rbd_writesame(rbd_image_t image, uint64_t ofs, size_t len,
   return r;
 }
 
+extern "C" ssize_t rbd_write_zeroes(rbd_image_t image, uint64_t ofs, size_t len,
+                                    int zero_flags, int op_flags)
+{
+  librbd::ImageCtx *ictx = (librbd::ImageCtx *)image;
+  return librbd::api::Io<>::write_zeroes(*ictx, ofs, len, zero_flags, op_flags);
+}
+
 extern "C" ssize_t rbd_compare_and_write(rbd_image_t image,
                                          uint64_t ofs, size_t len,
                                          const char *cmp_buf,
@@ -6045,19 +6069,6 @@ extern "C" int rbd_aio_writev(rbd_image_t image, const struct iovec *iov,
   return r;
 }
 
-extern "C" int rbd_aio_discard(rbd_image_t image, uint64_t off, uint64_t len,
-                              rbd_completion_t c)
-{
-  librbd::ImageCtx *ictx = (librbd::ImageCtx *)image;
-  librbd::RBD::AioCompletion *comp = (librbd::RBD::AioCompletion *)c;
-  tracepoint(librbd, aio_discard_enter, ictx, ictx->name.c_str(), ictx->snap_name.c_str(), ictx->read_only, off, len, comp->pc);
-  librbd::api::Io<>::aio_discard(
-    *ictx,  get_aio_completion(comp), off, len,
-    ictx->discard_granularity_bytes, true);
-  tracepoint(librbd, aio_discard_exit, 0);
-  return 0;
-}
-
 extern "C" int rbd_aio_read(rbd_image_t image, uint64_t off, size_t len,
                            char *buf, rbd_completion_t c)
 {
@@ -6142,6 +6153,19 @@ extern "C" int rbd_aio_flush(rbd_image_t image, rbd_completion_t c)
   return 0;
 }
 
+extern "C" int rbd_aio_discard(rbd_image_t image, uint64_t off, uint64_t len,
+                              rbd_completion_t c)
+{
+  librbd::ImageCtx *ictx = (librbd::ImageCtx *)image;
+  librbd::RBD::AioCompletion *comp = (librbd::RBD::AioCompletion *)c;
+  tracepoint(librbd, aio_discard_enter, ictx, ictx->name.c_str(), ictx->snap_name.c_str(), ictx->read_only, off, len, comp->pc);
+  librbd::api::Io<>::aio_discard(
+    *ictx,  get_aio_completion(comp), off, len,
+    ictx->discard_granularity_bytes, true);
+  tracepoint(librbd, aio_discard_exit, 0);
+  return 0;
+}
+
 extern "C" int rbd_aio_writesame(rbd_image_t image, uint64_t off, size_t len,
                                  const char *buf, size_t data_len, rbd_completion_t c,
                                  int op_flags)
@@ -6159,8 +6183,8 @@ extern "C" int rbd_aio_writesame(rbd_image_t image, uint64_t off, size_t len,
 
   bool discard_zero = ictx->config.get_val<bool>("rbd_discard_on_zeroed_write_same");
   if (discard_zero && mem_is_zero(buf, data_len)) {
-    librbd::api::Io<>::aio_discard(
-      *ictx, get_aio_completion(comp), off, len, 0, true);
+    librbd::api::Io<>::aio_write_zeroes(
+      *ictx, get_aio_completion(comp), off, len, 0, op_flags, true);
     tracepoint(librbd, aio_writesame_exit, 0);
     return 0;
   }
@@ -6174,6 +6198,18 @@ extern "C" int rbd_aio_writesame(rbd_image_t image, uint64_t off, size_t len,
   return 0;
 }
 
+extern "C" int rbd_aio_write_zeroes(rbd_image_t image, uint64_t off, size_t len,
+                                    rbd_completion_t c, int zero_flags,
+                                    int op_flags)
+{
+  librbd::ImageCtx *ictx = (librbd::ImageCtx *)image;
+  librbd::RBD::AioCompletion *comp = (librbd::RBD::AioCompletion *)c;
+
+  librbd::api::Io<>::aio_write_zeroes(*ictx, get_aio_completion(comp), off, len,
+                                      zero_flags, op_flags, true);
+  return 0;
+}
+
 extern "C" ssize_t rbd_aio_compare_and_write(rbd_image_t image, uint64_t off,
                                              size_t len, const char *cmp_buf,
                                              const char *buf, rbd_completion_t c,
index f046efe91bd7107d6062580dc5a417a17dc624a3..190016ef3ced08e2389c35e77693d7bfeaaa77ae 100644 (file)
@@ -8513,6 +8513,65 @@ TEST_F(TestLibRBD, QuiesceWatchTimeout)
   ioctx.close();
 }
 
+TEST_F(TestLibRBD, WriteZeroes) {
+  librbd::RBD rbd;
+  librados::IoCtx ioctx;
+  ASSERT_EQ(0, _rados.ioctx_create(m_pool_name.c_str(), ioctx));
+  std::string name = get_temp_image_name();
+  int order = 0;
+  uint64_t size = 2 << 20;
+  ASSERT_EQ(0, create_image_pp(rbd, ioctx, name.c_str(), size, &order));
+
+  librbd::Image image;
+  ASSERT_EQ(0, rbd.open(ioctx, image, name.c_str(), NULL));
+
+  // 1s from [0, 256) / length 256
+  char data[256];
+  memset(data, 1, sizeof(data));
+  bufferlist bl;
+  bl.append(data, 256);
+  ASSERT_EQ(256, image.write(0, 256, bl));
+
+  interval_set<uint64_t> diff;
+  ASSERT_EQ(0, image.diff_iterate2(nullptr, 0, size, false, false,
+                                   iterate_cb, (void *)&diff));
+  auto expected_diff = interval_set<uint64_t>{{{0, 256}}};
+  ASSERT_EQ(expected_diff, diff);
+
+  // writes zero passed the current end extents.
+  // Now 1s from [0, 192) / length 192
+  ASSERT_EQ(size - 192,
+            image.write_zeroes(192, size - 192, 0U, 0));
+  diff.clear();
+  ASSERT_EQ(0, image.diff_iterate2(nullptr, 0, size, false, false,
+                                   iterate_cb, (void *)&diff));
+  expected_diff = interval_set<uint64_t>{{{0, 192}}};
+  ASSERT_EQ(expected_diff, diff);
+
+  // zero an existing extent and truncate some off the end
+  // Now 1s from [64, 192) / length 192
+  ASSERT_EQ(64, image.write_zeroes(0, 64, 0U, 0));
+
+  diff.clear();
+  ASSERT_EQ(0, image.diff_iterate2(nullptr, 0, size, false, false,
+                                   iterate_cb, (void *)&diff));
+  expected_diff = interval_set<uint64_t>{{{0, 192}}};
+  ASSERT_EQ(expected_diff, diff);
+
+  bufferlist expected_bl;
+  expected_bl.append_zero(64);
+  bufferlist sub_bl;
+  sub_bl.substr_of(bl, 0, 128);
+  expected_bl.claim_append(sub_bl);
+  expected_bl.append_zero(size - 192);
+
+  bufferlist read_bl;
+  EXPECT_EQ(size, image.read(0, size, read_bl));
+  EXPECT_EQ(expected_bl, read_bl);
+
+  ASSERT_EQ(0, image.close());
+}
+
 // poorman's ceph_assert()
 namespace ceph {
   void __ceph_assert_fail(const char *assertion, const char *file, int line,