From: Jason Dillaman Date: Tue, 30 Jun 2020 12:44:38 +0000 (-0400) Subject: librbd: add 'write_zeroes' public C/C++ API methods X-Git-Tag: v16.1.0~1845^2~2 X-Git-Url: http://git-server-git.apps.pok.os.sepia.ceph.com/?a=commitdiff_plain;h=ae6dd86b22e928dc23a385faf41cf76b0d293576;p=ceph.git librbd: add 'write_zeroes' public C/C++ API methods Unlike the existing 'discard' option which is more of a hint to attempt to release space, the new 'write_zeroes' APIs will ensure that the entire provided extent is fully zeroed. Signed-off-by: Jason Dillaman --- diff --git a/src/include/rbd/librbd.h b/src/include/rbd/librbd.h index 50e2e990262c..7113a0e29706 100644 --- a/src/include/rbd/librbd.h +++ b/src/include/rbd/librbd.h @@ -47,6 +47,7 @@ extern "C" { #define LIBRBD_SUPPORTS_IOVEC 1 #define LIBRBD_SUPPORTS_WATCH 0 #define LIBRBD_SUPPORTS_WRITESAME 1 +#define LIBRBD_SUPPORTS_WRITE_ZEROES 1 #if __GNUC__ >= 4 #define CEPH_RBD_API __attribute__ ((visibility ("default"))) @@ -1100,10 +1101,15 @@ CEPH_RBD_API ssize_t rbd_write2(rbd_image_t image, uint64_t ofs, size_t len, const char *buf, int op_flags); CEPH_RBD_API int rbd_discard(rbd_image_t image, uint64_t ofs, uint64_t len); CEPH_RBD_API ssize_t rbd_writesame(rbd_image_t image, uint64_t ofs, size_t len, - const char *buf, size_t data_len, int op_flags); + const char *buf, size_t data_len, + int op_flags); +CEPH_RBD_API ssize_t rbd_write_zeroes(rbd_image_t image, uint64_t ofs, + uint64_t len, int zero_flags, + int op_flags); CEPH_RBD_API ssize_t rbd_compare_and_write(rbd_image_t image, uint64_t ofs, size_t len, const char *cmp_buf, - const char *buf, uint64_t *mismatch_off, + const char *buf, + uint64_t *mismatch_off, int op_flags); CEPH_RBD_API int rbd_aio_write(rbd_image_t image, uint64_t off, size_t len, @@ -1131,10 +1137,15 @@ CEPH_RBD_API int rbd_aio_discard(rbd_image_t image, uint64_t off, uint64_t len, CEPH_RBD_API int rbd_aio_writesame(rbd_image_t image, uint64_t off, size_t len, const char *buf, size_t data_len, rbd_completion_t c, int op_flags); +CEPH_RBD_API int rbd_aio_write_zeroes(rbd_image_t image, uint64_t off, + size_t len, rbd_completion_t c, + int zero_flags, int op_flags); CEPH_RBD_API ssize_t rbd_aio_compare_and_write(rbd_image_t image, uint64_t off, size_t len, - const char *cmp_buf, const char *buf, - rbd_completion_t c, uint64_t *mismatch_off, + const char *cmp_buf, + const char *buf, + rbd_completion_t c, + uint64_t *mismatch_off, int op_flags); CEPH_RBD_API int rbd_aio_create_completion(void *cb_arg, diff --git a/src/include/rbd/librbd.hpp b/src/include/rbd/librbd.hpp index 987a76eb9da8..76acfa9b393e 100644 --- a/src/include/rbd/librbd.hpp +++ b/src/include/rbd/librbd.hpp @@ -679,8 +679,11 @@ public: ssize_t write(uint64_t ofs, size_t len, ceph::bufferlist& bl); /* @param op_flags see librados.h constants beginning with LIBRADOS_OP_FLAG */ ssize_t write2(uint64_t ofs, size_t len, ceph::bufferlist& bl, int op_flags); + int discard(uint64_t ofs, uint64_t len); ssize_t writesame(uint64_t ofs, size_t len, ceph::bufferlist &bl, int op_flags); + ssize_t write_zeroes(uint64_t ofs, size_t len, int zero_flags, int op_flags); + ssize_t compare_and_write(uint64_t ofs, size_t len, ceph::bufferlist &cmp_bl, ceph::bufferlist& bl, uint64_t *mismatch_off, int op_flags); @@ -688,11 +691,17 @@ public: /* @param op_flags see librados.h constants beginning with LIBRADOS_OP_FLAG */ int aio_write2(uint64_t off, size_t len, ceph::bufferlist& bl, RBD::AioCompletion *c, int op_flags); + + int aio_discard(uint64_t off, uint64_t len, RBD::AioCompletion *c); int aio_writesame(uint64_t off, size_t len, ceph::bufferlist& bl, RBD::AioCompletion *c, int op_flags); + int aio_write_zeroes(uint64_t ofs, size_t len, RBD::AioCompletion *c, + int zero_flags, int op_flags); + int aio_compare_and_write(uint64_t off, size_t len, ceph::bufferlist& cmp_bl, ceph::bufferlist& bl, RBD::AioCompletion *c, uint64_t *mismatch_off, int op_flags); + /** * read async from image * @@ -714,7 +723,6 @@ public: /* @param op_flags see librados.h constants beginning with LIBRADOS_OP_FLAG */ int aio_read2(uint64_t off, size_t len, ceph::bufferlist& bl, RBD::AioCompletion *c, int op_flags); - int aio_discard(uint64_t off, uint64_t len, RBD::AioCompletion *c); int flush(); /** diff --git a/src/librbd/api/Io.cc b/src/librbd/api/Io.cc index a91b55568619..4d97b9d8a8cf 100644 --- a/src/librbd/api/Io.cc +++ b/src/librbd/api/Io.cc @@ -133,6 +133,32 @@ ssize_t Io::write_same( return len; } +template +ssize_t Io::write_zeroes(I& image_ctx, uint64_t off, uint64_t len, + int zero_flags, int op_flags) { + auto cct = image_ctx.cct; + ldout(cct, 20) << "ictx=" << &image_ctx << ", off=" << off << ", " + << "len = " << len << dendl; + + image_ctx.image_lock.lock_shared(); + int r = clip_io(util::get_image_ctx(&image_ctx), off, &len); + image_ctx.image_lock.unlock_shared(); + if (r < 0) { + lderr(cct) << "invalid IO request: " << cpp_strerror(r) << dendl; + return r; + } + + C_SaferCond ctx; + auto aio_comp = io::AioCompletion::create(&ctx); + aio_write_zeroes(image_ctx, aio_comp, off, len, zero_flags, op_flags, false); + + r = ctx.wait(); + if (r < 0) { + return r; + } + return len; +} + template ssize_t Io::compare_and_write( I &image_ctx, uint64_t off, uint64_t len, bufferlist &&cmp_bl, @@ -303,6 +329,46 @@ void Io::aio_write_same(I &image_ctx, io::AioCompletion *aio_comp, req->send(); } +template +void Io::aio_write_zeroes(I& image_ctx, io::AioCompletion *aio_comp, + uint64_t off, uint64_t len, int zero_flags, + int op_flags, bool native_async) { + auto cct = image_ctx.cct; + FUNCTRACE(cct); + ZTracer::Trace trace; + if (image_ctx.blkin_trace_all) { + trace.init("io: write_zeroes", &image_ctx.trace_endpoint); + trace.event("init"); + } + + aio_comp->init_time(util::get_image_ctx(&image_ctx), io::AIO_TYPE_DISCARD); + ldout(cct, 20) << "ictx=" << &image_ctx << ", " + << "completion=" << aio_comp << ", off=" << off << ", " + << "len=" << len << dendl; + + if (native_async && image_ctx.event_socket.is_valid()) { + aio_comp->set_event_notify(true); + } + + // validate the supported flags + if (zero_flags != 0U) { + aio_comp->fail(-EINVAL); + return; + } + + if (!is_valid_io(image_ctx, aio_comp)) { + return; + } + + // enable partial discard (zeroing) of objects + uint32_t discard_granularity_bytes = 0; + + auto req = io::ImageDispatchSpec::create_discard( + image_ctx, io::IMAGE_DISPATCH_LAYER_API_START, aio_comp, off, len, + discard_granularity_bytes, trace, 0); + req->send(); +} + template void Io::aio_compare_and_write(I &image_ctx, io::AioCompletion *aio_comp, uint64_t off, uint64_t len, diff --git a/src/librbd/api/Io.h b/src/librbd/api/Io.h index 95120d499f1f..4e2ec5028e43 100644 --- a/src/librbd/api/Io.h +++ b/src/librbd/api/Io.h @@ -24,6 +24,8 @@ struct Io { uint32_t discard_granularity_bytes); static ssize_t write_same(ImageCtxT &image_ctx, uint64_t off, uint64_t len, bufferlist &&bl, int op_flags); + static ssize_t write_zeroes(ImageCtxT &image_ctx, uint64_t off, uint64_t len, + int zero_flags, int op_flags); static ssize_t compare_and_write(ImageCtxT &image_ctx, uint64_t off, uint64_t len, bufferlist &&cmp_bl, bufferlist &&bl, uint64_t *mismatch_off, @@ -43,6 +45,9 @@ struct Io { static void aio_write_same(ImageCtxT &image_ctx, io::AioCompletion *c, uint64_t off, uint64_t len, bufferlist &&bl, int op_flags, bool native_async); + static void aio_write_zeroes(ImageCtxT &image_ctx, io::AioCompletion *c, + uint64_t off, uint64_t len, int zero_flags, + int op_flags, bool native_async); static void aio_compare_and_write(ImageCtxT &image_ctx, io::AioCompletion *c, uint64_t off, uint64_t len, bufferlist &&cmp_bl, bufferlist &&bl, diff --git a/src/librbd/librbd.cc b/src/librbd/librbd.cc index 83722258971a..2e10188acea9 100644 --- a/src/librbd/librbd.cc +++ b/src/librbd/librbd.cc @@ -2570,8 +2570,8 @@ namespace librbd { } bool discard_zero = ictx->config.get_val("rbd_discard_on_zeroed_write_same"); - if (discard_zero && mem_is_zero(bl.c_str(), bl.length())) { - int r = api::Io<>::discard(*ictx, ofs, len, 0); + if (discard_zero && bl.is_zero()) { + int r = api::Io<>::write_zeroes(*ictx, ofs, len, 0U, op_flags); tracepoint(librbd, writesame_exit, r); return r; } @@ -2581,6 +2581,13 @@ namespace librbd { return r; } + ssize_t Image::write_zeroes(uint64_t ofs, size_t len, int zero_flags, + int op_flags) + { + ImageCtx *ictx = (ImageCtx *)ctx; + return api::Io<>::write_zeroes(*ictx, ofs, len, zero_flags, op_flags); + } + ssize_t Image::compare_and_write(uint64_t ofs, size_t len, ceph::bufferlist &cmp_bl, ceph::bufferlist& bl, uint64_t *mismatch_off, int op_flags) @@ -2638,17 +2645,6 @@ namespace librbd { return 0; } - int Image::aio_discard(uint64_t off, uint64_t len, RBD::AioCompletion *c) - { - ImageCtx *ictx = (ImageCtx *)ctx; - tracepoint(librbd, aio_discard_enter, ictx, ictx->name.c_str(), ictx->snap_name.c_str(), ictx->read_only, off, len, c->pc); - api::Io<>::aio_discard( - *ictx, get_aio_completion(c), off, len, ictx->discard_granularity_bytes, - true); - tracepoint(librbd, aio_discard_exit, 0); - return 0; - } - int Image::aio_read(uint64_t off, size_t len, bufferlist& bl, RBD::AioCompletion *c) { @@ -2696,6 +2692,17 @@ namespace librbd { return 0; } + int Image::aio_discard(uint64_t off, uint64_t len, RBD::AioCompletion *c) + { + ImageCtx *ictx = (ImageCtx *)ctx; + tracepoint(librbd, aio_discard_enter, ictx, ictx->name.c_str(), ictx->snap_name.c_str(), ictx->read_only, off, len, c->pc); + api::Io<>::aio_discard( + *ictx, get_aio_completion(c), off, len, ictx->discard_granularity_bytes, + true); + tracepoint(librbd, aio_discard_exit, 0); + return 0; + } + int Image::aio_writesame(uint64_t off, size_t len, bufferlist& bl, RBD::AioCompletion *c, int op_flags) { @@ -2709,8 +2716,9 @@ namespace librbd { } bool discard_zero = ictx->config.get_val("rbd_discard_on_zeroed_write_same"); - if (discard_zero && mem_is_zero(bl.c_str(), bl.length())) { - api::Io<>::aio_discard(*ictx, get_aio_completion(c), off, len, 0, true); + if (discard_zero && bl.is_zero()) { + api::Io<>::aio_write_zeroes(*ictx, get_aio_completion(c), off, len, 0U, + op_flags, true); tracepoint(librbd, aio_writesame_exit, 0); return 0; } @@ -2721,6 +2729,15 @@ namespace librbd { return 0; } + int Image::aio_write_zeroes(uint64_t off, size_t len, RBD::AioCompletion *c, + int zero_flags, int op_flags) + { + ImageCtx *ictx = (ImageCtx *)ctx; + api::Io<>::aio_write_zeroes(*ictx, get_aio_completion(c), off, len, + zero_flags, op_flags, true); + return 0; + } + int Image::aio_compare_and_write(uint64_t off, size_t len, ceph::bufferlist& cmp_bl, ceph::bufferlist& bl, RBD::AioCompletion *c, uint64_t *mismatch_off, @@ -5929,7 +5946,7 @@ extern "C" ssize_t rbd_writesame(rbd_image_t image, uint64_t ofs, size_t len, bool discard_zero = ictx->config.get_val("rbd_discard_on_zeroed_write_same"); if (discard_zero && mem_is_zero(buf, data_len)) { - int r = librbd::api::Io<>::discard(*ictx, ofs, len, 0); + int r = librbd::api::Io<>::write_zeroes(*ictx, ofs, len, 0, op_flags); tracepoint(librbd, writesame_exit, r); return r; } @@ -5942,6 +5959,13 @@ extern "C" ssize_t rbd_writesame(rbd_image_t image, uint64_t ofs, size_t len, return r; } +extern "C" ssize_t rbd_write_zeroes(rbd_image_t image, uint64_t ofs, size_t len, + int zero_flags, int op_flags) +{ + librbd::ImageCtx *ictx = (librbd::ImageCtx *)image; + return librbd::api::Io<>::write_zeroes(*ictx, ofs, len, zero_flags, op_flags); +} + extern "C" ssize_t rbd_compare_and_write(rbd_image_t image, uint64_t ofs, size_t len, const char *cmp_buf, @@ -6045,19 +6069,6 @@ extern "C" int rbd_aio_writev(rbd_image_t image, const struct iovec *iov, return r; } -extern "C" int rbd_aio_discard(rbd_image_t image, uint64_t off, uint64_t len, - rbd_completion_t c) -{ - librbd::ImageCtx *ictx = (librbd::ImageCtx *)image; - librbd::RBD::AioCompletion *comp = (librbd::RBD::AioCompletion *)c; - tracepoint(librbd, aio_discard_enter, ictx, ictx->name.c_str(), ictx->snap_name.c_str(), ictx->read_only, off, len, comp->pc); - librbd::api::Io<>::aio_discard( - *ictx, get_aio_completion(comp), off, len, - ictx->discard_granularity_bytes, true); - tracepoint(librbd, aio_discard_exit, 0); - return 0; -} - extern "C" int rbd_aio_read(rbd_image_t image, uint64_t off, size_t len, char *buf, rbd_completion_t c) { @@ -6142,6 +6153,19 @@ extern "C" int rbd_aio_flush(rbd_image_t image, rbd_completion_t c) return 0; } +extern "C" int rbd_aio_discard(rbd_image_t image, uint64_t off, uint64_t len, + rbd_completion_t c) +{ + librbd::ImageCtx *ictx = (librbd::ImageCtx *)image; + librbd::RBD::AioCompletion *comp = (librbd::RBD::AioCompletion *)c; + tracepoint(librbd, aio_discard_enter, ictx, ictx->name.c_str(), ictx->snap_name.c_str(), ictx->read_only, off, len, comp->pc); + librbd::api::Io<>::aio_discard( + *ictx, get_aio_completion(comp), off, len, + ictx->discard_granularity_bytes, true); + tracepoint(librbd, aio_discard_exit, 0); + return 0; +} + extern "C" int rbd_aio_writesame(rbd_image_t image, uint64_t off, size_t len, const char *buf, size_t data_len, rbd_completion_t c, int op_flags) @@ -6159,8 +6183,8 @@ extern "C" int rbd_aio_writesame(rbd_image_t image, uint64_t off, size_t len, bool discard_zero = ictx->config.get_val("rbd_discard_on_zeroed_write_same"); if (discard_zero && mem_is_zero(buf, data_len)) { - librbd::api::Io<>::aio_discard( - *ictx, get_aio_completion(comp), off, len, 0, true); + librbd::api::Io<>::aio_write_zeroes( + *ictx, get_aio_completion(comp), off, len, 0, op_flags, true); tracepoint(librbd, aio_writesame_exit, 0); return 0; } @@ -6174,6 +6198,18 @@ extern "C" int rbd_aio_writesame(rbd_image_t image, uint64_t off, size_t len, return 0; } +extern "C" int rbd_aio_write_zeroes(rbd_image_t image, uint64_t off, size_t len, + rbd_completion_t c, int zero_flags, + int op_flags) +{ + librbd::ImageCtx *ictx = (librbd::ImageCtx *)image; + librbd::RBD::AioCompletion *comp = (librbd::RBD::AioCompletion *)c; + + librbd::api::Io<>::aio_write_zeroes(*ictx, get_aio_completion(comp), off, len, + zero_flags, op_flags, true); + return 0; +} + extern "C" ssize_t rbd_aio_compare_and_write(rbd_image_t image, uint64_t off, size_t len, const char *cmp_buf, const char *buf, rbd_completion_t c, diff --git a/src/test/librbd/test_librbd.cc b/src/test/librbd/test_librbd.cc index f046efe91bd7..190016ef3ced 100644 --- a/src/test/librbd/test_librbd.cc +++ b/src/test/librbd/test_librbd.cc @@ -8513,6 +8513,65 @@ TEST_F(TestLibRBD, QuiesceWatchTimeout) ioctx.close(); } +TEST_F(TestLibRBD, WriteZeroes) { + librbd::RBD rbd; + librados::IoCtx ioctx; + ASSERT_EQ(0, _rados.ioctx_create(m_pool_name.c_str(), ioctx)); + std::string name = get_temp_image_name(); + int order = 0; + uint64_t size = 2 << 20; + ASSERT_EQ(0, create_image_pp(rbd, ioctx, name.c_str(), size, &order)); + + librbd::Image image; + ASSERT_EQ(0, rbd.open(ioctx, image, name.c_str(), NULL)); + + // 1s from [0, 256) / length 256 + char data[256]; + memset(data, 1, sizeof(data)); + bufferlist bl; + bl.append(data, 256); + ASSERT_EQ(256, image.write(0, 256, bl)); + + interval_set diff; + ASSERT_EQ(0, image.diff_iterate2(nullptr, 0, size, false, false, + iterate_cb, (void *)&diff)); + auto expected_diff = interval_set{{{0, 256}}}; + ASSERT_EQ(expected_diff, diff); + + // writes zero passed the current end extents. + // Now 1s from [0, 192) / length 192 + ASSERT_EQ(size - 192, + image.write_zeroes(192, size - 192, 0U, 0)); + diff.clear(); + ASSERT_EQ(0, image.diff_iterate2(nullptr, 0, size, false, false, + iterate_cb, (void *)&diff)); + expected_diff = interval_set{{{0, 192}}}; + ASSERT_EQ(expected_diff, diff); + + // zero an existing extent and truncate some off the end + // Now 1s from [64, 192) / length 192 + ASSERT_EQ(64, image.write_zeroes(0, 64, 0U, 0)); + + diff.clear(); + ASSERT_EQ(0, image.diff_iterate2(nullptr, 0, size, false, false, + iterate_cb, (void *)&diff)); + expected_diff = interval_set{{{0, 192}}}; + ASSERT_EQ(expected_diff, diff); + + bufferlist expected_bl; + expected_bl.append_zero(64); + bufferlist sub_bl; + sub_bl.substr_of(bl, 0, 128); + expected_bl.claim_append(sub_bl); + expected_bl.append_zero(size - 192); + + bufferlist read_bl; + EXPECT_EQ(size, image.read(0, size, read_bl)); + EXPECT_EQ(expected_bl, read_bl); + + ASSERT_EQ(0, image.close()); +} + // poorman's ceph_assert() namespace ceph { void __ceph_assert_fail(const char *assertion, const char *file, int line,