From 0d10b0af65ec6ccfbbfa733323dee45a3e1edd14 Mon Sep 17 00:00:00 2001 From: Jason Dillaman Date: Tue, 30 Jun 2020 16:59:47 -0400 Subject: [PATCH] librbd: add flag to force thick-provisioning during write_zeroes API Signed-off-by: Jason Dillaman --- src/include/rbd/librbd.h | 5 ++ src/librbd/api/Io.cc | 111 ++++++++++++++++++++++++++++++++- src/pybind/rbd/rbd.pyx | 4 ++ src/test/librbd/test_librbd.cc | 79 +++++++++++++++++++++++ src/test/pybind/test_rbd.py | 11 +++- 5 files changed, 208 insertions(+), 2 deletions(-) diff --git a/src/include/rbd/librbd.h b/src/include/rbd/librbd.h index 7113a0e2970..fce85e6e5a0 100644 --- a/src/include/rbd/librbd.h +++ b/src/include/rbd/librbd.h @@ -366,6 +366,11 @@ typedef enum { RBD_POOL_STAT_OPTION_TRASH_SNAPSHOTS } rbd_pool_stat_option_t; +/* rbd_write_zeroes / rbd_aio_write_zeroes flags */ +enum { + RBD_WRITE_ZEROES_FLAG_THICK_PROVISION = (1U<<0), /* fully allocated zeroed extent */ +}; + CEPH_RBD_API void rbd_image_options_create(rbd_image_options_t* opts); CEPH_RBD_API void rbd_image_options_destroy(rbd_image_options_t opts); CEPH_RBD_API int rbd_image_options_set_string(rbd_image_options_t opts, diff --git a/src/librbd/api/Io.cc b/src/librbd/api/Io.cc index 4d97b9d8a8c..76d43c4a344 100644 --- a/src/librbd/api/Io.cc +++ b/src/librbd/api/Io.cc @@ -2,6 +2,7 @@ // vim: ts=8 sw=2 smarttab #include "librbd/api/Io.h" +#include "include/intarith.h" #include "common/dout.h" #include "common/errno.h" #include "common/Cond.h" @@ -341,7 +342,13 @@ void Io::aio_write_zeroes(I& image_ctx, io::AioCompletion *aio_comp, trace.event("init"); } - aio_comp->init_time(util::get_image_ctx(&image_ctx), io::AIO_TYPE_DISCARD); + auto io_type = io::AIO_TYPE_DISCARD; + if ((zero_flags & RBD_WRITE_ZEROES_FLAG_THICK_PROVISION) != 0) { + zero_flags &= ~RBD_WRITE_ZEROES_FLAG_THICK_PROVISION; + io_type = io::AIO_TYPE_WRITESAME; + } + + aio_comp->init_time(util::get_image_ctx(&image_ctx), io_type); ldout(cct, 20) << "ictx=" << &image_ctx << ", " << "completion=" << aio_comp << ", off=" << off << ", " << "len=" << len << dendl; @@ -360,6 +367,108 @@ void Io::aio_write_zeroes(I& image_ctx, io::AioCompletion *aio_comp, return; } + if (io_type == io::AIO_TYPE_WRITESAME) { + // write-same needs to be aligned to its buffer but librbd has never forced + // block alignment. Hide that requirement from the user by adding optional + // writes. + const uint64_t data_length = 512; + uint64_t write_same_offset = p2roundup(off, data_length); + uint64_t write_same_offset_end = p2align(off + len, data_length); + uint64_t write_same_length = 0; + if (write_same_offset_end > write_same_offset) { + write_same_length = write_same_offset_end - write_same_offset; + } + + uint64_t prepend_offset = off; + uint64_t prepend_length = write_same_offset - off; + uint64_t append_offset = write_same_offset + write_same_length; + uint64_t append_length = len - prepend_length - write_same_length; + ldout(cct, 20) << "prepend_offset=" << prepend_offset << ", " + << "prepend_length=" << prepend_length << ", " + << "write_same_offset=" << write_same_offset << ", " + << "write_same_length=" << write_same_length << ", " + << "append_offset=" << append_offset << ", " + << "append_length=" << append_length << dendl; + ceph_assert(prepend_length + write_same_length + append_length == len); + + if (write_same_length <= data_length) { + // unaligned or small write-zeroes request -- use single write + bufferlist bl; + bl.append_zero(len); + + aio_comp->aio_type = io::AIO_TYPE_WRITE; + auto req = io::ImageDispatchSpec::create_write( + image_ctx, io::IMAGE_DISPATCH_LAYER_API_START, aio_comp, {{off, len}}, + std::move(bl), op_flags, trace, 0); + req->send(); + return; + } else if (prepend_length == 0 && append_length == 0) { + // fully aligned -- use a single write-same image request + bufferlist bl; + bl.append_zero(data_length); + + auto req = io::ImageDispatchSpec::create_write_same( + image_ctx, io::IMAGE_DISPATCH_LAYER_API_START, aio_comp, off, len, + std::move(bl), op_flags, trace, 0); + req->send(); + return; + } + + // to reach this point, we need at least one prepend/append write along with + // a write-same -- therefore we will need to wrap the provided AioCompletion + auto request_count = 1; + if (prepend_length > 0) { + ++request_count; + } + if (append_length > 0) { + ++request_count; + } + + ceph_assert(request_count > 1); + aio_comp->start_op(); + aio_comp->set_request_count(request_count); + + if (prepend_length > 0) { + bufferlist bl; + bl.append_zero(prepend_length); + + Context* prepend_ctx = new io::C_AioRequest(aio_comp); + auto prepend_aio_comp = io::AioCompletion::create_and_start( + prepend_ctx, &image_ctx, io::AIO_TYPE_WRITE); + auto prepend_req = io::ImageDispatchSpec::create_write( + image_ctx, io::IMAGE_DISPATCH_LAYER_API_START, prepend_aio_comp, + {{prepend_offset, prepend_length}}, std::move(bl), op_flags, trace, + 0); + prepend_req->send(); + } + + if (append_length > 0) { + bufferlist bl; + bl.append_zero(append_length); + + Context* append_ctx = new io::C_AioRequest(aio_comp); + auto append_aio_comp = io::AioCompletion::create_and_start( + append_ctx, &image_ctx, io::AIO_TYPE_WRITE); + auto append_req = io::ImageDispatchSpec::create_write( + image_ctx, io::IMAGE_DISPATCH_LAYER_API_START, append_aio_comp, + {{append_offset, append_length}}, std::move(bl), op_flags, trace, 0); + append_req->send(); + } + + bufferlist bl; + bl.append_zero(data_length); + + Context* write_same_ctx = new io::C_AioRequest(aio_comp); + auto write_same_aio_comp = io::AioCompletion::create_and_start( + write_same_ctx, &image_ctx, io::AIO_TYPE_WRITESAME); + auto req = io::ImageDispatchSpec::create_write_same( + image_ctx, io::IMAGE_DISPATCH_LAYER_API_START, write_same_aio_comp, + write_same_offset, write_same_length, std::move(bl), op_flags, trace, + 0); + req->send(); + return; + } + // enable partial discard (zeroing) of objects uint32_t discard_granularity_bytes = 0; diff --git a/src/pybind/rbd/rbd.pyx b/src/pybind/rbd/rbd.pyx index 8e081e3a4b5..fe1ec95e99c 100644 --- a/src/pybind/rbd/rbd.pyx +++ b/src/pybind/rbd/rbd.pyx @@ -105,6 +105,8 @@ cdef extern from "rbd/librbd.h" nogil: _RBD_SNAP_REMOVE_FLATTEN "RBD_SNAP_REMOVE_FLATTEN" _RBD_SNAP_REMOVE_FORCE "RBD_SNAP_REMOVE_FORCE" + _RBD_WRITE_ZEROES_FLAG_THICK_PROVISION "RBD_WRITE_ZEROES_FLAG_THICK_PROVISION" + ctypedef void* rados_t ctypedef void* rados_ioctx_t ctypedef void* rbd_image_t @@ -816,6 +818,8 @@ RBD_SNAP_REMOVE_UNPROTECT = _RBD_SNAP_REMOVE_UNPROTECT RBD_SNAP_REMOVE_FLATTEN = _RBD_SNAP_REMOVE_FLATTEN RBD_SNAP_REMOVE_FORCE = _RBD_SNAP_REMOVE_FORCE +RBD_WRITE_ZEROES_FLAG_THICK_PROVISION = _RBD_WRITE_ZEROES_FLAG_THICK_PROVISION + class Error(Exception): pass diff --git a/src/test/librbd/test_librbd.cc b/src/test/librbd/test_librbd.cc index 190016ef3ce..a69662efa97 100644 --- a/src/test/librbd/test_librbd.cc +++ b/src/test/librbd/test_librbd.cc @@ -8572,6 +8572,85 @@ TEST_F(TestLibRBD, WriteZeroes) { ASSERT_EQ(0, image.close()); } +TEST_F(TestLibRBD, WriteZeroesThickProvision) { + librbd::RBD rbd; + librados::IoCtx ioctx; + ASSERT_EQ(0, _rados.ioctx_create(m_pool_name.c_str(), ioctx)); + std::string name = get_temp_image_name(); + int order = 0; + uint64_t size = 2 << 20; + ASSERT_EQ(0, create_image_pp(rbd, ioctx, name.c_str(), size, &order)); + + librbd::Image image; + ASSERT_EQ(0, rbd.open(ioctx, image, name.c_str(), NULL)); + + interval_set diff; + ASSERT_EQ(0, image.diff_iterate2(nullptr, 0, size, false, false, + iterate_cb, (void *)&diff)); + auto expected_diff = interval_set{{}}; + ASSERT_EQ(expected_diff, diff); + + // writes unaligned zeroes as a prepend + ASSERT_EQ(128, image.write_zeroes( + 0, 128, RBD_WRITE_ZEROES_FLAG_THICK_PROVISION, 0)); + diff.clear(); + ASSERT_EQ(0, image.diff_iterate2(nullptr, 0, size, false, false, + iterate_cb, (void *)&diff)); + expected_diff = interval_set{{{0, 128}}}; + ASSERT_EQ(expected_diff, diff); + + ASSERT_EQ(512, image.write_zeroes( + 384, 512, RBD_WRITE_ZEROES_FLAG_THICK_PROVISION, 0)); + diff.clear(); + ASSERT_EQ(0, image.diff_iterate2(nullptr, 0, size, false, false, + iterate_cb, (void *)&diff)); + expected_diff = interval_set{{{0, 896}}}; + ASSERT_EQ(expected_diff, diff); + + // prepend with write-same + ASSERT_EQ(640, image.write_zeroes( + 896, 640, RBD_WRITE_ZEROES_FLAG_THICK_PROVISION, 0)); + diff.clear(); + ASSERT_EQ(0, image.diff_iterate2(nullptr, 0, size, false, false, + iterate_cb, (void *)&diff)); + expected_diff = interval_set{{{0, 1536}}}; + ASSERT_EQ(expected_diff, diff); + + // write-same with append + ASSERT_EQ(640, image.write_zeroes( + 1536, 640, RBD_WRITE_ZEROES_FLAG_THICK_PROVISION, 0)); + diff.clear(); + ASSERT_EQ(0, image.diff_iterate2(nullptr, 0, size, false, false, + iterate_cb, (void *)&diff)); + expected_diff = interval_set{{{0, 2176}}}; + ASSERT_EQ(expected_diff, diff); + + // prepend + write-same + append + ASSERT_EQ(768, image.write_zeroes( + 2176, 768, RBD_WRITE_ZEROES_FLAG_THICK_PROVISION, 0)); + diff.clear(); + ASSERT_EQ(0, image.diff_iterate2(nullptr, 0, size, false, false, + iterate_cb, (void *)&diff)); + expected_diff = interval_set{{{0, 2944}}}; + + // write-same + ASSERT_EQ(1024, image.write_zeroes( + 3072, 1024, RBD_WRITE_ZEROES_FLAG_THICK_PROVISION, 0)); + diff.clear(); + ASSERT_EQ(0, image.diff_iterate2(nullptr, 0, size, false, false, + iterate_cb, (void *)&diff)); + expected_diff = interval_set{{{0, 4096}}}; + + bufferlist expected_bl; + expected_bl.append_zero(size); + + bufferlist read_bl; + EXPECT_EQ(size, image.read(0, size, read_bl)); + EXPECT_EQ(expected_bl, read_bl); + + ASSERT_EQ(0, image.close()); +} + // poorman's ceph_assert() namespace ceph { void __ceph_assert_fail(const char *assertion, const char *file, int line, diff --git a/src/test/pybind/test_rbd.py b/src/test/pybind/test_rbd.py index b272748173b..09b47e5660f 100644 --- a/src/test/pybind/test_rbd.py +++ b/src/test/pybind/test_rbd.py @@ -40,7 +40,8 @@ from rbd import (RBD, Group, Image, ImageNotFound, InvalidArgument, ImageExists, RBD_SNAP_REMOVE_UNPROTECT, RBD_SNAP_MIRROR_STATE_PRIMARY, RBD_SNAP_MIRROR_STATE_PRIMARY_DEMOTED, RBD_SNAP_CREATE_SKIP_QUIESCE, - RBD_SNAP_CREATE_IGNORE_QUIESCE_ERROR) + RBD_SNAP_CREATE_IGNORE_QUIESCE_ERROR, + RBD_WRITE_ZEROES_FLAG_THICK_PROVISION) rados = None ioctx = None @@ -608,6 +609,14 @@ class TestImage(object): self.image.write(data, 0) self.image.write_zeroes(0, 256) eq(self.image.read(256, 256), b'\0' * 256) + check_diff(self.image, 0, IMG_SIZE, None, []) + + def test_write_zeroes_thick_provision(self): + data = rand_data(256) + self.image.write(data, 0) + self.image.write_zeroes(0, 256, RBD_WRITE_ZEROES_FLAG_THICK_PROVISION) + eq(self.image.read(256, 256), b'\0' * 256) + check_diff(self.image, 0, IMG_SIZE, None, [(0, 256, True)]) def test_read(self): data = self.image.read(0, 20) -- 2.39.5