RBD_POOL_STAT_OPTION_TRASH_SNAPSHOTS
} rbd_pool_stat_option_t;
+/* rbd_write_zeroes / rbd_aio_write_zeroes flags */
+enum {
+ RBD_WRITE_ZEROES_FLAG_THICK_PROVISION = (1U<<0), /* fully allocated zeroed extent */
+};
+
CEPH_RBD_API void rbd_image_options_create(rbd_image_options_t* opts);
CEPH_RBD_API void rbd_image_options_destroy(rbd_image_options_t opts);
CEPH_RBD_API int rbd_image_options_set_string(rbd_image_options_t opts,
// vim: ts=8 sw=2 smarttab
#include "librbd/api/Io.h"
+#include "include/intarith.h"
#include "common/dout.h"
#include "common/errno.h"
#include "common/Cond.h"
trace.event("init");
}
- aio_comp->init_time(util::get_image_ctx(&image_ctx), io::AIO_TYPE_DISCARD);
+ auto io_type = io::AIO_TYPE_DISCARD;
+ if ((zero_flags & RBD_WRITE_ZEROES_FLAG_THICK_PROVISION) != 0) {
+ zero_flags &= ~RBD_WRITE_ZEROES_FLAG_THICK_PROVISION;
+ io_type = io::AIO_TYPE_WRITESAME;
+ }
+
+ aio_comp->init_time(util::get_image_ctx(&image_ctx), io_type);
ldout(cct, 20) << "ictx=" << &image_ctx << ", "
<< "completion=" << aio_comp << ", off=" << off << ", "
<< "len=" << len << dendl;
return;
}
+ if (io_type == io::AIO_TYPE_WRITESAME) {
+ // write-same needs to be aligned to its buffer but librbd has never forced
+ // block alignment. Hide that requirement from the user by adding optional
+ // writes.
+ const uint64_t data_length = 512;
+ uint64_t write_same_offset = p2roundup(off, data_length);
+ uint64_t write_same_offset_end = p2align(off + len, data_length);
+ uint64_t write_same_length = 0;
+ if (write_same_offset_end > write_same_offset) {
+ write_same_length = write_same_offset_end - write_same_offset;
+ }
+
+ uint64_t prepend_offset = off;
+ uint64_t prepend_length = write_same_offset - off;
+ uint64_t append_offset = write_same_offset + write_same_length;
+ uint64_t append_length = len - prepend_length - write_same_length;
+ ldout(cct, 20) << "prepend_offset=" << prepend_offset << ", "
+ << "prepend_length=" << prepend_length << ", "
+ << "write_same_offset=" << write_same_offset << ", "
+ << "write_same_length=" << write_same_length << ", "
+ << "append_offset=" << append_offset << ", "
+ << "append_length=" << append_length << dendl;
+ ceph_assert(prepend_length + write_same_length + append_length == len);
+
+ if (write_same_length <= data_length) {
+ // unaligned or small write-zeroes request -- use single write
+ bufferlist bl;
+ bl.append_zero(len);
+
+ aio_comp->aio_type = io::AIO_TYPE_WRITE;
+ auto req = io::ImageDispatchSpec<I>::create_write(
+ image_ctx, io::IMAGE_DISPATCH_LAYER_API_START, aio_comp, {{off, len}},
+ std::move(bl), op_flags, trace, 0);
+ req->send();
+ return;
+ } else if (prepend_length == 0 && append_length == 0) {
+ // fully aligned -- use a single write-same image request
+ bufferlist bl;
+ bl.append_zero(data_length);
+
+ auto req = io::ImageDispatchSpec<I>::create_write_same(
+ image_ctx, io::IMAGE_DISPATCH_LAYER_API_START, aio_comp, off, len,
+ std::move(bl), op_flags, trace, 0);
+ req->send();
+ return;
+ }
+
+ // to reach this point, we need at least one prepend/append write along with
+ // a write-same -- therefore we will need to wrap the provided AioCompletion
+ auto request_count = 1;
+ if (prepend_length > 0) {
+ ++request_count;
+ }
+ if (append_length > 0) {
+ ++request_count;
+ }
+
+ ceph_assert(request_count > 1);
+ aio_comp->start_op();
+ aio_comp->set_request_count(request_count);
+
+ if (prepend_length > 0) {
+ bufferlist bl;
+ bl.append_zero(prepend_length);
+
+ Context* prepend_ctx = new io::C_AioRequest(aio_comp);
+ auto prepend_aio_comp = io::AioCompletion::create_and_start(
+ prepend_ctx, &image_ctx, io::AIO_TYPE_WRITE);
+ auto prepend_req = io::ImageDispatchSpec<I>::create_write(
+ image_ctx, io::IMAGE_DISPATCH_LAYER_API_START, prepend_aio_comp,
+ {{prepend_offset, prepend_length}}, std::move(bl), op_flags, trace,
+ 0);
+ prepend_req->send();
+ }
+
+ if (append_length > 0) {
+ bufferlist bl;
+ bl.append_zero(append_length);
+
+ Context* append_ctx = new io::C_AioRequest(aio_comp);
+ auto append_aio_comp = io::AioCompletion::create_and_start(
+ append_ctx, &image_ctx, io::AIO_TYPE_WRITE);
+ auto append_req = io::ImageDispatchSpec<I>::create_write(
+ image_ctx, io::IMAGE_DISPATCH_LAYER_API_START, append_aio_comp,
+ {{append_offset, append_length}}, std::move(bl), op_flags, trace, 0);
+ append_req->send();
+ }
+
+ bufferlist bl;
+ bl.append_zero(data_length);
+
+ Context* write_same_ctx = new io::C_AioRequest(aio_comp);
+ auto write_same_aio_comp = io::AioCompletion::create_and_start(
+ write_same_ctx, &image_ctx, io::AIO_TYPE_WRITESAME);
+ auto req = io::ImageDispatchSpec<I>::create_write_same(
+ image_ctx, io::IMAGE_DISPATCH_LAYER_API_START, write_same_aio_comp,
+ write_same_offset, write_same_length, std::move(bl), op_flags, trace,
+ 0);
+ req->send();
+ return;
+ }
+
// enable partial discard (zeroing) of objects
uint32_t discard_granularity_bytes = 0;
_RBD_SNAP_REMOVE_FLATTEN "RBD_SNAP_REMOVE_FLATTEN"
_RBD_SNAP_REMOVE_FORCE "RBD_SNAP_REMOVE_FORCE"
+ _RBD_WRITE_ZEROES_FLAG_THICK_PROVISION "RBD_WRITE_ZEROES_FLAG_THICK_PROVISION"
+
ctypedef void* rados_t
ctypedef void* rados_ioctx_t
ctypedef void* rbd_image_t
RBD_SNAP_REMOVE_FLATTEN = _RBD_SNAP_REMOVE_FLATTEN
RBD_SNAP_REMOVE_FORCE = _RBD_SNAP_REMOVE_FORCE
+RBD_WRITE_ZEROES_FLAG_THICK_PROVISION = _RBD_WRITE_ZEROES_FLAG_THICK_PROVISION
+
class Error(Exception):
pass
ASSERT_EQ(0, image.close());
}
+TEST_F(TestLibRBD, WriteZeroesThickProvision) {
+ librbd::RBD rbd;
+ librados::IoCtx ioctx;
+ ASSERT_EQ(0, _rados.ioctx_create(m_pool_name.c_str(), ioctx));
+ std::string name = get_temp_image_name();
+ int order = 0;
+ uint64_t size = 2 << 20;
+ ASSERT_EQ(0, create_image_pp(rbd, ioctx, name.c_str(), size, &order));
+
+ librbd::Image image;
+ ASSERT_EQ(0, rbd.open(ioctx, image, name.c_str(), NULL));
+
+ interval_set<uint64_t> diff;
+ ASSERT_EQ(0, image.diff_iterate2(nullptr, 0, size, false, false,
+ iterate_cb, (void *)&diff));
+ auto expected_diff = interval_set<uint64_t>{{}};
+ ASSERT_EQ(expected_diff, diff);
+
+ // writes unaligned zeroes as a prepend
+ ASSERT_EQ(128, image.write_zeroes(
+ 0, 128, RBD_WRITE_ZEROES_FLAG_THICK_PROVISION, 0));
+ diff.clear();
+ ASSERT_EQ(0, image.diff_iterate2(nullptr, 0, size, false, false,
+ iterate_cb, (void *)&diff));
+ expected_diff = interval_set<uint64_t>{{{0, 128}}};
+ ASSERT_EQ(expected_diff, diff);
+
+ ASSERT_EQ(512, image.write_zeroes(
+ 384, 512, RBD_WRITE_ZEROES_FLAG_THICK_PROVISION, 0));
+ diff.clear();
+ ASSERT_EQ(0, image.diff_iterate2(nullptr, 0, size, false, false,
+ iterate_cb, (void *)&diff));
+ expected_diff = interval_set<uint64_t>{{{0, 896}}};
+ ASSERT_EQ(expected_diff, diff);
+
+ // prepend with write-same
+ ASSERT_EQ(640, image.write_zeroes(
+ 896, 640, RBD_WRITE_ZEROES_FLAG_THICK_PROVISION, 0));
+ diff.clear();
+ ASSERT_EQ(0, image.diff_iterate2(nullptr, 0, size, false, false,
+ iterate_cb, (void *)&diff));
+ expected_diff = interval_set<uint64_t>{{{0, 1536}}};
+ ASSERT_EQ(expected_diff, diff);
+
+ // write-same with append
+ ASSERT_EQ(640, image.write_zeroes(
+ 1536, 640, RBD_WRITE_ZEROES_FLAG_THICK_PROVISION, 0));
+ diff.clear();
+ ASSERT_EQ(0, image.diff_iterate2(nullptr, 0, size, false, false,
+ iterate_cb, (void *)&diff));
+ expected_diff = interval_set<uint64_t>{{{0, 2176}}};
+ ASSERT_EQ(expected_diff, diff);
+
+ // prepend + write-same + append
+ ASSERT_EQ(768, image.write_zeroes(
+ 2176, 768, RBD_WRITE_ZEROES_FLAG_THICK_PROVISION, 0));
+ diff.clear();
+ ASSERT_EQ(0, image.diff_iterate2(nullptr, 0, size, false, false,
+ iterate_cb, (void *)&diff));
+ expected_diff = interval_set<uint64_t>{{{0, 2944}}};
+
+ // write-same
+ ASSERT_EQ(1024, image.write_zeroes(
+ 3072, 1024, RBD_WRITE_ZEROES_FLAG_THICK_PROVISION, 0));
+ diff.clear();
+ ASSERT_EQ(0, image.diff_iterate2(nullptr, 0, size, false, false,
+ iterate_cb, (void *)&diff));
+ expected_diff = interval_set<uint64_t>{{{0, 4096}}};
+
+ bufferlist expected_bl;
+ expected_bl.append_zero(size);
+
+ bufferlist read_bl;
+ EXPECT_EQ(size, image.read(0, size, read_bl));
+ EXPECT_EQ(expected_bl, read_bl);
+
+ ASSERT_EQ(0, image.close());
+}
+
// poorman's ceph_assert()
namespace ceph {
void __ceph_assert_fail(const char *assertion, const char *file, int line,
RBD_SNAP_REMOVE_UNPROTECT, RBD_SNAP_MIRROR_STATE_PRIMARY,
RBD_SNAP_MIRROR_STATE_PRIMARY_DEMOTED,
RBD_SNAP_CREATE_SKIP_QUIESCE,
- RBD_SNAP_CREATE_IGNORE_QUIESCE_ERROR)
+ RBD_SNAP_CREATE_IGNORE_QUIESCE_ERROR,
+ RBD_WRITE_ZEROES_FLAG_THICK_PROVISION)
rados = None
ioctx = None
self.image.write(data, 0)
self.image.write_zeroes(0, 256)
eq(self.image.read(256, 256), b'\0' * 256)
+ check_diff(self.image, 0, IMG_SIZE, None, [])
+
+ def test_write_zeroes_thick_provision(self):
+ data = rand_data(256)
+ self.image.write(data, 0)
+ self.image.write_zeroes(0, 256, RBD_WRITE_ZEROES_FLAG_THICK_PROVISION)
+ eq(self.image.read(256, 256), b'\0' * 256)
+ check_diff(self.image, 0, IMG_SIZE, None, [(0, 256, True)])
def test_read(self):
data = self.image.read(0, 20)