#define LIBRBD_SUPPORTS_IOVEC 1
#define LIBRBD_SUPPORTS_WATCH 0
#define LIBRBD_SUPPORTS_WRITESAME 1
+#define LIBRBD_SUPPORTS_WRITE_ZEROES 1
#if __GNUC__ >= 4
#define CEPH_RBD_API __attribute__ ((visibility ("default")))
const char *buf, int op_flags);
CEPH_RBD_API int rbd_discard(rbd_image_t image, uint64_t ofs, uint64_t len);
CEPH_RBD_API ssize_t rbd_writesame(rbd_image_t image, uint64_t ofs, size_t len,
- const char *buf, size_t data_len, int op_flags);
+ const char *buf, size_t data_len,
+ int op_flags);
+CEPH_RBD_API ssize_t rbd_write_zeroes(rbd_image_t image, uint64_t ofs,
+ uint64_t len, int zero_flags,
+ int op_flags);
CEPH_RBD_API ssize_t rbd_compare_and_write(rbd_image_t image, uint64_t ofs,
size_t len, const char *cmp_buf,
- const char *buf, uint64_t *mismatch_off,
+ const char *buf,
+ uint64_t *mismatch_off,
int op_flags);
CEPH_RBD_API int rbd_aio_write(rbd_image_t image, uint64_t off, size_t len,
CEPH_RBD_API int rbd_aio_writesame(rbd_image_t image, uint64_t off, size_t len,
const char *buf, size_t data_len,
rbd_completion_t c, int op_flags);
+CEPH_RBD_API int rbd_aio_write_zeroes(rbd_image_t image, uint64_t off,
+ size_t len, rbd_completion_t c,
+ int zero_flags, int op_flags);
CEPH_RBD_API ssize_t rbd_aio_compare_and_write(rbd_image_t image,
uint64_t off, size_t len,
- const char *cmp_buf, const char *buf,
- rbd_completion_t c, uint64_t *mismatch_off,
+ const char *cmp_buf,
+ const char *buf,
+ rbd_completion_t c,
+ uint64_t *mismatch_off,
int op_flags);
CEPH_RBD_API int rbd_aio_create_completion(void *cb_arg,
ssize_t write(uint64_t ofs, size_t len, ceph::bufferlist& bl);
/* @param op_flags see librados.h constants beginning with LIBRADOS_OP_FLAG */
ssize_t write2(uint64_t ofs, size_t len, ceph::bufferlist& bl, int op_flags);
+
int discard(uint64_t ofs, uint64_t len);
ssize_t writesame(uint64_t ofs, size_t len, ceph::bufferlist &bl, int op_flags);
+ ssize_t write_zeroes(uint64_t ofs, size_t len, int zero_flags, int op_flags);
+
ssize_t compare_and_write(uint64_t ofs, size_t len, ceph::bufferlist &cmp_bl,
ceph::bufferlist& bl, uint64_t *mismatch_off, int op_flags);
/* @param op_flags see librados.h constants beginning with LIBRADOS_OP_FLAG */
int aio_write2(uint64_t off, size_t len, ceph::bufferlist& bl,
RBD::AioCompletion *c, int op_flags);
+
+ int aio_discard(uint64_t off, uint64_t len, RBD::AioCompletion *c);
int aio_writesame(uint64_t off, size_t len, ceph::bufferlist& bl,
RBD::AioCompletion *c, int op_flags);
+ int aio_write_zeroes(uint64_t ofs, size_t len, RBD::AioCompletion *c,
+ int zero_flags, int op_flags);
+
int aio_compare_and_write(uint64_t off, size_t len, ceph::bufferlist& cmp_bl,
ceph::bufferlist& bl, RBD::AioCompletion *c,
uint64_t *mismatch_off, int op_flags);
+
/**
* read async from image
*
/* @param op_flags see librados.h constants beginning with LIBRADOS_OP_FLAG */
int aio_read2(uint64_t off, size_t len, ceph::bufferlist& bl,
RBD::AioCompletion *c, int op_flags);
- int aio_discard(uint64_t off, uint64_t len, RBD::AioCompletion *c);
int flush();
/**
return len;
}
+template <typename I>
+ssize_t Io<I>::write_zeroes(I& image_ctx, uint64_t off, uint64_t len,
+ int zero_flags, int op_flags) {
+ auto cct = image_ctx.cct;
+ ldout(cct, 20) << "ictx=" << &image_ctx << ", off=" << off << ", "
+ << "len = " << len << dendl;
+
+ image_ctx.image_lock.lock_shared();
+ int r = clip_io(util::get_image_ctx(&image_ctx), off, &len);
+ image_ctx.image_lock.unlock_shared();
+ if (r < 0) {
+ lderr(cct) << "invalid IO request: " << cpp_strerror(r) << dendl;
+ return r;
+ }
+
+ C_SaferCond ctx;
+ auto aio_comp = io::AioCompletion::create(&ctx);
+ aio_write_zeroes(image_ctx, aio_comp, off, len, zero_flags, op_flags, false);
+
+ r = ctx.wait();
+ if (r < 0) {
+ return r;
+ }
+ return len;
+}
+
template <typename I>
ssize_t Io<I>::compare_and_write(
I &image_ctx, uint64_t off, uint64_t len, bufferlist &&cmp_bl,
req->send();
}
+template <typename I>
+void Io<I>::aio_write_zeroes(I& image_ctx, io::AioCompletion *aio_comp,
+ uint64_t off, uint64_t len, int zero_flags,
+ int op_flags, bool native_async) {
+ auto cct = image_ctx.cct;
+ FUNCTRACE(cct);
+ ZTracer::Trace trace;
+ if (image_ctx.blkin_trace_all) {
+ trace.init("io: write_zeroes", &image_ctx.trace_endpoint);
+ trace.event("init");
+ }
+
+ aio_comp->init_time(util::get_image_ctx(&image_ctx), io::AIO_TYPE_DISCARD);
+ ldout(cct, 20) << "ictx=" << &image_ctx << ", "
+ << "completion=" << aio_comp << ", off=" << off << ", "
+ << "len=" << len << dendl;
+
+ if (native_async && image_ctx.event_socket.is_valid()) {
+ aio_comp->set_event_notify(true);
+ }
+
+ // validate the supported flags
+ if (zero_flags != 0U) {
+ aio_comp->fail(-EINVAL);
+ return;
+ }
+
+ if (!is_valid_io(image_ctx, aio_comp)) {
+ return;
+ }
+
+ // enable partial discard (zeroing) of objects
+ uint32_t discard_granularity_bytes = 0;
+
+ auto req = io::ImageDispatchSpec<I>::create_discard(
+ image_ctx, io::IMAGE_DISPATCH_LAYER_API_START, aio_comp, off, len,
+ discard_granularity_bytes, trace, 0);
+ req->send();
+}
+
template <typename I>
void Io<I>::aio_compare_and_write(I &image_ctx, io::AioCompletion *aio_comp,
uint64_t off, uint64_t len,
uint32_t discard_granularity_bytes);
static ssize_t write_same(ImageCtxT &image_ctx, uint64_t off, uint64_t len,
bufferlist &&bl, int op_flags);
+ static ssize_t write_zeroes(ImageCtxT &image_ctx, uint64_t off, uint64_t len,
+ int zero_flags, int op_flags);
static ssize_t compare_and_write(ImageCtxT &image_ctx, uint64_t off,
uint64_t len, bufferlist &&cmp_bl,
bufferlist &&bl, uint64_t *mismatch_off,
static void aio_write_same(ImageCtxT &image_ctx, io::AioCompletion *c,
uint64_t off, uint64_t len, bufferlist &&bl,
int op_flags, bool native_async);
+ static void aio_write_zeroes(ImageCtxT &image_ctx, io::AioCompletion *c,
+ uint64_t off, uint64_t len, int zero_flags,
+ int op_flags, bool native_async);
static void aio_compare_and_write(ImageCtxT &image_ctx, io::AioCompletion *c,
uint64_t off, uint64_t len,
bufferlist &&cmp_bl, bufferlist &&bl,
}
bool discard_zero = ictx->config.get_val<bool>("rbd_discard_on_zeroed_write_same");
- if (discard_zero && mem_is_zero(bl.c_str(), bl.length())) {
- int r = api::Io<>::discard(*ictx, ofs, len, 0);
+ if (discard_zero && bl.is_zero()) {
+ int r = api::Io<>::write_zeroes(*ictx, ofs, len, 0U, op_flags);
tracepoint(librbd, writesame_exit, r);
return r;
}
return r;
}
+ ssize_t Image::write_zeroes(uint64_t ofs, size_t len, int zero_flags,
+ int op_flags)
+ {
+ ImageCtx *ictx = (ImageCtx *)ctx;
+ return api::Io<>::write_zeroes(*ictx, ofs, len, zero_flags, op_flags);
+ }
+
ssize_t Image::compare_and_write(uint64_t ofs, size_t len,
ceph::bufferlist &cmp_bl, ceph::bufferlist& bl,
uint64_t *mismatch_off, int op_flags)
return 0;
}
- int Image::aio_discard(uint64_t off, uint64_t len, RBD::AioCompletion *c)
- {
- ImageCtx *ictx = (ImageCtx *)ctx;
- tracepoint(librbd, aio_discard_enter, ictx, ictx->name.c_str(), ictx->snap_name.c_str(), ictx->read_only, off, len, c->pc);
- api::Io<>::aio_discard(
- *ictx, get_aio_completion(c), off, len, ictx->discard_granularity_bytes,
- true);
- tracepoint(librbd, aio_discard_exit, 0);
- return 0;
- }
-
int Image::aio_read(uint64_t off, size_t len, bufferlist& bl,
RBD::AioCompletion *c)
{
return 0;
}
+ int Image::aio_discard(uint64_t off, uint64_t len, RBD::AioCompletion *c)
+ {
+ ImageCtx *ictx = (ImageCtx *)ctx;
+ tracepoint(librbd, aio_discard_enter, ictx, ictx->name.c_str(), ictx->snap_name.c_str(), ictx->read_only, off, len, c->pc);
+ api::Io<>::aio_discard(
+ *ictx, get_aio_completion(c), off, len, ictx->discard_granularity_bytes,
+ true);
+ tracepoint(librbd, aio_discard_exit, 0);
+ return 0;
+ }
+
int Image::aio_writesame(uint64_t off, size_t len, bufferlist& bl,
RBD::AioCompletion *c, int op_flags)
{
}
bool discard_zero = ictx->config.get_val<bool>("rbd_discard_on_zeroed_write_same");
- if (discard_zero && mem_is_zero(bl.c_str(), bl.length())) {
- api::Io<>::aio_discard(*ictx, get_aio_completion(c), off, len, 0, true);
+ if (discard_zero && bl.is_zero()) {
+ api::Io<>::aio_write_zeroes(*ictx, get_aio_completion(c), off, len, 0U,
+ op_flags, true);
tracepoint(librbd, aio_writesame_exit, 0);
return 0;
}
return 0;
}
+ int Image::aio_write_zeroes(uint64_t off, size_t len, RBD::AioCompletion *c,
+ int zero_flags, int op_flags)
+ {
+ ImageCtx *ictx = (ImageCtx *)ctx;
+ api::Io<>::aio_write_zeroes(*ictx, get_aio_completion(c), off, len,
+ zero_flags, op_flags, true);
+ return 0;
+ }
+
int Image::aio_compare_and_write(uint64_t off, size_t len,
ceph::bufferlist& cmp_bl, ceph::bufferlist& bl,
RBD::AioCompletion *c, uint64_t *mismatch_off,
bool discard_zero = ictx->config.get_val<bool>("rbd_discard_on_zeroed_write_same");
if (discard_zero && mem_is_zero(buf, data_len)) {
- int r = librbd::api::Io<>::discard(*ictx, ofs, len, 0);
+ int r = librbd::api::Io<>::write_zeroes(*ictx, ofs, len, 0, op_flags);
tracepoint(librbd, writesame_exit, r);
return r;
}
return r;
}
+extern "C" ssize_t rbd_write_zeroes(rbd_image_t image, uint64_t ofs, size_t len,
+ int zero_flags, int op_flags)
+{
+ librbd::ImageCtx *ictx = (librbd::ImageCtx *)image;
+ return librbd::api::Io<>::write_zeroes(*ictx, ofs, len, zero_flags, op_flags);
+}
+
extern "C" ssize_t rbd_compare_and_write(rbd_image_t image,
uint64_t ofs, size_t len,
const char *cmp_buf,
return r;
}
-extern "C" int rbd_aio_discard(rbd_image_t image, uint64_t off, uint64_t len,
- rbd_completion_t c)
-{
- librbd::ImageCtx *ictx = (librbd::ImageCtx *)image;
- librbd::RBD::AioCompletion *comp = (librbd::RBD::AioCompletion *)c;
- tracepoint(librbd, aio_discard_enter, ictx, ictx->name.c_str(), ictx->snap_name.c_str(), ictx->read_only, off, len, comp->pc);
- librbd::api::Io<>::aio_discard(
- *ictx, get_aio_completion(comp), off, len,
- ictx->discard_granularity_bytes, true);
- tracepoint(librbd, aio_discard_exit, 0);
- return 0;
-}
-
extern "C" int rbd_aio_read(rbd_image_t image, uint64_t off, size_t len,
char *buf, rbd_completion_t c)
{
return 0;
}
+extern "C" int rbd_aio_discard(rbd_image_t image, uint64_t off, uint64_t len,
+ rbd_completion_t c)
+{
+ librbd::ImageCtx *ictx = (librbd::ImageCtx *)image;
+ librbd::RBD::AioCompletion *comp = (librbd::RBD::AioCompletion *)c;
+ tracepoint(librbd, aio_discard_enter, ictx, ictx->name.c_str(), ictx->snap_name.c_str(), ictx->read_only, off, len, comp->pc);
+ librbd::api::Io<>::aio_discard(
+ *ictx, get_aio_completion(comp), off, len,
+ ictx->discard_granularity_bytes, true);
+ tracepoint(librbd, aio_discard_exit, 0);
+ return 0;
+}
+
extern "C" int rbd_aio_writesame(rbd_image_t image, uint64_t off, size_t len,
const char *buf, size_t data_len, rbd_completion_t c,
int op_flags)
bool discard_zero = ictx->config.get_val<bool>("rbd_discard_on_zeroed_write_same");
if (discard_zero && mem_is_zero(buf, data_len)) {
- librbd::api::Io<>::aio_discard(
- *ictx, get_aio_completion(comp), off, len, 0, true);
+ librbd::api::Io<>::aio_write_zeroes(
+ *ictx, get_aio_completion(comp), off, len, 0, op_flags, true);
tracepoint(librbd, aio_writesame_exit, 0);
return 0;
}
return 0;
}
+extern "C" int rbd_aio_write_zeroes(rbd_image_t image, uint64_t off, size_t len,
+ rbd_completion_t c, int zero_flags,
+ int op_flags)
+{
+ librbd::ImageCtx *ictx = (librbd::ImageCtx *)image;
+ librbd::RBD::AioCompletion *comp = (librbd::RBD::AioCompletion *)c;
+
+ librbd::api::Io<>::aio_write_zeroes(*ictx, get_aio_completion(comp), off, len,
+ zero_flags, op_flags, true);
+ return 0;
+}
+
extern "C" ssize_t rbd_aio_compare_and_write(rbd_image_t image, uint64_t off,
size_t len, const char *cmp_buf,
const char *buf, rbd_completion_t c,
ioctx.close();
}
+TEST_F(TestLibRBD, WriteZeroes) {
+ librbd::RBD rbd;
+ librados::IoCtx ioctx;
+ ASSERT_EQ(0, _rados.ioctx_create(m_pool_name.c_str(), ioctx));
+ std::string name = get_temp_image_name();
+ int order = 0;
+ uint64_t size = 2 << 20;
+ ASSERT_EQ(0, create_image_pp(rbd, ioctx, name.c_str(), size, &order));
+
+ librbd::Image image;
+ ASSERT_EQ(0, rbd.open(ioctx, image, name.c_str(), NULL));
+
+ // 1s from [0, 256) / length 256
+ char data[256];
+ memset(data, 1, sizeof(data));
+ bufferlist bl;
+ bl.append(data, 256);
+ ASSERT_EQ(256, image.write(0, 256, bl));
+
+ interval_set<uint64_t> diff;
+ ASSERT_EQ(0, image.diff_iterate2(nullptr, 0, size, false, false,
+ iterate_cb, (void *)&diff));
+ auto expected_diff = interval_set<uint64_t>{{{0, 256}}};
+ ASSERT_EQ(expected_diff, diff);
+
+ // writes zero passed the current end extents.
+ // Now 1s from [0, 192) / length 192
+ ASSERT_EQ(size - 192,
+ image.write_zeroes(192, size - 192, 0U, 0));
+ diff.clear();
+ ASSERT_EQ(0, image.diff_iterate2(nullptr, 0, size, false, false,
+ iterate_cb, (void *)&diff));
+ expected_diff = interval_set<uint64_t>{{{0, 192}}};
+ ASSERT_EQ(expected_diff, diff);
+
+ // zero an existing extent and truncate some off the end
+ // Now 1s from [64, 192) / length 192
+ ASSERT_EQ(64, image.write_zeroes(0, 64, 0U, 0));
+
+ diff.clear();
+ ASSERT_EQ(0, image.diff_iterate2(nullptr, 0, size, false, false,
+ iterate_cb, (void *)&diff));
+ expected_diff = interval_set<uint64_t>{{{0, 192}}};
+ ASSERT_EQ(expected_diff, diff);
+
+ bufferlist expected_bl;
+ expected_bl.append_zero(64);
+ bufferlist sub_bl;
+ sub_bl.substr_of(bl, 0, 128);
+ expected_bl.claim_append(sub_bl);
+ expected_bl.append_zero(size - 192);
+
+ bufferlist read_bl;
+ EXPECT_EQ(size, image.read(0, size, read_bl));
+ EXPECT_EQ(expected_bl, read_bl);
+
+ ASSERT_EQ(0, image.close());
+}
+
// poorman's ceph_assert()
namespace ceph {
void __ceph_assert_fail(const char *assertion, const char *file, int line,