From a8b444c9bbedaf2eba5570e3753907291fc15de4 Mon Sep 17 00:00:00 2001 From: Mykola Golub Date: Wed, 30 Jan 2019 13:12:04 +0000 Subject: [PATCH] cls/rbd: add sparsify method Signed-off-by: Mykola Golub --- src/cls/rbd/cls_rbd.cc | 145 +++++++++++++++++++++++++++++-- src/cls/rbd/cls_rbd_client.cc | 18 ++++ src/cls/rbd/cls_rbd_client.h | 26 +++--- src/test/cls_rbd/test_cls_rbd.cc | 129 +++++++++++++++++++++++++++ 4 files changed, 299 insertions(+), 19 deletions(-) diff --git a/src/cls/rbd/cls_rbd.cc b/src/cls/rbd/cls_rbd.cc index 92e59ff166d7d..a6a767195d0c2 100644 --- a/src/cls/rbd/cls_rbd.cc +++ b/src/cls/rbd/cls_rbd.cc @@ -77,6 +77,33 @@ uint64_t get_encode_features(cls_method_context_t hctx) { return features; } +bool calc_sparse_extent(const bufferptr &bp, size_t sparse_size, + uint64_t length, size_t *write_offset, + size_t *write_length, size_t *offset) { + size_t extent_size; + if (*offset + sparse_size > length) { + extent_size = length - *offset; + } else { + extent_size = sparse_size; + } + + bufferptr extent(bp, *offset, extent_size); + *offset += extent_size; + + bool extent_is_zero = extent.is_zero(); + if (!extent_is_zero) { + *write_length += extent_size; + } + if (extent_is_zero && *write_length == 0) { + *write_offset += extent_size; + } + + if ((extent_is_zero || *offset == length) && *write_length != 0) { + return true; + } + return false; +} + } // anonymous namespace static int snap_read_header(cls_method_context_t hctx, bufferlist& bl) @@ -7273,6 +7300,100 @@ int namespace_list(cls_method_context_t hctx, bufferlist *in, bufferlist *out) return 0; } +/** + * Reclaim space for zeroed extents + * + * Input: + * @param sparse_size minimal zeroed block to sparse + * @param remove_empty boolean, true if the object should be removed if empty + * + * Output: + * @returns -ENOENT if the object does not exist or has been removed + * @returns 0 on success, negative error code on failure + */ +int sparsify(cls_method_context_t hctx, bufferlist *in, bufferlist *out) +{ + size_t sparse_size; + bool remove_empty; + try { + auto iter = in->cbegin(); + decode(sparse_size, iter); + decode(remove_empty, iter); + } catch (const buffer::error &err) { + return -EINVAL; + } + + int r = check_exists(hctx); + if (r < 0) { + return r; + } + + bufferlist bl; + r = cls_cxx_read(hctx, 0, 0, &bl); + if (r < 0) { + CLS_ERR("failed to read data off of disk: %s", cpp_strerror(r).c_str()); + return r; + } + + if (bl.is_zero()) { + if (remove_empty) { + CLS_LOG(20, "remove"); + r = cls_cxx_remove(hctx); + if (r < 0) { + CLS_ERR("remove failed: %s", cpp_strerror(r).c_str()); + return r; + } + } else if (bl.length() > 0) { + CLS_LOG(20, "truncate"); + bufferlist write_bl; + r = cls_cxx_replace(hctx, 0, 0, &write_bl); + if (r < 0) { + CLS_ERR("truncate failed: %s", cpp_strerror(r).c_str()); + return r; + } + } else { + CLS_LOG(20, "skip empty"); + } + return 0; + } + + bl.rebuild(buffer::ptr_node::create(bl.length())); + size_t write_offset = 0; + size_t write_length = 0; + size_t offset = 0; + size_t length = bl.length(); + const auto& ptr = bl.front(); + bool replace = true; + while (offset < length) { + if (calc_sparse_extent(ptr, sparse_size, length, &write_offset, + &write_length, &offset)) { + if (write_offset == 0 && write_length == length) { + CLS_LOG(20, "nothing to do"); + return 0; + } + CLS_LOG(20, "write%s %" PRIu64 "~%" PRIu64, (replace ? "(replace)" : ""), + write_offset, write_length); + bufferlist write_bl; + write_bl.push_back(buffer::ptr_node::create(ptr, write_offset, + write_length)); + if (replace) { + r = cls_cxx_replace(hctx, write_offset, write_length, &write_bl); + replace = false; + } else { + r = cls_cxx_write(hctx, write_offset, write_length, &write_bl); + } + if (r < 0) { + CLS_ERR("write failed: %s", cpp_strerror(r).c_str()); + return r; + } + write_offset = offset; + write_length = 0; + } + } + + return 0; +} + CLS_INIT(rbd) { CLS_LOG(20, "Loaded rbd class!"); @@ -7315,7 +7436,6 @@ CLS_INIT(rbd) cls_method_handle_t h_snapshot_rename; cls_method_handle_t h_snapshot_trash_add; cls_method_handle_t h_get_all_features; - cls_method_handle_t h_copyup; cls_method_handle_t h_get_id; cls_method_handle_t h_set_id; cls_method_handle_t h_set_modify_timestamp; @@ -7347,7 +7467,6 @@ CLS_INIT(rbd) cls_method_handle_t h_migration_set_state; cls_method_handle_t h_migration_get; cls_method_handle_t h_migration_remove; - cls_method_handle_t h_assert_snapc_seq; cls_method_handle_t h_old_snapshots_list; cls_method_handle_t h_old_snapshot_add; cls_method_handle_t h_old_snapshot_remove; @@ -7402,6 +7521,9 @@ CLS_INIT(rbd) cls_method_handle_t h_namespace_add; cls_method_handle_t h_namespace_remove; cls_method_handle_t h_namespace_list; + cls_method_handle_t h_copyup; + cls_method_handle_t h_assert_snapc_seq; + cls_method_handle_t h_sparsify; cls_register("rbd", &h_class); cls_register_cxx_method(h_class, "create", @@ -7451,9 +7573,6 @@ CLS_INIT(rbd) cls_register_cxx_method(h_class, "get_all_features", CLS_METHOD_RD, get_all_features, &h_get_all_features); - cls_register_cxx_method(h_class, "copyup", - CLS_METHOD_RD | CLS_METHOD_WR, - copyup, &h_copyup); // NOTE: deprecate v1 parent APIs after mimic EOLed cls_register_cxx_method(h_class, "get_parent", @@ -7549,10 +7668,6 @@ CLS_INIT(rbd) cls_register_cxx_method(h_class, "migration_remove", CLS_METHOD_RD | CLS_METHOD_WR, migration_remove, &h_migration_remove); - cls_register_cxx_method(h_class, "assert_snapc_seq", - CLS_METHOD_RD | CLS_METHOD_WR, - assert_snapc_seq, - &h_assert_snapc_seq); cls_register_cxx_method(h_class, "set_modify_timestamp", CLS_METHOD_RD | CLS_METHOD_WR, @@ -7792,4 +7907,16 @@ CLS_INIT(rbd) namespace_remove, &h_namespace_remove); cls_register_cxx_method(h_class, "namespace_list", CLS_METHOD_RD, namespace_list, &h_namespace_list); + + /* data object methods */ + cls_register_cxx_method(h_class, "copyup", + CLS_METHOD_RD | CLS_METHOD_WR, + copyup, &h_copyup); + cls_register_cxx_method(h_class, "assert_snapc_seq", + CLS_METHOD_RD | CLS_METHOD_WR, + assert_snapc_seq, + &h_assert_snapc_seq); + cls_register_cxx_method(h_class, "sparsify", + CLS_METHOD_RD | CLS_METHOD_WR, + sparsify, &h_sparsify); } diff --git a/src/cls/rbd/cls_rbd_client.cc b/src/cls/rbd/cls_rbd_client.cc index 78bd6f013d598..ab5305909b02e 100644 --- a/src/cls/rbd/cls_rbd_client.cc +++ b/src/cls/rbd/cls_rbd_client.cc @@ -2783,5 +2783,23 @@ int namespace_list(librados::IoCtx *ioctx, return namespace_list_finish(&iter, entries); } +void sparsify(librados::ObjectWriteOperation *op, size_t sparse_size, + bool remove_empty) +{ + bufferlist bl; + encode(sparse_size, bl); + encode(remove_empty, bl); + op->exec("rbd", "sparsify", bl); +} + +int sparsify(librados::IoCtx *ioctx, const std::string &oid, size_t sparse_size, + bool remove_empty) +{ + librados::ObjectWriteOperation op; + sparsify(&op, sparse_size, remove_empty); + + return ioctx->operate(oid, &op); +} + } // namespace cls_client } // namespace librbd diff --git a/src/cls/rbd/cls_rbd_client.h b/src/cls/rbd/cls_rbd_client.h index 833d58bad9303..48125173bffa8 100644 --- a/src/cls/rbd/cls_rbd_client.h +++ b/src/cls/rbd/cls_rbd_client.h @@ -183,9 +183,6 @@ int get_all_features_finish(bufferlist::const_iterator *it, int get_all_features(librados::IoCtx *ioctx, const std::string &oid, uint64_t *all_features); -int copyup(librados::IoCtx *ioctx, const std::string &oid, - bufferlist data); - /// NOTE: remove protection after clone v1 is retired void get_protection_status_start(librados::ObjectReadOperation *op, snapid_t snap_id); @@ -293,13 +290,6 @@ int migration_get(librados::IoCtx *ioctx, const std::string &oid, int migration_remove(librados::IoCtx *ioctx, const std::string &oid); void migration_remove(librados::ObjectWriteOperation *op); -int assert_snapc_seq(librados::IoCtx *ioctx, const std::string &oid, - uint64_t snapc_seq, - cls::rbd::AssertSnapcSeqState state); -void assert_snapc_seq(librados::ObjectWriteOperation *op, - uint64_t snapc_seq, - cls::rbd::AssertSnapcSeqState state); - // operations on rbd_id objects void get_id_start(librados::ObjectReadOperation *op); int get_id_finish(bufferlist::const_iterator *it, std::string *id); @@ -604,6 +594,22 @@ int namespace_list(librados::IoCtx *ioctx, const std::string &start, uint64_t max_return, std::list *entries); +// operations on data objects +int assert_snapc_seq(librados::IoCtx *ioctx, const std::string &oid, + uint64_t snapc_seq, + cls::rbd::AssertSnapcSeqState state); +void assert_snapc_seq(librados::ObjectWriteOperation *op, + uint64_t snapc_seq, + cls::rbd::AssertSnapcSeqState state); + +int copyup(librados::IoCtx *ioctx, const std::string &oid, + bufferlist data); + +void sparsify(librados::ObjectWriteOperation *op, size_t sparse_size, + bool remove_empty); +int sparsify(librados::IoCtx *ioctx, const std::string &oid, size_t sparse_size, + bool remove_empty); + } // namespace cls_client } // namespace librbd diff --git a/src/test/cls_rbd/test_cls_rbd.cc b/src/test/cls_rbd/test_cls_rbd.cc index 756d64d31beb8..43d30b717b56d 100644 --- a/src/test/cls_rbd/test_cls_rbd.cc +++ b/src/test/cls_rbd/test_cls_rbd.cc @@ -65,6 +65,28 @@ static char *random_buf(size_t len) return b; } +static bool is_sparse_read_supported(librados::IoCtx &ioctx, + const std::string &oid) { + EXPECT_EQ(0, ioctx.create(oid, true)); + bufferlist inbl; + inbl.append(std::string(1, 'X')); + EXPECT_EQ(0, ioctx.write(oid, inbl, inbl.length(), 1)); + EXPECT_EQ(0, ioctx.write(oid, inbl, inbl.length(), 3)); + + std::map m; + bufferlist outbl; + int r = ioctx.sparse_read(oid, m, outbl, 4, 0); + ioctx.remove(oid); + + int expected_r = 2; + std::map expected_m = {{1, 1}, {3, 1}}; + bufferlist expected_outbl; + expected_outbl.append(std::string(2, 'X')); + + return (r == expected_r && m == expected_m && + outbl.contents_equal(expected_outbl)); +} + class TestClsRbd : public ::testing::Test { public: @@ -3009,3 +3031,110 @@ TEST_F(TestClsRbd, assert_snapc_seq) ASSERT_EQ(0, ioctx.selfmanaged_snap_remove(snapc_seq)); } + +TEST_F(TestClsRbd, sparsify) +{ + librados::IoCtx ioctx; + ASSERT_EQ(0, _rados.ioctx_create(_pool_name.c_str(), ioctx)); + + string oid = get_temp_image_name(); + ioctx.remove(oid); + + bool sparse_read_supported = is_sparse_read_supported(ioctx, oid); + + // test sparsify on a non-existent object + + ASSERT_EQ(-ENOENT, sparsify(&ioctx, oid, 16, false)); + uint64_t size; + ASSERT_EQ(-ENOENT, ioctx.stat(oid, &size, NULL)); + ASSERT_EQ(-ENOENT, sparsify(&ioctx, oid, 16, true)); + ASSERT_EQ(-ENOENT, ioctx.stat(oid, &size, NULL)); + + // test sparsify on an empty object + + ASSERT_EQ(0, ioctx.create(oid, true)); + ASSERT_EQ(0, sparsify(&ioctx, oid, 16, false)); + ASSERT_EQ(0, sparsify(&ioctx, oid, 16, true)); + ASSERT_EQ(-ENOENT, sparsify(&ioctx, oid, 16, false)); + + // test sparsify on a zeroed object + + bufferlist inbl; + inbl.append(std::string(4096, '\0')); + ASSERT_EQ(0, ioctx.write(oid, inbl, inbl.length(), 0)); + ASSERT_EQ(0, sparsify(&ioctx, oid, 16, false)); + std::map m; + bufferlist outbl; + std::map expected_m = {{0, 0}}; + bufferlist expected_outbl; + if (sparse_read_supported) { + expected_m = {}; + } + ASSERT_EQ((int)expected_m.size(), + ioctx.sparse_read(oid, m, outbl, inbl.length(), 0)); + ASSERT_EQ(m, expected_m); + ASSERT_EQ(0, sparsify(&ioctx, oid, 16, true)); + ASSERT_EQ(-ENOENT, sparsify(&ioctx, oid, 16, true)); + ASSERT_EQ(0, ioctx.write(oid, inbl, inbl.length(), 0)); + ASSERT_EQ(0, sparsify(&ioctx, oid, 16, true)); + ASSERT_EQ(-ENOENT, sparsify(&ioctx, oid, 16, true)); + + // test sparsify on an object with zeroes + + inbl.append(std::string(4096, '1')); + inbl.append(std::string(4096, '\0')); + inbl.append(std::string(4096, '2')); + inbl.append(std::string(4096, '\0')); + ASSERT_EQ(0, ioctx.write(oid, inbl, inbl.length(), 0)); + + // try to sparsify with sparse_size too large + + ASSERT_EQ(0, sparsify(&ioctx, oid, inbl.length(), true)); + expected_m = {{0, inbl.length()}}; + expected_outbl = inbl; + ASSERT_EQ((int)expected_m.size(), + ioctx.sparse_read(oid, m, outbl, inbl.length(), 0)); + ASSERT_EQ(m, expected_m); + ASSERT_TRUE(outbl.contents_equal(expected_outbl)); + + // sparsify with small sparse_size + + ASSERT_EQ(0, sparsify(&ioctx, oid, 16, true)); + outbl.clear(); + ASSERT_EQ((int)(inbl.length() - 4096), + ioctx.read(oid, outbl, inbl.length(), 0)); + outbl.append(std::string(4096, '\0')); + ASSERT_TRUE(outbl.contents_equal(expected_outbl)); + if (sparse_read_supported) { + expected_m = {{4096 * 1, 4096}, {4096 * 3, 4096}}; + expected_outbl.clear(); + expected_outbl.append(std::string(4096, '1')); + expected_outbl.append(std::string(4096, '2')); + } else { + expected_m = {{0, 4 * 4096}}; + expected_outbl.clear(); + expected_outbl.append(std::string(4096, '\0')); + expected_outbl.append(std::string(4096, '1')); + expected_outbl.append(std::string(4096, '\0')); + expected_outbl.append(std::string(4096, '2')); + } + m.clear(); + outbl.clear(); + ASSERT_EQ((int)expected_m.size(), + ioctx.sparse_read(oid, m, outbl, inbl.length(), 0)); + ASSERT_EQ(m, expected_m); + ASSERT_TRUE(outbl.contents_equal(expected_outbl)); + + // test it is the same after yet another sparsify + + ASSERT_EQ(0, sparsify(&ioctx, oid, 16, true)); + m.clear(); + outbl.clear(); + ASSERT_EQ((int)expected_m.size(), + ioctx.sparse_read(oid, m, outbl, inbl.length(), 0)); + ASSERT_EQ(m, expected_m); + ASSERT_TRUE(outbl.contents_equal(expected_outbl)); + + ASSERT_EQ(0, ioctx.remove(oid)); + ioctx.close(); +} -- 2.39.5