return features;
}
+bool calc_sparse_extent(const bufferptr &bp, size_t sparse_size,
+ uint64_t length, size_t *write_offset,
+ size_t *write_length, size_t *offset) {
+ size_t extent_size;
+ if (*offset + sparse_size > length) {
+ extent_size = length - *offset;
+ } else {
+ extent_size = sparse_size;
+ }
+
+ bufferptr extent(bp, *offset, extent_size);
+ *offset += extent_size;
+
+ bool extent_is_zero = extent.is_zero();
+ if (!extent_is_zero) {
+ *write_length += extent_size;
+ }
+ if (extent_is_zero && *write_length == 0) {
+ *write_offset += extent_size;
+ }
+
+ if ((extent_is_zero || *offset == length) && *write_length != 0) {
+ return true;
+ }
+ return false;
+}
+
} // anonymous namespace
static int snap_read_header(cls_method_context_t hctx, bufferlist& bl)
return 0;
}
+/**
+ * Reclaim space for zeroed extents
+ *
+ * Input:
+ * @param sparse_size minimal zeroed block to sparse
+ * @param remove_empty boolean, true if the object should be removed if empty
+ *
+ * Output:
+ * @returns -ENOENT if the object does not exist or has been removed
+ * @returns 0 on success, negative error code on failure
+ */
+int sparsify(cls_method_context_t hctx, bufferlist *in, bufferlist *out)
+{
+ size_t sparse_size;
+ bool remove_empty;
+ try {
+ auto iter = in->cbegin();
+ decode(sparse_size, iter);
+ decode(remove_empty, iter);
+ } catch (const buffer::error &err) {
+ return -EINVAL;
+ }
+
+ int r = check_exists(hctx);
+ if (r < 0) {
+ return r;
+ }
+
+ bufferlist bl;
+ r = cls_cxx_read(hctx, 0, 0, &bl);
+ if (r < 0) {
+ CLS_ERR("failed to read data off of disk: %s", cpp_strerror(r).c_str());
+ return r;
+ }
+
+ if (bl.is_zero()) {
+ if (remove_empty) {
+ CLS_LOG(20, "remove");
+ r = cls_cxx_remove(hctx);
+ if (r < 0) {
+ CLS_ERR("remove failed: %s", cpp_strerror(r).c_str());
+ return r;
+ }
+ } else if (bl.length() > 0) {
+ CLS_LOG(20, "truncate");
+ bufferlist write_bl;
+ r = cls_cxx_replace(hctx, 0, 0, &write_bl);
+ if (r < 0) {
+ CLS_ERR("truncate failed: %s", cpp_strerror(r).c_str());
+ return r;
+ }
+ } else {
+ CLS_LOG(20, "skip empty");
+ }
+ return 0;
+ }
+
+ bl.rebuild(buffer::ptr_node::create(bl.length()));
+ size_t write_offset = 0;
+ size_t write_length = 0;
+ size_t offset = 0;
+ size_t length = bl.length();
+ const auto& ptr = bl.front();
+ bool replace = true;
+ while (offset < length) {
+ if (calc_sparse_extent(ptr, sparse_size, length, &write_offset,
+ &write_length, &offset)) {
+ if (write_offset == 0 && write_length == length) {
+ CLS_LOG(20, "nothing to do");
+ return 0;
+ }
+ CLS_LOG(20, "write%s %" PRIu64 "~%" PRIu64, (replace ? "(replace)" : ""),
+ write_offset, write_length);
+ bufferlist write_bl;
+ write_bl.push_back(buffer::ptr_node::create(ptr, write_offset,
+ write_length));
+ if (replace) {
+ r = cls_cxx_replace(hctx, write_offset, write_length, &write_bl);
+ replace = false;
+ } else {
+ r = cls_cxx_write(hctx, write_offset, write_length, &write_bl);
+ }
+ if (r < 0) {
+ CLS_ERR("write failed: %s", cpp_strerror(r).c_str());
+ return r;
+ }
+ write_offset = offset;
+ write_length = 0;
+ }
+ }
+
+ return 0;
+}
+
CLS_INIT(rbd)
{
CLS_LOG(20, "Loaded rbd class!");
cls_method_handle_t h_snapshot_rename;
cls_method_handle_t h_snapshot_trash_add;
cls_method_handle_t h_get_all_features;
- cls_method_handle_t h_copyup;
cls_method_handle_t h_get_id;
cls_method_handle_t h_set_id;
cls_method_handle_t h_set_modify_timestamp;
cls_method_handle_t h_migration_set_state;
cls_method_handle_t h_migration_get;
cls_method_handle_t h_migration_remove;
- cls_method_handle_t h_assert_snapc_seq;
cls_method_handle_t h_old_snapshots_list;
cls_method_handle_t h_old_snapshot_add;
cls_method_handle_t h_old_snapshot_remove;
cls_method_handle_t h_namespace_add;
cls_method_handle_t h_namespace_remove;
cls_method_handle_t h_namespace_list;
+ cls_method_handle_t h_copyup;
+ cls_method_handle_t h_assert_snapc_seq;
+ cls_method_handle_t h_sparsify;
cls_register("rbd", &h_class);
cls_register_cxx_method(h_class, "create",
cls_register_cxx_method(h_class, "get_all_features",
CLS_METHOD_RD,
get_all_features, &h_get_all_features);
- cls_register_cxx_method(h_class, "copyup",
- CLS_METHOD_RD | CLS_METHOD_WR,
- copyup, &h_copyup);
// NOTE: deprecate v1 parent APIs after mimic EOLed
cls_register_cxx_method(h_class, "get_parent",
cls_register_cxx_method(h_class, "migration_remove",
CLS_METHOD_RD | CLS_METHOD_WR,
migration_remove, &h_migration_remove);
- cls_register_cxx_method(h_class, "assert_snapc_seq",
- CLS_METHOD_RD | CLS_METHOD_WR,
- assert_snapc_seq,
- &h_assert_snapc_seq);
cls_register_cxx_method(h_class, "set_modify_timestamp",
CLS_METHOD_RD | CLS_METHOD_WR,
namespace_remove, &h_namespace_remove);
cls_register_cxx_method(h_class, "namespace_list", CLS_METHOD_RD,
namespace_list, &h_namespace_list);
+
+ /* data object methods */
+ cls_register_cxx_method(h_class, "copyup",
+ CLS_METHOD_RD | CLS_METHOD_WR,
+ copyup, &h_copyup);
+ cls_register_cxx_method(h_class, "assert_snapc_seq",
+ CLS_METHOD_RD | CLS_METHOD_WR,
+ assert_snapc_seq,
+ &h_assert_snapc_seq);
+ cls_register_cxx_method(h_class, "sparsify",
+ CLS_METHOD_RD | CLS_METHOD_WR,
+ sparsify, &h_sparsify);
}
return namespace_list_finish(&iter, entries);
}
+void sparsify(librados::ObjectWriteOperation *op, size_t sparse_size,
+ bool remove_empty)
+{
+ bufferlist bl;
+ encode(sparse_size, bl);
+ encode(remove_empty, bl);
+ op->exec("rbd", "sparsify", bl);
+}
+
+int sparsify(librados::IoCtx *ioctx, const std::string &oid, size_t sparse_size,
+ bool remove_empty)
+{
+ librados::ObjectWriteOperation op;
+ sparsify(&op, sparse_size, remove_empty);
+
+ return ioctx->operate(oid, &op);
+}
+
} // namespace cls_client
} // namespace librbd
int get_all_features(librados::IoCtx *ioctx, const std::string &oid,
uint64_t *all_features);
-int copyup(librados::IoCtx *ioctx, const std::string &oid,
- bufferlist data);
-
/// NOTE: remove protection after clone v1 is retired
void get_protection_status_start(librados::ObjectReadOperation *op,
snapid_t snap_id);
int migration_remove(librados::IoCtx *ioctx, const std::string &oid);
void migration_remove(librados::ObjectWriteOperation *op);
-int assert_snapc_seq(librados::IoCtx *ioctx, const std::string &oid,
- uint64_t snapc_seq,
- cls::rbd::AssertSnapcSeqState state);
-void assert_snapc_seq(librados::ObjectWriteOperation *op,
- uint64_t snapc_seq,
- cls::rbd::AssertSnapcSeqState state);
-
// operations on rbd_id objects
void get_id_start(librados::ObjectReadOperation *op);
int get_id_finish(bufferlist::const_iterator *it, std::string *id);
const std::string &start, uint64_t max_return,
std::list<std::string> *entries);
+// operations on data objects
+int assert_snapc_seq(librados::IoCtx *ioctx, const std::string &oid,
+ uint64_t snapc_seq,
+ cls::rbd::AssertSnapcSeqState state);
+void assert_snapc_seq(librados::ObjectWriteOperation *op,
+ uint64_t snapc_seq,
+ cls::rbd::AssertSnapcSeqState state);
+
+int copyup(librados::IoCtx *ioctx, const std::string &oid,
+ bufferlist data);
+
+void sparsify(librados::ObjectWriteOperation *op, size_t sparse_size,
+ bool remove_empty);
+int sparsify(librados::IoCtx *ioctx, const std::string &oid, size_t sparse_size,
+ bool remove_empty);
+
} // namespace cls_client
} // namespace librbd
return b;
}
+static bool is_sparse_read_supported(librados::IoCtx &ioctx,
+ const std::string &oid) {
+ EXPECT_EQ(0, ioctx.create(oid, true));
+ bufferlist inbl;
+ inbl.append(std::string(1, 'X'));
+ EXPECT_EQ(0, ioctx.write(oid, inbl, inbl.length(), 1));
+ EXPECT_EQ(0, ioctx.write(oid, inbl, inbl.length(), 3));
+
+ std::map<uint64_t, uint64_t> m;
+ bufferlist outbl;
+ int r = ioctx.sparse_read(oid, m, outbl, 4, 0);
+ ioctx.remove(oid);
+
+ int expected_r = 2;
+ std::map<uint64_t, uint64_t> expected_m = {{1, 1}, {3, 1}};
+ bufferlist expected_outbl;
+ expected_outbl.append(std::string(2, 'X'));
+
+ return (r == expected_r && m == expected_m &&
+ outbl.contents_equal(expected_outbl));
+}
+
class TestClsRbd : public ::testing::Test {
public:
ASSERT_EQ(0, ioctx.selfmanaged_snap_remove(snapc_seq));
}
+
+TEST_F(TestClsRbd, sparsify)
+{
+ librados::IoCtx ioctx;
+ ASSERT_EQ(0, _rados.ioctx_create(_pool_name.c_str(), ioctx));
+
+ string oid = get_temp_image_name();
+ ioctx.remove(oid);
+
+ bool sparse_read_supported = is_sparse_read_supported(ioctx, oid);
+
+ // test sparsify on a non-existent object
+
+ ASSERT_EQ(-ENOENT, sparsify(&ioctx, oid, 16, false));
+ uint64_t size;
+ ASSERT_EQ(-ENOENT, ioctx.stat(oid, &size, NULL));
+ ASSERT_EQ(-ENOENT, sparsify(&ioctx, oid, 16, true));
+ ASSERT_EQ(-ENOENT, ioctx.stat(oid, &size, NULL));
+
+ // test sparsify on an empty object
+
+ ASSERT_EQ(0, ioctx.create(oid, true));
+ ASSERT_EQ(0, sparsify(&ioctx, oid, 16, false));
+ ASSERT_EQ(0, sparsify(&ioctx, oid, 16, true));
+ ASSERT_EQ(-ENOENT, sparsify(&ioctx, oid, 16, false));
+
+ // test sparsify on a zeroed object
+
+ bufferlist inbl;
+ inbl.append(std::string(4096, '\0'));
+ ASSERT_EQ(0, ioctx.write(oid, inbl, inbl.length(), 0));
+ ASSERT_EQ(0, sparsify(&ioctx, oid, 16, false));
+ std::map<uint64_t, uint64_t> m;
+ bufferlist outbl;
+ std::map<uint64_t, uint64_t> expected_m = {{0, 0}};
+ bufferlist expected_outbl;
+ if (sparse_read_supported) {
+ expected_m = {};
+ }
+ ASSERT_EQ((int)expected_m.size(),
+ ioctx.sparse_read(oid, m, outbl, inbl.length(), 0));
+ ASSERT_EQ(m, expected_m);
+ ASSERT_EQ(0, sparsify(&ioctx, oid, 16, true));
+ ASSERT_EQ(-ENOENT, sparsify(&ioctx, oid, 16, true));
+ ASSERT_EQ(0, ioctx.write(oid, inbl, inbl.length(), 0));
+ ASSERT_EQ(0, sparsify(&ioctx, oid, 16, true));
+ ASSERT_EQ(-ENOENT, sparsify(&ioctx, oid, 16, true));
+
+ // test sparsify on an object with zeroes
+
+ inbl.append(std::string(4096, '1'));
+ inbl.append(std::string(4096, '\0'));
+ inbl.append(std::string(4096, '2'));
+ inbl.append(std::string(4096, '\0'));
+ ASSERT_EQ(0, ioctx.write(oid, inbl, inbl.length(), 0));
+
+ // try to sparsify with sparse_size too large
+
+ ASSERT_EQ(0, sparsify(&ioctx, oid, inbl.length(), true));
+ expected_m = {{0, inbl.length()}};
+ expected_outbl = inbl;
+ ASSERT_EQ((int)expected_m.size(),
+ ioctx.sparse_read(oid, m, outbl, inbl.length(), 0));
+ ASSERT_EQ(m, expected_m);
+ ASSERT_TRUE(outbl.contents_equal(expected_outbl));
+
+ // sparsify with small sparse_size
+
+ ASSERT_EQ(0, sparsify(&ioctx, oid, 16, true));
+ outbl.clear();
+ ASSERT_EQ((int)(inbl.length() - 4096),
+ ioctx.read(oid, outbl, inbl.length(), 0));
+ outbl.append(std::string(4096, '\0'));
+ ASSERT_TRUE(outbl.contents_equal(expected_outbl));
+ if (sparse_read_supported) {
+ expected_m = {{4096 * 1, 4096}, {4096 * 3, 4096}};
+ expected_outbl.clear();
+ expected_outbl.append(std::string(4096, '1'));
+ expected_outbl.append(std::string(4096, '2'));
+ } else {
+ expected_m = {{0, 4 * 4096}};
+ expected_outbl.clear();
+ expected_outbl.append(std::string(4096, '\0'));
+ expected_outbl.append(std::string(4096, '1'));
+ expected_outbl.append(std::string(4096, '\0'));
+ expected_outbl.append(std::string(4096, '2'));
+ }
+ m.clear();
+ outbl.clear();
+ ASSERT_EQ((int)expected_m.size(),
+ ioctx.sparse_read(oid, m, outbl, inbl.length(), 0));
+ ASSERT_EQ(m, expected_m);
+ ASSERT_TRUE(outbl.contents_equal(expected_outbl));
+
+ // test it is the same after yet another sparsify
+
+ ASSERT_EQ(0, sparsify(&ioctx, oid, 16, true));
+ m.clear();
+ outbl.clear();
+ ASSERT_EQ((int)expected_m.size(),
+ ioctx.sparse_read(oid, m, outbl, inbl.length(), 0));
+ ASSERT_EQ(m, expected_m);
+ ASSERT_TRUE(outbl.contents_equal(expected_outbl));
+
+ ASSERT_EQ(0, ioctx.remove(oid));
+ ioctx.close();
+}