]> git.apps.os.sepia.ceph.com Git - ceph.git/commitdiff
cls/rbd: add sparsify method
authorMykola Golub <mgolub@suse.com>
Wed, 30 Jan 2019 13:12:04 +0000 (13:12 +0000)
committerMykola Golub <mgolub@suse.com>
Fri, 15 Feb 2019 11:12:00 +0000 (11:12 +0000)
Signed-off-by: Mykola Golub <mgolub@suse.com>
src/cls/rbd/cls_rbd.cc
src/cls/rbd/cls_rbd_client.cc
src/cls/rbd/cls_rbd_client.h
src/test/cls_rbd/test_cls_rbd.cc

index 92e59ff166d7d6ed36eda4e8a9a1abb6065ff65f..a6a767195d0c237aa8c9a8a0e32a1607fdade480 100644 (file)
@@ -77,6 +77,33 @@ uint64_t get_encode_features(cls_method_context_t hctx) {
   return features;
 }
 
+bool calc_sparse_extent(const bufferptr &bp, size_t sparse_size,
+                        uint64_t length, size_t *write_offset,
+                        size_t *write_length, size_t *offset) {
+  size_t extent_size;
+  if (*offset + sparse_size > length) {
+    extent_size = length - *offset;
+  } else {
+    extent_size = sparse_size;
+  }
+
+  bufferptr extent(bp, *offset, extent_size);
+  *offset += extent_size;
+
+  bool extent_is_zero = extent.is_zero();
+  if (!extent_is_zero) {
+    *write_length += extent_size;
+  }
+  if (extent_is_zero && *write_length == 0) {
+    *write_offset += extent_size;
+  }
+
+  if ((extent_is_zero || *offset == length) && *write_length != 0) {
+    return true;
+  }
+  return false;
+}
+
 } // anonymous namespace
 
 static int snap_read_header(cls_method_context_t hctx, bufferlist& bl)
@@ -7273,6 +7300,100 @@ int namespace_list(cls_method_context_t hctx, bufferlist *in, bufferlist *out)
   return 0;
 }
 
+/**
+ *  Reclaim space for zeroed extents
+ *
+ * Input:
+ * @param sparse_size minimal zeroed block to sparse
+ * @param remove_empty boolean, true if the object should be removed if empty
+ *
+ * Output:
+ * @returns -ENOENT if the object does not exist or has been removed
+ * @returns 0 on success, negative error code on failure
+ */
+int sparsify(cls_method_context_t hctx, bufferlist *in, bufferlist *out)
+{
+  size_t sparse_size;
+  bool remove_empty;
+  try {
+    auto iter = in->cbegin();
+    decode(sparse_size, iter);
+    decode(remove_empty, iter);
+  } catch (const buffer::error &err) {
+    return -EINVAL;
+  }
+
+  int r = check_exists(hctx);
+  if (r < 0) {
+    return r;
+  }
+
+  bufferlist bl;
+  r = cls_cxx_read(hctx, 0, 0, &bl);
+  if (r < 0) {
+    CLS_ERR("failed to read data off of disk: %s", cpp_strerror(r).c_str());
+    return r;
+  }
+
+  if (bl.is_zero()) {
+    if (remove_empty) {
+      CLS_LOG(20, "remove");
+      r = cls_cxx_remove(hctx);
+      if (r < 0) {
+        CLS_ERR("remove failed: %s", cpp_strerror(r).c_str());
+        return r;
+      }
+    } else if (bl.length() > 0) {
+      CLS_LOG(20, "truncate");
+      bufferlist write_bl;
+      r = cls_cxx_replace(hctx, 0, 0, &write_bl);
+      if (r < 0) {
+        CLS_ERR("truncate failed: %s", cpp_strerror(r).c_str());
+        return r;
+      }
+    } else {
+      CLS_LOG(20, "skip empty");
+    }
+    return 0;
+  }
+
+  bl.rebuild(buffer::ptr_node::create(bl.length()));
+  size_t write_offset = 0;
+  size_t write_length = 0;
+  size_t offset = 0;
+  size_t length = bl.length();
+  const auto& ptr = bl.front();
+  bool replace = true;
+  while (offset < length) {
+    if (calc_sparse_extent(ptr, sparse_size, length, &write_offset,
+                           &write_length, &offset)) {
+      if (write_offset == 0 && write_length == length) {
+        CLS_LOG(20, "nothing to do");
+        return 0;
+      }
+      CLS_LOG(20, "write%s %" PRIu64 "~%" PRIu64, (replace ? "(replace)" : ""),
+              write_offset, write_length);
+      bufferlist write_bl;
+      write_bl.push_back(buffer::ptr_node::create(ptr, write_offset,
+                                                  write_length));
+      if (replace) {
+        r = cls_cxx_replace(hctx, write_offset, write_length, &write_bl);
+        replace = false;
+      } else {
+        r = cls_cxx_write(hctx, write_offset, write_length, &write_bl);
+      }
+      if (r < 0) {
+        CLS_ERR("write failed: %s", cpp_strerror(r).c_str());
+        return r;
+      }
+      write_offset = offset;
+      write_length = 0;
+    }
+  }
+
+  return 0;
+}
+
 CLS_INIT(rbd)
 {
   CLS_LOG(20, "Loaded rbd class!");
@@ -7315,7 +7436,6 @@ CLS_INIT(rbd)
   cls_method_handle_t h_snapshot_rename;
   cls_method_handle_t h_snapshot_trash_add;
   cls_method_handle_t h_get_all_features;
-  cls_method_handle_t h_copyup;
   cls_method_handle_t h_get_id;
   cls_method_handle_t h_set_id;
   cls_method_handle_t h_set_modify_timestamp;
@@ -7347,7 +7467,6 @@ CLS_INIT(rbd)
   cls_method_handle_t h_migration_set_state;
   cls_method_handle_t h_migration_get;
   cls_method_handle_t h_migration_remove;
-  cls_method_handle_t h_assert_snapc_seq;
   cls_method_handle_t h_old_snapshots_list;
   cls_method_handle_t h_old_snapshot_add;
   cls_method_handle_t h_old_snapshot_remove;
@@ -7402,6 +7521,9 @@ CLS_INIT(rbd)
   cls_method_handle_t h_namespace_add;
   cls_method_handle_t h_namespace_remove;
   cls_method_handle_t h_namespace_list;
+  cls_method_handle_t h_copyup;
+  cls_method_handle_t h_assert_snapc_seq;
+  cls_method_handle_t h_sparsify;
 
   cls_register("rbd", &h_class);
   cls_register_cxx_method(h_class, "create",
@@ -7451,9 +7573,6 @@ CLS_INIT(rbd)
   cls_register_cxx_method(h_class, "get_all_features",
                          CLS_METHOD_RD,
                          get_all_features, &h_get_all_features);
-  cls_register_cxx_method(h_class, "copyup",
-                         CLS_METHOD_RD | CLS_METHOD_WR,
-                         copyup, &h_copyup);
 
   // NOTE: deprecate v1 parent APIs after mimic EOLed
   cls_register_cxx_method(h_class, "get_parent",
@@ -7549,10 +7668,6 @@ CLS_INIT(rbd)
   cls_register_cxx_method(h_class, "migration_remove",
                           CLS_METHOD_RD | CLS_METHOD_WR,
                           migration_remove, &h_migration_remove);
-  cls_register_cxx_method(h_class, "assert_snapc_seq",
-                          CLS_METHOD_RD | CLS_METHOD_WR,
-                          assert_snapc_seq,
-                          &h_assert_snapc_seq);
 
   cls_register_cxx_method(h_class, "set_modify_timestamp",
                          CLS_METHOD_RD | CLS_METHOD_WR,
@@ -7792,4 +7907,16 @@ CLS_INIT(rbd)
                           namespace_remove, &h_namespace_remove);
   cls_register_cxx_method(h_class, "namespace_list", CLS_METHOD_RD,
                           namespace_list, &h_namespace_list);
+
+  /* data object methods */
+  cls_register_cxx_method(h_class, "copyup",
+                         CLS_METHOD_RD | CLS_METHOD_WR,
+                         copyup, &h_copyup);
+  cls_register_cxx_method(h_class, "assert_snapc_seq",
+                          CLS_METHOD_RD | CLS_METHOD_WR,
+                          assert_snapc_seq,
+                          &h_assert_snapc_seq);
+  cls_register_cxx_method(h_class, "sparsify",
+                         CLS_METHOD_RD | CLS_METHOD_WR,
+                         sparsify, &h_sparsify);
 }
index 78bd6f013d5983b94fb16e766bfce5e55931d1a3..ab5305909b02ec32c320fefe9a120b0897895846 100644 (file)
@@ -2783,5 +2783,23 @@ int namespace_list(librados::IoCtx *ioctx,
   return namespace_list_finish(&iter, entries);
 }
 
+void sparsify(librados::ObjectWriteOperation *op, size_t sparse_size,
+              bool remove_empty)
+{
+  bufferlist bl;
+  encode(sparse_size, bl);
+  encode(remove_empty, bl);
+  op->exec("rbd", "sparsify", bl);
+}
+
+int sparsify(librados::IoCtx *ioctx, const std::string &oid, size_t sparse_size,
+             bool remove_empty)
+{
+  librados::ObjectWriteOperation op;
+  sparsify(&op, sparse_size, remove_empty);
+
+  return ioctx->operate(oid, &op);
+}
+
 } // namespace cls_client
 } // namespace librbd
index 833d58bad930342d3dd4b94a5209f1e5fefb096d..48125173bffa8fbf9b67563c69cc2c8755fd2f19 100644 (file)
@@ -183,9 +183,6 @@ int get_all_features_finish(bufferlist::const_iterator *it,
 int get_all_features(librados::IoCtx *ioctx, const std::string &oid,
                      uint64_t *all_features);
 
-int copyup(librados::IoCtx *ioctx, const std::string &oid,
-           bufferlist data);
-
 /// NOTE: remove protection after clone v1 is retired
 void get_protection_status_start(librados::ObjectReadOperation *op,
                                  snapid_t snap_id);
@@ -293,13 +290,6 @@ int migration_get(librados::IoCtx *ioctx, const std::string &oid,
 int migration_remove(librados::IoCtx *ioctx, const std::string &oid);
 void migration_remove(librados::ObjectWriteOperation *op);
 
-int assert_snapc_seq(librados::IoCtx *ioctx, const std::string &oid,
-                     uint64_t snapc_seq,
-                     cls::rbd::AssertSnapcSeqState state);
-void assert_snapc_seq(librados::ObjectWriteOperation *op,
-                      uint64_t snapc_seq,
-                      cls::rbd::AssertSnapcSeqState state);
-
 // operations on rbd_id objects
 void get_id_start(librados::ObjectReadOperation *op);
 int get_id_finish(bufferlist::const_iterator *it, std::string *id);
@@ -604,6 +594,22 @@ int namespace_list(librados::IoCtx *ioctx,
                    const std::string &start, uint64_t max_return,
                    std::list<std::string> *entries);
 
+// operations on data objects
+int assert_snapc_seq(librados::IoCtx *ioctx, const std::string &oid,
+                     uint64_t snapc_seq,
+                     cls::rbd::AssertSnapcSeqState state);
+void assert_snapc_seq(librados::ObjectWriteOperation *op,
+                      uint64_t snapc_seq,
+                      cls::rbd::AssertSnapcSeqState state);
+
+int copyup(librados::IoCtx *ioctx, const std::string &oid,
+           bufferlist data);
+
+void sparsify(librados::ObjectWriteOperation *op, size_t sparse_size,
+              bool remove_empty);
+int sparsify(librados::IoCtx *ioctx, const std::string &oid, size_t sparse_size,
+             bool remove_empty);
+
 } // namespace cls_client
 } // namespace librbd
 
index 756d64d31beb8af7ad9515beb08af0d18905acc7..43d30b717b56d8e6ab71ecfb0e20e9bfc6ba5afc 100644 (file)
@@ -65,6 +65,28 @@ static char *random_buf(size_t len)
   return b;
 }
 
+static bool is_sparse_read_supported(librados::IoCtx &ioctx,
+                                     const std::string &oid) {
+  EXPECT_EQ(0, ioctx.create(oid, true));
+  bufferlist inbl;
+  inbl.append(std::string(1, 'X'));
+  EXPECT_EQ(0, ioctx.write(oid, inbl, inbl.length(), 1));
+  EXPECT_EQ(0, ioctx.write(oid, inbl, inbl.length(), 3));
+
+  std::map<uint64_t, uint64_t> m;
+  bufferlist outbl;
+  int r = ioctx.sparse_read(oid, m, outbl, 4, 0);
+  ioctx.remove(oid);
+
+  int expected_r = 2;
+  std::map<uint64_t, uint64_t> expected_m = {{1, 1}, {3, 1}};
+  bufferlist expected_outbl;
+  expected_outbl.append(std::string(2, 'X'));
+
+  return (r == expected_r && m == expected_m &&
+          outbl.contents_equal(expected_outbl));
+}
+
 class TestClsRbd : public ::testing::Test {
 public:
 
@@ -3009,3 +3031,110 @@ TEST_F(TestClsRbd, assert_snapc_seq)
 
   ASSERT_EQ(0, ioctx.selfmanaged_snap_remove(snapc_seq));
 }
+
+TEST_F(TestClsRbd, sparsify)
+{
+  librados::IoCtx ioctx;
+  ASSERT_EQ(0, _rados.ioctx_create(_pool_name.c_str(), ioctx));
+
+  string oid = get_temp_image_name();
+  ioctx.remove(oid);
+
+  bool sparse_read_supported = is_sparse_read_supported(ioctx, oid);
+
+  // test sparsify on a non-existent object
+
+  ASSERT_EQ(-ENOENT, sparsify(&ioctx, oid, 16, false));
+  uint64_t size;
+  ASSERT_EQ(-ENOENT, ioctx.stat(oid, &size, NULL));
+  ASSERT_EQ(-ENOENT, sparsify(&ioctx, oid, 16, true));
+  ASSERT_EQ(-ENOENT, ioctx.stat(oid, &size, NULL));
+
+  // test sparsify on an empty object
+
+  ASSERT_EQ(0, ioctx.create(oid, true));
+  ASSERT_EQ(0, sparsify(&ioctx, oid, 16, false));
+  ASSERT_EQ(0, sparsify(&ioctx, oid, 16, true));
+  ASSERT_EQ(-ENOENT, sparsify(&ioctx, oid, 16, false));
+
+  // test sparsify on a zeroed object
+
+  bufferlist inbl;
+  inbl.append(std::string(4096, '\0'));
+  ASSERT_EQ(0, ioctx.write(oid, inbl, inbl.length(), 0));
+  ASSERT_EQ(0, sparsify(&ioctx, oid, 16, false));
+  std::map<uint64_t, uint64_t> m;
+  bufferlist outbl;
+  std::map<uint64_t, uint64_t> expected_m = {{0, 0}};
+  bufferlist expected_outbl;
+  if (sparse_read_supported) {
+    expected_m = {};
+  }
+  ASSERT_EQ((int)expected_m.size(),
+            ioctx.sparse_read(oid, m, outbl, inbl.length(), 0));
+  ASSERT_EQ(m, expected_m);
+  ASSERT_EQ(0, sparsify(&ioctx, oid, 16, true));
+  ASSERT_EQ(-ENOENT, sparsify(&ioctx, oid, 16, true));
+  ASSERT_EQ(0, ioctx.write(oid, inbl, inbl.length(), 0));
+  ASSERT_EQ(0, sparsify(&ioctx, oid, 16, true));
+  ASSERT_EQ(-ENOENT, sparsify(&ioctx, oid, 16, true));
+
+  // test sparsify on an object with zeroes
+
+  inbl.append(std::string(4096, '1'));
+  inbl.append(std::string(4096, '\0'));
+  inbl.append(std::string(4096, '2'));
+  inbl.append(std::string(4096, '\0'));
+  ASSERT_EQ(0, ioctx.write(oid, inbl, inbl.length(), 0));
+
+  // try to sparsify with sparse_size too large
+
+  ASSERT_EQ(0, sparsify(&ioctx, oid, inbl.length(), true));
+  expected_m = {{0, inbl.length()}};
+  expected_outbl = inbl;
+  ASSERT_EQ((int)expected_m.size(),
+            ioctx.sparse_read(oid, m, outbl, inbl.length(), 0));
+  ASSERT_EQ(m, expected_m);
+  ASSERT_TRUE(outbl.contents_equal(expected_outbl));
+
+  // sparsify with small sparse_size
+
+  ASSERT_EQ(0, sparsify(&ioctx, oid, 16, true));
+  outbl.clear();
+  ASSERT_EQ((int)(inbl.length() - 4096),
+            ioctx.read(oid, outbl, inbl.length(), 0));
+  outbl.append(std::string(4096, '\0'));
+  ASSERT_TRUE(outbl.contents_equal(expected_outbl));
+  if (sparse_read_supported) {
+    expected_m = {{4096 * 1, 4096}, {4096 * 3, 4096}};
+    expected_outbl.clear();
+    expected_outbl.append(std::string(4096, '1'));
+    expected_outbl.append(std::string(4096, '2'));
+  } else {
+    expected_m = {{0, 4 * 4096}};
+    expected_outbl.clear();
+    expected_outbl.append(std::string(4096, '\0'));
+    expected_outbl.append(std::string(4096, '1'));
+    expected_outbl.append(std::string(4096, '\0'));
+    expected_outbl.append(std::string(4096, '2'));
+  }
+  m.clear();
+  outbl.clear();
+  ASSERT_EQ((int)expected_m.size(),
+            ioctx.sparse_read(oid, m, outbl, inbl.length(), 0));
+  ASSERT_EQ(m, expected_m);
+  ASSERT_TRUE(outbl.contents_equal(expected_outbl));
+
+  // test it is the same after yet another sparsify
+
+  ASSERT_EQ(0, sparsify(&ioctx, oid, 16, true));
+  m.clear();
+  outbl.clear();
+  ASSERT_EQ((int)expected_m.size(),
+            ioctx.sparse_read(oid, m, outbl, inbl.length(), 0));
+  ASSERT_EQ(m, expected_m);
+  ASSERT_TRUE(outbl.contents_equal(expected_outbl));
+
+  ASSERT_EQ(0, ioctx.remove(oid));
+  ioctx.close();
+}