osd, test: do not increment refcount if a previous snapshot have a same chunk

author myoungwon oh <ohmyoungwon@gmail.com>

Wed, 1 Jul 2020 13:57:21 +0000 (22:57 +0900)

committer myoungwon oh <ohmyoungwon@gmail.com>

Tue, 18 Aug 2020 16:38:05 +0000 (01:38 +0900)
author myoungwon oh <ohmyoungwon@gmail.com>
Wed, 1 Jul 2020 13:57:21 +0000 (22:57 +0900)
committer myoungwon oh <ohmyoungwon@gmail.com>
Tue, 18 Aug 2020 16:38:05 +0000 (01:38 +0900)
diff --git a/src/osd/PrimaryLogPG.cc b/src/osd/PrimaryLogPG.cc

index 4ed45365f0e5edfe56a1c09d5a1a9ea7d7b72718..09974ee05789ecb9db59df1a7d480327dc07a02c 100644 (file)
--- a/src/osd/PrimaryLogPG.cc
+++ b/src/osd/PrimaryLogPG.cc
@@ -2510,6 +2510,11 @@ int PrimaryLogPG::start_manifest_flush(OpRequestRef op, ObjectContextRef obc, bo
      return r;
    }
  
+  // all clean
+  if (manifest_fop->io_tids.empty()) {
+    return 0;
+  }
+
    flush_ops[obc->obs.oi.soid] = manifest_fop;
    return -EINPROGRESS;
  }
@@ -2555,6 +2560,20 @@ int PrimaryLogPG::do_manifest_flush(OpRequestRef op, ObjectContextRef obc, Flush
      unsigned flags = CEPH_OSD_FLAG_IGNORE_CACHE | CEPH_OSD_FLAG_IGNORE_OVERLAY |
                      CEPH_OSD_FLAG_RWORDERED;
      tgt_length = chunk_data.length();
+
+    /* 
+     * TODO:
+     * set_chunk will not imply that flush eventually re-write 
+     * the chunk if it becomes overwritten. So, we need to remove this part
+     * entirely and rework the dedup procedure based on thw following scenarios.
+     * 
+     * 1. An external agent runs a CDC and explicitly sends set-chunk commands for 
+     * each chunk it chooses to dedup.
+     * 2. The osd internally runs a CDC on the extents of the object that are not yet 
+     * dedup'd and performs the dedup directly.
+     *
+     */
+
      if (is_dedup_chunk(obc->obs.oi, iter->second)) {
        pg_pool_t::fingerprint_t fp_algo = pool.info.get_fingerprint_type();
        object_t fp_oid = [&fp_algo, &chunk_data]() -> string {
@@ -2572,6 +2591,18 @@ int PrimaryLogPG::do_manifest_flush(OpRequestRef op, ObjectContextRef obc, Flush
        }();
        tgt_soid.oid = fp_oid;
        iter->second.oid = tgt_soid;
+      // skip if the same content exits in prev snap at same offset
+      if (obc->ssc->snapset.clones.size()) {
+       ObjectContextRef cobc = get_prev_clone_obc(obc);
+       if (cobc) {
+         auto c = cobc->obs.oi.manifest.chunk_map.find(iter->first);
+         if (c != cobc->obs.oi.manifest.chunk_map.end()) {
+           if (iter->second == cobc->obs.oi.manifest.chunk_map[iter->first]) {
+             continue;
+           }
+         }
+       }
+      }
        {
         bufferlist t;
         cls_cas_chunk_create_or_get_ref_op get_call;
diff --git a/src/test/librados/tier_cxx.cc b/src/test/librados/tier_cxx.cc

index e9a3f5d1ff4b1a65f588d8788cf275ad7db3304e..7f4bdb3cba02341a7ba19d83e7b8420bfa17a6b9 100644 (file)
--- a/src/test/librados/tier_cxx.cc
+++ b/src/test/librados/tier_cxx.cc
@@ -106,6 +106,51 @@ void manifest_set_chunk(Rados& cluster, librados::IoCtx& src_ioctx,
    completion->release();
  }
  
+#include "common/ceph_crypto.h"
+using ceph::crypto::SHA1;
+#include "rgw/rgw_common.h"
+
+void check_fp_oid_refcount(librados::IoCtx& ioctx, std::string foid, uint64_t count,
+                          std::string fp_algo = NULL)
+{
+  bufferlist t;
+  int size = foid.length();
+  if (fp_algo == "sha1") {
+    unsigned char fingerprint[CEPH_CRYPTO_SHA1_DIGESTSIZE + 1];
+    char p_str[CEPH_CRYPTO_SHA1_DIGESTSIZE*2+1] = {0};
+    SHA1 sha1_gen;
+    sha1_gen.Update((const unsigned char *)foid.c_str(), size);
+    sha1_gen.Final(fingerprint);
+    buf_to_hex(fingerprint, CEPH_CRYPTO_SHA1_DIGESTSIZE, p_str);
+    ioctx.getxattr(p_str, CHUNK_REFCOUNT_ATTR, t);
+  } else if (!fp_algo.empty()) {
+    ceph_assert(0 == "unrecognized fingerprint algorithm");
+  }
+
+  chunk_refs_t refs;
+  try {
+    auto iter = t.cbegin();
+    decode(refs, iter);
+  } catch (buffer::error& err) {
+    ASSERT_TRUE(0);
+  }
+  ASSERT_EQ(count, refs.count());
+}
+
+void do_manifest_flush(librados::Rados& cluster, librados::IoCtx& ioctx,
+                      std::string oid, int expect_ret)
+{
+  ObjectReadOperation op;
+  op.tier_flush();
+  librados::AioCompletion *completion = cluster.aio_create_completion();
+  ASSERT_EQ(0, ioctx.aio_operate(
+    oid, completion, &op,
+    librados::OPERATION_IGNORE_CACHE, NULL));
+  completion->wait_for_complete();
+  ASSERT_EQ(expect_ret, completion->get_return_value());
+  completion->release();
+}
+
  class LibRadosTwoPoolsPP : public RadosTestPP
  {
  public:
@@ -4662,6 +4707,104 @@ TEST_F(LibRadosTwoPoolsPP, ManifestCheckRefcountWhenModification) {
    }
  }
  
+TEST_F(LibRadosTwoPoolsPP, ManifestFlushDupCount) {
+  // skip test if not yet octopus
+  if (_get_required_osd_release(cluster) < "octopus") {
+    cout << "cluster is not yet octopus, skipping test" << std::endl;
+    return;
+  }
+
+  bufferlist inbl;
+  ASSERT_EQ(0, cluster.mon_command(
+       set_pool_str(pool_name, "fingerprint_algorithm", "sha1"),
+       inbl, NULL, NULL));
+  cluster.wait_for_latest_osdmap();
+
+  // create object
+  {
+    bufferlist bl;
+    bl.append("there hiHI");
+    ObjectWriteOperation op;
+    op.write_full(bl);
+    ASSERT_EQ(0, ioctx.operate("foo", &op));
+  }
+  {
+    bufferlist bl;
+    bl.append("there hiHI");
+    ObjectWriteOperation op;
+    op.write_full(bl);
+    ASSERT_EQ(0, cache_ioctx.operate("bar", &op));
+  }
+
+  // wait for maps to settle
+  cluster.wait_for_latest_osdmap();
+
+  // set-chunk (dedup)
+  manifest_set_chunk(cluster, cache_ioctx, ioctx, 2, 2, "bar", "foo");
+  // set-chunk (dedup)
+  manifest_set_chunk(cluster, cache_ioctx, ioctx, 6, 2, "bar", "foo");
+  // set-chunk (dedup)
+  manifest_set_chunk(cluster, cache_ioctx, ioctx, 8, 2, "bar", "foo");
+
+  // foo head: [er] [hi] [HI]
+  // make a dirty chunks
+  {
+    bufferlist bl;
+    bl.append("There hi");
+    ASSERT_EQ(0, ioctx.write("foo", bl, bl.length(), 0));
+  }
+
+  // create a snapshot, clone
+  vector<uint64_t> my_snaps(1);
+  ASSERT_EQ(0, ioctx.selfmanaged_snap_create(&my_snaps[0]));
+  ASSERT_EQ(0, ioctx.selfmanaged_snap_set_write_ctx(my_snaps[0],
+       my_snaps));
+
+  // make a dirty chunks
+  // foo head: [bb] [hi] [HI]
+  {
+    bufferlist bl;
+    bl.append("Thbbe hi");
+    ASSERT_EQ(0, ioctx.write("foo", bl, bl.length(), 0));
+  }
+
+  // and another
+  my_snaps.resize(2);
+  my_snaps[1] = my_snaps[0];
+  ASSERT_EQ(0, ioctx.selfmanaged_snap_create(&my_snaps[0]));
+  ASSERT_EQ(0, ioctx.selfmanaged_snap_set_write_ctx(my_snaps[0],
+       my_snaps));
+
+  // make a dirty chunks
+  // foo head: [bb] [hi] [HI]
+  {
+    bufferlist bl;
+    bl.append("Thbbe hi");
+    ASSERT_EQ(0, ioctx.write("foo", bl, bl.length(), 0));
+  }
+
+  // foo snap[1]: [er] [hi] [HI]
+  // foo snap[0]: [bb] [hi] [HI]
+  // foo head   : [bb] [hi] [HI]
+
+  //flush on oldest snap
+  ioctx.snap_set_read(my_snaps[1]);
+  do_manifest_flush(cluster, ioctx, "foo", 0);
+
+  // flush on oldest snap
+  ioctx.snap_set_read(my_snaps[0]);
+  do_manifest_flush(cluster, ioctx, "foo", 0);
+
+  ioctx.snap_set_read(librados::SNAP_HEAD);
+  do_manifest_flush(cluster, ioctx, "foo", 0);
+
+  // check chunk's refcount
+  check_fp_oid_refcount(cache_ioctx, "hi", 1u, "sha1");
+
+  // check chunk's refcount
+  check_fp_oid_refcount(cache_ioctx, "bb", 1u, "sha1");
+}
+
  class LibRadosTwoPoolsECPP : public RadosTestECPP
  {
  public:
author	myoungwon oh <ohmyoungwon@gmail.com>
	Wed, 1 Jul 2020 13:57:21 +0000 (22:57 +0900)
committer	myoungwon oh <ohmyoungwon@gmail.com>
	Tue, 18 Aug 2020 16:38:05 +0000 (01:38 +0900)
src/osd/PrimaryLogPG.cc		patch \| blob \| history
src/test/librados/tier_cxx.cc		patch \| blob \| history