From: myoungwon oh Date: Wed, 1 Jul 2020 13:57:21 +0000 (+0900) Subject: osd, test: do not increment refcount if a previous snapshot have a same chunk X-Git-Tag: v16.1.0~1248^2~18 X-Git-Url: http://git-server-git.apps.pok.os.sepia.ceph.com/?a=commitdiff_plain;h=0945fa65447ba6d118cf806200622f9c440b618f;p=ceph.git osd, test: do not increment refcount if a previous snapshot have a same chunk When flush is started, check a previous snapshot and making target chunk's state clean if there is the same chunk in the previous snapshot Signed-off-by: Myoungwon Oh --- diff --git a/src/osd/PrimaryLogPG.cc b/src/osd/PrimaryLogPG.cc index 4ed45365f0e..09974ee0578 100644 --- a/src/osd/PrimaryLogPG.cc +++ b/src/osd/PrimaryLogPG.cc @@ -2510,6 +2510,11 @@ int PrimaryLogPG::start_manifest_flush(OpRequestRef op, ObjectContextRef obc, bo return r; } + // all clean + if (manifest_fop->io_tids.empty()) { + return 0; + } + flush_ops[obc->obs.oi.soid] = manifest_fop; return -EINPROGRESS; } @@ -2555,6 +2560,20 @@ int PrimaryLogPG::do_manifest_flush(OpRequestRef op, ObjectContextRef obc, Flush unsigned flags = CEPH_OSD_FLAG_IGNORE_CACHE | CEPH_OSD_FLAG_IGNORE_OVERLAY | CEPH_OSD_FLAG_RWORDERED; tgt_length = chunk_data.length(); + + /* + * TODO: + * set_chunk will not imply that flush eventually re-write + * the chunk if it becomes overwritten. So, we need to remove this part + * entirely and rework the dedup procedure based on thw following scenarios. + * + * 1. An external agent runs a CDC and explicitly sends set-chunk commands for + * each chunk it chooses to dedup. + * 2. The osd internally runs a CDC on the extents of the object that are not yet + * dedup'd and performs the dedup directly. + * + */ + if (is_dedup_chunk(obc->obs.oi, iter->second)) { pg_pool_t::fingerprint_t fp_algo = pool.info.get_fingerprint_type(); object_t fp_oid = [&fp_algo, &chunk_data]() -> string { @@ -2572,6 +2591,18 @@ int PrimaryLogPG::do_manifest_flush(OpRequestRef op, ObjectContextRef obc, Flush }(); tgt_soid.oid = fp_oid; iter->second.oid = tgt_soid; + // skip if the same content exits in prev snap at same offset + if (obc->ssc->snapset.clones.size()) { + ObjectContextRef cobc = get_prev_clone_obc(obc); + if (cobc) { + auto c = cobc->obs.oi.manifest.chunk_map.find(iter->first); + if (c != cobc->obs.oi.manifest.chunk_map.end()) { + if (iter->second == cobc->obs.oi.manifest.chunk_map[iter->first]) { + continue; + } + } + } + } { bufferlist t; cls_cas_chunk_create_or_get_ref_op get_call; diff --git a/src/test/librados/tier_cxx.cc b/src/test/librados/tier_cxx.cc index e9a3f5d1ff4..7f4bdb3cba0 100644 --- a/src/test/librados/tier_cxx.cc +++ b/src/test/librados/tier_cxx.cc @@ -106,6 +106,51 @@ void manifest_set_chunk(Rados& cluster, librados::IoCtx& src_ioctx, completion->release(); } +#include "common/ceph_crypto.h" +using ceph::crypto::SHA1; +#include "rgw/rgw_common.h" + +void check_fp_oid_refcount(librados::IoCtx& ioctx, std::string foid, uint64_t count, + std::string fp_algo = NULL) +{ + bufferlist t; + int size = foid.length(); + if (fp_algo == "sha1") { + unsigned char fingerprint[CEPH_CRYPTO_SHA1_DIGESTSIZE + 1]; + char p_str[CEPH_CRYPTO_SHA1_DIGESTSIZE*2+1] = {0}; + SHA1 sha1_gen; + sha1_gen.Update((const unsigned char *)foid.c_str(), size); + sha1_gen.Final(fingerprint); + buf_to_hex(fingerprint, CEPH_CRYPTO_SHA1_DIGESTSIZE, p_str); + ioctx.getxattr(p_str, CHUNK_REFCOUNT_ATTR, t); + } else if (!fp_algo.empty()) { + ceph_assert(0 == "unrecognized fingerprint algorithm"); + } + + chunk_refs_t refs; + try { + auto iter = t.cbegin(); + decode(refs, iter); + } catch (buffer::error& err) { + ASSERT_TRUE(0); + } + ASSERT_EQ(count, refs.count()); +} + +void do_manifest_flush(librados::Rados& cluster, librados::IoCtx& ioctx, + std::string oid, int expect_ret) +{ + ObjectReadOperation op; + op.tier_flush(); + librados::AioCompletion *completion = cluster.aio_create_completion(); + ASSERT_EQ(0, ioctx.aio_operate( + oid, completion, &op, + librados::OPERATION_IGNORE_CACHE, NULL)); + completion->wait_for_complete(); + ASSERT_EQ(expect_ret, completion->get_return_value()); + completion->release(); +} + class LibRadosTwoPoolsPP : public RadosTestPP { public: @@ -4662,6 +4707,104 @@ TEST_F(LibRadosTwoPoolsPP, ManifestCheckRefcountWhenModification) { } } +TEST_F(LibRadosTwoPoolsPP, ManifestFlushDupCount) { + // skip test if not yet octopus + if (_get_required_osd_release(cluster) < "octopus") { + cout << "cluster is not yet octopus, skipping test" << std::endl; + return; + } + + bufferlist inbl; + ASSERT_EQ(0, cluster.mon_command( + set_pool_str(pool_name, "fingerprint_algorithm", "sha1"), + inbl, NULL, NULL)); + cluster.wait_for_latest_osdmap(); + + // create object + { + bufferlist bl; + bl.append("there hiHI"); + ObjectWriteOperation op; + op.write_full(bl); + ASSERT_EQ(0, ioctx.operate("foo", &op)); + } + { + bufferlist bl; + bl.append("there hiHI"); + ObjectWriteOperation op; + op.write_full(bl); + ASSERT_EQ(0, cache_ioctx.operate("bar", &op)); + } + + // wait for maps to settle + cluster.wait_for_latest_osdmap(); + + // set-chunk (dedup) + manifest_set_chunk(cluster, cache_ioctx, ioctx, 2, 2, "bar", "foo"); + // set-chunk (dedup) + manifest_set_chunk(cluster, cache_ioctx, ioctx, 6, 2, "bar", "foo"); + // set-chunk (dedup) + manifest_set_chunk(cluster, cache_ioctx, ioctx, 8, 2, "bar", "foo"); + + // foo head: [er] [hi] [HI] + // make a dirty chunks + { + bufferlist bl; + bl.append("There hi"); + ASSERT_EQ(0, ioctx.write("foo", bl, bl.length(), 0)); + } + + // create a snapshot, clone + vector my_snaps(1); + ASSERT_EQ(0, ioctx.selfmanaged_snap_create(&my_snaps[0])); + ASSERT_EQ(0, ioctx.selfmanaged_snap_set_write_ctx(my_snaps[0], + my_snaps)); + + // make a dirty chunks + // foo head: [bb] [hi] [HI] + { + bufferlist bl; + bl.append("Thbbe hi"); + ASSERT_EQ(0, ioctx.write("foo", bl, bl.length(), 0)); + } + + // and another + my_snaps.resize(2); + my_snaps[1] = my_snaps[0]; + ASSERT_EQ(0, ioctx.selfmanaged_snap_create(&my_snaps[0])); + ASSERT_EQ(0, ioctx.selfmanaged_snap_set_write_ctx(my_snaps[0], + my_snaps)); + + // make a dirty chunks + // foo head: [bb] [hi] [HI] + { + bufferlist bl; + bl.append("Thbbe hi"); + ASSERT_EQ(0, ioctx.write("foo", bl, bl.length(), 0)); + } + + // foo snap[1]: [er] [hi] [HI] + // foo snap[0]: [bb] [hi] [HI] + // foo head : [bb] [hi] [HI] + + //flush on oldest snap + ioctx.snap_set_read(my_snaps[1]); + do_manifest_flush(cluster, ioctx, "foo", 0); + + // flush on oldest snap + ioctx.snap_set_read(my_snaps[0]); + do_manifest_flush(cluster, ioctx, "foo", 0); + + ioctx.snap_set_read(librados::SNAP_HEAD); + do_manifest_flush(cluster, ioctx, "foo", 0); + + // check chunk's refcount + check_fp_oid_refcount(cache_ioctx, "hi", 1u, "sha1"); + + // check chunk's refcount + check_fp_oid_refcount(cache_ioctx, "bb", 1u, "sha1"); +} + class LibRadosTwoPoolsECPP : public RadosTestECPP { public: