From cee688c49ef46ef24a868d86ffb9d71c7082a771 Mon Sep 17 00:00:00 2001 From: myoungwon oh Date: Thu, 19 Nov 2020 17:46:05 +0900 Subject: [PATCH] osd,test: remove cdc_window_size entirely To avoid miscalculation caused by boundary mismatch, we prefer to read whole object. Signed-off-by: Myoungwon Oh --- src/mon/MonCommands.h | 4 +- src/mon/OSDMonitor.cc | 10 +--- src/osd/PrimaryLogPG.cc | 99 ++++++++++++++++------------------- src/osd/PrimaryLogPG.h | 2 +- src/osd/osd_types.cc | 9 ++-- src/osd/osd_types.h | 9 +--- src/test/librados/tier_cxx.cc | 27 +--------- 7 files changed, 54 insertions(+), 106 deletions(-) diff --git a/src/mon/MonCommands.h b/src/mon/MonCommands.h index 93d315b63495f..83f007477c0d8 100644 --- a/src/mon/MonCommands.h +++ b/src/mon/MonCommands.h @@ -1090,11 +1090,11 @@ COMMAND("osd pool rename " "rename to ", "osd", "rw") COMMAND("osd pool get " "name=pool,type=CephPoolname " - "name=var,type=CephChoices,strings=size|min_size|pg_num|pgp_num|crush_rule|hashpspool|nodelete|nopgchange|nosizechange|write_fadvise_dontneed|noscrub|nodeep-scrub|hit_set_type|hit_set_period|hit_set_count|hit_set_fpp|use_gmt_hitset|target_max_objects|target_max_bytes|cache_target_dirty_ratio|cache_target_dirty_high_ratio|cache_target_full_ratio|cache_min_flush_age|cache_min_evict_age|erasure_code_profile|min_read_recency_for_promote|all|min_write_recency_for_promote|fast_read|hit_set_grade_decay_rate|hit_set_search_last_n|scrub_min_interval|scrub_max_interval|deep_scrub_interval|recovery_priority|recovery_op_priority|scrub_priority|compression_mode|compression_algorithm|compression_required_ratio|compression_max_blob_size|compression_min_blob_size|csum_type|csum_min_block|csum_max_block|allow_ec_overwrites|fingerprint_algorithm|pg_autoscale_mode|pg_autoscale_bias|pg_num_min|target_size_bytes|target_size_ratio|dedup_tier|dedup_chunk_algorithm|dedup_cdc_window_size|dedup_cdc_chunk_size", + "name=var,type=CephChoices,strings=size|min_size|pg_num|pgp_num|crush_rule|hashpspool|nodelete|nopgchange|nosizechange|write_fadvise_dontneed|noscrub|nodeep-scrub|hit_set_type|hit_set_period|hit_set_count|hit_set_fpp|use_gmt_hitset|target_max_objects|target_max_bytes|cache_target_dirty_ratio|cache_target_dirty_high_ratio|cache_target_full_ratio|cache_min_flush_age|cache_min_evict_age|erasure_code_profile|min_read_recency_for_promote|all|min_write_recency_for_promote|fast_read|hit_set_grade_decay_rate|hit_set_search_last_n|scrub_min_interval|scrub_max_interval|deep_scrub_interval|recovery_priority|recovery_op_priority|scrub_priority|compression_mode|compression_algorithm|compression_required_ratio|compression_max_blob_size|compression_min_blob_size|csum_type|csum_min_block|csum_max_block|allow_ec_overwrites|fingerprint_algorithm|pg_autoscale_mode|pg_autoscale_bias|pg_num_min|target_size_bytes|target_size_ratio|dedup_tier|dedup_chunk_algorithm|dedup_cdc_chunk_size", "get pool parameter ", "osd", "r") COMMAND("osd pool set " "name=pool,type=CephPoolname " - "name=var,type=CephChoices,strings=size|min_size|pg_num|pgp_num|pgp_num_actual|crush_rule|hashpspool|nodelete|nopgchange|nosizechange|write_fadvise_dontneed|noscrub|nodeep-scrub|hit_set_type|hit_set_period|hit_set_count|hit_set_fpp|use_gmt_hitset|target_max_bytes|target_max_objects|cache_target_dirty_ratio|cache_target_dirty_high_ratio|cache_target_full_ratio|cache_min_flush_age|cache_min_evict_age|min_read_recency_for_promote|min_write_recency_for_promote|fast_read|hit_set_grade_decay_rate|hit_set_search_last_n|scrub_min_interval|scrub_max_interval|deep_scrub_interval|recovery_priority|recovery_op_priority|scrub_priority|compression_mode|compression_algorithm|compression_required_ratio|compression_max_blob_size|compression_min_blob_size|csum_type|csum_min_block|csum_max_block|allow_ec_overwrites|fingerprint_algorithm|pg_autoscale_mode|pg_autoscale_bias|pg_num_min|target_size_bytes|target_size_ratio|dedup_tier|dedup_chunk_algorithm|dedup_cdc_window_size|dedup_cdc_chunk_size " + "name=var,type=CephChoices,strings=size|min_size|pg_num|pgp_num|pgp_num_actual|crush_rule|hashpspool|nodelete|nopgchange|nosizechange|write_fadvise_dontneed|noscrub|nodeep-scrub|hit_set_type|hit_set_period|hit_set_count|hit_set_fpp|use_gmt_hitset|target_max_bytes|target_max_objects|cache_target_dirty_ratio|cache_target_dirty_high_ratio|cache_target_full_ratio|cache_min_flush_age|cache_min_evict_age|min_read_recency_for_promote|min_write_recency_for_promote|fast_read|hit_set_grade_decay_rate|hit_set_search_last_n|scrub_min_interval|scrub_max_interval|deep_scrub_interval|recovery_priority|recovery_op_priority|scrub_priority|compression_mode|compression_algorithm|compression_required_ratio|compression_max_blob_size|compression_min_blob_size|csum_type|csum_min_block|csum_max_block|allow_ec_overwrites|fingerprint_algorithm|pg_autoscale_mode|pg_autoscale_bias|pg_num_min|target_size_bytes|target_size_ratio|dedup_tier|dedup_chunk_algorithm|dedup_cdc_chunk_size " "name=val,type=CephString " "name=yes_i_really_mean_it,type=CephBool,req=false", "set pool parameter to ", "osd", "rw") diff --git a/src/mon/OSDMonitor.cc b/src/mon/OSDMonitor.cc index 3fe9cf37c9e7b..c74b209814b08 100644 --- a/src/mon/OSDMonitor.cc +++ b/src/mon/OSDMonitor.cc @@ -5373,7 +5373,7 @@ namespace { COMPRESSION_MAX_BLOB_SIZE, COMPRESSION_MIN_BLOB_SIZE, CSUM_TYPE, CSUM_MAX_BLOCK, CSUM_MIN_BLOCK, FINGERPRINT_ALGORITHM, PG_AUTOSCALE_MODE, PG_NUM_MIN, TARGET_SIZE_BYTES, TARGET_SIZE_RATIO, - PG_AUTOSCALE_BIAS, DEDUP_TIER, DEDUP_CHUNK_ALGORITHM, DEDUP_CDC_WINDOW_SIZE, + PG_AUTOSCALE_BIAS, DEDUP_TIER, DEDUP_CHUNK_ALGORITHM, DEDUP_CDC_CHUNK_SIZE }; std::set @@ -6111,7 +6111,6 @@ bool OSDMonitor::preprocess_command(MonOpRequestRef op) {"pg_autoscale_bias", PG_AUTOSCALE_BIAS}, {"dedup_tier", DEDUP_TIER}, {"dedup_chunk_algorithm", DEDUP_CHUNK_ALGORITHM}, - {"dedup_cdc_window_size", DEDUP_CDC_WINDOW_SIZE}, {"dedup_cdc_chunk_size", DEDUP_CDC_CHUNK_SIZE}, }; @@ -6331,7 +6330,6 @@ bool OSDMonitor::preprocess_command(MonOpRequestRef op) case PG_AUTOSCALE_BIAS: case DEDUP_TIER: case DEDUP_CHUNK_ALGORITHM: - case DEDUP_CDC_WINDOW_SIZE: case DEDUP_CDC_CHUNK_SIZE: pool_opts_t::key_t key = pool_opts_t::get_opt_desc(i->first).key; if (p->opts.is_set(key)) { @@ -6492,7 +6490,6 @@ bool OSDMonitor::preprocess_command(MonOpRequestRef op) case PG_AUTOSCALE_BIAS: case DEDUP_TIER: case DEDUP_CHUNK_ALGORITHM: - case DEDUP_CDC_WINDOW_SIZE: case DEDUP_CDC_CHUNK_SIZE: for (i = ALL_CHOICES.begin(); i != ALL_CHOICES.end(); ++i) { if (i->second == *it) @@ -8691,11 +8688,6 @@ int OSDMonitor::prepare_command_pool_set(const cmdmap_t& cmdmap, return -EINVAL; } } - } else if (var == "dedup_cdc_window_size") { - if (interr.length()) { - ss << "error parsing int value '" << val << "': " << interr; - return -EINVAL; - } } else if (var == "dedup_cdc_chunk_size") { if (interr.length()) { ss << "error parsing int value '" << val << "': " << interr; diff --git a/src/osd/PrimaryLogPG.cc b/src/osd/PrimaryLogPG.cc index a5841b29c6ad0..4adff0a769b28 100644 --- a/src/osd/PrimaryLogPG.cc +++ b/src/osd/PrimaryLogPG.cc @@ -10025,7 +10025,6 @@ struct C_Flush : public Context { int PrimaryLogPG::start_dedup(OpRequestRef op, ObjectContextRef obc) { bufferlist bl; - uint64_t cur_off = 0; const object_info_t& oi = obc->obs.oi; const hobject_t& soid = oi.soid; @@ -10045,49 +10044,45 @@ int PrimaryLogPG::start_dedup(OpRequestRef op, ObjectContextRef obc) */ ManifestOpRef mop(std::make_shared(nullptr, 0)); - while (cur_off < oi.size) { - // cdc - std::map chunks; - int r = do_cdc(oi, cur_off, mop->new_manifest.chunk_map, chunks); - if (r < 0) { - return r; - } - if (!chunks.size()) { - break; - } + // cdc + std::map chunks; + int r = do_cdc(oi, mop->new_manifest.chunk_map, chunks); + if (r < 0) { + return r; + } + if (!chunks.size()) { + return 0; + } - // chunks issued here are different with chunk_map newly generated - // because the same chunks in previous snap will not be issued - // So, we need two data structures; the first is the issued chunk list to track - // issued operations, and the second is the new chunk_map to update chunk_map after - // all operations are finished - object_ref_delta_t refs; - ObjectContextRef obc_l, obc_g; - get_adjacent_clones(obc, obc_l, obc_g); - // skip if the same content exits in prev snap at same offset - mop->new_manifest.calc_refs_to_inc_on_set( - obc_l ? &(obc_l->obs.oi.manifest) : nullptr, - obc_g ? &(obc_g->obs.oi.manifest) : nullptr, - refs, - chunks.begin()->first); // to avoid unnecessary search + // chunks issued here are different with chunk_map newly generated + // because the same chunks in previous snap will not be issued + // So, we need two data structures; the first is the issued chunk list to track + // issued operations, and the second is the new chunk_map to update chunk_map after + // all operations are finished + object_ref_delta_t refs; + ObjectContextRef obc_l, obc_g; + get_adjacent_clones(obc, obc_l, obc_g); + // skip if the same content exits in prev snap at same offset + mop->new_manifest.calc_refs_to_inc_on_set( + obc_l ? &(obc_l->obs.oi.manifest) : nullptr, + obc_g ? &(obc_g->obs.oi.manifest) : nullptr, + refs); - for (auto p : chunks) { - hobject_t target = mop->new_manifest.chunk_map[p.first].oid; - if (refs.find(target) == refs.end()) { - continue; - } - C_SetDedupChunks *fin = new C_SetDedupChunks(this, soid, get_last_peering_reset(), p.first); - ceph_tid_t tid = refcount_manifest(soid, target, refcount_t::CREATE_OR_GET_REF, - fin, move(chunks[p.first])); - mop->chunks[target] = make_pair(p.first, p.second.length()); - mop->num_chunks++; - mop->tids[p.first] = tid; - fin->tid = tid; - dout(10) << __func__ << " oid: " << soid << " tid: " << tid - << " target: " << target << " offset: " << p.first - << " length: " << p.second.length() << dendl; + for (auto p : chunks) { + hobject_t target = mop->new_manifest.chunk_map[p.first].oid; + if (refs.find(target) == refs.end()) { + continue; } - cur_off += r; + C_SetDedupChunks *fin = new C_SetDedupChunks(this, soid, get_last_peering_reset(), p.first); + ceph_tid_t tid = refcount_manifest(soid, target, refcount_t::CREATE_OR_GET_REF, + fin, move(chunks[p.first])); + mop->chunks[target] = make_pair(p.first, p.second.length()); + mop->num_chunks++; + mop->tids[p.first] = tid; + fin->tid = tid; + dout(10) << __func__ << " oid: " << soid << " tid: " << tid + << " target: " << target << " offset: " << p.first + << " length: " << p.second.length() << dendl; } if (mop->tids.size()) { @@ -10101,14 +10096,12 @@ int PrimaryLogPG::start_dedup(OpRequestRef op, ObjectContextRef obc) return -EINPROGRESS; } -int PrimaryLogPG::do_cdc(const object_info_t& oi, uint64_t off, +int PrimaryLogPG::do_cdc(const object_info_t& oi, std::map& chunk_map, std::map& chunks) { - uint64_t cur_off = off; string chunk_algo = pool.info.get_dedup_chunk_algorithm_name(); int64_t chunk_size = pool.info.get_dedup_cdc_chunk_size(); - uint64_t max_window_size = static_cast(pool.info.get_dedup_cdc_window_size()); uint64_t total_length = 0; std::unique_ptr cdc = CDC::create(chunk_algo, cbits(chunk_size)-1); @@ -10117,9 +10110,6 @@ int PrimaryLogPG::do_cdc(const object_info_t& oi, uint64_t off, return -EINVAL; } - if (cur_off >= oi.size) { - return -ERANGE; - } bufferlist bl; /** * We disable EC pool as a base tier of distributed dedup. @@ -10128,10 +10118,10 @@ int PrimaryLogPG::do_cdc(const object_info_t& oi, uint64_t off, * As s result, we leave this as a future work. */ int r = pgbackend->objects_read_sync( - oi.soid, cur_off, max_window_size, 0, &bl); + oi.soid, 0, oi.size, 0, &bl); if (r < 0) { - dout(0) << __func__ << " read fail " << " offset: " << cur_off - << " len: " << max_window_size << " r: " << r << dendl; + dout(0) << __func__ << " read fail " << oi.soid + << " len: " << oi.size << " r: " << r << dendl; return r; } if (bl.length() == 0) { @@ -10140,8 +10130,8 @@ int PrimaryLogPG::do_cdc(const object_info_t& oi, uint64_t off, } dout(10) << __func__ << " oid: " << oi.soid << " len: " << bl.length() - << " oi.size: " << oi.size << " window_size: " << max_window_size - << " chunk_size: " << chunk_size << dendl; + << " oi.size: " << oi.size + << " chunk_size: " << chunk_size << dendl; vector> cdc_chunks; cdc->calc_chunks(bl, &cdc_chunks); @@ -10149,11 +10139,10 @@ int PrimaryLogPG::do_cdc(const object_info_t& oi, uint64_t off, // get fingerprint for (auto p : cdc_chunks) { bufferlist chunk; - uint64_t object_offset = off + p.first; chunk.substr_of(bl, p.first, p.second); hobject_t target = get_fpoid_from_chunk(oi.soid, chunk); - chunks[object_offset] = move(chunk); - chunk_map[object_offset] = chunk_info_t(0, p.second, target); + chunks[p.first] = move(chunk); + chunk_map[p.first] = chunk_info_t(0, p.second, target); total_length += p.second; } return total_length; diff --git a/src/osd/PrimaryLogPG.h b/src/osd/PrimaryLogPG.h index 80b373b5b3a34..8ce17ba437e84 100644 --- a/src/osd/PrimaryLogPG.h +++ b/src/osd/PrimaryLogPG.h @@ -1512,7 +1512,7 @@ protected: ObjectContextRef& _l, ObjectContextRef& _g); bool inc_refcount_by_set(OpContext* ctx, object_manifest_t& tgt, OSDOp& osd_op); - int do_cdc(const object_info_t& oi, uint64_t off, std::map& chunk_map, + int do_cdc(const object_info_t& oi, std::map& chunk_map, std::map& chunks); int start_dedup(OpRequestRef op, ObjectContextRef obc); hobject_t get_fpoid_from_chunk(const hobject_t soid, bufferlist& chunk); diff --git a/src/osd/osd_types.cc b/src/osd/osd_types.cc index c598d3f2a0b7f..677730ca7b855 100644 --- a/src/osd/osd_types.cc +++ b/src/osd/osd_types.cc @@ -1360,9 +1360,7 @@ static opt_mapping_t opt_mapping = boost::assign::map_list_of ("dedup_chunk_algorithm", pool_opts_t::opt_desc_t( pool_opts_t::DEDUP_CHUNK_ALGORITHM, pool_opts_t::STR)) ("dedup_cdc_chunk_size", pool_opts_t::opt_desc_t( - pool_opts_t::DEDUP_CDC_CHUNK_SIZE, pool_opts_t::INT)) - ("dedup_cdc_window_size", pool_opts_t::opt_desc_t( - pool_opts_t::DEDUP_CDC_WINDOW_SIZE, pool_opts_t::INT)); + pool_opts_t::DEDUP_CDC_CHUNK_SIZE, pool_opts_t::INT)); bool pool_opts_t::is_opt_name(const std::string& name) { @@ -5928,11 +5926,10 @@ std::ostream& operator<<(std::ostream& out, const object_ref_delta_t & ci) void object_manifest_t::calc_refs_to_inc_on_set( const object_manifest_t* _g, const object_manifest_t* _l, - object_ref_delta_t &refs, - uint64_t start) const + object_ref_delta_t &refs) const { /* avoid to increment the same reference on adjacent clones */ - auto iter = start ? chunk_map.find(start) : chunk_map.begin(); + auto iter = chunk_map.begin(); auto find_chunk = [](decltype(iter) &i, const object_manifest_t* cur) -> bool { if (cur) { diff --git a/src/osd/osd_types.h b/src/osd/osd_types.h index 1feeb0a3408be..2883227ecdd37 100644 --- a/src/osd/osd_types.h +++ b/src/osd/osd_types.h @@ -1062,7 +1062,6 @@ public: READ_LEASE_INTERVAL, DEDUP_TIER, DEDUP_CHUNK_ALGORITHM, - DEDUP_CDC_WINDOW_SIZE, DEDUP_CDC_CHUNK_SIZE, }; @@ -1594,11 +1593,6 @@ public: opts.get(pool_opts_t::DEDUP_CDC_CHUNK_SIZE, &chunk_size); return chunk_size; } - int64_t get_dedup_cdc_window_size() const { - int64_t window_size; - opts.get(pool_opts_t::DEDUP_CDC_WINDOW_SIZE, &window_size); - return window_size; - } /// application -> key/value metadata std::map> application_metadata; @@ -5687,8 +5681,7 @@ struct object_manifest_t { void calc_refs_to_inc_on_set( const object_manifest_t* g, ///< [in] manifest for clone > *this const object_manifest_t* l, ///< [in] manifest for clone < *this - object_ref_delta_t &delta, ///< [out] set of refs to drop - uint64_t start = 0 ///< [in] start position + object_ref_delta_t &delta ///< [out] set of refs to drop ) const; /** diff --git a/src/test/librados/tier_cxx.cc b/src/test/librados/tier_cxx.cc index 49573b9890e6a..5dd09f1d2c50d 100644 --- a/src/test/librados/tier_cxx.cc +++ b/src/test/librados/tier_cxx.cc @@ -4664,9 +4664,6 @@ TEST_F(LibRadosTwoPoolsPP, DedupFlushRead) { ASSERT_EQ(0, cluster.mon_command( set_pool_str(cache_pool_name, "dedup_chunk_algorithm", "fastcdc"), inbl, NULL, NULL)); - ASSERT_EQ(0, cluster.mon_command( - set_pool_str(cache_pool_name, "dedup_cdc_window_size", 8192), - inbl, NULL, NULL)); ASSERT_EQ(0, cluster.mon_command( set_pool_str(cache_pool_name, "dedup_cdc_chunk_size", 1024), inbl, NULL, NULL)); @@ -4739,9 +4736,6 @@ TEST_F(LibRadosTwoPoolsPP, DedupFlushRead) { ASSERT_EQ(test_bl[1], chunk[1]); } - ASSERT_EQ(0, cluster.mon_command( - set_pool_str(cache_pool_name, "dedup_cdc_window_size", 512), - inbl, NULL, NULL)); ASSERT_EQ(0, cluster.mon_command( set_pool_str(cache_pool_name, "dedup_cdc_chunk_size", 512), inbl, NULL, NULL)); @@ -4769,11 +4763,9 @@ TEST_F(LibRadosTwoPoolsPP, DedupFlushRead) { cdc = CDC::create("fastcdc", cbits(512)-1); chunks.clear(); - bufferlist chunk_target; - chunk_target.substr_of(gbl, 512, 512); // calculate based on window size - cdc->calc_chunks(chunk_target, &chunks); + cdc->calc_chunks(gbl, &chunks); bufferlist chunk_512; - chunk_512.substr_of(gbl, chunks[0].first + 512, chunks[0].second); + chunk_512.substr_of(gbl, chunks[3].first, chunks[3].second); { unsigned char fingerprint[CEPH_CRYPTO_SHA1_DIGESTSIZE + 1] = {0}; char p_str[CEPH_CRYPTO_SHA1_DIGESTSIZE*2+1] = {0}; @@ -4792,9 +4784,6 @@ TEST_F(LibRadosTwoPoolsPP, DedupFlushRead) { ASSERT_EQ(test_bl[1], chunk_512[1]); } - ASSERT_EQ(0, cluster.mon_command( - set_pool_str(cache_pool_name, "dedup_cdc_window_size", 16384), - inbl, NULL, NULL)); ASSERT_EQ(0, cluster.mon_command( set_pool_str(cache_pool_name, "dedup_cdc_chunk_size", 16384), inbl, NULL, NULL)); @@ -4843,9 +4832,6 @@ TEST_F(LibRadosTwoPoolsPP, DedupFlushRead) { } // less than object size - ASSERT_EQ(0, cluster.mon_command( - set_pool_str(cache_pool_name, "dedup_cdc_window_size", 4096), - inbl, NULL, NULL)); ASSERT_EQ(0, cluster.mon_command( set_pool_str(cache_pool_name, "dedup_cdc_chunk_size", 1024), inbl, NULL, NULL)); @@ -4933,9 +4919,6 @@ TEST_F(LibRadosTwoPoolsPP, ManifestFlushSnap) { ASSERT_EQ(0, cluster.mon_command( set_pool_str(cache_pool_name, "dedup_chunk_algorithm", "fastcdc"), inbl, NULL, NULL)); - ASSERT_EQ(0, cluster.mon_command( - set_pool_str(cache_pool_name, "dedup_cdc_window_size", 8192), - inbl, NULL, NULL)); ASSERT_EQ(0, cluster.mon_command( set_pool_str(cache_pool_name, "dedup_cdc_chunk_size", 1024), inbl, NULL, NULL)); @@ -5118,9 +5101,6 @@ TEST_F(LibRadosTwoPoolsPP, ManifestFlushDupCount) { ASSERT_EQ(0, cluster.mon_command( set_pool_str(cache_pool_name, "dedup_chunk_algorithm", "fastcdc"), inbl, NULL, NULL)); - ASSERT_EQ(0, cluster.mon_command( - set_pool_str(cache_pool_name, "dedup_cdc_window_size", 8192), - inbl, NULL, NULL)); ASSERT_EQ(0, cluster.mon_command( set_pool_str(cache_pool_name, "dedup_cdc_chunk_size", 1024), inbl, NULL, NULL)); @@ -5344,9 +5324,6 @@ TEST_F(LibRadosTwoPoolsPP, TierFlushDuringFlush) { ASSERT_EQ(0, cluster.mon_command( set_pool_str(cache_pool_name, "dedup_chunk_algorithm", "fastcdc"), inbl, NULL, NULL)); - ASSERT_EQ(0, cluster.mon_command( - set_pool_str(cache_pool_name, "dedup_cdc_window_size", 8192), - inbl, NULL, NULL)); ASSERT_EQ(0, cluster.mon_command( set_pool_str(cache_pool_name, "dedup_cdc_chunk_size", 1024), inbl, NULL, NULL)); -- 2.39.5