From: myoungwon oh Date: Tue, 17 Nov 2020 04:41:58 +0000 (+0900) Subject: osd: make start_dedup() and do_cdc() simple with removing unnecessary copy X-Git-Tag: v16.1.0~374^2~11 X-Git-Url: http://git-server-git.apps.pok.os.sepia.ceph.com/?a=commitdiff_plain;h=84ce5e83d9e48b35ffd87091cb572bdbbb0e20aa;p=ceph.git osd: make start_dedup() and do_cdc() simple with removing unnecessary copy Signed-off-by: Myoungwon Oh --- diff --git a/src/osd/PrimaryLogPG.cc b/src/osd/PrimaryLogPG.cc index 12b8f7d19001..73b59d05a31f 100644 --- a/src/osd/PrimaryLogPG.cc +++ b/src/osd/PrimaryLogPG.cc @@ -10031,8 +10031,8 @@ int PrimaryLogPG::start_dedup(OpRequestRef op, ObjectContextRef obc) while (cur_off < oi.size) { // cdc - vector> chunks; - int r = do_cdc(oi, bl, chunks); + std::map chunks; + int r = do_cdc(oi, cur_off, mop->new_manifest.chunk_map, chunks); if (r < 0) { return r; } @@ -10040,40 +10040,32 @@ int PrimaryLogPG::start_dedup(OpRequestRef op, ObjectContextRef obc) break; } - // get fingerprint + // chunks issued here are different with chunk_map newly generated + // because the same chunks in previous snap will not be issued + // So, we need two data structures; the first is the issued chunk list to track + // issued operations, and the second is the new chunk_map to update chunk_map after + // all operations are finished + object_ref_delta_t refs; + ObjectContextRef obc_l, obc_g; + get_adjacent_clones(obc, obc_l, obc_g); + // skip if the same content exits in prev snap at same offset + mop->new_manifest.calc_refs_to_inc_on_set( + obc_l ? &(obc_l->obs.oi.manifest) : nullptr, + obc_g ? &(obc_g->obs.oi.manifest) : nullptr, + refs, + chunks.begin()->first); // to avoid unnecessary search + for (auto p : chunks) { - bufferlist chunk; - chunk.substr_of(bl, p.first, p.second); - hobject_t target = get_fpoid_from_chunk(soid, chunk); - - chunk_info_t chunk_info(0, p.second, target); - // chunks issued here are different with chunk_map newly generated - // because the same chunks in previous snap will not be issued - // So, we need two data structures; the first is the issued chunk list to track - // issued operations, and the second is the new chunk_map to update chunk_map after - // all operations are finished - mop->new_chunk_map[p.first] = chunk_info; - // skip if the same content exits in prev snap at same offset - object_ref_delta_t refs; - ObjectContextRef obc_l, obc_g; - object_manifest_t set_chunk; - set_chunk.chunk_map[p.first] = chunk_info; - get_adjacent_clones(obc, obc_l, obc_g); - set_chunk.calc_refs_to_inc_on_set( - obc_l ? &(obc_l->obs.oi.manifest) : nullptr, - obc_g ? &(obc_g->obs.oi.manifest) : nullptr, - refs); - if (refs.is_empty()) { - dout(15) << " found same chunk " << refs << dendl; + hobject_t target = mop->new_manifest.chunk_map[p.first].oid; + if (refs.find(target) == refs.end()) { continue; - } - + } // make a create_or_get_ref op bufferlist t; ObjectOperation obj_op; cls_cas_chunk_create_or_get_ref_op get_call; get_call.source = soid.get_head(); - get_call.data = chunk; + get_call.data = chunks[p.first]; ::encode(get_call, t); obj_op.call("cas", "chunk_create_or_get_ref", t); @@ -10088,14 +10080,13 @@ int PrimaryLogPG::start_dedup(OpRequestRef op, ObjectContextRef obc) flags, new C_OnFinisher(fin, osd->get_objecter_finisher(get_pg_shard()))); fin->tid = tid; mop->tids[p.first] = tid; - mop->chunks[target] = make_pair(p.first, p.second); + mop->chunks[target] = make_pair(p.first, p.second.length()); mop->num_chunks++; dout(10) << __func__ << " oid: " << soid << " tid: " << tid << " target: " << target << " offset: " << p.first - << " length: " << p.second << dendl; + << " length: " << p.second.length() << dendl; } - - cur_off += bl.length(); + cur_off += r; } if (mop->tids.size()) { @@ -10109,46 +10100,62 @@ int PrimaryLogPG::start_dedup(OpRequestRef op, ObjectContextRef obc) return -EINPROGRESS; } -int PrimaryLogPG::do_cdc(const object_info_t& oi, bufferlist& bl, vector>& chunks) +int PrimaryLogPG::do_cdc(const object_info_t& oi, uint64_t off, + std::map& chunk_map, + std::map& chunks) { - uint64_t cur_off = 0; + uint64_t cur_off = off; string chunk_algo = pool.info.get_dedup_chunk_algorithm_name(); int64_t chunk_size = pool.info.get_dedup_cdc_chunk_size(); uint64_t max_window_size = static_cast(pool.info.get_dedup_cdc_window_size()); + uint64_t total_length = 0; std::unique_ptr cdc = CDC::create(chunk_algo, cbits(chunk_size)-1); if (!cdc) { dout(0) << __func__ << " unrecognized chunk-algorithm " << dendl; return -EINVAL; } - while (cur_off < oi.size && cur_off < max_window_size) { - bufferlist chunk_data; - /** - * We disable EC pool as a base tier of distributed dedup. - * The reason why we disallow erasure code pool here is that the EC pool does not support objects_read_sync(). - * Therefore, we should change the current implementation totally to make EC pool compatible. - * As s result, we leave this as a future work. - */ - int r = pgbackend->objects_read_sync( - oi.soid, cur_off, max_window_size, 0, &chunk_data); - if (r < 0) { - dout(0) << __func__ << " read fail " << " offset: " << cur_off - << " len: " << max_window_size << " r: " << r << dendl; - return r; - } - if (chunk_data.length() == 0) { - dout(0) << __func__ << " got 0 byte during chunking " << dendl; - return r; - } - bl.append(chunk_data); - cur_off += r; + + if (cur_off >= oi.size) { + return -ERANGE; + } + bufferlist bl; + /** + * We disable EC pool as a base tier of distributed dedup. + * The reason why we disallow erasure code pool here is that the EC pool does not support objects_read_sync(). + * Therefore, we should change the current implementation totally to make EC pool compatible. + * As s result, we leave this as a future work. + */ + int r = pgbackend->objects_read_sync( + oi.soid, cur_off, max_window_size, 0, &bl); + if (r < 0) { + dout(0) << __func__ << " read fail " << " offset: " << cur_off + << " len: " << max_window_size << " r: " << r << dendl; + return r; + } + if (bl.length() == 0) { + dout(0) << __func__ << " got 0 byte during chunking " << dendl; + return r; } dout(10) << __func__ << " oid: " << oi.soid << " len: " << bl.length() << " oi.size: " << oi.size << " window_size: " << max_window_size << " chunk_size: " << chunk_size << dendl; - cdc->calc_chunks(bl, &chunks); - return 0; + + vector> cdc_chunks; + cdc->calc_chunks(bl, &cdc_chunks); + + // get fingerprint + for (auto p : cdc_chunks) { + bufferlist chunk; + uint64_t object_offset = off + p.first; + chunk.substr_of(bl, p.first, p.second); + hobject_t target = get_fpoid_from_chunk(oi.soid, chunk); + chunks[object_offset] = move(chunk); + chunk_map[object_offset] = chunk_info_t(0, p.second, target); + total_length += p.second; + } + return total_length; } hobject_t PrimaryLogPG::get_fpoid_from_chunk(const hobject_t soid, bufferlist& chunk) @@ -10275,7 +10282,7 @@ void PrimaryLogPG::finish_set_dedup(hobject_t oid, int r, ceph_tid_t tid, uint64 ctx->new_obs.oi.manifest.chunk_map.clear(); // set new references - ctx->new_obs.oi.manifest.chunk_map = mop->new_chunk_map; + ctx->new_obs.oi.manifest.chunk_map = mop->new_manifest.chunk_map; finish_ctx(ctx.get(), pg_log_entry_t::CLEAN); simple_opc_submit(std::move(ctx)); diff --git a/src/osd/PrimaryLogPG.h b/src/osd/PrimaryLogPG.h index 956474a96672..a2191b3548fe 100644 --- a/src/osd/PrimaryLogPG.h +++ b/src/osd/PrimaryLogPG.h @@ -265,7 +265,7 @@ public: std::map tids; std::map> chunks; uint64_t num_chunks = 0; - std::map new_chunk_map; + object_manifest_t new_manifest; ManifestOp(RefCountCallback* cb, ceph_tid_t tid) @@ -1511,7 +1511,8 @@ protected: ObjectContextRef& _l, ObjectContextRef& _g); bool inc_refcount_by_set(OpContext* ctx, object_manifest_t& tgt, OSDOp& osd_op); - int do_cdc(const object_info_t& oi, bufferlist& bl, vector>& chunks); + int do_cdc(const object_info_t& oi, uint64_t off, std::map& chunk_map, + std::map& chunks); int start_dedup(OpRequestRef op, ObjectContextRef obc); hobject_t get_fpoid_from_chunk(const hobject_t soid, bufferlist& chunk); void finish_set_dedup(hobject_t oid, int r, ceph_tid_t tid, uint64_t offset); diff --git a/src/osd/osd_types.cc b/src/osd/osd_types.cc index 5e3f7abdba44..c598d3f2a0b7 100644 --- a/src/osd/osd_types.cc +++ b/src/osd/osd_types.cc @@ -5928,10 +5928,11 @@ std::ostream& operator<<(std::ostream& out, const object_ref_delta_t & ci) void object_manifest_t::calc_refs_to_inc_on_set( const object_manifest_t* _g, const object_manifest_t* _l, - object_ref_delta_t &refs) const + object_ref_delta_t &refs, + uint64_t start) const { /* avoid to increment the same reference on adjacent clones */ - auto iter = chunk_map.begin(); + auto iter = start ? chunk_map.find(start) : chunk_map.begin(); auto find_chunk = [](decltype(iter) &i, const object_manifest_t* cur) -> bool { if (cur) { diff --git a/src/osd/osd_types.h b/src/osd/osd_types.h index c82b0d7d6149..1feeb0a3408b 100644 --- a/src/osd/osd_types.h +++ b/src/osd/osd_types.h @@ -5542,6 +5542,7 @@ public: auto begin() const { return ref_delta.begin(); } auto end() const { return ref_delta.end(); } + auto find(hobject_t &key) const { return ref_delta.find(key); } bool operator==(const object_ref_delta_t &rhs) const { return ref_delta == rhs.ref_delta; @@ -5686,7 +5687,8 @@ struct object_manifest_t { void calc_refs_to_inc_on_set( const object_manifest_t* g, ///< [in] manifest for clone > *this const object_manifest_t* l, ///< [in] manifest for clone < *this - object_ref_delta_t &delta ///< [out] set of refs to drop + object_ref_delta_t &delta, ///< [out] set of refs to drop + uint64_t start = 0 ///< [in] start position ) const; /**