From: myoungwon oh Date: Wed, 24 Jul 2019 16:37:37 +0000 (+0900) Subject: osd: add refcounting chunks for snapshotted manifest object X-Git-Tag: v16.1.0~1738^2~12 X-Git-Url: http://git-server-git.apps.pok.os.sepia.ceph.com/?a=commitdiff_plain;h=51719f689525ab07e54e159fb7e80baceccbf1d7;p=ceph.git osd: add refcounting chunks for snapshotted manifest object To prevent removing a manifest object in use (has the reference count), the chunk maps in clone is introduced. Signed-off-by: Myoungwon Oh --- diff --git a/src/osd/PrimaryLogPG.cc b/src/osd/PrimaryLogPG.cc index 9738568bad57..1746ea8f1ec0 100644 --- a/src/osd/PrimaryLogPG.cc +++ b/src/osd/PrimaryLogPG.cc @@ -2588,6 +2588,7 @@ int PrimaryLogPG::do_manifest_flush(OpRequestRef op, ObjectContextRef obc, Flush }(); bufferlist in; if (fp_oid != tgt_soid.oid) { + // TODO: don't retain reference to dirty extents // decrement old chunk's reference count ObjectOperation dec_op; cls_cas_chunk_put_ref_op put_call; @@ -3457,24 +3458,95 @@ void PrimaryLogPG::cancel_manifest_ops(bool requeue, vector *tids) } } -void PrimaryLogPG::refcount_manifest(ObjectContextRef obc, object_locator_t oloc, hobject_t soid, - SnapContext snapc, bool get, RefCountCallback *cb, uint64_t offset) +void PrimaryLogPG::dec_refcount_non_intersection(ObjectContextRef obc, const object_info_t& oi, + set intersection_set) +{ + for (auto c : oi.manifest.chunk_map) { + auto iter = intersection_set.find(c.first); + if (intersection_set.end() == iter) { + dout(10) << __func__ << " need dereference. " << " offset: " << c.first + << " length: " << c.second.length << " oid: " << c.second.oid << dendl; + object_locator_t target_oloc(c.second.oid); + refcount_manifest(obc, obc->obs.oi.soid, target_oloc, c.second.oid, + SnapContext(), refcount_t::DECREMENT_REF, NULL); + } + } +} + +void PrimaryLogPG::dec_all_refcount_head_manifest(object_info_t& oi, OpContext* ctx) +{ + SnapSetContext* ssc = ctx->obc->ssc; + ceph_assert(oi.has_manifest()); + ceph_assert(oi.soid.is_head()); + // has snapshot + if (ssc && ssc->snapset.clones.size() > 0) { + dout(15) << __func__ << " has snapset " << dendl; + interval_set &newest_overlap = + ssc->snapset.clone_overlap.rbegin()->second; + set refs; + ceph_assert(oi.manifest.is_chunked()); + ceph_assert(!oi.manifest.is_redirect()); + + hobject_t clone_oid = oi.soid; + clone_oid.snap = ssc->snapset.clone_overlap.rbegin()->first; + ObjectContextRef cobc = get_object_context(clone_oid, false, NULL); + if (!cobc) { + dout(0) << __func__ << ": Can not find clone obc " << clone_oid << dendl;; + return; + } + object_info_t& coi = cobc->obs.oi; + oi.manifest.build_intersection_set(coi.manifest.chunk_map, refs, &newest_overlap); + + ctx->register_on_commit( + [oi, ctx, this, refs](){ + dec_refcount_non_intersection(ctx->obc, oi, refs); + }); + return; + } + + // no snapshot + if ((oi.flags & object_info_t::FLAG_REDIRECT_HAS_REFERENCE) && oi.manifest.is_redirect()) { + ctx->register_on_commit( + [oi, ctx, this](){ + object_locator_t target_oloc(oi.manifest.redirect_target); + refcount_manifest(ctx->obc, oi.soid, target_oloc, oi.manifest.redirect_target, + SnapContext(), refcount_t::DECREMENT_REF, NULL); + }); + } else if (oi.manifest.is_chunked()) { + ctx->register_on_commit( + [oi, ctx, this](){ + for (auto p : oi.manifest.chunk_map) { + if (p.second.has_reference()) { + object_locator_t target_oloc(p.second.oid); + refcount_manifest(ctx->obc, oi.soid, target_oloc, p.second.oid, + SnapContext(), refcount_t::DECREMENT_REF, NULL); + } + } + }); + } else { + ceph_abort_msg("unrecognized manifest type"); + } +} + +void PrimaryLogPG::refcount_manifest(ObjectContextRef obc, hobject_t src_soid, object_locator_t oloc, + hobject_t tgt_soid, SnapContext snapc, refcount_t type, RefCountCallback* cb) { unsigned flags = CEPH_OSD_FLAG_IGNORE_CACHE | CEPH_OSD_FLAG_IGNORE_OVERLAY | CEPH_OSD_FLAG_RWORDERED; - dout(10) << __func__ << " Start refcount for " << soid << dendl; + dout(10) << __func__ << " Start refcount from " << src_soid + << " to " << tgt_soid << dendl; ObjectOperation obj_op; bufferlist in; - if (get) { + if (type == refcount_t::INCREMENT_REF) { cls_cas_chunk_get_ref_op call; - call.source = obc->obs.oi.soid; + call.source = src_soid; ::encode(call, in); obj_op.call("cas", "chunk_get_ref", in); - } else { + } else if (type == refcount_t::DECREMENT_REF) { cls_cas_chunk_put_ref_op call; - call.source = obc->obs.oi.soid; + call.source = src_soid; ::encode(call, in); obj_op.call("cas", "chunk_put_ref", in); } @@ -3487,7 +3559,7 @@ void PrimaryLogPG::refcount_manifest(ObjectContextRef obc, object_locator_t oloc } auto tid = osd->objecter->mutate( - soid.oid, oloc, obj_op, snapc, + tgt_soid.oid, oloc, obj_op, snapc, ceph::real_clock::from_ceph_timespec(obc->obs.oi.mtime), flags, c); if (cb) { @@ -4449,8 +4521,31 @@ int PrimaryLogPG::trim_object( ctx->delta_stats.num_object_clones--; if (coi.is_cache_pinned()) ctx->delta_stats.num_objects_pinned--; - if (coi.has_manifest()) + if (coi.has_manifest()) { + set refs; + for (auto p : snapset.clones) { + hobject_t clone_oid = coid; + if (clone_oid.snap == p) { + continue; + } + clone_oid.snap = p; + ObjectContextRef cobc = get_object_context(clone_oid, false, NULL); + if (!cobc) { + close_op_ctx(ctx.release()); + dout(0) << __func__ << ": Can not find obc " << coid << dendl;; + return -ENOENT; + } + + // check if the references is still used + object_info_t& oi = cobc->obs.oi; + if (oi.has_manifest()) { + coi.manifest.build_intersection_set(oi.manifest.chunk_map, refs, NULL); + } + } + + dec_refcount_non_intersection(head_obc, coi, refs); ctx->delta_stats.num_objects_manifest--; + } obc->obs.exists = false; snapset.clones.erase(p); @@ -4549,8 +4644,10 @@ int PrimaryLogPG::trim_object( if (oi.is_cache_pinned()) { ctx->delta_stats.num_objects_pinned--; } - if (coi.has_manifest()) + if (oi.has_manifest()) { ctx->delta_stats.num_objects_manifest--; + dec_all_refcount_head_manifest(oi, ctx.get()); + } head_obc->obs.exists = false; head_obc->obs.oi = object_info_t(head_oid); t->remove(head_oid); @@ -6637,27 +6734,6 @@ int PrimaryLogPG::do_osd_ops(OpContext *ctx, vector& ops) result = 0; tracepoint(osd, do_osd_op_pre_delete, soid.oid.name.c_str(), soid.snap.val); { - if (oi.has_manifest()) { - if ((oi.flags & object_info_t::FLAG_REDIRECT_HAS_REFERENCE) && oi.manifest.is_redirect()) { - ctx->register_on_commit( - [oi, ctx, this](){ - object_locator_t target_oloc(oi.manifest.redirect_target); - refcount_manifest(ctx->obc, target_oloc, oi.manifest.redirect_target, - SnapContext(), false, NULL, 0); - }); - } else if (oi.manifest.is_chunked()) { - ctx->register_on_commit( - [oi, ctx, this](){ - for (auto p : oi.manifest.chunk_map) { - if (p.second.has_reference()) { - object_locator_t target_oloc(p.second.oid); - refcount_manifest(ctx->obc, target_oloc, p.second.oid, - SnapContext(), false, NULL, p.first); - } - } - }); - } - } result = _delete_oid(ctx, false, ctx->ignore_cache); } break; @@ -6847,8 +6923,8 @@ int PrimaryLogPG::do_osd_ops(OpContext *ctx, vector& ops) ctx->op_finishers[ctx->current_osd_subop_num].reset( new SetManifestFinisher(osd_op)); RefCountCallback *fin = new RefCountCallback(ctx, osd_op); - refcount_manifest(ctx->obc, target_oloc, target, SnapContext(), - true, fin, 0); + refcount_manifest(ctx->obc, ctx->obc->obs.oi.soid, target_oloc, target, + SnapContext(), refcount_t::INCREMENT_REF, fin); result = -EINPROGRESS; } else { // finish @@ -6918,6 +6994,10 @@ int PrimaryLogPG::do_osd_ops(OpContext *ctx, vector& ops) result = -EOPNOTSUPP; break; } + if (soid.has_snapset()) { + result = -EOPNOTSUPP; + break; + } object_locator_t tgt_oloc; uint64_t src_offset, src_length, tgt_offset; @@ -6972,8 +7052,8 @@ int PrimaryLogPG::do_osd_ops(OpContext *ctx, vector& ops) ctx->op_finishers[ctx->current_osd_subop_num].reset( new SetManifestFinisher(osd_op)); RefCountCallback *fin = new RefCountCallback(ctx, osd_op); - refcount_manifest(ctx->obc, tgt_oloc, target, SnapContext(), - true, fin, src_offset); + refcount_manifest(ctx->obc, ctx->obc->obs.oi.soid, tgt_oloc, target, + SnapContext(), refcount_t::INCREMENT_REF, fin); result = -EINPROGRESS; } else { if (op_finisher) { @@ -6985,7 +7065,7 @@ int PrimaryLogPG::do_osd_ops(OpContext *ctx, vector& ops) chunk_info.set_flag(chunk_info_t::FLAG_MISSING); chunk_info.oid = target; chunk_info.offset = tgt_offset; - chunk_info.length= src_length; + chunk_info.length = src_length; oi.manifest.chunk_map[src_offset] = chunk_info; if (!oi.has_manifest() && !oi.manifest.is_chunked()) ctx->delta_stats.num_objects_manifest++; @@ -7037,15 +7117,14 @@ int PrimaryLogPG::do_osd_ops(OpContext *ctx, vector& ops) if (obs.oi.manifest.is_chunked()) { src_hoid = obs.oi.soid; - cb = new PromoteManifestCallback(ctx->obc, this, ctx); } else if (obs.oi.manifest.is_redirect()) { object_locator_t src_oloc(obs.oi.manifest.redirect_target); my_oloc = src_oloc; src_hoid = obs.oi.manifest.redirect_target; - cb = new PromoteManifestCallback(ctx->obc, this, ctx); } else { ceph_abort_msg("unrecognized manifest type"); } + cb = new PromoteManifestCallback(ctx->obc, this, ctx); ctx->op_finishers[ctx->current_osd_subop_num].reset( new PromoteFinisher(cb)); unsigned flags = CEPH_OSD_COPY_FROM_FLAG_IGNORE_OVERLAY | @@ -7130,29 +7209,7 @@ int PrimaryLogPG::do_osd_ops(OpContext *ctx, vector& ops) break; } - if (oi.manifest.is_redirect()) { - if ((oi.flags & object_info_t::FLAG_REDIRECT_HAS_REFERENCE)) { - ctx->register_on_commit( - [oi, ctx, this](){ - object_locator_t target_oloc(oi.manifest.redirect_target); - refcount_manifest(ctx->obc, target_oloc, oi.manifest.redirect_target, - SnapContext(), false, NULL, 0); - }); - } - } else if (oi.manifest.is_chunked()) { - ctx->register_on_commit( - [oi, ctx, this](){ - for (auto p : oi.manifest.chunk_map) { - if (p.second.flags & chunk_info_t::FLAG_HAS_REFERENCE) { - object_locator_t target_oloc(p.second.oid); - refcount_manifest(ctx->obc, target_oloc, p.second.oid, - SnapContext(), false, NULL, p.first); - } - } - }); - } else { - ceph_abort_msg("unrecognized manifest type"); - } + dec_all_refcount_head_manifest(oi, ctx); oi.clear_flag(object_info_t::FLAG_MANIFEST); oi.manifest = object_manifest_t(); @@ -7935,6 +7992,10 @@ inline int PrimaryLogPG::_delete_oid( dout(20) << __func__ << " setting whiteout on " << soid << dendl; oi.set_flag(object_info_t::FLAG_WHITEOUT); ctx->delta_stats.num_whiteouts++; + if (oi.has_manifest()) { + ctx->delta_stats.num_objects_manifest--; + dec_all_refcount_head_manifest(oi, ctx); + } t->create(soid); osd->logger->inc(l_osd_tier_whiteout); return 0; @@ -7954,6 +8015,7 @@ inline int PrimaryLogPG::_delete_oid( } if (oi.has_manifest()) { ctx->delta_stats.num_objects_manifest--; + dec_all_refcount_head_manifest(oi, ctx); } obs.exists = false; return 0; @@ -8205,6 +8267,20 @@ void PrimaryLogPG::make_writeable(OpContext *ctx) if (pool.info.is_erasure()) ctx->clone_obc->attr_cache = ctx->obc->attr_cache; snap_oi = &ctx->clone_obc->obs.oi; + if (ctx->obc->obs.oi.has_manifest()) { + if ((ctx->obc->obs.oi.flags & object_info_t::FLAG_REDIRECT_HAS_REFERENCE) && + ctx->obc->obs.oi.manifest.is_redirect()) { + snap_oi->set_flag(object_info_t::FLAG_MANIFEST); + snap_oi->manifest.type = object_manifest_t::TYPE_REDIRECT; + snap_oi->manifest.redirect_target = ctx->obc->obs.oi.manifest.redirect_target; + } else if (ctx->obc->obs.oi.manifest.is_chunked()) { + snap_oi->set_flag(object_info_t::FLAG_MANIFEST); + snap_oi->manifest.type = object_manifest_t::TYPE_CHUNKED; + snap_oi->manifest.chunk_map = ctx->obc->obs.oi.manifest.chunk_map; + } else { + ceph_abort_msg("unrecognized manifest type"); + } + } bool got = ctx->lock_manager.get_write_greedy( coid, ctx->clone_obc, @@ -9135,10 +9211,6 @@ void PrimaryLogPG::_copy_some_manifest(ObjectContextRef obc, CopyOpRef cop, uint ObjectOperation op; op.dup(sub_cop->chunk_ops); - dout(20) << __func__ << " tgt_oid: " << soid.oid << " tgt_offset: " - << manifest->chunk_map[iter->first].offset - << " length: " << length << " pool id: " << oloc.pool << dendl; - if (cop->results.user_version) { op.assert_version(cop->results.user_version); } else { @@ -9161,6 +9233,12 @@ void PrimaryLogPG::_copy_some_manifest(ObjectContextRef obc, CopyOpRef cop, uint sub_cop->results.user_version ? NULL : &sub_cop->results.user_version); fin->tid = tid; sub_cop->objecter_tid = tid; + + dout(20) << __func__ << " tgt_oid: " << soid.oid << " tgt_offset: " + << manifest->chunk_map[iter->first].offset + << " length: " << length << " pool id: " << oloc.pool + << " tid: " << tid << dendl; + if (last_offset < iter->first) { break; } diff --git a/src/osd/PrimaryLogPG.h b/src/osd/PrimaryLogPG.h index ebc36e811709..74cf9f5d4bf6 100644 --- a/src/osd/PrimaryLogPG.h +++ b/src/osd/PrimaryLogPG.h @@ -1476,6 +1476,10 @@ protected: friend struct C_ProxyWrite_Commit; // -- chunkop -- + enum class refcount_t { + INCREMENT_REF, + DECREMENT_REF, + }; void do_proxy_chunked_op(OpRequestRef op, const hobject_t& missing_oid, ObjectContextRef obc, bool write_ordered); void do_proxy_chunked_read(OpRequestRef op, ObjectContextRef obc, int op_index, @@ -1494,9 +1498,12 @@ protected: uint64_t last_offset); void handle_manifest_flush(hobject_t oid, ceph_tid_t tid, int r, uint64_t offset, uint64_t last_offset, epoch_t lpr); - void cancel_manifest_ops(bool requeue, std::vector *tids); - void refcount_manifest(ObjectContextRef obc, object_locator_t oloc, hobject_t soid, - SnapContext snapc, bool get, RefCountCallback *cb, uint64_t offset); + void cancel_manifest_ops(bool requeue, vector *tids); + void refcount_manifest(ObjectContextRef obc, hobject_t src_soid, object_locator_t oloc, + hobject_t tgt_soid, SnapContext snapc, refcount_t type, RefCountCallback* cb); + void dec_all_refcount_head_manifest(object_info_t& oi, OpContext* ctx); + void dec_refcount_non_intersection(ObjectContextRef obc, const object_info_t& oi, + set intersection_set); friend struct C_ProxyChunkRead; friend class PromoteManifestCallback; diff --git a/src/osd/osd_types.cc b/src/osd/osd_types.cc index 557a6b1a06c7..3bdd70095c7b 100644 --- a/src/osd/osd_types.cc +++ b/src/osd/osd_types.cc @@ -5810,6 +5810,23 @@ void chunk_info_t::dump(Formatter *f) const f->dump_unsigned("flags", flags); } + +bool chunk_info_t::operator==(const chunk_info_t& cit) const +{ + if (has_fingerprint()) { + if (oid.oid.name == cit.oid.oid.name) { + return true; + } + } else { + if (offset == cit.offset && length == cit.length && + oid.oid.name == cit.oid.oid.name) { + return true; + } + + } + return false; +} + ostream& operator<<(ostream& out, const chunk_info_t& ci) { return out << "(len: " << ci.length << " oid: " << ci.oid @@ -5819,6 +5836,25 @@ ostream& operator<<(ostream& out, const chunk_info_t& ci) // -- object_manifest_t -- +void object_manifest_t::build_intersection_set(std::map& map, + set& intersection, interval_set* check_intersection) +{ + for (auto p : chunk_map) { + if (check_intersection) { + if (check_intersection->intersects(p.first, p.second.length)) { + intersection.insert(p.first); + continue; + } + } + + for (auto c : map) { + if (p.second == c.second) { + intersection.insert(p.first); + } + } + } +} + void object_manifest_t::encode(ceph::buffer::list& bl) const { ENCODE_START(1, 1, bl); diff --git a/src/osd/osd_types.h b/src/osd/osd_types.h index 630ff3d07aa0..c7814bfd887e 100644 --- a/src/osd/osd_types.h +++ b/src/osd/osd_types.h @@ -5485,6 +5485,7 @@ struct chunk_info_t { void decode(ceph::buffer::list::const_iterator &bl); void dump(ceph::Formatter *f) const; friend std::ostream& operator<<(std::ostream& out, const chunk_info_t& ci); + bool operator==(const chunk_info_t& cit) const; }; WRITE_CLASS_ENCODER(chunk_info_t) std::ostream& operator<<(std::ostream& out, const chunk_info_t& ci); @@ -5529,6 +5530,8 @@ struct object_manifest_t { redirect_target = hobject_t(); chunk_map.clear(); } + void build_intersection_set(std::map& map, + set& intersection, interval_set* check_intersection); static void generate_test_instances(std::list& o); void encode(ceph::buffer::list &bl) const; void decode(ceph::buffer::list::const_iterator &bl);