From: Ronen Friedman Date: Sun, 15 Nov 2020 20:13:52 +0000 (+0200) Subject: osd: Extracting scrub code from PrimaryLogPG (a derivative of 'PG') X-Git-Tag: v16.1.0~270^2~3 X-Git-Url: http://git-server-git.apps.pok.os.sepia.ceph.com/?a=commitdiff_plain;h=5f6a9015acc6a554c903928fd4b987b75e2f3c73;p=ceph.git osd: Extracting scrub code from PrimaryLogPG (a derivative of 'PG') into a PrimaryLogScrub - a derivative of PgScrubber. Signed-off-by: Ronen Friedman --- diff --git a/src/osd/CMakeLists.txt b/src/osd/CMakeLists.txt index d3b6c21c79e..0d0ca63b347 100644 --- a/src/osd/CMakeLists.txt +++ b/src/osd/CMakeLists.txt @@ -13,6 +13,7 @@ set(osd_srcs OSD.cc pg_scrubber.cc scrub_machine.cc + PrimaryLogScrub.cc Watch.cc ClassHandler.cc PG.cc diff --git a/src/osd/PG.h b/src/osd/PG.h index 9119b297938..a2403cc07d5 100644 --- a/src/osd/PG.h +++ b/src/osd/PG.h @@ -170,6 +170,7 @@ class PG : public DoutPrefixProvider, public PeeringState::PeeringListener { friend struct NamedState; friend class PeeringState; friend class PgScrubber; + friend class PrimaryLogScrub; friend class Scrub::ReplicaReservations; friend class Scrub::LocalReservation; // dout()-only friendship friend class Scrub::ReservedByRemotePrimary; // dout()-only friendship diff --git a/src/osd/PrimaryLogPG.cc b/src/osd/PrimaryLogPG.cc index e8038a6e955..2df61761b0d 100644 --- a/src/osd/PrimaryLogPG.cc +++ b/src/osd/PrimaryLogPG.cc @@ -21,6 +21,7 @@ #include "pg_scrubber.h" #include "PrimaryLogPG.h" #include "OSD.h" +#include "PrimaryLogScrub.h" #include "OpRequest.h" #include "ScrubStore.h" #include "Session.h" @@ -1609,6 +1610,53 @@ int PrimaryLogPG::do_scrub_ls(const MOSDOp *m, OSDOp *osd_op) return r; } + +/** + * Releases locks + * + * @param manager [in] manager with locks to release + */ +void PrimaryLogPG::release_object_locks( + ObcLockManager &lock_manager) { + std::list > > to_req; + bool requeue_recovery = false; + bool requeue_snaptrim = false; + lock_manager.put_locks( + &to_req, + &requeue_recovery, + &requeue_snaptrim); + if (requeue_recovery) + queue_recovery(); + if (requeue_snaptrim) + snap_trimmer_machine.process_event(TrimWriteUnblocked()); + + if (!to_req.empty()) { + // requeue at front of scrub blocking queue if we are blocked by scrub + for (auto &&p: to_req) { + if (m_scrubber->write_blocked_by_scrub(p.first->obs.oi.soid.get_head())) { + for (auto& op : p.second) { + op->mark_delayed("waiting for scrub"); + } + + waiting_for_scrub.splice( + waiting_for_scrub.begin(), + p.second, + p.second.begin(), + p.second.end()); + } else if (is_laggy()) { + for (auto& op : p.second) { + op->mark_delayed("waiting for readable"); + } + waiting_for_readable.splice( + waiting_for_readable.begin(), + p.second, + p.second.begin(), + p.second.end()); + } else { + requeue_ops(p.second); + } + } + } } PrimaryLogPG::PrimaryLogPG(OSDService *o, OSDMapRef curmap, @@ -1628,8 +1676,7 @@ PrimaryLogPG::PrimaryLogPG(OSDService *o, OSDMapRef curmap, pgbackend->get_is_recoverable_predicate()); snap_trimmer_machine.initiate(); - m_scrubber = make_unique(this); // *not* the final code - // next commit: m_scrubber = make_unique(this); + m_scrubber = make_unique(this); } void PrimaryLogPG::get_src_oloc(const object_t& oid, const object_locator_t& oloc, object_locator_t& src_oloc) @@ -14929,507 +14976,6 @@ bool PrimaryLogPG::_range_available_for_scrub(const hobject_t& begin, return true; } -static bool doing_clones(const std::optional &snapset, - const vector::reverse_iterator &curclone) { - return snapset && curclone != snapset->clones.rend(); -} - -void PrimaryLogPG::log_missing(unsigned missing, - const std::optional &head, - LogChannelRef clog, - const spg_t &pgid, - const char *func, - const char *mode, - bool allow_incomplete_clones) -{ - ceph_assert(head); - if (allow_incomplete_clones) { - dout(20) << func << " " << mode << " " << pgid << " " << *head - << " skipped " << missing << " clone(s) in cache tier" << dendl; - } else { - clog->info() << mode << " " << pgid << " " << *head - << " : " << missing << " missing clone(s)"; - } -} - -unsigned PrimaryLogPG::process_clones_to(const std::optional &head, - const std::optional &snapset, - LogChannelRef clog, - const spg_t &pgid, - const char *mode, - bool allow_incomplete_clones, - std::optional target, - vector::reverse_iterator *curclone, - inconsistent_snapset_wrapper &e) -{ - ceph_assert(head); - ceph_assert(snapset); - unsigned missing = 0; - - // NOTE: clones are in descending order, thus **curclone > target test here - hobject_t next_clone(*head); - while(doing_clones(snapset, *curclone) && (!target || **curclone > *target)) { - ++missing; - // it is okay to be missing one or more clones in a cache tier. - // skip higher-numbered clones in the list. - if (!allow_incomplete_clones) { - next_clone.snap = **curclone; - clog->error() << mode << " " << pgid << " " << *head - << " : expected clone " << next_clone << " " << missing - << " missing"; - ++scrubber.shallow_errors; - e.set_clone_missing(next_clone.snap); - } - // Clones are descending - ++(*curclone); - } - return missing; -} - -/* - * Validate consistency of the object info and snap sets. - * - * We are sort of comparing 2 lists. The main loop is on objmap.objects. But - * the comparison of the objects is against multiple snapset.clones. There are - * multiple clone lists and in between lists we expect head. - * - * Example - * - * objects expected - * ======= ======= - * obj1 snap 1 head, unexpected obj1 snap 1 - * obj2 head head, match - * [SnapSet clones 6 4 2 1] - * obj2 snap 7 obj2 snap 6, unexpected obj2 snap 7 - * obj2 snap 6 obj2 snap 6, match - * obj2 snap 4 obj2 snap 4, match - * obj3 head obj2 snap 2 (expected), obj2 snap 1 (expected), match - * [Snapset clones 3 1] - * obj3 snap 3 obj3 snap 3 match - * obj3 snap 1 obj3 snap 1 match - * obj4 head head, match - * [Snapset clones 4] - * EOL obj4 snap 4, (expected) - */ -void PrimaryLogPG::scrub_snapshot_metadata( - ScrubMap &scrubmap, - const map, - std::optional>> &missing_digest) -{ - dout(10) << __func__ << dendl; - - bool repair = state_test(PG_STATE_REPAIR); - bool deep_scrub = state_test(PG_STATE_DEEP_SCRUB); - const char *mode = (repair ? "repair": (deep_scrub ? "deep-scrub" : "scrub")); - std::optional all_clones; // Unspecified snapid_t or std::nullopt - - // traverse in reverse order. - std::optional head; - std::optional snapset; // If initialized so will head (above) - vector::reverse_iterator curclone; // Defined only if snapset initialized - unsigned missing = 0; - inconsistent_snapset_wrapper soid_error, head_error; - unsigned soid_error_count = 0; - - for (map::reverse_iterator - p = scrubmap.objects.rbegin(); p != scrubmap.objects.rend(); ++p) { - const hobject_t& soid = p->first; - ceph_assert(!soid.is_snapdir()); - soid_error = inconsistent_snapset_wrapper{soid}; - object_stat_sum_t stat; - std::optional oi; - - stat.num_objects++; - - if (soid.nspace == cct->_conf->osd_hit_set_namespace) - stat.num_objects_hit_set_archive++; - - if (soid.is_snap()) { - // it's a clone - stat.num_object_clones++; - } - - // basic checks. - if (p->second.attrs.count(OI_ATTR) == 0) { - oi = std::nullopt; - osd->clog->error() << mode << " " << info.pgid << " " << soid - << " : no '" << OI_ATTR << "' attr"; - ++scrubber.shallow_errors; - soid_error.set_info_missing(); - } else { - bufferlist bv; - bv.push_back(p->second.attrs[OI_ATTR]); - try { - oi = object_info_t(); // Initialize optional<> before decode into it - oi->decode(bv); - } catch (ceph::buffer::error& e) { - oi = std::nullopt; - osd->clog->error() << mode << " " << info.pgid << " " << soid - << " : can't decode '" << OI_ATTR << "' attr " << e.what(); - ++scrubber.shallow_errors; - soid_error.set_info_corrupted(); - soid_error.set_info_missing(); // Not available too - } - } - - if (oi) { - if (pgbackend->be_get_ondisk_size(oi->size) != p->second.size) { - osd->clog->error() << mode << " " << info.pgid << " " << soid - << " : on disk size (" << p->second.size - << ") does not match object info size (" - << oi->size << ") adjusted for ondisk to (" - << pgbackend->be_get_ondisk_size(oi->size) - << ")"; - soid_error.set_size_mismatch(); - ++scrubber.shallow_errors; - } - - dout(20) << mode << " " << soid << " " << *oi << dendl; - - // A clone num_bytes will be added later when we have snapset - if (!soid.is_snap()) { - stat.num_bytes += oi->size; - } - if (soid.nspace == cct->_conf->osd_hit_set_namespace) - stat.num_bytes_hit_set_archive += oi->size; - - if (oi->is_dirty()) - ++stat.num_objects_dirty; - if (oi->is_whiteout()) - ++stat.num_whiteouts; - if (oi->is_omap()) - ++stat.num_objects_omap; - if (oi->is_cache_pinned()) - ++stat.num_objects_pinned; - if (oi->has_manifest()) - ++stat.num_objects_manifest; - } - - // Check for any problems while processing clones - if (doing_clones(snapset, curclone)) { - std::optional target; - // Expecting an object with snap for current head - if (soid.has_snapset() || soid.get_head() != head->get_head()) { - - dout(10) << __func__ << " " << mode << " " << info.pgid << " new object " - << soid << " while processing " << *head << dendl; - - target = all_clones; - } else { - ceph_assert(soid.is_snap()); - target = soid.snap; - } - - // Log any clones we were expecting to be there up to target - // This will set missing, but will be a no-op if snap.soid == *curclone. - missing += process_clones_to(head, snapset, osd->clog, info.pgid, mode, - pool.info.allow_incomplete_clones(), target, &curclone, - head_error); - } - bool expected; - // Check doing_clones() again in case we ran process_clones_to() - if (doing_clones(snapset, curclone)) { - // A head would have processed all clones above - // or all greater than *curclone. - ceph_assert(soid.is_snap() && *curclone <= soid.snap); - - // After processing above clone snap should match the expected curclone - expected = (*curclone == soid.snap); - } else { - // If we aren't doing clones any longer, then expecting head - expected = soid.has_snapset(); - } - if (!expected) { - // If we couldn't read the head's snapset, just ignore clones - if (head && !snapset) { - osd->clog->error() << mode << " " << info.pgid << " " << soid - << " : clone ignored due to missing snapset"; - } else { - osd->clog->error() << mode << " " << info.pgid << " " << soid - << " : is an unexpected clone"; - } - ++scrubber.shallow_errors; - soid_error.set_headless(); - scrubber.store->add_snap_error(pool.id, soid_error); - ++soid_error_count; - if (head && soid.get_head() == head->get_head()) - head_error.set_clone(soid.snap); - continue; - } - - // new snapset? - if (soid.has_snapset()) { - - if (missing) { - log_missing(missing, head, osd->clog, info.pgid, __func__, mode, - pool.info.allow_incomplete_clones()); - } - - // Save previous head error information - if (head && (head_error.errors || soid_error_count)) - scrubber.store->add_snap_error(pool.id, head_error); - // Set this as a new head object - head = soid; - missing = 0; - head_error = soid_error; - soid_error_count = 0; - - dout(20) << __func__ << " " << mode << " new head " << head << dendl; - - if (p->second.attrs.count(SS_ATTR) == 0) { - osd->clog->error() << mode << " " << info.pgid << " " << soid - << " : no '" << SS_ATTR << "' attr"; - ++scrubber.shallow_errors; - snapset = std::nullopt; - head_error.set_snapset_missing(); - } else { - bufferlist bl; - bl.push_back(p->second.attrs[SS_ATTR]); - auto blp = bl.cbegin(); - try { - snapset = SnapSet(); // Initialize optional<> before decoding into it - decode(*snapset, blp); - head_error.ss_bl.push_back(p->second.attrs[SS_ATTR]); - } catch (ceph::buffer::error& e) { - snapset = std::nullopt; - osd->clog->error() << mode << " " << info.pgid << " " << soid - << " : can't decode '" << SS_ATTR << "' attr " << e.what(); - ++scrubber.shallow_errors; - head_error.set_snapset_corrupted(); - } - } - - if (snapset) { - // what will be next? - curclone = snapset->clones.rbegin(); - - if (!snapset->clones.empty()) { - dout(20) << " snapset " << *snapset << dendl; - if (snapset->seq == 0) { - osd->clog->error() << mode << " " << info.pgid << " " << soid - << " : snaps.seq not set"; - ++scrubber.shallow_errors; - head_error.set_snapset_error(); - } - } - } - } else { - ceph_assert(soid.is_snap()); - ceph_assert(head); - ceph_assert(snapset); - ceph_assert(soid.snap == *curclone); - - dout(20) << __func__ << " " << mode << " matched clone " << soid << dendl; - - if (snapset->clone_size.count(soid.snap) == 0) { - osd->clog->error() << mode << " " << info.pgid << " " << soid - << " : is missing in clone_size"; - ++scrubber.shallow_errors; - soid_error.set_size_mismatch(); - } else { - if (oi && oi->size != snapset->clone_size[soid.snap]) { - osd->clog->error() << mode << " " << info.pgid << " " << soid - << " : size " << oi->size << " != clone_size " - << snapset->clone_size[*curclone]; - ++scrubber.shallow_errors; - soid_error.set_size_mismatch(); - } - - if (snapset->clone_overlap.count(soid.snap) == 0) { - osd->clog->error() << mode << " " << info.pgid << " " << soid - << " : is missing in clone_overlap"; - ++scrubber.shallow_errors; - soid_error.set_size_mismatch(); - } else { - // This checking is based on get_clone_bytes(). The first 2 asserts - // can't happen because we know we have a clone_size and - // a clone_overlap. Now we check that the interval_set won't - // cause the last assert. - uint64_t size = snapset->clone_size.find(soid.snap)->second; - const interval_set &overlap = - snapset->clone_overlap.find(soid.snap)->second; - bool bad_interval_set = false; - for (interval_set::const_iterator i = overlap.begin(); - i != overlap.end(); ++i) { - if (size < i.get_len()) { - bad_interval_set = true; - break; - } - size -= i.get_len(); - } - - if (bad_interval_set) { - osd->clog->error() << mode << " " << info.pgid << " " << soid - << " : bad interval_set in clone_overlap"; - ++scrubber.shallow_errors; - soid_error.set_size_mismatch(); - } else { - stat.num_bytes += snapset->get_clone_bytes(soid.snap); - } - } - } - - // what's next? - ++curclone; - if (soid_error.errors) { - scrubber.store->add_snap_error(pool.id, soid_error); - ++soid_error_count; - } - } - - scrub_cstat.add(stat); - } - - if (doing_clones(snapset, curclone)) { - dout(10) << __func__ << " " << mode << " " << info.pgid - << " No more objects while processing " << *head << dendl; - - missing += process_clones_to(head, snapset, osd->clog, info.pgid, mode, - pool.info.allow_incomplete_clones(), all_clones, &curclone, - head_error); - } - // There could be missing found by the test above or even - // before dropping out of the loop for the last head. - if (missing) { - log_missing(missing, head, osd->clog, info.pgid, __func__, - mode, pool.info.allow_incomplete_clones()); - } - if (head && (head_error.errors || soid_error_count)) - scrubber.store->add_snap_error(pool.id, head_error); - - for (auto p = missing_digest.begin(); p != missing_digest.end(); ++p) { - ceph_assert(!p->first.is_snapdir()); - dout(10) << __func__ << " recording digests for " << p->first << dendl; - ObjectContextRef obc = get_object_context(p->first, false); - if (!obc) { - osd->clog->error() << info.pgid << " " << mode - << " cannot get object context for object " - << p->first; - continue; - } else if (obc->obs.oi.soid != p->first) { - osd->clog->error() << info.pgid << " " << mode - << " " << p->first - << " : object has a valid oi attr with a mismatched name, " - << " obc->obs.oi.soid: " << obc->obs.oi.soid; - continue; - } - OpContextUPtr ctx = simple_opc_create(obc); - ctx->at_version = get_next_version(); - ctx->mtime = utime_t(); // do not update mtime - if (p->second.first) { - ctx->new_obs.oi.set_data_digest(*p->second.first); - } else { - ctx->new_obs.oi.clear_data_digest(); - } - if (p->second.second) { - ctx->new_obs.oi.set_omap_digest(*p->second.second); - } else { - ctx->new_obs.oi.clear_omap_digest(); - } - finish_ctx(ctx.get(), pg_log_entry_t::MODIFY); - - ctx->register_on_success( - [this]() { - dout(20) << "updating scrub digest" << dendl; - if (--scrubber.num_digest_updates_pending == 0) { - requeue_scrub(); - } - }); - - simple_opc_submit(std::move(ctx)); - ++scrubber.num_digest_updates_pending; - } - - dout(10) << __func__ << " (" << mode << ") finish" << dendl; -} - -void PrimaryLogPG::_scrub_clear_state() -{ - scrub_cstat = object_stat_collection_t(); -} - -void PrimaryLogPG::_scrub_finish() -{ - bool repair = state_test(PG_STATE_REPAIR); - bool deep_scrub = state_test(PG_STATE_DEEP_SCRUB); - const char *mode = (repair ? "repair": (deep_scrub ? "deep-scrub" : "scrub")); - - if (info.stats.stats_invalid) { - recovery_state.update_stats( - [=](auto &history, auto &stats) { - stats.stats = scrub_cstat; - stats.stats_invalid = false; - return false; - }); - - if (agent_state) - agent_choose_mode(); - } - - dout(10) << mode << " got " - << scrub_cstat.sum.num_objects << "/" << info.stats.stats.sum.num_objects << " objects, " - << scrub_cstat.sum.num_object_clones << "/" << info.stats.stats.sum.num_object_clones << " clones, " - << scrub_cstat.sum.num_objects_dirty << "/" << info.stats.stats.sum.num_objects_dirty << " dirty, " - << scrub_cstat.sum.num_objects_omap << "/" << info.stats.stats.sum.num_objects_omap << " omap, " - << scrub_cstat.sum.num_objects_pinned << "/" << info.stats.stats.sum.num_objects_pinned << " pinned, " - << scrub_cstat.sum.num_objects_hit_set_archive << "/" << info.stats.stats.sum.num_objects_hit_set_archive << " hit_set_archive, " - << scrub_cstat.sum.num_bytes << "/" << info.stats.stats.sum.num_bytes << " bytes, " - << scrub_cstat.sum.num_objects_manifest << "/" << info.stats.stats.sum.num_objects_manifest << " manifest objects, " - << scrub_cstat.sum.num_bytes_hit_set_archive << "/" << info.stats.stats.sum.num_bytes_hit_set_archive << " hit_set_archive bytes." - << dendl; - - if (scrub_cstat.sum.num_objects != info.stats.stats.sum.num_objects || - scrub_cstat.sum.num_object_clones != info.stats.stats.sum.num_object_clones || - (scrub_cstat.sum.num_objects_dirty != info.stats.stats.sum.num_objects_dirty && - !info.stats.dirty_stats_invalid) || - (scrub_cstat.sum.num_objects_omap != info.stats.stats.sum.num_objects_omap && - !info.stats.omap_stats_invalid) || - (scrub_cstat.sum.num_objects_pinned != info.stats.stats.sum.num_objects_pinned && - !info.stats.pin_stats_invalid) || - (scrub_cstat.sum.num_objects_hit_set_archive != info.stats.stats.sum.num_objects_hit_set_archive && - !info.stats.hitset_stats_invalid) || - (scrub_cstat.sum.num_bytes_hit_set_archive != info.stats.stats.sum.num_bytes_hit_set_archive && - !info.stats.hitset_bytes_stats_invalid) || - (scrub_cstat.sum.num_objects_manifest != info.stats.stats.sum.num_objects_manifest && - !info.stats.manifest_stats_invalid) || - scrub_cstat.sum.num_whiteouts != info.stats.stats.sum.num_whiteouts || - scrub_cstat.sum.num_bytes != info.stats.stats.sum.num_bytes) { - osd->clog->error() << info.pgid << " " << mode - << " : stat mismatch, got " - << scrub_cstat.sum.num_objects << "/" << info.stats.stats.sum.num_objects << " objects, " - << scrub_cstat.sum.num_object_clones << "/" << info.stats.stats.sum.num_object_clones << " clones, " - << scrub_cstat.sum.num_objects_dirty << "/" << info.stats.stats.sum.num_objects_dirty << " dirty, " - << scrub_cstat.sum.num_objects_omap << "/" << info.stats.stats.sum.num_objects_omap << " omap, " - << scrub_cstat.sum.num_objects_pinned << "/" << info.stats.stats.sum.num_objects_pinned << " pinned, " - << scrub_cstat.sum.num_objects_hit_set_archive << "/" << info.stats.stats.sum.num_objects_hit_set_archive << " hit_set_archive, " - << scrub_cstat.sum.num_whiteouts << "/" << info.stats.stats.sum.num_whiteouts << " whiteouts, " - << scrub_cstat.sum.num_bytes << "/" << info.stats.stats.sum.num_bytes << " bytes, " - << scrub_cstat.sum.num_objects_manifest << "/" << info.stats.stats.sum.num_objects_manifest << " manifest objects, " - << scrub_cstat.sum.num_bytes_hit_set_archive << "/" << info.stats.stats.sum.num_bytes_hit_set_archive << " hit_set_archive bytes."; - ++scrubber.shallow_errors; - - if (repair) { - ++scrubber.fixed; - recovery_state.update_stats( - [this](auto &history, auto &stats) { - stats.stats = scrub_cstat; - stats.dirty_stats_invalid = false; - stats.omap_stats_invalid = false; - stats.hitset_stats_invalid = false; - stats.hitset_bytes_stats_invalid = false; - stats.pin_stats_invalid = false; - stats.manifest_stats_invalid = false; - return false; - }); - publish_stats_to_osd(); - recovery_state.share_pg_info(); - } - } - // Clear object context cache to get repair information - if (repair) - object_contexts.clear(); -} int PrimaryLogPG::rep_repair_primary_object(const hobject_t& soid, OpContext *ctx) { @@ -15491,6 +15037,13 @@ void PrimaryLogPG::SnapTrimmer::log_exit(const char *state_name, utime_t enter_t ldout(pg->cct, 20) << "exit " << state_name << dendl; } +bool PrimaryLogPG::SnapTrimmer::permit_trim() { + return + pg->is_clean() && + !pg->m_scrubber->is_scrub_active() && + !pg->snap_trimq.empty(); +} + /*---SnapTrimmer states---*/ #undef dout_prefix #define dout_prefix (context< SnapTrimmer >().pg->gen_prefix(*_dout) \ diff --git a/src/osd/PrimaryLogPG.h b/src/osd/PrimaryLogPG.h index 211f8ede522..5a8c9f405d6 100644 --- a/src/osd/PrimaryLogPG.h +++ b/src/osd/PrimaryLogPG.h @@ -58,6 +58,7 @@ struct inconsistent_snapset_wrapper; class PrimaryLogPG : public PG, public PGBackend::Listener { friend class OSD; friend class Watch; + friend class PrimaryLogScrub; public: MEMPOOL_CLASS_HELPERS(); @@ -367,8 +368,8 @@ public: return gen_prefix(out); } - const std::map> - &get_missing_loc_shards() const override { + const HobjToShardSetMapping& get_missing_loc_shards() const override + { return recovery_state.get_missing_loc().get_missing_locs(); } const std::map &get_shard_missing() const override { @@ -1391,14 +1392,6 @@ protected: // -- scrub -- bool _range_available_for_scrub( const hobject_t &begin, const hobject_t &end) override; - void scrub_snapshot_metadata( - ScrubMap &map, - const std::map, - std::optional>> &missing_digest) override; - void _scrub_clear_state() override; - void _scrub_finish() override; - object_stat_collection_t scrub_cstat; void _split_into(pg_t child_pgid, PG *child, unsigned split_bits) override; @@ -1554,22 +1547,6 @@ private: pool.info.opts.get(pool_opts_t::RECOVERY_OP_PRIORITY, &pri); return pri > 0 ? pri : cct->_conf->osd_recovery_op_priority; } - void log_missing(unsigned missing, - const std::optional &head, - LogChannelRef clog, - const spg_t &pgid, - const char *func, - const char *mode, - bool allow_incomplete_clones); - unsigned process_clones_to(const std::optional &head, - const std::optional &snapset, - LogChannelRef clog, - const spg_t &pgid, - const char *mode, - bool allow_incomplete_clones, - std::optional target, - std::vector::reverse_iterator *curclone, - inconsistent_snapset_wrapper &snap_error); public: coll_t get_coll() { @@ -1623,12 +1600,7 @@ private: explicit SnapTrimmer(PrimaryLogPG *pg) : pg(pg) {} void log_enter(const char *state_name); void log_exit(const char *state_name, utime_t duration); - bool permit_trim() { - return - pg->is_clean() && - !pg->scrubber.active && - !pg->snap_trimq.empty(); - } + bool permit_trim(); bool can_trim() { return permit_trim() && diff --git a/src/osd/PrimaryLogScrub.cc b/src/osd/PrimaryLogScrub.cc new file mode 100644 index 00000000000..6cafa256a5e --- /dev/null +++ b/src/osd/PrimaryLogScrub.cc @@ -0,0 +1,611 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab + +#include "PrimaryLogScrub.h" + +#include "common/scrub_types.h" + +#include "PeeringState.h" +#include "PrimaryLogPG.h" +#include "scrub_machine.h" + +#define dout_context (m_pg->cct) +#define dout_subsys ceph_subsys_osd +#undef dout_prefix +#define dout_prefix _prefix(_dout, this->m_pg) + +template static ostream& _prefix(std::ostream* _dout, T* t) +{ + return t->gen_prefix(*_dout) << " PrimaryLog scrubber pg(" << t->pg_id << ") "; +} + +using namespace Scrub; +using Scrub::ScrubMachine; + +bool PrimaryLogScrub::get_store_errors(const scrub_ls_arg_t& arg, + scrub_ls_result_t& res_inout) const +{ + if (!m_store) { + return false; + } + + if (arg.get_snapsets) { + res_inout.vals = + m_store->get_snap_errors(m_pg->get_pgid().pool(), arg.start_after, arg.max_return); + } else { + res_inout.vals = m_store->get_object_errors(m_pg->get_pgid().pool(), arg.start_after, + arg.max_return); + } + return true; +} + +void PrimaryLogScrub::_scrub_finish() +{ + auto& info = m_pg->info; ///< a temporary alias + + dout(10) << __func__ + << " info stats: " << (info.stats.stats_invalid ? "invalid" : "valid") + << dendl; + + bool repair = state_test(PG_STATE_REPAIR); + bool deep_scrub = state_test(PG_STATE_DEEP_SCRUB); + const char* mode = (repair ? "repair" : (deep_scrub ? "deep-scrub" : "scrub")); + + if (info.stats.stats_invalid) { + m_pl_pg->recovery_state.update_stats([=](auto& history, auto& stats) { + stats.stats = m_scrub_cstat; + stats.stats_invalid = false; + return false; + }); + + if (m_pl_pg->agent_state) + m_pl_pg->agent_choose_mode(); + } + + dout(10) << mode << " got " << m_scrub_cstat.sum.num_objects << "/" + << info.stats.stats.sum.num_objects << " objects, " + << m_scrub_cstat.sum.num_object_clones << "/" + << info.stats.stats.sum.num_object_clones << " clones, " + << m_scrub_cstat.sum.num_objects_dirty << "/" + << info.stats.stats.sum.num_objects_dirty << " dirty, " + << m_scrub_cstat.sum.num_objects_omap << "/" + << info.stats.stats.sum.num_objects_omap << " omap, " + << m_scrub_cstat.sum.num_objects_pinned << "/" + << info.stats.stats.sum.num_objects_pinned << " pinned, " + << m_scrub_cstat.sum.num_objects_hit_set_archive << "/" + << info.stats.stats.sum.num_objects_hit_set_archive << " hit_set_archive, " + << m_scrub_cstat.sum.num_bytes << "/" << info.stats.stats.sum.num_bytes + << " bytes, " << m_scrub_cstat.sum.num_objects_manifest << "/" + << info.stats.stats.sum.num_objects_manifest << " manifest objects, " + << m_scrub_cstat.sum.num_bytes_hit_set_archive << "/" + << info.stats.stats.sum.num_bytes_hit_set_archive << " hit_set_archive bytes." + << dendl; + + if (m_scrub_cstat.sum.num_objects != info.stats.stats.sum.num_objects || + m_scrub_cstat.sum.num_object_clones != info.stats.stats.sum.num_object_clones || + (m_scrub_cstat.sum.num_objects_dirty != info.stats.stats.sum.num_objects_dirty && + !info.stats.dirty_stats_invalid) || + (m_scrub_cstat.sum.num_objects_omap != info.stats.stats.sum.num_objects_omap && + !info.stats.omap_stats_invalid) || + (m_scrub_cstat.sum.num_objects_pinned != info.stats.stats.sum.num_objects_pinned && + !info.stats.pin_stats_invalid) || + (m_scrub_cstat.sum.num_objects_hit_set_archive != + info.stats.stats.sum.num_objects_hit_set_archive && + !info.stats.hitset_stats_invalid) || + (m_scrub_cstat.sum.num_bytes_hit_set_archive != + info.stats.stats.sum.num_bytes_hit_set_archive && + !info.stats.hitset_bytes_stats_invalid) || + (m_scrub_cstat.sum.num_objects_manifest != + info.stats.stats.sum.num_objects_manifest && + !info.stats.manifest_stats_invalid) || + m_scrub_cstat.sum.num_whiteouts != info.stats.stats.sum.num_whiteouts || + m_scrub_cstat.sum.num_bytes != info.stats.stats.sum.num_bytes) { + m_osds->clog->error() << info.pgid << " " << mode << " : stat mismatch, got " + << m_scrub_cstat.sum.num_objects << "/" + << info.stats.stats.sum.num_objects << " objects, " + << m_scrub_cstat.sum.num_object_clones << "/" + << info.stats.stats.sum.num_object_clones << " clones, " + << m_scrub_cstat.sum.num_objects_dirty << "/" + << info.stats.stats.sum.num_objects_dirty << " dirty, " + << m_scrub_cstat.sum.num_objects_omap << "/" + << info.stats.stats.sum.num_objects_omap << " omap, " + << m_scrub_cstat.sum.num_objects_pinned << "/" + << info.stats.stats.sum.num_objects_pinned << " pinned, " + << m_scrub_cstat.sum.num_objects_hit_set_archive << "/" + << info.stats.stats.sum.num_objects_hit_set_archive + << " hit_set_archive, " << m_scrub_cstat.sum.num_whiteouts + << "/" << info.stats.stats.sum.num_whiteouts << " whiteouts, " + << m_scrub_cstat.sum.num_bytes << "/" + << info.stats.stats.sum.num_bytes << " bytes, " + << m_scrub_cstat.sum.num_objects_manifest << "/" + << info.stats.stats.sum.num_objects_manifest + << " manifest objects, " + << m_scrub_cstat.sum.num_bytes_hit_set_archive << "/" + << info.stats.stats.sum.num_bytes_hit_set_archive + << " hit_set_archive bytes."; + ++m_shallow_errors; + + if (repair) { + ++m_fixed_count; + m_pl_pg->recovery_state.update_stats([this](auto& history, auto& stats) { + stats.stats = m_scrub_cstat; + stats.dirty_stats_invalid = false; + stats.omap_stats_invalid = false; + stats.hitset_stats_invalid = false; + stats.hitset_bytes_stats_invalid = false; + stats.pin_stats_invalid = false; + stats.manifest_stats_invalid = false; + return false; + }); + m_pl_pg->publish_stats_to_osd(); + m_pl_pg->recovery_state.share_pg_info(); + } + } + // Clear object context cache to get repair information + if (repair) + m_pl_pg->object_contexts.clear(); +} + +static bool doing_clones(const std::optional& snapset, + const vector::reverse_iterator& curclone) +{ + return snapset && curclone != snapset->clones.rend(); +} + +void PrimaryLogScrub::log_missing(int missing, + const std::optional& head, + LogChannelRef clog, + const spg_t& pgid, + const char* func, + const char* mode, + bool allow_incomplete_clones) +{ + ceph_assert(head); + if (allow_incomplete_clones) { + dout(20) << func << " " << mode << " " << pgid << " " << *head << " skipped " + << missing << " clone(s) in cache tier" << dendl; + } else { + clog->info() << mode << " " << pgid << " " << *head << " : " << missing + << " missing clone(s)"; + } +} + +int PrimaryLogScrub::process_clones_to(const std::optional& head, + const std::optional& snapset, + LogChannelRef clog, + const spg_t& pgid, + const char* mode, + bool allow_incomplete_clones, + std::optional target, + vector::reverse_iterator* curclone, + inconsistent_snapset_wrapper& e) +{ + ceph_assert(head); + ceph_assert(snapset); + int missing_count = 0; + + // NOTE: clones are in descending order, thus **curclone > target test here + hobject_t next_clone(*head); + while (doing_clones(snapset, *curclone) && (!target || **curclone > *target)) { + + ++missing_count; + // it is okay to be missing one or more clones in a cache tier. + // skip higher-numbered clones in the list. + if (!allow_incomplete_clones) { + next_clone.snap = **curclone; + clog->error() << mode << " " << pgid << " " << *head << " : expected clone " + << next_clone << " " << m_missing << " missing"; + ++m_shallow_errors; + e.set_clone_missing(next_clone.snap); + } + // Clones are descending + ++(*curclone); + } + return missing_count; +} + +/* + * Validate consistency of the object info and snap sets. + * + * We are sort of comparing 2 lists. The main loop is on objmap.objects. But + * the comparison of the objects is against multiple snapset.clones. There are + * multiple clone lists and in between lists we expect head. + * + * Example + * + * objects expected + * ======= ======= + * obj1 snap 1 head, unexpected obj1 snap 1 + * obj2 head head, match + * [SnapSet clones 6 4 2 1] + * obj2 snap 7 obj2 snap 6, unexpected obj2 snap 7 + * obj2 snap 6 obj2 snap 6, match + * obj2 snap 4 obj2 snap 4, match + * obj3 head obj2 snap 2 (expected), obj2 snap 1 (expected), match + * [Snapset clones 3 1] + * obj3 snap 3 obj3 snap 3 match + * obj3 snap 1 obj3 snap 1 match + * obj4 head head, match + * [Snapset clones 4] + * EOL obj4 snap 4, (expected) + */ +void PrimaryLogScrub::scrub_snapshot_metadata(ScrubMap& scrubmap, + const missing_map_t& missing_digest) +{ + dout(10) << __func__ << " num stat obj " << m_pl_pg->info.stats.stats.sum.num_objects + << dendl; + + auto& info = m_pl_pg->info; + const PGPool& pool = m_pl_pg->pool; + bool allow_incomplete_clones = pool.info.allow_incomplete_clones(); + + bool repair = state_test(PG_STATE_REPAIR); + bool deep_scrub = state_test(PG_STATE_DEEP_SCRUB); + const char* mode = (repair ? "repair" : (deep_scrub ? "deep-scrub" : "scrub")); + + std::optional all_clones; // Unspecified snapid_t or std::nullopt + + // traverse in reverse order. + std::optional head; + std::optional snapset; // If initialized so will head (above) + vector::reverse_iterator curclone; // Defined only if snapset initialized + int missing = 0; + inconsistent_snapset_wrapper soid_error, head_error; + int soid_error_count = 0; + + for (auto p = scrubmap.objects.rbegin(); p != scrubmap.objects.rend(); ++p) { + + const hobject_t& soid = p->first; + ceph_assert(!soid.is_snapdir()); + soid_error = inconsistent_snapset_wrapper{soid}; + object_stat_sum_t stat; + std::optional oi; + + stat.num_objects++; + + if (soid.nspace == m_pl_pg->cct->_conf->osd_hit_set_namespace) + stat.num_objects_hit_set_archive++; + + if (soid.is_snap()) { + // it's a clone + stat.num_object_clones++; + } + + // basic checks. + if (p->second.attrs.count(OI_ATTR) == 0) { + oi = std::nullopt; + m_osds->clog->error() << mode << " " << info.pgid << " " << soid << " : no '" + << OI_ATTR << "' attr"; + ++m_shallow_errors; + soid_error.set_info_missing(); + } else { + bufferlist bv; + bv.push_back(p->second.attrs[OI_ATTR]); + try { + oi = object_info_t(); // Initialize optional<> before decode into it + oi->decode(bv); + } catch (ceph::buffer::error& e) { + oi = std::nullopt; + m_osds->clog->error() << mode << " " << info.pgid << " " << soid + << " : can't decode '" << OI_ATTR << "' attr " << e.what(); + ++m_shallow_errors; + soid_error.set_info_corrupted(); + soid_error.set_info_missing(); // Not available too + } + } + + if (oi) { + if (m_pl_pg->pgbackend->be_get_ondisk_size(oi->size) != p->second.size) { + m_osds->clog->error() << mode << " " << info.pgid << " " << soid + << " : on disk size (" << p->second.size + << ") does not match object info size (" << oi->size + << ") adjusted for ondisk to (" + << m_pl_pg->pgbackend->be_get_ondisk_size(oi->size) << ")"; + soid_error.set_size_mismatch(); + ++m_shallow_errors; + } + + dout(20) << mode << " " << soid << " " << *oi << dendl; + + // A clone num_bytes will be added later when we have snapset + if (!soid.is_snap()) { + stat.num_bytes += oi->size; + } + if (soid.nspace == m_pl_pg->cct->_conf->osd_hit_set_namespace) + stat.num_bytes_hit_set_archive += oi->size; + + if (oi->is_dirty()) + ++stat.num_objects_dirty; + if (oi->is_whiteout()) + ++stat.num_whiteouts; + if (oi->is_omap()) + ++stat.num_objects_omap; + if (oi->is_cache_pinned()) + ++stat.num_objects_pinned; + if (oi->has_manifest()) + ++stat.num_objects_manifest; + } + + // Check for any problems while processing clones + if (doing_clones(snapset, curclone)) { + std::optional target; + // Expecting an object with snap for current head + if (soid.has_snapset() || soid.get_head() != head->get_head()) { + + dout(10) << __func__ << " " << mode << " " << info.pgid << " new object " << soid + << " while processing " << *head << dendl; + + target = all_clones; + } else { + ceph_assert(soid.is_snap()); + target = soid.snap; + } + + // Log any clones we were expecting to be there up to target + // This will set missing, but will be a no-op if snap.soid == *curclone. + missing += + process_clones_to(head, snapset, m_osds->clog, info.pgid, mode, + allow_incomplete_clones, target, &curclone, head_error); + } + + bool expected; + // Check doing_clones() again in case we ran process_clones_to() + if (doing_clones(snapset, curclone)) { + // A head would have processed all clones above + // or all greater than *curclone. + ceph_assert(soid.is_snap() && *curclone <= soid.snap); + + // After processing above clone snap should match the expected curclone + expected = (*curclone == soid.snap); + } else { + // If we aren't doing clones any longer, then expecting head + expected = soid.has_snapset(); + } + if (!expected) { + // If we couldn't read the head's snapset, just ignore clones + if (head && !snapset) { + m_osds->clog->error() << mode << " " << info.pgid << " " << soid + << " : clone ignored due to missing snapset"; + } else { + m_osds->clog->error() << mode << " " << info.pgid << " " << soid + << " : is an unexpected clone"; + } + ++m_shallow_errors; + soid_error.set_headless(); + m_store->add_snap_error(pool.id, soid_error); + ++soid_error_count; + if (head && soid.get_head() == head->get_head()) + head_error.set_clone(soid.snap); + continue; + } + + // new snapset? + if (soid.has_snapset()) { + + if (missing) { + log_missing(missing, head, m_osds->clog, info.pgid, __func__, mode, + pool.info.allow_incomplete_clones()); + } + + // Save previous head error information + if (head && (head_error.errors || soid_error_count)) + m_store->add_snap_error(pool.id, head_error); + // Set this as a new head object + head = soid; + missing = 0; + head_error = soid_error; + soid_error_count = 0; + + dout(20) << __func__ << " " << mode << " new head " << head << dendl; + + if (p->second.attrs.count(SS_ATTR) == 0) { + m_osds->clog->error() << mode << " " << info.pgid << " " << soid << " : no '" + << SS_ATTR << "' attr"; + ++m_shallow_errors; + snapset = std::nullopt; + head_error.set_snapset_missing(); + } else { + bufferlist bl; + bl.push_back(p->second.attrs[SS_ATTR]); + auto blp = bl.cbegin(); + try { + snapset = SnapSet(); // Initialize optional<> before decoding into it + decode(*snapset, blp); + head_error.ss_bl.push_back(p->second.attrs[SS_ATTR]); + } catch (ceph::buffer::error& e) { + snapset = std::nullopt; + m_osds->clog->error() + << mode << " " << info.pgid << " " << soid << " : can't decode '" << SS_ATTR + << "' attr " << e.what(); + ++m_shallow_errors; + head_error.set_snapset_corrupted(); + } + } + + if (snapset) { + // what will be next? + curclone = snapset->clones.rbegin(); + + if (!snapset->clones.empty()) { + dout(20) << " snapset " << *snapset << dendl; + if (snapset->seq == 0) { + m_osds->clog->error() + << mode << " " << info.pgid << " " << soid << " : snaps.seq not set"; + ++m_shallow_errors; + head_error.set_snapset_error(); + } + } + } + } else { + ceph_assert(soid.is_snap()); + ceph_assert(head); + ceph_assert(snapset); + ceph_assert(soid.snap == *curclone); + + dout(20) << __func__ << " " << mode << " matched clone " << soid << dendl; + + if (snapset->clone_size.count(soid.snap) == 0) { + m_osds->clog->error() << mode << " " << info.pgid << " " << soid + << " : is missing in clone_size"; + ++m_shallow_errors; + soid_error.set_size_mismatch(); + } else { + if (oi && oi->size != snapset->clone_size[soid.snap]) { + m_osds->clog->error() + << mode << " " << info.pgid << " " << soid << " : size " << oi->size + << " != clone_size " << snapset->clone_size[*curclone]; + ++m_shallow_errors; + soid_error.set_size_mismatch(); + } + + if (snapset->clone_overlap.count(soid.snap) == 0) { + m_osds->clog->error() << mode << " " << info.pgid << " " << soid + << " : is missing in clone_overlap"; + ++m_shallow_errors; + soid_error.set_size_mismatch(); + } else { + // This checking is based on get_clone_bytes(). The first 2 asserts + // can't happen because we know we have a clone_size and + // a clone_overlap. Now we check that the interval_set won't + // cause the last assert. + uint64_t size = snapset->clone_size.find(soid.snap)->second; + const interval_set& overlap = + snapset->clone_overlap.find(soid.snap)->second; + bool bad_interval_set = false; + for (interval_set::const_iterator i = overlap.begin(); + i != overlap.end(); ++i) { + if (size < i.get_len()) { + bad_interval_set = true; + break; + } + size -= i.get_len(); + } + + if (bad_interval_set) { + m_osds->clog->error() << mode << " " << info.pgid << " " << soid + << " : bad interval_set in clone_overlap"; + ++m_shallow_errors; + soid_error.set_size_mismatch(); + } else { + stat.num_bytes += snapset->get_clone_bytes(soid.snap); + } + } + } + + // what's next? + ++curclone; + if (soid_error.errors) { + m_store->add_snap_error(pool.id, soid_error); + ++soid_error_count; + } + } + m_scrub_cstat.add(stat); + } + + if (doing_clones(snapset, curclone)) { + dout(10) << __func__ << " " << mode << " " << info.pgid + << " No more objects while processing " << *head << dendl; + + missing += + process_clones_to(head, snapset, m_osds->clog, info.pgid, mode, + allow_incomplete_clones, all_clones, &curclone, head_error); + } + + // There could be missing found by the test above or even + // before dropping out of the loop for the last head. + if (missing) { + log_missing(missing, head, m_osds->clog, info.pgid, __func__, mode, + allow_incomplete_clones); + } + if (head && (head_error.errors || soid_error_count)) + m_store->add_snap_error(pool.id, head_error); + + dout(20) << __func__ << " - " << missing << " (" << missing_digest.size() << ") missing" + << dendl; + for (auto p = missing_digest.begin(); p != missing_digest.end(); ++p) { + + ceph_assert(!p->first.is_snapdir()); + dout(10) << __func__ << " recording digests for " << p->first << dendl; + + ObjectContextRef obc = m_pl_pg->get_object_context(p->first, false); + if (!obc) { + m_osds->clog->error() << info.pgid << " " << mode + << " cannot get object context for object " << p->first; + continue; + } + if (obc->obs.oi.soid != p->first) { + m_osds->clog->error() << info.pgid << " " << mode << " " << p->first + << " : object has a valid oi attr with a mismatched name, " + << " obc->obs.oi.soid: " << obc->obs.oi.soid; + continue; + } + PrimaryLogPG::OpContextUPtr ctx = m_pl_pg->simple_opc_create(obc); + ctx->at_version = m_pl_pg->get_next_version(); + ctx->mtime = utime_t(); // do not update mtime + if (p->second.first) { + ctx->new_obs.oi.set_data_digest(*p->second.first); + } else { + ctx->new_obs.oi.clear_data_digest(); + } + if (p->second.second) { + ctx->new_obs.oi.set_omap_digest(*p->second.second); + } else { + ctx->new_obs.oi.clear_omap_digest(); + } + m_pl_pg->finish_ctx(ctx.get(), pg_log_entry_t::MODIFY); + + ctx->register_on_success([this]() { + dout(20) << "updating scrub digest " << num_digest_updates_pending << dendl; + if (--num_digest_updates_pending <= 0) { + m_osds->queue_scrub_digest_update(m_pl_pg, m_pl_pg->is_scrub_blocking_ops()); + } + }); + + ++num_digest_updates_pending; + m_pl_pg->simple_opc_submit(std::move(ctx)); + } + + dout(10) << __func__ << " (" << mode << ") finish" << dendl; +} + +PrimaryLogScrub::PrimaryLogScrub(PrimaryLogPG* pg) + : PgScrubber{pg}, m_pl_pg{pg} +{} + +void PrimaryLogScrub::_scrub_clear_state() +{ + dout(7) << __func__ << " - pg(" << m_pl_pg->pg_id << dendl; + m_scrub_cstat = object_stat_collection_t(); +} + +void PrimaryLogScrub::stats_of_handled_objects(const object_stat_sum_t& delta_stats, + const hobject_t& soid) +{ + // We scrub objects in hobject_t order, so objects before m_start have already been + // scrubbed and their stats have already been added to the scrubber. Objects after that + // point haven't been included in the scrubber's stats accounting yet, so they will be + // included when the scrubber gets to that object. + dout(15) << __func__ << " soid: " << soid << " scrub is active? " << is_scrub_active() + << dendl; + if (is_primary() && is_scrub_active()) { + if (soid < m_start) { + dout(20) << __func__ << " " << soid << " < [" << m_start << "," << m_end << ")" + << dendl; + m_scrub_cstat.add(delta_stats); + } else { + dout(20) << __func__ << " " << soid << " >= [" << m_start << "," << m_end << ")" + << dendl; + } + } +} + +bool PrimaryLogScrub::should_requeue_blocked_ops(eversion_t last_recovery_applied) const +{ + if (!is_scrub_active()) { + // just verify that things indeed are quiet + ceph_assert(m_start == m_end); + return false; + } + + return last_recovery_applied >= m_subset_last_update; +} diff --git a/src/osd/PrimaryLogScrub.h b/src/osd/PrimaryLogScrub.h new file mode 100644 index 00000000000..d24fa092065 --- /dev/null +++ b/src/osd/PrimaryLogScrub.h @@ -0,0 +1,82 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab +#pragma once + +// the './' includes are marked this way to affect clang-format +#include "./pg_scrubber.h" + +#include +#include +#include + +#include "debug.h" + +#include "common/errno.h" +#include "common/scrub_types.h" +#include "messages/MOSDOp.h" +#include "messages/MOSDRepScrub.h" +#include "messages/MOSDRepScrubMap.h" +#include "messages/MOSDScrub.h" +#include "messages/MOSDScrubReserve.h" + +#include "OSD.h" +#include "scrub_machine.h" + +class PrimaryLogPG; + +/** + * The derivative of PgScrubber that is used by PrimaryLogPG. + */ +class PrimaryLogScrub : public PgScrubber { + public: + explicit PrimaryLogScrub(PrimaryLogPG* pg); + + void _scrub_finish() final; + + bool get_store_errors(const scrub_ls_arg_t& arg, + scrub_ls_result_t& res_inout) const final; + + /** + * should we requeue blocked ops? + * Yes - if our 'subset_last_applied' is less up-to-date than the + * new recovery_state.get_last_update_applied(). + * (used by PrimaryLogPG::op_applied()) + */ + [[nodiscard]] bool should_requeue_blocked_ops( + eversion_t last_recovery_applied) const final; + + void stats_of_handled_objects(const object_stat_sum_t& delta_stats, + const hobject_t& soid) final; + + private: + // we know our PG is actually a PrimaryLogPG. Let's alias the pointer to that object: + PrimaryLogPG* const m_pl_pg; + + /** + * Validate consistency of the object info and snap sets. + */ + void scrub_snapshot_metadata(ScrubMap& map, const missing_map_t& missing_digest) final; + + void log_missing(int missing, + const std::optional& head, + LogChannelRef clog, + const spg_t& pgid, + const char* func, + const char* mode, + bool allow_incomplete_clones); + + int process_clones_to(const std::optional& head, + const std::optional& snapset, + LogChannelRef clog, + const spg_t& pgid, + const char* mode, + bool allow_incomplete_clones, + std::optional target, + std::vector::reverse_iterator* curclone, + inconsistent_snapset_wrapper& snap_error); + + + // handle our part in stats collection + object_stat_collection_t m_scrub_cstat; + void _scrub_clear_state() final; // which just clears the stats +}; diff --git a/src/osd/pg_scrubber.cc b/src/osd/pg_scrubber.cc index bcabad41f73..2afa689aed4 100644 --- a/src/osd/pg_scrubber.cc +++ b/src/osd/pg_scrubber.cc @@ -325,7 +325,7 @@ bool PgScrubber::is_scrub_registered() const void PgScrubber::reg_next_scrub(const requested_scrub_t& request_flags) { if (!is_primary()) { - dout(20) << __func__ << ": not a primary!" << dendl; + // normal. No warning is required. return; } @@ -375,18 +375,6 @@ void PgScrubber::unreg_next_scrub() } } -/// debug/development temporary code: -void PgScrubber::debug_dump_reservations(std::string_view header_txt) const -{ - std::string format; - auto f = Formatter::create(format, "json-pretty", "json-pretty"); - m_osds->dump_scrub_reservations(f); - std::stringstream o; - f->flush(o); - dout(20) << header_txt << o.str() << dendl; - delete f; -} - void PgScrubber::scrub_requested(scrub_level_t scrub_level, scrub_type_t scrub_type, requested_scrub_t& req_flags) @@ -396,8 +384,6 @@ void PgScrubber::scrub_requested(scrub_level_t scrub_level, << " prev stamp: " << m_scrub_reg_stamp << " " << is_scrub_registered() << dendl; - debug_dump_reservations(" before_unreg "); - unreg_next_scrub(); req_flags.must_scrub = true; @@ -409,24 +395,18 @@ void PgScrubber::scrub_requested(scrub_level_t scrub_level, req_flags.req_scrub = true; dout(20) << __func__ << " pg(" << m_pg_id << ") planned:" << req_flags << dendl; - debug_dump_reservations(" before_reg "); reg_next_scrub(req_flags); - - debug_dump_reservations(" after_reg "); } void PgScrubber::request_rescrubbing(requested_scrub_t& req_flags) { dout(10) << __func__ << " existing-" << m_scrub_reg_stamp << " ## " << is_scrub_registered() << dendl; - debug_dump_reservations(" auto-scrub before "); unreg_next_scrub(); req_flags.need_auto = true; reg_next_scrub(req_flags); - - debug_dump_reservations(" auto-scrub after "); } bool PgScrubber::reserve_local() @@ -469,7 +449,6 @@ void PgScrubber::set_subset_last_update(eversion_t e) * - setting tentative range based on conf and divisor * - requesting a partial list of elements from the backend; * - handling some head/clones issues - * - ... * * The selected range is set directly into 'm_start' and 'm_end' */ @@ -500,8 +479,6 @@ bool PgScrubber::select_range() int max_idx = std::max(min_idx, m_pg->get_cct()->_conf->osd_scrub_chunk_max / preemption_data.chunk_divisor()); - // why mixing 'int' and int64_t? RRR - dout(10) << __func__ << " Min: " << min_idx << " Max: " << max_idx << " Div: " << preemption_data.chunk_divisor() << dendl; @@ -560,10 +537,9 @@ bool PgScrubber::write_blocked_by_scrub(const hobject_t& soid) return false; } - dout(10) << __func__ << " " << soid << " can preempt? " - << preemption_data.is_preemptable() << dendl; - dout(10) << __func__ << " " << soid << " already? " << preemption_data.was_preempted() - << dendl; + dout(20) << __func__ << " " << soid << " can preempt? " + << preemption_data.is_preemptable() << " already preempted? " + << preemption_data.was_preempted() << dendl; if (preemption_data.is_preemptable()) { @@ -1189,8 +1165,6 @@ void PgScrubber::scrub_compare_maps() if (m_pg->recovery_state.get_acting().size() > 1) { - // RRR add a comment here - dout(10) << __func__ << " comparing replica scrub maps" << dendl; // Map from object with errors to good peer @@ -1395,8 +1369,7 @@ void PgScrubber::clear_scrub_reservations() void PgScrubber::message_all_replicas(int32_t opcode, std::string_view op_text) { - ceph_assert(m_pg->recovery_state.get_backfill_targets() - .empty()); // RRR ask: (the code was copied as is) Why checking here? + ceph_assert(m_pg->recovery_state.get_backfill_targets().empty()); std::vector> messages; messages.reserve(m_pg->get_actingset().size()); diff --git a/src/osd/pg_scrubber.h b/src/osd/pg_scrubber.h index 0a390ce18e5..e3936691578 100644 --- a/src/osd/pg_scrubber.h +++ b/src/osd/pg_scrubber.h @@ -674,7 +674,4 @@ class PgScrubber : public ScrubPgIF, public ScrubMachineListener { }; preemption_data_t preemption_data; - - // debug/development temporary code: - void debug_dump_reservations(std::string_view header_txt) const; };