From 7e2ba75f07d12b6d8e925d4291e3d3c5168d80cb Mon Sep 17 00:00:00 2001 From: Ronen Friedman Date: Sat, 30 Apr 2022 13:43:58 +0000 Subject: [PATCH] osd/scrub: reformat scrub files to 80 cols Reformatting the OSD scrub code files to match styleguide. Specifically: - force 80-cols lines; and - (sadly) force 'use tabs' (replacing each 8 indentation blanks with a tab. clang-format version used: 13 Configuration file used is detailed in PR comment. Signed-off-by: Ronen Friedman --- src/osd/scrubber/PrimaryLogScrub.cc | 114 ++-- src/osd/scrubber/PrimaryLogScrub.h | 5 +- src/osd/scrubber/ScrubStore.h | 40 +- src/osd/scrubber/osd_scrub_sched.cc | 98 ++-- src/osd/scrubber/osd_scrub_sched.h | 17 +- src/osd/scrubber/pg_scrubber.cc | 779 ++++++++++++++----------- src/osd/scrubber/pg_scrubber.h | 238 ++++---- src/osd/scrubber/scrub_machine.cc | 62 +- src/osd/scrubber/scrub_machine.h | 191 +++--- src/osd/scrubber/scrub_machine_lstnr.h | 13 +- src/osd/scrubber_common.h | 67 ++- 11 files changed, 947 insertions(+), 677 deletions(-) diff --git a/src/osd/scrubber/PrimaryLogScrub.cc b/src/osd/scrubber/PrimaryLogScrub.cc index 2e895a04169c..fb610fde74c8 100644 --- a/src/osd/scrubber/PrimaryLogScrub.cc +++ b/src/osd/scrubber/PrimaryLogScrub.cc @@ -6,10 +6,10 @@ #include #include "common/scrub_types.h" -#include "osd/osd_types_fmt.h" - #include "osd/PeeringState.h" #include "osd/PrimaryLogPG.h" +#include "osd/osd_types_fmt.h" + #include "scrub_machine.h" #define dout_context (m_osds->cct) @@ -33,10 +33,12 @@ bool PrimaryLogScrub::get_store_errors(const scrub_ls_arg_t& arg, } if (arg.get_snapsets) { - res_inout.vals = - m_store->get_snap_errors(m_pg->get_pgid().pool(), arg.start_after, arg.max_return); + res_inout.vals = m_store->get_snap_errors(m_pg->get_pgid().pool(), + arg.start_after, + arg.max_return); } else { - res_inout.vals = m_store->get_object_errors(m_pg->get_pgid().pool(), arg.start_after, + res_inout.vals = m_store->get_object_errors(m_pg->get_pgid().pool(), + arg.start_after, arg.max_return); } return true; @@ -49,23 +51,23 @@ void PrimaryLogScrub::submit_digest_fixes(const digests_fixes_t& fixes) // encounter previous-chunk digest updates after starting a new chunk num_digest_updates_pending = fixes.size(); dout(10) << __func__ - << ": num_digest_updates_pending: " << num_digest_updates_pending - << dendl; + << ": num_digest_updates_pending: " << num_digest_updates_pending + << dendl; for (auto& [obj, dgs] : fixes) { ObjectContextRef obc = m_pl_pg->get_object_context(obj, false); if (!obc) { m_osds->clog->error() << m_pg_id << " " << m_mode_desc - << " cannot get object context for object " << obj; + << " cannot get object context for object " << obj; num_digest_updates_pending--; continue; } if (obc->obs.oi.soid != obj) { m_osds->clog->error() - << m_pg_id << " " << m_mode_desc << " " << obj - << " : object has a valid oi attr with a mismatched name, " - << " obc->obs.oi.soid: " << obc->obs.oi.soid; + << m_pg_id << " " << m_mode_desc << " " << obj + << " : object has a valid oi attr with a mismatched name, " + << " obc->obs.oi.soid: " << obc->obs.oi.soid; num_digest_updates_pending--; continue; } @@ -88,9 +90,9 @@ void PrimaryLogScrub::submit_digest_fixes(const digests_fixes_t& fixes) ctx->register_on_success([this]() { if ((num_digest_updates_pending >= 1) && - (--num_digest_updates_pending == 0)) { - m_osds->queue_scrub_digest_update(m_pl_pg, - m_pl_pg->is_scrub_blocking_ops()); + (--num_digest_updates_pending == 0)) { + m_osds->queue_scrub_digest_update(m_pl_pg, + m_pl_pg->is_scrub_blocking_ops()); } }); @@ -110,10 +112,9 @@ void PrimaryLogScrub::_scrub_finish() { auto& info = m_pg->get_pg_info(ScrubberPasskey{}); ///< a temporary alias - dout(10) << __func__ - << " info stats: " << (info.stats.stats_invalid ? "invalid" : "valid") - << " m_is_repair: " << m_is_repair - << dendl; + dout(10) << __func__ << " info stats: " + << (info.stats.stats_invalid ? "invalid" : "valid") + << " m_is_repair: " << m_is_repair << dendl; if (info.stats.stats_invalid) { m_pl_pg->recovery_state.update_stats([=](auto& history, auto& stats) { @@ -138,21 +139,26 @@ void PrimaryLogScrub::_scrub_finish() << m_scrub_cstat.sum.num_objects_pinned << "/" << info.stats.stats.sum.num_objects_pinned << " pinned, " << m_scrub_cstat.sum.num_objects_hit_set_archive << "/" - << info.stats.stats.sum.num_objects_hit_set_archive << " hit_set_archive, " - << m_scrub_cstat.sum.num_bytes << "/" << info.stats.stats.sum.num_bytes - << " bytes, " << m_scrub_cstat.sum.num_objects_manifest << "/" + << info.stats.stats.sum.num_objects_hit_set_archive + << " hit_set_archive, " << m_scrub_cstat.sum.num_bytes << "/" + << info.stats.stats.sum.num_bytes << " bytes, " + << m_scrub_cstat.sum.num_objects_manifest << "/" << info.stats.stats.sum.num_objects_manifest << " manifest objects, " << m_scrub_cstat.sum.num_bytes_hit_set_archive << "/" - << info.stats.stats.sum.num_bytes_hit_set_archive << " hit_set_archive bytes." - << dendl; + << info.stats.stats.sum.num_bytes_hit_set_archive + << " hit_set_archive bytes." << dendl; if (m_scrub_cstat.sum.num_objects != info.stats.stats.sum.num_objects || - m_scrub_cstat.sum.num_object_clones != info.stats.stats.sum.num_object_clones || - (m_scrub_cstat.sum.num_objects_dirty != info.stats.stats.sum.num_objects_dirty && + m_scrub_cstat.sum.num_object_clones != + info.stats.stats.sum.num_object_clones || + (m_scrub_cstat.sum.num_objects_dirty != + info.stats.stats.sum.num_objects_dirty && !info.stats.dirty_stats_invalid) || - (m_scrub_cstat.sum.num_objects_omap != info.stats.stats.sum.num_objects_omap && + (m_scrub_cstat.sum.num_objects_omap != + info.stats.stats.sum.num_objects_omap && !info.stats.omap_stats_invalid) || - (m_scrub_cstat.sum.num_objects_pinned != info.stats.stats.sum.num_objects_pinned && + (m_scrub_cstat.sum.num_objects_pinned != + info.stats.stats.sum.num_objects_pinned && !info.stats.pin_stats_invalid) || (m_scrub_cstat.sum.num_objects_hit_set_archive != info.stats.stats.sum.num_objects_hit_set_archive && @@ -166,23 +172,27 @@ void PrimaryLogScrub::_scrub_finish() m_scrub_cstat.sum.num_whiteouts != info.stats.stats.sum.num_whiteouts || m_scrub_cstat.sum.num_bytes != info.stats.stats.sum.num_bytes) { - m_osds->clog->error() << info.pgid << " " << m_mode_desc << " : stat mismatch, got " + m_osds->clog->error() << info.pgid << " " << m_mode_desc + << " : stat mismatch, got " << m_scrub_cstat.sum.num_objects << "/" << info.stats.stats.sum.num_objects << " objects, " << m_scrub_cstat.sum.num_object_clones << "/" - << info.stats.stats.sum.num_object_clones << " clones, " - << m_scrub_cstat.sum.num_objects_dirty << "/" - << info.stats.stats.sum.num_objects_dirty << " dirty, " - << m_scrub_cstat.sum.num_objects_omap << "/" - << info.stats.stats.sum.num_objects_omap << " omap, " - << m_scrub_cstat.sum.num_objects_pinned << "/" - << info.stats.stats.sum.num_objects_pinned << " pinned, " - << m_scrub_cstat.sum.num_objects_hit_set_archive << "/" + << info.stats.stats.sum.num_object_clones + << " clones, " << m_scrub_cstat.sum.num_objects_dirty + << "/" << info.stats.stats.sum.num_objects_dirty + << " dirty, " << m_scrub_cstat.sum.num_objects_omap + << "/" << info.stats.stats.sum.num_objects_omap + << " omap, " << m_scrub_cstat.sum.num_objects_pinned + << "/" << info.stats.stats.sum.num_objects_pinned + << " pinned, " + << m_scrub_cstat.sum.num_objects_hit_set_archive + << "/" << info.stats.stats.sum.num_objects_hit_set_archive - << " hit_set_archive, " << m_scrub_cstat.sum.num_whiteouts - << "/" << info.stats.stats.sum.num_whiteouts << " whiteouts, " - << m_scrub_cstat.sum.num_bytes << "/" - << info.stats.stats.sum.num_bytes << " bytes, " + << " hit_set_archive, " + << m_scrub_cstat.sum.num_whiteouts << "/" + << info.stats.stats.sum.num_whiteouts + << " whiteouts, " << m_scrub_cstat.sum.num_bytes + << "/" << info.stats.stats.sum.num_bytes << " bytes, " << m_scrub_cstat.sum.num_objects_manifest << "/" << info.stats.stats.sum.num_objects_manifest << " manifest objects, " @@ -212,7 +222,8 @@ void PrimaryLogScrub::_scrub_finish() m_pl_pg->object_contexts.clear(); } -PrimaryLogScrub::PrimaryLogScrub(PrimaryLogPG* pg) : PgScrubber{pg}, m_pl_pg{pg} {} +PrimaryLogScrub::PrimaryLogScrub(PrimaryLogPG* pg) : PgScrubber{pg}, m_pl_pg{pg} +{} void PrimaryLogScrub::_scrub_clear_state() { @@ -220,22 +231,27 @@ void PrimaryLogScrub::_scrub_clear_state() m_scrub_cstat = object_stat_collection_t(); } -void PrimaryLogScrub::stats_of_handled_objects(const object_stat_sum_t& delta_stats, - const hobject_t& soid) +void PrimaryLogScrub::stats_of_handled_objects( + const object_stat_sum_t& delta_stats, + const hobject_t& soid) { - // We scrub objects in hobject_t order, so objects before m_start have already been - // scrubbed and their stats have already been added to the scrubber. Objects after that - // point haven't been included in the scrubber's stats accounting yet, so they will be - // included when the scrubber gets to that object. + // We scrub objects in hobject_t order, so objects before m_start have already + // been scrubbed and their stats have already been added to the scrubber. + // Objects after that point haven't been included in the scrubber's stats + // accounting yet, so they will be included when the scrubber gets to that + // object. if (is_primary() && is_scrub_active()) { if (soid < m_start) { - dout(20) << fmt::format("{} {} < [{},{})", __func__, soid, m_start, m_end) << dendl; + dout(20) << fmt::format("{} {} < [{},{})", __func__, soid, m_start, m_end) + << dendl; m_scrub_cstat.add(delta_stats); } else { - dout(25) << fmt::format("{} {} >= [{},{})", __func__, soid, m_start, m_end) << dendl; + dout(25) + << fmt::format("{} {} >= [{},{})", __func__, soid, m_start, m_end) + << dendl; } } } diff --git a/src/osd/scrubber/PrimaryLogScrub.h b/src/osd/scrubber/PrimaryLogScrub.h index 90d1a49adcc0..5dfec2f7f5aa 100644 --- a/src/osd/scrubber/PrimaryLogScrub.h +++ b/src/osd/scrubber/PrimaryLogScrub.h @@ -14,8 +14,8 @@ #include "messages/MOSDRepScrubMap.h" #include "messages/MOSDScrub.h" #include "messages/MOSDScrubReserve.h" - #include "osd/OSD.h" + #include "scrub_machine.h" class PrimaryLogPG; @@ -42,7 +42,8 @@ class PrimaryLogScrub : public PgScrubber { void submit_digest_fixes(const digests_fixes_t& fixes) final; private: - // we know our PG is actually a PrimaryLogPG. Let's alias the pointer to that object: + // we know our PG is actually a PrimaryLogPG. Let's alias the pointer to that + // object: PrimaryLogPG* const m_pl_pg; // handle our part in stats collection diff --git a/src/osd/scrubber/ScrubStore.h b/src/osd/scrubber/ScrubStore.h index f3b5b5d98ddb..567badf608b6 100644 --- a/src/osd/scrubber/ScrubStore.h +++ b/src/osd/scrubber/ScrubStore.h @@ -4,11 +4,11 @@ #ifndef CEPH_SCRUB_RESULT_H #define CEPH_SCRUB_RESULT_H -#include "osd/SnapMapper.h" // for OSDriver #include "common/map_cacher.hpp" +#include "osd/SnapMapper.h" // for OSDriver namespace librados { - struct object_id_t; +struct object_id_t; } struct inconsistent_obj_wrapper; @@ -17,7 +17,7 @@ struct inconsistent_snapset_wrapper; namespace Scrub { class Store { -public: + public: ~Store(); static Store* create(ObjectStore* store, ObjectStore::Transaction* t, @@ -31,19 +31,25 @@ public: void add_error(int64_t pool, const inconsistent_snapset_wrapper& e); bool empty() const; - void flush(ObjectStore::Transaction *); - void cleanup(ObjectStore::Transaction *); - std::vector get_snap_errors(int64_t pool, - const librados::object_id_t& start, - uint64_t max_return) const; - std::vector get_object_errors(int64_t pool, - const librados::object_id_t& start, - uint64_t max_return) const; -private: + void flush(ObjectStore::Transaction*); + void cleanup(ObjectStore::Transaction*); + + std::vector get_snap_errors( + int64_t pool, + const librados::object_id_t& start, + uint64_t max_return) const; + + std::vector get_object_errors( + int64_t pool, + const librados::object_id_t& start, + uint64_t max_return) const; + + private: Store(const coll_t& coll, const ghobject_t& oid, ObjectStore* store); - std::vector get_errors(const std::string& start, const std::string& end, - uint64_t max_return) const; -private: + std::vector get_errors(const std::string& start, + const std::string& end, + uint64_t max_return) const; + private: const coll_t coll; const ghobject_t hoid; // a temp object holding mappings from seq-id to inconsistencies found in @@ -52,6 +58,6 @@ private: mutable MapCacher::MapCacher backend; std::map results; }; -} +} // namespace Scrub -#endif // CEPH_SCRUB_RESULT_H +#endif // CEPH_SCRUB_RESULT_H diff --git a/src/osd/scrubber/osd_scrub_sched.cc b/src/osd/scrubber/osd_scrub_sched.cc index e34cdc9eff03..225f6011e7a1 100644 --- a/src/osd/scrubber/osd_scrub_sched.cc +++ b/src/osd/scrubber/osd_scrub_sched.cc @@ -19,15 +19,18 @@ using namespace ::std::literals; #define dout_prefix *_dout << "osd." << whoami << " " ScrubQueue::ScrubJob::ScrubJob(CephContext* cct, const spg_t& pg, int node_id) - : RefCountedObject{cct}, pgid{pg}, whoami{node_id}, cct{cct} + : RefCountedObject{cct} + , pgid{pg} + , whoami{node_id} + , cct{cct} {} // debug usage only ostream& operator<<(ostream& out, const ScrubQueue::ScrubJob& sjob) { out << sjob.pgid << ", " << sjob.schedule.scheduled_at - << " dead: " << sjob.schedule.deadline << " - " << sjob.registration_state() - << " / failure: " << sjob.resources_failure + << " dead: " << sjob.schedule.deadline << " - " + << sjob.registration_state() << " / failure: " << sjob.resources_failure << " / pen. t.o.: " << sjob.penalty_timeout << " / queue state: " << ScrubQueue::qu_state_text(sjob.state); @@ -64,7 +67,8 @@ std::string ScrubQueue::ScrubJob::scheduling_state(utime_t now_is, return fmt::format("queued for {}scrub", (is_deep_expected ? "deep " : "")); } - return fmt::format("{}scrub scheduled @ {}", (is_deep_expected ? "deep " : ""), + return fmt::format("{}scrub scheduled @ {}", + (is_deep_expected ? "deep " : ""), schedule.scheduled_at); } @@ -80,7 +84,8 @@ std::string ScrubQueue::ScrubJob::scheduling_state(utime_t now_is, ScrubQueue::ScrubQueue(CephContext* cct, OSDService& osds) - : cct{cct}, osd_service{osds} + : cct{cct} + , osd_service{osds} { // initialize the daily loadavg with current 15min loadavg if (double loadavgs[3]; getloadavg(loadavgs, 3) == 3) { @@ -128,8 +133,9 @@ void ScrubQueue::remove_from_osd_queue(ScrubJobRef scrub_job) << dendl; qu_state_t expected_state{qu_state_t::registered}; - auto ret = scrub_job->state.compare_exchange_strong(expected_state, - qu_state_t::unregistering); + auto ret = + scrub_job->state.compare_exchange_strong(expected_state, + qu_state_t::unregistering); if (ret) { @@ -141,7 +147,8 @@ void ScrubQueue::remove_from_osd_queue(ScrubJobRef scrub_job) // job wasn't in state 'registered' coming in dout(5) << "removing pg[" << scrub_job->pgid - << "] failed. State was: " << qu_state_text(expected_state) << dendl; + << "] failed. State was: " << qu_state_text(expected_state) + << dendl; } } @@ -299,8 +306,8 @@ std::string_view ScrubQueue::qu_state_text(qu_state_t st) Scrub::schedule_result_t ScrubQueue::select_pg_and_scrub( Scrub::ScrubPreconds& preconds) { - dout(10) << " reg./pen. sizes: " << to_scrub.size() << " / " << penalized.size() - << dendl; + dout(10) << " reg./pen. sizes: " << to_scrub.size() << " / " + << penalized.size() << dendl; utime_t now_is = ceph_clock_now(); @@ -322,7 +329,8 @@ Scrub::schedule_result_t ScrubQueue::select_pg_and_scrub( restore_penalized = false; // remove the 'updated' flag from all entries - std::for_each(to_scrub.begin(), to_scrub.end(), + std::for_each(to_scrub.begin(), + to_scrub.end(), [](const auto& jobref) -> void { jobref->updated = false; }); // add failed scrub attempts to the penalized list @@ -343,8 +351,8 @@ Scrub::schedule_result_t ScrubQueue::select_pg_and_scrub( // - we will try the penalized if (res == Scrub::schedule_result_t::none_ready && !penalized_copy.empty()) { res = select_from_group(penalized_copy, preconds, now_is); - dout(10) << "tried the penalized. Res: " << ScrubQueue::attempt_res_text(res) - << dendl; + dout(10) << "tried the penalized. Res: " + << ScrubQueue::attempt_res_text(res) << dendl; restore_penalized = true; } @@ -379,8 +387,9 @@ struct cmp_sched_time_t { } // namespace // called under lock -ScrubQueue::ScrubQContainer ScrubQueue::collect_ripe_jobs(ScrubQContainer& group, - utime_t time_now) +ScrubQueue::ScrubQContainer ScrubQueue::collect_ripe_jobs( + ScrubQContainer& group, + utime_t time_now) { rm_unregistered_jobs(group); @@ -388,7 +397,9 @@ ScrubQueue::ScrubQContainer ScrubQueue::collect_ripe_jobs(ScrubQContainer& group ScrubQueue::ScrubQContainer ripes; ripes.reserve(group.size()); - std::copy_if(group.begin(), group.end(), std::back_inserter(ripes), + std::copy_if(group.begin(), + group.end(), + std::back_inserter(ripes), [time_now](const auto& jobref) -> bool { return jobref->schedule.scheduled_at <= time_now; }); @@ -408,7 +419,9 @@ ScrubQueue::ScrubQContainer ScrubQueue::collect_ripe_jobs(ScrubQContainer& group // not holding jobs_lock. 'group' is a copy of the actual list. Scrub::schedule_result_t ScrubQueue::select_from_group( - ScrubQContainer& group, const Scrub::ScrubPreconds& preconds, utime_t now_is) + ScrubQContainer& group, + const Scrub::ScrubPreconds& preconds, + utime_t now_is) { dout(15) << "jobs #: " << group.size() << dendl; @@ -429,8 +442,9 @@ Scrub::schedule_result_t ScrubQueue::select_from_group( // we have a candidate to scrub. We turn to the OSD to verify that the PG // configuration allows the specified type of scrub, and to initiate the // scrub. - switch (osd_service.initiate_a_scrub(candidate->pgid, - preconds.allow_requested_repair_only)) { + switch ( + osd_service.initiate_a_scrub(candidate->pgid, + preconds.allow_requested_repair_only)) { case Scrub::schedule_result_t::scrub_initiated: // the happy path. We are done @@ -544,8 +558,9 @@ bool ScrubQueue::scrub_load_below_threshold() const // allow scrub if below daily avg and currently decreasing if (loadavgs[0] < daily_loadavg && loadavgs[0] < loadavgs[2]) { - dout(20) << "loadavg " << loadavgs[0] << " < daily_loadavg " << daily_loadavg - << " and < 15m avg " << loadavgs[2] << " = yes" << dendl; + dout(20) << "loadavg " << loadavgs[0] << " < daily_loadavg " + << daily_loadavg << " and < 15m avg " << loadavgs[2] << " = yes" + << dendl; return true; } @@ -575,7 +590,9 @@ void ScrubQueue::scan_penalized(bool forgive_all, utime_t time_now) } else { auto forgiven_last = std::partition( - penalized.begin(), penalized.end(), [time_now](const auto& e) { + penalized.begin(), + penalized.end(), + [time_now](const auto& e) { return (*e).updated || ((*e).penalty_timeout <= time_now); }); @@ -599,9 +616,9 @@ bool ScrubQueue::scrub_time_permit(utime_t now) const time_t tt = now.sec(); localtime_r(&tt, &bdt); - bool day_permit = - isbetween_modulo(cct->_conf->osd_scrub_begin_week_day, - cct->_conf->osd_scrub_end_week_day, bdt.tm_wday); + bool day_permit = isbetween_modulo(cct->_conf->osd_scrub_begin_week_day, + cct->_conf->osd_scrub_end_week_day, + bdt.tm_wday); if (!day_permit) { dout(20) << "should run between week day " << cct->_conf->osd_scrub_begin_week_day << " - " @@ -610,9 +627,9 @@ bool ScrubQueue::scrub_time_permit(utime_t now) const return false; } - bool time_permit = - isbetween_modulo(cct->_conf->osd_scrub_begin_hour, - cct->_conf->osd_scrub_end_hour, bdt.tm_hour); + bool time_permit = isbetween_modulo(cct->_conf->osd_scrub_begin_hour, + cct->_conf->osd_scrub_end_hour, + bdt.tm_hour); dout(20) << "should run between " << cct->_conf->osd_scrub_begin_hour << " - " << cct->_conf->osd_scrub_end_hour << " now (" << bdt.tm_hour << ") = " << (time_permit ? "yes" : "no") << dendl; @@ -625,7 +642,8 @@ void ScrubQueue::ScrubJob::dump(ceph::Formatter* f) const f->dump_stream("pgid") << pgid; f->dump_stream("sched_time") << schedule.scheduled_at; f->dump_stream("deadline") << schedule.deadline; - f->dump_bool("forced", schedule.scheduled_at == PgScrubber::scrub_must_stamp()); + f->dump_bool("forced", + schedule.scheduled_at == PgScrubber::scrub_must_stamp()); f->close_section(); } @@ -636,10 +654,12 @@ void ScrubQueue::dump_scrubs(ceph::Formatter* f) const f->open_array_section("scrubs"); - std::for_each(to_scrub.cbegin(), to_scrub.cend(), - [&f](const ScrubJobRef& j) { j->dump(f); }); + std::for_each(to_scrub.cbegin(), to_scrub.cend(), [&f](const ScrubJobRef& j) { + j->dump(f); + }); - std::for_each(penalized.cbegin(), penalized.cend(), + std::for_each(penalized.cbegin(), + penalized.cend(), [&f](const ScrubJobRef& j) { j->dump(f); }); f->close_section(); @@ -653,9 +673,13 @@ ScrubQueue::ScrubQContainer ScrubQueue::list_registered_jobs() const std::lock_guard lck{jobs_lock}; - std::copy_if(to_scrub.begin(), to_scrub.end(), std::back_inserter(all_jobs), + std::copy_if(to_scrub.begin(), + to_scrub.end(), + std::back_inserter(all_jobs), registered_job); - std::copy_if(penalized.begin(), penalized.end(), std::back_inserter(all_jobs), + std::copy_if(penalized.begin(), + penalized.end(), + std::back_inserter(all_jobs), registered_job); return all_jobs; @@ -709,9 +733,9 @@ bool ScrubQueue::inc_scrubs_remote() std::lock_guard lck{resource_lock}; if (scrubs_local + scrubs_remote < cct->_conf->osd_max_scrubs) { - dout(20) << ": " << scrubs_remote << " -> " << (scrubs_remote + 1) << " (max " - << cct->_conf->osd_max_scrubs << ", local " << scrubs_local << ")" - << dendl; + dout(20) << ": " << scrubs_remote << " -> " << (scrubs_remote + 1) + << " (max " << cct->_conf->osd_max_scrubs << ", local " + << scrubs_local << ")" << dendl; ++scrubs_remote; return true; } diff --git a/src/osd/scrubber/osd_scrub_sched.h b/src/osd/scrubber/osd_scrub_sched.h index 98309eb47fb1..c15df6b55fe3 100644 --- a/src/osd/scrubber/osd_scrub_sched.h +++ b/src/osd/scrubber/osd_scrub_sched.h @@ -178,9 +178,9 @@ class ScrubQueue { struct ScrubJob final : public RefCountedObject { /** - * a time scheduled for scrub, and a deadline: The scrub could be delayed if - * system load is too high (but not if after the deadline),or if trying to - * scrub out of scrub hours. + * a time scheduled for scrub, and a deadline: The scrub could be delayed + * if system load is too high (but not if after the deadline),or if trying + * to scrub out of scrub hours. */ scrub_schedule_t schedule; @@ -354,8 +354,8 @@ class ScrubQueue { * (read - with higher value) configuration element * (osd_scrub_extended_sleep). */ - double scrub_sleep_time( - bool must_scrub) const; /// \todo (future) return milliseconds + double scrub_sleep_time(bool must_scrub) const; /// \todo (future) return + /// milliseconds /** * called every heartbeat to update the "daily" load average @@ -450,7 +450,8 @@ class ScrubQueue { */ void move_failed_pgs(utime_t now_is); - Scrub::schedule_result_t select_from_group(ScrubQContainer& group, - const Scrub::ScrubPreconds& preconds, - utime_t now_is); + Scrub::schedule_result_t select_from_group( + ScrubQContainer& group, + const Scrub::ScrubPreconds& preconds, + utime_t now_is); }; diff --git a/src/osd/scrubber/pg_scrubber.cc b/src/osd/scrubber/pg_scrubber.cc index 5dcc17b8138c..c4739e640c8d 100644 --- a/src/osd/scrubber/pg_scrubber.cc +++ b/src/osd/scrubber/pg_scrubber.cc @@ -1,7 +1,7 @@ // -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- // vim: ts=2 sw=2 smarttab -#include "./pg_scrubber.h" // the '.' notation used to affect clang-format order +#include "./pg_scrubber.h" // '.' notation used to affect clang-format order #include #include @@ -15,10 +15,10 @@ #include "messages/MOSDRepScrubMap.h" #include "messages/MOSDScrub.h" #include "messages/MOSDScrubReserve.h" - #include "osd/OSD.h" #include "osd/PG.h" #include "osd/osd_types_fmt.h" + #include "ScrubStore.h" #include "scrub_backend.h" #include "scrub_machine.h" @@ -83,8 +83,8 @@ ostream& operator<<(ostream& out, const requested_scrub_t& sf) /* * if the incoming message is from a previous interval, it must mean - * PrimaryLogPG::on_change() was called when that interval ended. We can safely discard - * the stale message. + * PrimaryLogPG::on_change() was called when that interval ended. We can safely + * discard the stale message. */ bool PgScrubber::check_interval(epoch_t epoch_to_verify) { @@ -94,8 +94,8 @@ bool PgScrubber::check_interval(epoch_t epoch_to_verify) bool PgScrubber::is_message_relevant(epoch_t epoch_to_verify) { if (!m_active) { - // not scrubbing. We can assume that the scrub was already terminated, and we - // can silently discard the incoming event. + // not scrubbing. We can assume that the scrub was already terminated, and + // we can silently discard the incoming event. return false; } @@ -163,27 +163,31 @@ bool PgScrubber::should_abort() const * a note re the checks performed before sending scrub-initiating messages: * * For those ('StartScrub', 'AfterRepairScrub') scrub-initiation messages that - * possibly were in the queue while the PG changed state and became unavailable for - * scrubbing: + * possibly were in the queue while the PG changed state and became unavailable + * for scrubbing: * - * The check_interval() catches all major changes to the PG. As for the other conditions - * we may check (and see is_message_relevant() above): + * The check_interval() catches all major changes to the PG. As for the other + * conditions we may check (and see is_message_relevant() above): * * - we are not 'active' yet, so must not check against is_active(), and: * - * - the 'abort' flags were just verified (when the triggering message was queued). As - * those are only modified in human speeds - they need not be queried again. + * - the 'abort' flags were just verified (when the triggering message was + * queued). As those are only modified in human speeds - they need not be + * queried again. * - * Some of the considerations above are also relevant to the replica-side initiation + * Some of the considerations above are also relevant to the replica-side + * initiation * ('StartReplica' & 'StartReplicaNoWait'). */ void PgScrubber::initiate_regular_scrub(epoch_t epoch_queued) { dout(15) << __func__ << " epoch: " << epoch_queued << dendl; - // we may have lost our Primary status while the message languished in the queue + // we may have lost our Primary status while the message languished in the + // queue if (check_interval(epoch_queued)) { - dout(10) << "scrubber event -->> StartScrub epoch: " << epoch_queued << dendl; + dout(10) << "scrubber event -->> StartScrub epoch: " << epoch_queued + << dendl; reset_epoch(epoch_queued); m_fsm->process_event(StartScrub{}); dout(10) << "scrubber event --<< StartScrub" << dendl; @@ -197,9 +201,11 @@ void PgScrubber::initiate_regular_scrub(epoch_t epoch_queued) void PgScrubber::initiate_scrub_after_repair(epoch_t epoch_queued) { dout(15) << __func__ << " epoch: " << epoch_queued << dendl; - // we may have lost our Primary status while the message languished in the queue + // we may have lost our Primary status while the message languished in the + // queue if (check_interval(epoch_queued)) { - dout(10) << "scrubber event -->> AfterRepairScrub epoch: " << epoch_queued << dendl; + dout(10) << "scrubber event -->> AfterRepairScrub epoch: " << epoch_queued + << dendl; reset_epoch(epoch_queued); m_fsm->process_event(AfterRepairScrub{}); dout(10) << "scrubber event --<< AfterRepairScrub" << dendl; @@ -212,7 +218,8 @@ void PgScrubber::initiate_scrub_after_repair(epoch_t epoch_queued) void PgScrubber::send_scrub_unblock(epoch_t epoch_queued) { - dout(10) << "scrubber event -->> " << __func__ << " epoch: " << epoch_queued << dendl; + dout(10) << "scrubber event -->> " << __func__ << " epoch: " << epoch_queued + << dendl; if (is_message_relevant(epoch_queued)) { m_fsm->process_event(Unblocked{}); } @@ -221,14 +228,16 @@ void PgScrubber::send_scrub_unblock(epoch_t epoch_queued) void PgScrubber::send_scrub_resched(epoch_t epoch_queued) { - dout(10) << "scrubber event -->> " << __func__ << " epoch: " << epoch_queued << dendl; + dout(10) << "scrubber event -->> " << __func__ << " epoch: " << epoch_queued + << dendl; if (is_message_relevant(epoch_queued)) { m_fsm->process_event(InternalSchedScrub{}); } dout(10) << "scrubber event --<< " << __func__ << dendl; } -void PgScrubber::send_start_replica(epoch_t epoch_queued, Scrub::act_token_t token) +void PgScrubber::send_start_replica(epoch_t epoch_queued, + Scrub::act_token_t token) { dout(10) << "scrubber event -->> " << __func__ << " epoch: " << epoch_queued << " token: " << token << dendl; @@ -250,7 +259,8 @@ void PgScrubber::send_start_replica(epoch_t epoch_queued, Scrub::act_token_t tok dout(10) << "scrubber event --<< " << __func__ << dendl; } -void PgScrubber::send_sched_replica(epoch_t epoch_queued, Scrub::act_token_t token) +void PgScrubber::send_sched_replica(epoch_t epoch_queued, + Scrub::act_token_t token) { dout(10) << "scrubber event -->> " << __func__ << " epoch: " << epoch_queued << " token: " << token << dendl; @@ -263,7 +273,8 @@ void PgScrubber::send_sched_replica(epoch_t epoch_queued, Scrub::act_token_t tok void PgScrubber::active_pushes_notification(epoch_t epoch_queued) { // note: Primary only - dout(10) << "scrubber event -->> " << __func__ << " epoch: " << epoch_queued << dendl; + dout(10) << "scrubber event -->> " << __func__ << " epoch: " << epoch_queued + << dendl; if (is_message_relevant(epoch_queued)) { m_fsm->process_event(ActivePushesUpd{}); } @@ -273,7 +284,8 @@ void PgScrubber::active_pushes_notification(epoch_t epoch_queued) void PgScrubber::update_applied_notification(epoch_t epoch_queued) { // note: Primary only - dout(10) << "scrubber event -->> " << __func__ << " epoch: " << epoch_queued << dendl; + dout(10) << "scrubber event -->> " << __func__ << " epoch: " << epoch_queued + << dendl; if (is_message_relevant(epoch_queued)) { m_fsm->process_event(UpdatesApplied{}); } @@ -283,7 +295,8 @@ void PgScrubber::update_applied_notification(epoch_t epoch_queued) void PgScrubber::digest_update_notification(epoch_t epoch_queued) { // note: Primary only - dout(10) << "scrubber event -->> " << __func__ << " epoch: " << epoch_queued << dendl; + dout(10) << "scrubber event -->> " << __func__ << " epoch: " << epoch_queued + << dendl; if (is_message_relevant(epoch_queued)) { m_fsm->process_event(DigestUpdate{}); } @@ -292,7 +305,8 @@ void PgScrubber::digest_update_notification(epoch_t epoch_queued) void PgScrubber::send_local_map_done(epoch_t epoch_queued) { - dout(10) << "scrubber event -->> " << __func__ << " epoch: " << epoch_queued << dendl; + dout(10) << "scrubber event -->> " << __func__ << " epoch: " << epoch_queued + << dendl; if (is_message_relevant(epoch_queued)) { m_fsm->process_event(Scrub::IntLocalMapDone{}); } @@ -301,7 +315,8 @@ void PgScrubber::send_local_map_done(epoch_t epoch_queued) void PgScrubber::send_replica_maps_ready(epoch_t epoch_queued) { - dout(10) << "scrubber event -->> " << __func__ << " epoch: " << epoch_queued << dendl; + dout(10) << "scrubber event -->> " << __func__ << " epoch: " << epoch_queued + << dendl; if (is_message_relevant(epoch_queued)) { m_fsm->process_event(GotReplicas{}); } @@ -310,7 +325,8 @@ void PgScrubber::send_replica_maps_ready(epoch_t epoch_queued) void PgScrubber::send_replica_pushes_upd(epoch_t epoch_queued) { - dout(10) << "scrubber event -->> " << __func__ << " epoch: " << epoch_queued << dendl; + dout(10) << "scrubber event -->> " << __func__ << " epoch: " << epoch_queued + << dendl; if (check_interval(epoch_queued)) { m_fsm->process_event(ReplicaPushesUpd{}); } @@ -319,7 +335,8 @@ void PgScrubber::send_replica_pushes_upd(epoch_t epoch_queued) void PgScrubber::send_remotes_reserved(epoch_t epoch_queued) { - dout(10) << "scrubber event -->> " << __func__ << " epoch: " << epoch_queued << dendl; + dout(10) << "scrubber event -->> " << __func__ << " epoch: " << epoch_queued + << dendl; // note: scrub is not active yet if (check_interval(epoch_queued)) { m_fsm->process_event(RemotesReserved{}); @@ -329,7 +346,8 @@ void PgScrubber::send_remotes_reserved(epoch_t epoch_queued) void PgScrubber::send_reservation_failure(epoch_t epoch_queued) { - dout(10) << "scrubber event -->> " << __func__ << " epoch: " << epoch_queued << dendl; + dout(10) << "scrubber event -->> " << __func__ << " epoch: " << epoch_queued + << dendl; if (check_interval(epoch_queued)) { // do not check for 'active'! m_fsm->process_event(ReservationFailure{}); } @@ -338,7 +356,8 @@ void PgScrubber::send_reservation_failure(epoch_t epoch_queued) void PgScrubber::send_full_reset(epoch_t epoch_queued) { - dout(10) << "scrubber event -->> " << __func__ << " epoch: " << epoch_queued << dendl; + dout(10) << "scrubber event -->> " << __func__ << " epoch: " << epoch_queued + << dendl; m_fsm->process_event(Scrub::FullReset{}); @@ -347,7 +366,8 @@ void PgScrubber::send_full_reset(epoch_t epoch_queued) void PgScrubber::send_chunk_free(epoch_t epoch_queued) { - dout(10) << "scrubber event -->> " << __func__ << " epoch: " << epoch_queued << dendl; + dout(10) << "scrubber event -->> " << __func__ << " epoch: " << epoch_queued + << dendl; if (check_interval(epoch_queued)) { m_fsm->process_event(Scrub::SelectedChunkFree{}); } @@ -356,7 +376,8 @@ void PgScrubber::send_chunk_free(epoch_t epoch_queued) void PgScrubber::send_chunk_busy(epoch_t epoch_queued) { - dout(10) << "scrubber event -->> " << __func__ << " epoch: " << epoch_queued << dendl; + dout(10) << "scrubber event -->> " << __func__ << " epoch: " << epoch_queued + << dendl; if (check_interval(epoch_queued)) { m_fsm->process_event(Scrub::ChunkIsBusy{}); } @@ -365,7 +386,8 @@ void PgScrubber::send_chunk_busy(epoch_t epoch_queued) void PgScrubber::send_get_next_chunk(epoch_t epoch_queued) { - dout(10) << "scrubber event -->> " << __func__ << " epoch: " << epoch_queued << dendl; + dout(10) << "scrubber event -->> " << __func__ << " epoch: " << epoch_queued + << dendl; if (is_message_relevant(epoch_queued)) { m_fsm->process_event(Scrub::NextChunk{}); } @@ -374,7 +396,8 @@ void PgScrubber::send_get_next_chunk(epoch_t epoch_queued) void PgScrubber::send_scrub_is_finished(epoch_t epoch_queued) { - dout(10) << "scrubber event -->> " << __func__ << " epoch: " << epoch_queued << dendl; + dout(10) << "scrubber event -->> " << __func__ << " epoch: " << epoch_queued + << dendl; // can't check for "active" @@ -385,7 +408,8 @@ void PgScrubber::send_scrub_is_finished(epoch_t epoch_queued) void PgScrubber::send_maps_compared(epoch_t epoch_queued) { - dout(10) << "scrubber event -->> " << __func__ << " epoch: " << epoch_queued << dendl; + dout(10) << "scrubber event -->> " << __func__ << " epoch: " << epoch_queued + << dendl; m_fsm->process_event(Scrub::MapsCompared{}); @@ -401,7 +425,8 @@ bool PgScrubber::is_reserving() const void PgScrubber::reset_epoch(epoch_t epoch_queued) { - dout(10) << __func__ << " state deep? " << state_test(PG_STATE_DEEP_SCRUB) << dendl; + dout(10) << __func__ << " state deep? " << state_test(PG_STATE_DEEP_SCRUB) + << dendl; m_fsm->assert_not_active(); m_epoch_start = epoch_queued; @@ -410,23 +435,27 @@ void PgScrubber::reset_epoch(epoch_t epoch_queued) update_op_mode_text(); } -unsigned int PgScrubber::scrub_requeue_priority(Scrub::scrub_prio_t with_priority) const +unsigned int PgScrubber::scrub_requeue_priority( + Scrub::scrub_prio_t with_priority) const { unsigned int qu_priority = m_flags.priority; if (with_priority == Scrub::scrub_prio_t::high_priority) { qu_priority = - std::max(qu_priority, (unsigned int)m_pg->get_cct()->_conf->osd_client_op_priority); + std::max(qu_priority, + (unsigned int)m_pg->get_cct()->_conf->osd_client_op_priority); } return qu_priority; } -unsigned int PgScrubber::scrub_requeue_priority(Scrub::scrub_prio_t with_priority, - unsigned int suggested_priority) const +unsigned int PgScrubber::scrub_requeue_priority( + Scrub::scrub_prio_t with_priority, + unsigned int suggested_priority) const { if (with_priority == Scrub::scrub_prio_t::high_priority) { - suggested_priority = std::max( - suggested_priority, (unsigned int)m_pg->get_cct()->_conf->osd_client_op_priority); + suggested_priority = + std::max(suggested_priority, + (unsigned int)m_pg->get_cct()->_conf->osd_client_op_priority); } return suggested_priority; } @@ -464,16 +493,17 @@ void PgScrubber::rm_from_osd_scrubbing() void PgScrubber::on_primary_change(const requested_scrub_t& request_flags) { dout(10) << __func__ << (is_primary() ? " Primary " : " Replica ") - << " flags: " << request_flags << dendl; + << " flags: " << request_flags << dendl; if (!m_scrub_job) { return; } - dout(15) << __func__ << " scrub-job state: " << m_scrub_job->state_desc() << dendl; + dout(15) << __func__ << " scrub-job state: " << m_scrub_job->state_desc() + << dendl; if (is_primary()) { - auto suggested = determine_scrub_time(request_flags); + auto suggested = determine_scrub_time(request_flags); m_osds->get_scrub_services().register_with_osd(m_scrub_job, suggested); } else { m_osds->get_scrub_services().remove_from_osd_queue(m_scrub_job); @@ -482,7 +512,8 @@ void PgScrubber::on_primary_change(const requested_scrub_t& request_flags) dout(15) << __func__ << " done " << registration_state() << dendl; } -void PgScrubber::on_maybe_registration_change(const requested_scrub_t& request_flags) +void PgScrubber::on_maybe_registration_change( + const requested_scrub_t& request_flags) { dout(10) << __func__ << (is_primary() ? " Primary " : " Replica/other ") << registration_state() << " flags: " << request_flags << dendl; @@ -497,7 +528,7 @@ void PgScrubber::update_scrub_job(const requested_scrub_t& request_flags) { // verify that the 'in_q' status matches our "Primariority" - if (m_scrub_job && is_primary() && !m_scrub_job->in_queues) { + if (m_scrub_job && is_primary() && !m_scrub_job->in_queues) { dout(1) << __func__ << " !!! primary but not scheduled! " << dendl; } } @@ -510,13 +541,13 @@ void PgScrubber::update_scrub_job(const requested_scrub_t& request_flags) dout(15) << __func__ << " done " << registration_state() << dendl; } -ScrubQueue::sched_params_t -PgScrubber::determine_scrub_time(const requested_scrub_t& request_flags) const +ScrubQueue::sched_params_t PgScrubber::determine_scrub_time( + const requested_scrub_t& request_flags) const { ScrubQueue::sched_params_t res; if (!is_primary()) { - return res; // with ok_to_scrub set to 'false' + return res; // with ok_to_scrub set to 'false' } if (request_flags.must_scrub || request_flags.need_auto) { @@ -527,7 +558,7 @@ PgScrubber::determine_scrub_time(const requested_scrub_t& request_flags) const // we do not need the interval data in this case } else if (m_pg->info.stats.stats_invalid && - m_pg->get_cct()->_conf->osd_scrub_invalid_stats) { + m_pg->get_cct()->_conf->osd_scrub_invalid_stats) { res.proposed_time = ceph_clock_now(); res.is_must = ScrubQueue::must_scrub_t::mandatory; @@ -539,12 +570,12 @@ PgScrubber::determine_scrub_time(const requested_scrub_t& request_flags) const m_pg->get_pool().info.opts.value_or(pool_opts_t::SCRUB_MAX_INTERVAL, 0.0); } - dout(15) << __func__ << " suggested: " << res.proposed_time << " hist: " - << m_pg->info.history.last_scrub_stamp << " v:" << m_pg->info.stats.stats_invalid - << " / " << m_pg->cct->_conf->osd_scrub_invalid_stats << " must:" - << (res.is_must==ScrubQueue::must_scrub_t::mandatory ? "y" : "n" ) - << " pool min: " << res.min_interval - << dendl; + dout(15) << __func__ << " suggested: " << res.proposed_time + << " hist: " << m_pg->info.history.last_scrub_stamp + << " v:" << m_pg->info.stats.stats_invalid << " / " + << m_pg->cct->_conf->osd_scrub_invalid_stats << " must:" + << (res.is_must == ScrubQueue::must_scrub_t::mandatory ? "y" : "n") + << " pool min: " << res.min_interval << dendl; return res; } @@ -552,21 +583,23 @@ void PgScrubber::scrub_requested(scrub_level_t scrub_level, scrub_type_t scrub_type, requested_scrub_t& req_flags) { - dout(10) << __func__ << (scrub_level == scrub_level_t::deep ? " deep " : " shallow ") - << (scrub_type == scrub_type_t::do_repair ? " repair-scrub " : " not-repair ") + dout(10) << __func__ + << (scrub_level == scrub_level_t::deep ? " deep " : " shallow ") + << (scrub_type == scrub_type_t::do_repair ? " repair-scrub " + : " not-repair ") << " prev stamp: " << m_scrub_job->get_sched_time() - << " registered? " << registration_state() - << dendl; + << " registered? " << registration_state() << dendl; req_flags.must_scrub = true; - req_flags.must_deep_scrub = - (scrub_level == scrub_level_t::deep) || (scrub_type == scrub_type_t::do_repair); + req_flags.must_deep_scrub = (scrub_level == scrub_level_t::deep) || + (scrub_type == scrub_type_t::do_repair); req_flags.must_repair = (scrub_type == scrub_type_t::do_repair); // User might intervene, so clear this req_flags.need_auto = false; req_flags.req_scrub = true; - dout(20) << __func__ << " pg(" << m_pg_id << ") planned:" << req_flags << dendl; + dout(20) << __func__ << " pg(" << m_pg_id << ") planned:" << req_flags + << dendl; update_scrub_job(req_flags); m_pg->publish_stats_to_osd(); @@ -618,7 +651,8 @@ void PgScrubber::on_applied_when_primary(const eversion_t& applied_version) { // we are only interested in updates if we are the Primary, and in state // WaitLastUpdate - if (m_fsm->is_accepting_updates() && (applied_version >= m_subset_last_update)) { + if (m_fsm->is_accepting_updates() && + (applied_version >= m_subset_last_update)) { m_osds->queue_scrub_applied_update(m_pg, m_pg->is_scrub_blocking_ops()); dout(15) << __func__ << " update: " << applied_version << " vs. required: " << m_subset_last_update << dendl; @@ -653,11 +687,15 @@ bool PgScrubber::select_range() * left end of the range if we are a tier because they may legitimately * not exist (see _scrub). */ - int min_idx = static_cast(std::max( - 3, m_pg->get_cct()->_conf->osd_scrub_chunk_min / (int)preemption_data.chunk_divisor())); + int min_idx = static_cast( + std::max(3, + m_pg->get_cct()->_conf->osd_scrub_chunk_min / + (int)preemption_data.chunk_divisor())); - int max_idx = static_cast(std::max(min_idx, m_pg->get_cct()->_conf->osd_scrub_chunk_max / - (int)preemption_data.chunk_divisor())); + int max_idx = static_cast( + std::max(min_idx, + m_pg->get_cct()->_conf->osd_scrub_chunk_max / + (int)preemption_data.chunk_divisor())); dout(10) << __func__ << " Min: " << min_idx << " Max: " << max_idx << " Div: " << preemption_data.chunk_divisor() << dendl; @@ -665,7 +703,10 @@ bool PgScrubber::select_range() hobject_t start = m_start; hobject_t candidate_end; std::vector objects; - int ret = m_pg->get_pgbackend()->objects_list_partial(start, min_idx, max_idx, &objects, + int ret = m_pg->get_pgbackend()->objects_list_partial(start, + min_idx, + max_idx, + &objects, &candidate_end); ceph_assert(ret >= 0); @@ -706,8 +747,8 @@ bool PgScrubber::select_range() if (m_end > m_max_end) m_max_end = m_end; - dout(15) << __func__ << " range selected: " << m_start << " //// " << m_end << " //// " - << m_max_end << dendl; + dout(15) << __func__ << " range selected: " << m_start << " //// " << m_end + << " //// " << m_max_end << dendl; // debug: be 'blocked' if told so by the 'pg scrub_debug block' asok command if (m_debug_blockrange > 0) { @@ -761,7 +802,8 @@ bool PgScrubber::write_blocked_by_scrub(const hobject_t& soid) return true; } -bool PgScrubber::range_intersects_scrub(const hobject_t& start, const hobject_t& end) +bool PgScrubber::range_intersects_scrub(const hobject_t& start, + const hobject_t& end) { // does [start, end] intersect [scrubber.start, scrubber.m_max_end) return (start < m_max_end && end >= m_start); @@ -769,7 +811,9 @@ bool PgScrubber::range_intersects_scrub(const hobject_t& start, const hobject_t& Scrub::BlockedRangeWarning PgScrubber::acquire_blocked_alarm() { - return std::make_unique(m_osds, ceph::timespan{300s}, m_pg_id); + return std::make_unique(m_osds, + ceph::timespan{300s}, + m_pg_id); } /** @@ -810,13 +854,15 @@ void PgScrubber::add_delayed_scheduling() if (!pg) { lgeneric_subdout(g_ceph_context, osd, 10) << "scrub_requeue_callback: Could not find " - << "PG " << pgid << " can't complete scrub requeue after sleep" << dendl; + << "PG " << pgid << " can't complete scrub requeue after sleep" + << dendl; return; } scrbr->m_needs_sleep = true; lgeneric_dout(scrbr->get_pg_cct(), 7) << "scrub_requeue_callback: slept for " - << ceph_clock_now() - scrbr->m_sleep_started_at << ", re-queuing scrub" << dendl; + << ceph_clock_now() - scrbr->m_sleep_started_at << ", re-queuing scrub" + << dendl; scrbr->m_sleep_started_at = utime_t{}; osds->queue_for_scrub_resched(&(*pg), Scrub::scrub_prio_t::low_priority); @@ -835,9 +881,11 @@ void PgScrubber::add_delayed_scheduling() eversion_t PgScrubber::search_log_for_updates() const { auto& projected = m_pg->projected_log.log; - auto pi = find_if( - projected.crbegin(), projected.crend(), - [this](const auto& e) -> bool { return e.soid >= m_start && e.soid < m_end; }); + auto pi = find_if(projected.crbegin(), + projected.crend(), + [this](const auto& e) -> bool { + return e.soid >= m_start && e.soid < m_end; + }); if (pi != projected.crend()) return pi->version; @@ -858,9 +906,8 @@ eversion_t PgScrubber::search_log_for_updates() const void PgScrubber::get_replicas_maps(bool replica_can_preempt) { dout(10) << __func__ << " started in epoch/interval: " << m_epoch_start << "/" - << m_interval_start - << " pg same_interval_since: " << m_pg->info.history.same_interval_since - << dendl; + << m_interval_start << " pg same_interval_since: " + << m_pg->info.history.same_interval_since << dendl; m_primary_scrubmap_pos.reset(); @@ -871,7 +918,11 @@ void PgScrubber::get_replicas_maps(bool replica_can_preempt) continue; m_maps_status.mark_replica_map_request(i); - _request_scrub_map(i, m_subset_last_update, m_start, m_end, m_is_deep, + _request_scrub_map(i, + m_subset_last_update, + m_start, + m_end, + m_is_deep, replica_can_preempt); } @@ -905,9 +956,11 @@ std::string PgScrubber::dump_awaited_maps() const void PgScrubber::update_op_mode_text() { auto visible_repair = state_test(PG_STATE_REPAIR); - m_mode_desc = (visible_repair ? "repair" : (m_is_deep ? "deep-scrub" : "scrub")); + m_mode_desc = + (visible_repair ? "repair" : (m_is_deep ? "deep-scrub" : "scrub")); - dout(10) << __func__ << ": repair: visible: " << (visible_repair ? "true" : "false") + dout(10) << __func__ + << ": repair: visible: " << (visible_repair ? "true" : "false") << ", internal: " << (m_is_repair ? "true" : "false") << ". Displayed: " << m_mode_desc << dendl; } @@ -923,10 +976,16 @@ void PgScrubber::_request_scrub_map(pg_shard_t replica, dout(10) << __func__ << " scrubmap from osd." << replica << (deep ? " deep" : " shallow") << dendl; - auto repscrubop = - new MOSDRepScrub(spg_t(m_pg->info.pgid.pgid, replica.shard), version, - get_osdmap_epoch(), m_pg->get_last_peering_reset(), start, end, deep, - allow_preemption, m_flags.priority, m_pg->ops_blocked_by_scrub()); + auto repscrubop = new MOSDRepScrub(spg_t(m_pg->info.pgid.pgid, replica.shard), + version, + get_osdmap_epoch(), + m_pg->get_last_peering_reset(), + start, + end, + deep, + allow_preemption, + m_flags.priority, + m_pg->ops_blocked_by_scrub()); // default priority. We want the replica-scrub processed prior to any recovery // or client io messages (we are holding a lock!) @@ -940,7 +999,8 @@ void PgScrubber::cleanup_store(ObjectStore::Transaction* t) struct OnComplete : Context { std::unique_ptr store; - explicit OnComplete(std::unique_ptr&& store) : store(std::move(store)) + explicit OnComplete(std::unique_ptr&& store) + : store(std::move(store)) {} void finish(int) override {} }; @@ -1002,13 +1062,13 @@ int PgScrubber::build_primary_map_chunk() { epoch_t map_building_since = m_pg->get_osdmap_epoch(); dout(20) << __func__ << ": initiated at epoch " << map_building_since - << dendl; + << dendl; auto ret = build_scrub_map_chunk(m_be->get_primary_scrubmap(), - m_primary_scrubmap_pos, - m_start, - m_end, - m_is_deep); + m_primary_scrubmap_pos, + m_start, + m_end, + m_is_deep); if (ret == -EINPROGRESS) { // reschedule another round of asking the backend to collect the scrub data @@ -1021,29 +1081,36 @@ int PgScrubber::build_primary_map_chunk() int PgScrubber::build_replica_map_chunk() { dout(10) << __func__ << " interval start: " << m_interval_start - << " current token: " << m_current_token << " epoch: " << m_epoch_start - << " deep: " << m_is_deep << dendl; + << " current token: " << m_current_token + << " epoch: " << m_epoch_start << " deep: " << m_is_deep << dendl; ceph_assert(m_be); - auto ret = build_scrub_map_chunk(replica_scrubmap, replica_scrubmap_pos, m_start, m_end, + auto ret = build_scrub_map_chunk(replica_scrubmap, + replica_scrubmap_pos, + m_start, + m_end, m_is_deep); switch (ret) { case -EINPROGRESS: // must wait for the backend to finish. No external event source. - // (note: previous version used low priority here. Now switched to using the - // priority of the original message) - m_osds->queue_for_rep_scrub_resched(m_pg, m_replica_request_priority, - m_flags.priority, m_current_token); + // (note: previous version used low priority here. Now switched to using + // the priority of the original message) + m_osds->queue_for_rep_scrub_resched(m_pg, + m_replica_request_priority, + m_flags.priority, + m_current_token); break; case 0: { // finished! - auto required_fixes = m_be->replica_clean_meta( - replica_scrubmap, m_end.is_max(), m_start, *this); + auto required_fixes = m_be->replica_clean_meta(replica_scrubmap, + m_end.is_max(), + m_start, + *this); // actuate snap-mapper changes: apply_snap_mapper_fixes(required_fixes); @@ -1063,8 +1130,8 @@ int PgScrubber::build_replica_map_chunk() // negative retval: build_scrub_map_chunk() signalled an error // Pre-Pacific code ignored this option, treating it as a success. // \todo Add an error flag in the returning message. - dout(1) << "Error! Aborting. ActiveReplica::react(SchedReplica) Ret: " << ret - << dendl; + dout(1) << "Error! Aborting. ActiveReplica::react(SchedReplica) Ret: " + << ret << dendl; replica_handling_done(); // only in debug mode for now: assert(false && "backend error"); @@ -1074,8 +1141,11 @@ int PgScrubber::build_replica_map_chunk() return ret; } -int PgScrubber::build_scrub_map_chunk( - ScrubMap& map, ScrubMapBuilder& pos, hobject_t start, hobject_t end, bool deep) +int PgScrubber::build_scrub_map_chunk(ScrubMap& map, + ScrubMapBuilder& pos, + hobject_t start, + hobject_t end, + bool deep) { dout(10) << __func__ << " [" << start << "," << end << ") " << " pos " << pos << " Deep: " << deep << dendl; @@ -1088,14 +1158,17 @@ int PgScrubber::build_scrub_map_chunk( // objects vector rollback_obs; - pos.ret = - m_pg->get_pgbackend()->objects_list_range(start, end, &pos.ls, &rollback_obs); + pos.ret = m_pg->get_pgbackend()->objects_list_range(start, + end, + &pos.ls, + &rollback_obs); dout(10) << __func__ << " while pos empty " << pos.ret << dendl; if (pos.ret < 0) { dout(5) << "objects_list_range error: " << pos.ret << dendl; return pos.ret; } - dout(10) << __func__ << " pos.ls.empty()? " << (pos.ls.empty() ? "+" : "-") << dendl; + dout(10) << __func__ << " pos.ls.empty()? " << (pos.ls.empty() ? "+" : "-") + << dendl; if (pos.ls.empty()) { break; } @@ -1120,7 +1193,8 @@ int PgScrubber::build_scrub_map_chunk( ceph_assert(pos.done()); m_be->repair_oinfo_oid(map); - dout(20) << __func__ << " done, got " << map.objects.size() << " items" << dendl; + dout(20) << __func__ << " done, got " << map.objects.size() << " items" + << dendl; return 0; } @@ -1167,23 +1241,30 @@ void PgScrubber::apply_snap_mapper_fixes( // must remove the existing snap-set before inserting the correct one if (auto r = m_pg->snap_mapper.remove_oid(hoid, &t_drv); r < 0) { - derr << __func__ << ": remove_oid returned " << cpp_strerror(r) - << dendl; - ceph_abort(); + derr << __func__ << ": remove_oid returned " << cpp_strerror(r) + << dendl; + ceph_abort(); } m_osds->clog->error() << fmt::format( - "osd.{} found snap mapper error on pg {} oid {} snaps in mapper: {}, " - "oi: " - "{} ...repaired", - m_pg_whoami, m_pg_id, hoid, bogus_snaps, snaps); + "osd.{} found snap mapper error on pg {} oid {} snaps in mapper: {}, " + "oi: " + "{} ...repaired", + m_pg_whoami, + m_pg_id, + hoid, + bogus_snaps, + snaps); } else { m_osds->clog->error() << fmt::format( - "osd.{} found snap mapper error on pg {} oid {} snaps missing in " - "mapper, should be: {} ...repaired", - m_pg_whoami, m_pg_id, hoid, snaps); + "osd.{} found snap mapper error on pg {} oid {} snaps missing in " + "mapper, should be: {} ...repaired", + m_pg_whoami, + m_pg_id, + hoid, + snaps); } // now - insert the correct snap-set @@ -1203,9 +1284,9 @@ void PgScrubber::apply_snap_mapper_fixes( t.register_on_applied_sync(new C_SafeCond(my_lock, my_cond, &done, &e)); if (e = m_pg->osd->store->queue_transaction(m_pg->ch, std::move(t)); - e != 0) { + e != 0) { derr << __func__ << ": queue_transaction got " << cpp_strerror(e) - << dendl; + << dendl; } else { std::unique_lock l{my_lock}; my_cond.wait(l, [&done] { return done; }); @@ -1257,8 +1338,8 @@ void PgScrubber::replica_scrub_op(OpRequestRef op) op->mark_started(); auto msg = op->get_req(); dout(10) << __func__ << " pg:" << m_pg->pg_id - << " Msg: map_epoch:" << msg->map_epoch - << " min_epoch:" << msg->min_epoch << " deep?" << msg->deep << dendl; + << " Msg: map_epoch:" << msg->map_epoch + << " min_epoch:" << msg->min_epoch << " deep?" << msg->deep << dendl; // are we still processing a previous scrub-map request without noticing that // the interval changed? won't see it here, but rather at the reservation @@ -1266,8 +1347,8 @@ void PgScrubber::replica_scrub_op(OpRequestRef op) if (msg->map_epoch < m_pg->info.history.same_interval_since) { dout(10) << "replica_scrub_op discarding old replica_scrub from " - << msg->map_epoch << " < " - << m_pg->info.history.same_interval_since << dendl; + << msg->map_epoch << " < " + << m_pg->info.history.same_interval_since << dendl; // is there a general sync issue? are we holding a stale reservation? // not checking now - assuming we will actively react to interval change. @@ -1290,7 +1371,8 @@ void PgScrubber::replica_scrub_op(OpRequestRef op) scrub_clear_state(); m_osds->clog->warn() << fmt::format( - "{}: after a reset. Now handling the new OP", __func__); + "{}: after a reset. Now handling the new OP", + __func__); } // make sure the FSM is at NotActive m_fsm->assert_not_active(); @@ -1305,18 +1387,20 @@ void PgScrubber::replica_scrub_op(OpRequestRef op) m_is_deep = msg->deep; m_interval_start = m_pg->info.history.same_interval_since; m_replica_request_priority = msg->high_priority - ? Scrub::scrub_prio_t::high_priority - : Scrub::scrub_prio_t::low_priority; + ? Scrub::scrub_prio_t::high_priority + : Scrub::scrub_prio_t::low_priority; m_flags.priority = msg->priority ? msg->priority : m_pg->get_scrub_priority(); preemption_data.reset(); preemption_data.force_preemptability(msg->allow_preemption); - replica_scrubmap_pos.reset(); // needed? RRR + replica_scrubmap_pos.reset(); // needed? RRR set_queued_or_active(); - m_osds->queue_for_rep_scrub(m_pg, m_replica_request_priority, - m_flags.priority, m_current_token); + m_osds->queue_for_rep_scrub(m_pg, + m_replica_request_priority, + m_flags.priority, + m_current_token); } void PgScrubber::set_op_parameters(requested_scrub_t& request) @@ -1345,8 +1429,9 @@ void PgScrubber::set_op_parameters(requested_scrub_t& request) // m_is_repair is set for either 'must_repair' or 'repair-on-the-go' (i.e. // deep-scrub with the auto_repair configuration flag set). m_is_repair value // determines the scrubber behavior. - // PG_STATE_REPAIR, on the other hand, is only used for status reports (inc. the - // PG status as appearing in the logs). + // + // PG_STATE_REPAIR, on the other hand, is only used for status reports (inc. + // the PG status as appearing in the logs). m_is_repair = request.must_repair || m_flags.auto_repair; if (request.must_repair) { state_set(PG_STATE_REPAIR); @@ -1364,9 +1449,10 @@ ScrubMachineListener::MsgAndEpoch PgScrubber::prep_replica_map_msg( { dout(10) << __func__ << " min epoch:" << m_replica_min_epoch << dendl; - auto reply = - make_message(spg_t(m_pg->info.pgid.pgid, m_pg->get_primary().shard), - m_replica_min_epoch, m_pg_whoami); + auto reply = make_message( + spg_t(m_pg->info.pgid.pgid, m_pg->get_primary().shard), + m_replica_min_epoch, + m_pg_whoami); reply->preempted = (was_preempted == PreemptionNoted::preempted); ::encode(replica_scrubmap, reply->get_data()); @@ -1376,28 +1462,36 @@ ScrubMachineListener::MsgAndEpoch PgScrubber::prep_replica_map_msg( void PgScrubber::send_replica_map(const MsgAndEpoch& preprepared) { - m_pg->send_cluster_message(m_pg->get_primary().osd, preprepared.m_msg, - preprepared.m_epoch, false); + m_pg->send_cluster_message(m_pg->get_primary().osd, + preprepared.m_msg, + preprepared.m_epoch, + false); } void PgScrubber::send_preempted_replica() { - auto reply = - make_message(spg_t{m_pg->info.pgid.pgid, m_pg->get_primary().shard}, - m_replica_min_epoch, m_pg_whoami); + auto reply = make_message( + spg_t{m_pg->info.pgid.pgid, m_pg->get_primary().shard}, + m_replica_min_epoch, + m_pg_whoami); reply->preempted = true; - ::encode(replica_scrubmap, reply->get_data()); // skipping this crashes the scrubber - m_pg->send_cluster_message(m_pg->get_primary().osd, reply, m_replica_min_epoch, false); + ::encode(replica_scrubmap, + reply->get_data()); // skipping this crashes the scrubber + m_pg->send_cluster_message(m_pg->get_primary().osd, + reply, + m_replica_min_epoch, + false); } /* - * - if the replica lets us know it was interrupted, we mark the chunk as interrupted. - * The state-machine will react to that when all replica maps are received. - * - when all maps are received, we signal the FSM with the GotReplicas event (see - * scrub_send_replmaps_ready()). Note that due to the no-reentrancy limitations of the - * FSM, we do not 'process' the event directly. Instead - it is queued for the OSD to - * handle. + * - if the replica lets us know it was interrupted, we mark the chunk as + * interrupted. The state-machine will react to that when all replica maps are + * received. + * - when all maps are received, we signal the FSM with the GotReplicas event + * (see scrub_send_replmaps_ready()). Note that due to the no-reentrancy + * limitations of the FSM, we do not 'process' the event directly. Instead - it + * is queued for the OSD to handle. */ void PgScrubber::map_from_replica(OpRequestRef op) { @@ -1416,9 +1510,9 @@ void PgScrubber::map_from_replica(OpRequestRef op) auto [is_ok, err_txt] = m_maps_status.mark_arriving_map(m->from); if (!is_ok) { - // previously an unexpected map was triggering an assert. Now, as scrubs can be - // aborted at any time, the chances of this happening have increased, and aborting is - // not justified + // previously an unexpected map was triggering an assert. Now, as scrubs can + // be aborted at any time, the chances of this happening have increased, and + // aborting is not justified dout(1) << __func__ << err_txt << " from OSD " << m->from << dendl; return; } @@ -1442,18 +1536,20 @@ void PgScrubber::handle_scrub_reserve_request(OpRequestRef op) /* * if we are currently holding a reservation, then: - * either (1) we, the scrubber, did not yet notice an interval change. The remembered - * reservation epoch is from before our interval, and we can silently discard the - * reservation (no message is required). + * either (1) we, the scrubber, did not yet notice an interval change. The + * remembered reservation epoch is from before our interval, and we can + * silently discard the reservation (no message is required). * or: - * (2) the interval hasn't changed, but the same Primary that (we think) holds the - * lock just sent us a new request. Note that we know it's the same Primary, as - * otherwise the interval would have changed. + * + * (2) the interval hasn't changed, but the same Primary that (we think) + * holds the lock just sent us a new request. Note that we know it's the + * same Primary, as otherwise the interval would have changed. + * * Ostensibly we can discard & redo the reservation. But then we - * will be temporarily releasing the OSD resource - and might not be able to grab it - * again. Thus, we simply treat this as a successful new request - * (but mark the fact that if there is a previous request from the primary to - * scrub a specific chunk - that request is now defunct). + * will be temporarily releasing the OSD resource - and might not be able + * to grab it again. Thus, we simply treat this as a successful new request + * (but mark the fact that if there is a previous request from the primary + * to scrub a specific chunk - that request is now defunct). */ if (m_remote_osd_resource.has_value() && m_remote_osd_resource->is_stale()) { @@ -1473,11 +1569,12 @@ void PgScrubber::handle_scrub_reserve_request(OpRequestRef op) dout(10) << __func__ << " already reserved." << dendl; /* - * it might well be that we did not yet finish handling the latest scrub-op from - * our primary. This happens, for example, if 'noscrub' was set via a command, then - * reset. The primary in this scenario will remain in the same interval, but we do need - * to reset our internal state (otherwise - the first renewed 'give me your scrub map' - * from the primary will see us in active state, crashing the OSD). + * it might well be that we did not yet finish handling the latest scrub-op + * from our primary. This happens, for example, if 'noscrub' was set via a + * command, then reset. The primary in this scenario will remain in the + * same interval, but we do need to reset our internal state (otherwise - + * the first renewed 'give me your scrub map' from the primary will see us + * in active state, crashing the OSD). */ advance_token(); granted = true; @@ -1497,8 +1594,10 @@ void PgScrubber::handle_scrub_reserve_request(OpRequestRef op) dout(10) << __func__ << " reserved? " << (granted ? "yes" : "no") << dendl; Message* reply = new MOSDScrubReserve( - spg_t(m_pg->info.pgid.pgid, m_pg->get_primary().shard), request_ep, - granted ? MOSDScrubReserve::GRANT : MOSDScrubReserve::REJECT, m_pg_whoami); + spg_t(m_pg->info.pgid.pgid, m_pg->get_primary().shard), + request_ep, + granted ? MOSDScrubReserve::GRANT : MOSDScrubReserve::REJECT, + m_pg_whoami); m_osds->send_message_osd_cluster(reply, op->get_req()->get_connection()); } @@ -1511,8 +1610,8 @@ void PgScrubber::handle_scrub_reserve_grant(OpRequestRef op, pg_shard_t from) if (m_reservations.has_value()) { m_reservations->handle_reserve_grant(op, from); } else { - derr << __func__ << ": received unsolicited reservation grant from osd " << from - << " (" << op << ")" << dendl; + derr << __func__ << ": received unsolicited reservation grant from osd " + << from << " (" << op << ")" << dendl; } } @@ -1533,8 +1632,8 @@ void PgScrubber::handle_scrub_reserve_release(OpRequestRef op) op->mark_started(); /* - * this specific scrub session has terminated. All incoming events carrying the old - * tag will be discarded. + * this specific scrub session has terminated. All incoming events carrying + * the old tag will be discarded. */ advance_token(); m_remote_osd_resource.reset(); @@ -1570,9 +1669,11 @@ void PgScrubber::message_all_replicas(int32_t opcode, std::string_view op_text) if (p == m_pg_whoami) continue; - dout(10) << "scrub requesting " << op_text << " from osd." << p << " Epoch: " << epch - << dendl; - Message* m = new MOSDScrubReserve(spg_t(m_pg->info.pgid.pgid, p.shard), epch, opcode, + dout(10) << "scrub requesting " << op_text << " from osd." << p + << " Epoch: " << epch << dendl; + Message* m = new MOSDScrubReserve(spg_t(m_pg->info.pgid.pgid, p.shard), + epch, + opcode, m_pg_whoami); messages.push_back(std::make_pair(p.osd, m)); } @@ -1618,8 +1719,8 @@ bool PgScrubber::is_queued_or_active() const */ void PgScrubber::scrub_finish() { - dout(10) << __func__ << " before flags: " << m_flags - << ". repair state: " << (state_test(PG_STATE_REPAIR) ? "repair" : "no-repair") + dout(10) << __func__ << " before flags: " << m_flags << ". repair state: " + << (state_test(PG_STATE_REPAIR) ? "repair" : "no-repair") << ". deep_scrub_on_error: " << m_flags.deep_scrub_on_error << dendl; ceph_assert(m_pg->is_locked()); @@ -1631,7 +1732,7 @@ void PgScrubber::scrub_finish() // we would like to cancel auto-repair if (m_is_repair && m_flags.auto_repair && m_be->authoritative_peers_count() > - static_cast(m_pg->cct->_conf->osd_scrub_auto_repair_num_errors)) { + static_cast(m_pg->cct->_conf->osd_scrub_auto_repair_num_errors)) { dout(10) << __func__ << " undoing the repair" << dendl; state_clear(PG_STATE_REPAIR); // not expected to be set, anyway @@ -1646,7 +1747,7 @@ void PgScrubber::scrub_finish() bool do_auto_scrub = false; if (m_flags.deep_scrub_on_error && m_be->authoritative_peers_count() && m_be->authoritative_peers_count() <= - static_cast(m_pg->cct->_conf->osd_scrub_auto_repair_num_errors)) { + static_cast(m_pg->cct->_conf->osd_scrub_auto_repair_num_errors)) { ceph_assert(!m_is_deep); do_auto_scrub = true; dout(15) << __func__ << " Try to auto repair after scrub errors" << dendl; @@ -1663,10 +1764,10 @@ void PgScrubber::scrub_finish() if (m_be->authoritative_peers_count()) { auto err_msg = fmt::format("{} {} {} missing, {} inconsistent objects", - m_pg->info.pgid, - m_mode_desc, - m_be->m_missing.size(), - m_be->m_inconsistent.size()); + m_pg->info.pgid, + m_mode_desc, + m_be->m_missing.size(), + m_be->m_inconsistent.size()); dout(2) << err_msg << dendl; m_osds->clog->error() << fmt::to_string(err_msg); @@ -1708,9 +1809,11 @@ void PgScrubber::scrub_finish() // Since we don't know which errors were fixed, we can only clear them // when every one has been fixed. if (m_is_repair) { - dout(15) << fmt::format("{}: {} errors. {} errors fixed", __func__, - m_shallow_errors + m_deep_errors, m_fixed_count) - << dendl; + dout(15) << fmt::format("{}: {} errors. {} errors fixed", + __func__, + m_shallow_errors + m_deep_errors, + m_fixed_count) + << dendl; if (m_fixed_count == m_shallow_errors + m_deep_errors) { ceph_assert(m_is_deep); @@ -1722,12 +1825,11 @@ void PgScrubber::scrub_finish() // Deep scrub in order to get corrected error counts m_pg->scrub_after_recovery = true; - m_planned_scrub.req_scrub = - m_planned_scrub.req_scrub || m_flags.required; + m_planned_scrub.req_scrub = m_planned_scrub.req_scrub || m_flags.required; dout(20) << __func__ << " Current 'required': " << m_flags.required - << " Planned 'req_scrub': " << m_planned_scrub.req_scrub - << dendl; + << " Planned 'req_scrub': " << m_planned_scrub.req_scrub + << dendl; } else if (m_shallow_errors || m_deep_errors) { @@ -1735,7 +1837,7 @@ void PgScrubber::scrub_finish() // possible. state_set(PG_STATE_FAILED_REPAIR); dout(10) << __func__ << " " << (m_shallow_errors + m_deep_errors) - << " error(s) present with no repair possible" << dendl; + << " error(s) present with no repair possible" << dendl; } } @@ -1744,55 +1846,55 @@ void PgScrubber::scrub_finish() ObjectStore::Transaction t; m_pg->recovery_state.update_stats( [this](auto& history, auto& stats) { - dout(10) << "m_pg->recovery_state.update_stats() errors:" - << m_shallow_errors << "/" << m_deep_errors << " deep? " - << m_is_deep << dendl; - utime_t now = ceph_clock_now(); - history.last_scrub = m_pg->recovery_state.get_info().last_update; - history.last_scrub_stamp = now; - if (m_is_deep) { - history.last_deep_scrub = m_pg->recovery_state.get_info().last_update; - history.last_deep_scrub_stamp = now; - } - - if (m_is_deep) { - if ((m_shallow_errors == 0) && (m_deep_errors == 0)) { - history.last_clean_scrub_stamp = now; - } - stats.stats.sum.num_shallow_scrub_errors = m_shallow_errors; - stats.stats.sum.num_deep_scrub_errors = m_deep_errors; - auto omap_stats = m_be->this_scrub_omapstats(); - stats.stats.sum.num_large_omap_objects = - omap_stats.large_omap_objects; - stats.stats.sum.num_omap_bytes = omap_stats.omap_bytes; - stats.stats.sum.num_omap_keys = omap_stats.omap_keys; - dout(19) << "scrub_finish shard " << m_pg_whoami - << " num_omap_bytes = " << stats.stats.sum.num_omap_bytes - << " num_omap_keys = " << stats.stats.sum.num_omap_keys - << dendl; - } else { - stats.stats.sum.num_shallow_scrub_errors = m_shallow_errors; - // XXX: last_clean_scrub_stamp doesn't mean the pg is not inconsistent - // because of deep-scrub errors - if (m_shallow_errors == 0) { - history.last_clean_scrub_stamp = now; - } - } - - stats.stats.sum.num_scrub_errors = - stats.stats.sum.num_shallow_scrub_errors + - stats.stats.sum.num_deep_scrub_errors; - - if (m_flags.check_repair) { - m_flags.check_repair = false; - if (m_pg->info.stats.stats.sum.num_scrub_errors) { - state_set(PG_STATE_FAILED_REPAIR); - dout(10) << "scrub_finish " - << m_pg->info.stats.stats.sum.num_scrub_errors - << " error(s) still present after re-scrub" << dendl; - } - } - return true; + dout(10) << "m_pg->recovery_state.update_stats() errors:" + << m_shallow_errors << "/" << m_deep_errors << " deep? " + << m_is_deep << dendl; + utime_t now = ceph_clock_now(); + history.last_scrub = m_pg->recovery_state.get_info().last_update; + history.last_scrub_stamp = now; + if (m_is_deep) { + history.last_deep_scrub = m_pg->recovery_state.get_info().last_update; + history.last_deep_scrub_stamp = now; + } + + if (m_is_deep) { + if ((m_shallow_errors == 0) && (m_deep_errors == 0)) { + history.last_clean_scrub_stamp = now; + } + stats.stats.sum.num_shallow_scrub_errors = m_shallow_errors; + stats.stats.sum.num_deep_scrub_errors = m_deep_errors; + auto omap_stats = m_be->this_scrub_omapstats(); + stats.stats.sum.num_large_omap_objects = + omap_stats.large_omap_objects; + stats.stats.sum.num_omap_bytes = omap_stats.omap_bytes; + stats.stats.sum.num_omap_keys = omap_stats.omap_keys; + dout(19) << "scrub_finish shard " << m_pg_whoami + << " num_omap_bytes = " << stats.stats.sum.num_omap_bytes + << " num_omap_keys = " << stats.stats.sum.num_omap_keys + << dendl; + } else { + stats.stats.sum.num_shallow_scrub_errors = m_shallow_errors; + // XXX: last_clean_scrub_stamp doesn't mean the pg is not inconsistent + // because of deep-scrub errors + if (m_shallow_errors == 0) { + history.last_clean_scrub_stamp = now; + } + } + + stats.stats.sum.num_scrub_errors = + stats.stats.sum.num_shallow_scrub_errors + + stats.stats.sum.num_deep_scrub_errors; + + if (m_flags.check_repair) { + m_flags.check_repair = false; + if (m_pg->info.stats.stats.sum.num_scrub_errors) { + state_set(PG_STATE_FAILED_REPAIR); + dout(10) << "scrub_finish " + << m_pg->info.stats.stats.sum.num_scrub_errors + << " error(s) still present after re-scrub" << dendl; + } + } + return true; }, &t); int tr = m_osds->store->queue_transaction(m_pg->ch, std::move(t), nullptr); @@ -1800,8 +1902,10 @@ void PgScrubber::scrub_finish() } if (has_error) { - m_pg->queue_peering_event(PGPeeringEventRef(std::make_shared( - get_osdmap_epoch(), get_osdmap_epoch(), PeeringState::DoRecovery()))); + m_pg->queue_peering_event(PGPeeringEventRef( + std::make_shared(get_osdmap_epoch(), + get_osdmap_epoch(), + PeeringState::DoRecovery()))); } else { m_is_repair = false; state_clear(PG_STATE_REPAIR); @@ -1825,8 +1929,8 @@ void PgScrubber::on_digest_updates() { dout(10) << __func__ << " #pending: " << num_digest_updates_pending << " " << (m_end.is_max() ? " " : " ") - << (is_queued_or_active() ? "" : " ** not marked as scrubbing **") - << dendl; + << (is_queued_or_active() ? "" : " ** not marked as scrubbing **") + << dendl; if (num_digest_updates_pending > 0) { // do nothing for now. We will be called again when new updates arrive @@ -1866,10 +1970,11 @@ void PgScrubber::dump_scrubber(ceph::Formatter* f, f->dump_stream("scrub_reg_stamp") << m_scrub_job->get_sched_time(); - // note that we are repeating logic that is coded elsewhere (currently PG.cc). - // This is not optimal. - bool deep_expected = (ceph_clock_now() >= m_pg->next_deepscrub_interval()) || - request_flags.must_deep_scrub || request_flags.need_auto; + // note that we are repeating logic that is coded elsewhere (currently + // PG.cc). This is not optimal. + bool deep_expected = + (ceph_clock_now() >= m_pg->next_deepscrub_interval()) || + request_flags.must_deep_scrub || request_flags.need_auto; auto sched_state = m_scrub_job->scheduling_state(ceph_clock_now(), deep_expected); f->dump_string("schedule", sched_state); @@ -1877,7 +1982,7 @@ void PgScrubber::dump_scrubber(ceph::Formatter* f, if (m_publish_sessions) { f->dump_int("test_sequence", - m_sessions_counter); // an ever-increasing number used by tests + m_sessions_counter); // an ever-increasing number used by tests } f->close_section(); @@ -1938,41 +2043,40 @@ pg_scrubbing_status_t PgScrubber::get_schedule() const } if (m_scrub_job->state != ScrubQueue::qu_state_t::registered) { return pg_scrubbing_status_t{utime_t{}, - 0, - pg_scrub_sched_status_t::not_queued, - false, - scrub_level_t::shallow, - false}; + 0, + pg_scrub_sched_status_t::not_queued, + false, + scrub_level_t::shallow, + false}; } // Will next scrub surely be a deep one? note that deep-scrub might be // selected even if we report a regular scrub here. bool deep_expected = (now_is >= m_pg->next_deepscrub_interval()) || - m_planned_scrub.must_deep_scrub || - m_planned_scrub.need_auto; + m_planned_scrub.must_deep_scrub || + m_planned_scrub.need_auto; scrub_level_t expected_level = deep_expected ? scrub_level_t::deep : scrub_level_t::shallow; - bool periodic = !m_planned_scrub.must_scrub && - !m_planned_scrub.need_auto && - !m_planned_scrub.must_deep_scrub; + bool periodic = !m_planned_scrub.must_scrub && !m_planned_scrub.need_auto && + !m_planned_scrub.must_deep_scrub; // are we ripe for scrubbing? if (now_is > m_scrub_job->schedule.scheduled_at) { // we are waiting for our turn at the OSD. return pg_scrubbing_status_t{m_scrub_job->schedule.scheduled_at, - 0, - pg_scrub_sched_status_t::queued, - false, - expected_level, - periodic}; + 0, + pg_scrub_sched_status_t::queued, + false, + expected_level, + periodic}; } return pg_scrubbing_status_t{m_scrub_job->schedule.scheduled_at, - 0, - pg_scrub_sched_status_t::scheduled, - false, - expected_level, - periodic}; + 0, + pg_scrub_sched_status_t::scheduled, + false, + expected_level, + periodic}; } void PgScrubber::handle_query_state(ceph::Formatter* f) @@ -2020,7 +2124,8 @@ PgScrubber::PgScrubber(PG* pg) m_fsm = std::make_unique(m_pg, this); m_fsm->initiate(); - m_scrub_job = ceph::make_ref(m_osds->cct, m_pg->pg_id, + m_scrub_job = ceph::make_ref(m_osds->cct, + m_pg->pg_id, m_osds->get_nodeid()); } @@ -2034,7 +2139,7 @@ void PgScrubber::set_scrub_duration() utime_t stamp = ceph_clock_now(); utime_t duration = stamp - scrub_begin_stamp; m_pg->recovery_state.update_stats([=](auto& history, auto& stats) { - stats.last_scrub_duration = ceill(duration.to_msec()/1000.0); + stats.last_scrub_duration = ceill(duration.to_msec() / 1000.0); stats.scrub_duration = double(duration); return true; }); @@ -2153,9 +2258,10 @@ void PgScrubber::advance_token() dout(10) << __func__ << " was: " << m_current_token << dendl; m_current_token++; - // when advance_token() is called, it is assumed that no scrubbing takes place. - // We will, though, verify that. And if we are actually still handling a stale request - - // both our internal state and the FSM state will be cleared. + // when advance_token() is called, it is assumed that no scrubbing takes + // place. We will, though, verify that. And if we are actually still handling + // a stale request - both our internal state and the FSM state will be + // cleared. replica_handling_done(); m_fsm->process_event(FullReset{}); } @@ -2165,8 +2271,8 @@ bool PgScrubber::is_token_current(Scrub::act_token_t received_token) if (received_token == 0 || received_token == m_current_token) { return true; } - dout(5) << __func__ << " obsolete token (" << received_token - << " vs current " << m_current_token << dendl; + dout(5) << __func__ << " obsolete token (" << received_token << " vs current " + << m_current_token << dendl; return false; } @@ -2209,7 +2315,8 @@ int PgScrubber::asok_debug(std::string_view cmd, dout(10) << __func__ << " cmd: " << cmd << " param: " << param << dendl; if (cmd == "block") { - // set a flag that will cause the next 'select_range' to report a blocked object + // set a flag that will cause the next 'select_range' to report a blocked + // object m_debug_blockrange = 1; } else if (cmd == "unblock") { @@ -2225,12 +2332,13 @@ int PgScrubber::asok_debug(std::string_view cmd, } else if (param == "block") { if (cmd == "set") { - // set a flag that will cause the next 'select_range' to report a blocked object - m_debug_blockrange = 1; + // set a flag that will cause the next 'select_range' to report a + // blocked object + m_debug_blockrange = 1; } else { - // send an 'unblock' event, as if a blocked range was freed - m_debug_blockrange = 0; - m_fsm->process_event(Unblocked{}); + // send an 'unblock' event, as if a blocked range was freed + m_debug_blockrange = 0; + m_fsm->process_event(Unblocked{}); } } } @@ -2251,8 +2359,8 @@ void PgScrubber::preemption_data_t::reset() m_preemptable = false; m_preempted = false; - m_left = - static_cast(m_pg->cct->_conf.get_val("osd_scrub_max_preemptions")); + m_left = static_cast( + m_pg->cct->_conf.get_val("osd_scrub_max_preemptions")); m_size_divisor = 1; } @@ -2262,12 +2370,16 @@ namespace Scrub { void ReplicaReservations::release_replica(pg_shard_t peer, epoch_t epoch) { - auto m = new MOSDScrubReserve(spg_t(m_pg_info.pgid.pgid, peer.shard), epoch, - MOSDScrubReserve::RELEASE, m_pg->pg_whoami); + auto m = new MOSDScrubReserve(spg_t(m_pg_info.pgid.pgid, peer.shard), + epoch, + MOSDScrubReserve::RELEASE, + m_pg->pg_whoami); m_osds->send_message_osd_cluster(peer.osd, m, epoch); } -ReplicaReservations::ReplicaReservations(PG* pg, pg_shard_t whoami, ScrubQueue::ScrubJobRef scrubjob) +ReplicaReservations::ReplicaReservations(PG* pg, + pg_shard_t whoami, + ScrubQueue::ScrubJobRef scrubjob) : m_pg{pg} , m_acting_set{pg->get_actingset()} , m_osds{m_pg->get_pg_osd(ScrubberPasskey())} @@ -2294,8 +2406,10 @@ ReplicaReservations::ReplicaReservations(PG* pg, pg_shard_t whoami, ScrubQueue:: for (auto p : m_acting_set) { if (p == whoami) continue; - auto m = new MOSDScrubReserve(spg_t(m_pg_info.pgid.pgid, p.shard), epoch, - MOSDScrubReserve::REQUEST, m_pg->pg_whoami); + auto m = new MOSDScrubReserve(spg_t(m_pg_info.pgid.pgid, p.shard), + epoch, + MOSDScrubReserve::REQUEST, + m_pg->pg_whoami); m_osds->send_message_osd_cluster(p.osd, m, epoch); m_waited_for_peers.push_back(p); dout(10) << __func__ << ": reserve " << p.osd << dendl; @@ -2318,18 +2432,20 @@ void ReplicaReservations::discard_all() { dout(10) << __func__ << ": " << m_reserved_peers << dendl; - m_had_rejections = true; // preventing late-coming responses from triggering events + m_had_rejections = true; // preventing late-coming responses from triggering + // events m_reserved_peers.clear(); m_waited_for_peers.clear(); } ReplicaReservations::~ReplicaReservations() { - m_had_rejections = true; // preventing late-coming responses from triggering events + m_had_rejections = true; // preventing late-coming responses from triggering + // events - // send un-reserve messages to all reserved replicas. We do not wait for answer (there - // wouldn't be one). Other incoming messages will be discarded on the way, by our - // owner. + // send un-reserve messages to all reserved replicas. We do not wait for + // answer (there wouldn't be one). Other incoming messages will be discarded + // on the way, by our owner. epoch_t epoch = m_pg->get_osdmap_epoch(); for (auto& p : m_reserved_peers) { @@ -2337,9 +2453,9 @@ ReplicaReservations::~ReplicaReservations() } m_reserved_peers.clear(); - // note: the release will follow on the heels of the request. When tried otherwise, - // grants that followed a reject arrived after the whole scrub machine-state was - // reset, causing leaked reservations. + // note: the release will follow on the heels of the request. When tried + // otherwise, grants that followed a reject arrived after the whole scrub + // machine-state was reset, causing leaked reservations. for (auto& p : m_waited_for_peers) { release_replica(p, epoch); } @@ -2347,8 +2463,8 @@ ReplicaReservations::~ReplicaReservations() } /** - * @ATTN we would not reach here if the ReplicaReservation object managed by the - * scrubber was reset. + * @ATTN we would not reach here if the ReplicaReservation object managed by + * the scrubber was reset. */ void ReplicaReservations::handle_reserve_grant(OpRequestRef op, pg_shard_t from) { @@ -2356,7 +2472,8 @@ void ReplicaReservations::handle_reserve_grant(OpRequestRef op, pg_shard_t from) op->mark_started(); { - // reduce the amount of extra release messages. Not a must, but the log is cleaner + // reduce the amount of extra release messages. Not a must, but the log is + // cleaner auto w = find(m_waited_for_peers.begin(), m_waited_for_peers.end(), from); if (w != m_waited_for_peers.end()) m_waited_for_peers.erase(w); @@ -2365,14 +2482,16 @@ void ReplicaReservations::handle_reserve_grant(OpRequestRef op, pg_shard_t from) // are we forced to reject the reservation? if (m_had_rejections) { - dout(10) << __func__ << ": rejecting late-coming reservation from " - << from << dendl; + dout(10) << __func__ << ": rejecting late-coming reservation from " << from + << dendl; release_replica(from, m_pg->get_osdmap_epoch()); - } else if (std::find(m_reserved_peers.begin(), m_reserved_peers.end(), from) != - m_reserved_peers.end()) { + } else if (std::find(m_reserved_peers.begin(), + m_reserved_peers.end(), + from) != m_reserved_peers.end()) { - dout(10) << __func__ << ": already had osd." << from << " reserved" << dendl; + dout(10) << __func__ << ": already had osd." << from << " reserved" + << dendl; } else { @@ -2385,14 +2504,16 @@ void ReplicaReservations::handle_reserve_grant(OpRequestRef op, pg_shard_t from) } } -void ReplicaReservations::handle_reserve_reject(OpRequestRef op, pg_shard_t from) +void ReplicaReservations::handle_reserve_reject(OpRequestRef op, + pg_shard_t from) { dout(10) << __func__ << ": rejected by " << from << dendl; dout(15) << __func__ << ": " << *op->get_req() << dendl; op->mark_started(); { - // reduce the amount of extra release messages. Not a must, but the log is cleaner + // reduce the amount of extra release messages. Not a must, but the log is + // cleaner auto w = find(m_waited_for_peers.begin(), m_waited_for_peers.end(), from); if (w != m_waited_for_peers.end()) m_waited_for_peers.erase(w); @@ -2401,17 +2522,20 @@ void ReplicaReservations::handle_reserve_reject(OpRequestRef op, pg_shard_t from if (m_had_rejections) { // our failure was already handled when the first rejection arrived - dout(15) << __func__ << ": ignoring late-coming rejection from " - << from << dendl; + dout(15) << __func__ << ": ignoring late-coming rejection from " << from + << dendl; - } else if (std::find(m_reserved_peers.begin(), m_reserved_peers.end(), from) != - m_reserved_peers.end()) { + } else if (std::find(m_reserved_peers.begin(), + m_reserved_peers.end(), + from) != m_reserved_peers.end()) { - dout(10) << __func__ << ": already had osd." << from << " reserved" << dendl; + dout(10) << __func__ << ": already had osd." << from << " reserved" + << dendl; } else { - dout(10) << __func__ << ": osd." << from << " scrub reserve = fail" << dendl; + dout(10) << __func__ << ": osd." << from << " scrub reserve = fail" + << dendl; m_had_rejections = true; // preventing any additional notifications send_reject(); } @@ -2425,8 +2549,7 @@ std::ostream& ReplicaReservations::gen_prefix(std::ostream& out) const // ///////////////////// LocalReservation ////////////////////////////////// // note: no dout()s in LocalReservation functions. Client logs interactions. -LocalReservation::LocalReservation(OSDService* osds) - : m_osds{osds} +LocalReservation::LocalReservation(OSDService* osds) : m_osds{osds} { if (m_osds->get_scrub_services().inc_scrubs_local()) { // the failure is signalled by not having m_holding_local_reservation set @@ -2459,7 +2582,8 @@ ReservedByRemotePrimary::ReservedByRemotePrimary(const PgScrubber* scrubber, return; } - dout(20) << __func__ << ": scrub resources reserved at Primary request" << dendl; + dout(20) << __func__ << ": scrub resources reserved at Primary request" + << dendl; m_reserved_by_remote_primary = true; } @@ -2486,7 +2610,8 @@ std::ostream& ReservedByRemotePrimary::gen_prefix(std::ostream& out) const auto MapsCollectionStatus::mark_arriving_map(pg_shard_t from) -> std::tuple { - auto fe = std::find(m_maps_awaited_for.begin(), m_maps_awaited_for.end(), from); + auto fe = + std::find(m_maps_awaited_for.begin(), m_maps_awaited_for.end(), from); if (fe != m_maps_awaited_for.end()) { // we are indeed waiting for a map from this replica m_maps_awaited_for.erase(fe); @@ -2524,7 +2649,9 @@ ostream& operator<<(ostream& out, const MapsCollectionStatus& sf) // ///////////////////// blocked_range_t /////////////////////////////// -blocked_range_t::blocked_range_t(OSDService* osds, ceph::timespan waittime, spg_t pg_id) +blocked_range_t::blocked_range_t(OSDService* osds, + ceph::timespan waittime, + spg_t pg_id) : m_osds{osds} { auto now_is = std::chrono::system_clock::now(); @@ -2533,9 +2660,11 @@ blocked_range_t::blocked_range_t(OSDService* osds, ceph::timespan waittime, spg_ char buf[50]; strftime(buf, sizeof(buf), "%Y-%m-%dT%H:%M:%S", std::localtime(&now_c)); lgeneric_subdout(g_ceph_context, osd, 10) - << "PgScrubber: " << pg_id << " blocked on an object for too long (since " << buf - << ")" << dendl; - osds->clog->warn() << "osd." << osds->whoami << " PgScrubber: " << pg_id << " blocked on an object for too long (since " << buf << ")"; + << "PgScrubber: " << pg_id << " blocked on an object for too long (since " + << buf << ")" << dendl; + osds->clog->warn() << "osd." << osds->whoami << " PgScrubber: " << pg_id + << " blocked on an object for too long (since " << buf + << ")"; return; }); diff --git a/src/osd/scrubber/pg_scrubber.h b/src/osd/scrubber/pg_scrubber.h index f03a1f841246..6bbb61835a76 100644 --- a/src/osd/scrubber/pg_scrubber.h +++ b/src/osd/scrubber/pg_scrubber.h @@ -90,14 +90,16 @@ struct BuildMap; /** * Reserving/freeing scrub resources at the replicas. * - * When constructed - sends reservation requests to the acting_set. - * A rejection triggers a "couldn't acquire the replicas' scrub resources" event. - * All previous requests, whether already granted or not, are explicitly released. + * When constructed - sends reservation requests to the acting_set. + * A rejection triggers a "couldn't acquire the replicas' scrub resources" + * event. All previous requests, whether already granted or not, are explicitly + * released. * - * A note re performance: I've measured a few container alternatives for - * m_reserved_peers, with its specific usage pattern. Std::set is extremely slow, as - * expected. flat_set is only slightly better. Surprisingly - std::vector (with no - * sorting) is better than boost::small_vec. And for std::vector: no need to pre-reserve. + * A note re performance: I've measured a few container alternatives for + * m_reserved_peers, with its specific usage pattern. Std::set is extremely + * slow, as expected. flat_set is only slightly better. Surprisingly - + * std::vector (with no sorting) is better than boost::small_vec. And for + * std::vector: no need to pre-reserve. */ class ReplicaReservations { using OrigSet = decltype(std::declval().get_actingset()); @@ -110,7 +112,7 @@ class ReplicaReservations { bool m_had_rejections{false}; int m_pending{-1}; const pg_info_t& m_pg_info; - ScrubQueue::ScrubJobRef m_scrub_job; ///< a ref to this PG's scrub job + ScrubQueue::ScrubJobRef m_scrub_job; ///< a ref to this PG's scrub job void release_replica(pg_shard_t peer, epoch_t epoch); @@ -125,12 +127,15 @@ class ReplicaReservations { /** * quietly discard all knowledge about existing reservations. No messages * are sent to peers. - * To be used upon interval change, as we know the the running scrub is no longer - * relevant, and that the replicas had reset the reservations on their side. + * To be used upon interval change, as we know the the running scrub is no + * longer relevant, and that the replicas had reset the reservations on + * their side. */ void discard_all(); - ReplicaReservations(PG* pg, pg_shard_t whoami, ScrubQueue::ScrubJobRef scrubjob); + ReplicaReservations(PG* pg, + pg_shard_t whoami, + ScrubQueue::ScrubJobRef scrubjob); ~ReplicaReservations(); @@ -155,19 +160,26 @@ class LocalReservation { }; /** - * wraps the OSD resource we are using when reserved as a replica by a scrubbing primary. + * wraps the OSD resource we are using when reserved as a replica by a + * scrubbing primary. */ class ReservedByRemotePrimary { - const PgScrubber* m_scrubber; ///< we will be using its gen_prefix() + const PgScrubber* m_scrubber; ///< we will be using its gen_prefix() PG* m_pg; OSDService* m_osds; bool m_reserved_by_remote_primary{false}; const epoch_t m_reserved_at; public: - ReservedByRemotePrimary(const PgScrubber* scrubber, PG* pg, OSDService* osds, epoch_t epoch); + ReservedByRemotePrimary(const PgScrubber* scrubber, + PG* pg, + OSDService* osds, + epoch_t epoch); ~ReservedByRemotePrimary(); - [[nodiscard]] bool is_reserved() const { return m_reserved_by_remote_primary; } + [[nodiscard]] bool is_reserved() const + { + return m_reserved_by_remote_primary; + } /// compare the remembered reserved-at epoch to the current interval [[nodiscard]] bool is_stale() const; @@ -176,10 +188,10 @@ class ReservedByRemotePrimary { }; /** - * Once all replicas' scrub maps are received, we go on to compare the maps. That is - - * unless we we have not yet completed building our own scrub map. MapsCollectionStatus - * combines the status of waiting for both the local map and the replicas, without - * resorting to adding dummy entries into a list. + * Once all replicas' scrub maps are received, we go on to compare the maps. + * That is - unless we we have not yet completed building our own scrub map. + * MapsCollectionStatus combines the status of waiting for both the local map + * and the replicas, without resorting to adding dummy entries into a list. */ class MapsCollectionStatus { @@ -202,7 +214,10 @@ class MapsCollectionStatus { /// @returns true if indeed waiting for this one. Otherwise: an error string auto mark_arriving_map(pg_shard_t from) -> std::tuple; - [[nodiscard]] std::vector get_awaited() const { return m_maps_awaited_for; } + [[nodiscard]] std::vector get_awaited() const + { + return m_maps_awaited_for; + } void reset(); @@ -231,7 +246,8 @@ struct scrub_flags_t { */ bool auto_repair{false}; - /// this flag indicates that we are scrubbing post repair to verify everything is fixed + /// this flag indicates that we are scrubbing post repair to verify everything + /// is fixed bool check_repair{false}; /// checked at the end of the scrub, to possibly initiate a deep-scrub @@ -239,8 +255,8 @@ struct scrub_flags_t { /** * scrub must not be aborted. - * Set for explicitly requested scrubs, and for scrubs originated by the pairing - * process with the 'repair' flag set (in the RequestScrub event). + * Set for explicitly requested scrubs, and for scrubs originated by the + * pairing process with the 'repair' flag set (in the RequestScrub event). */ bool required{false}; }; @@ -256,12 +272,12 @@ ostream& operator<<(ostream& out, const scrub_flags_t& sf); * the actual scrubbing code. */ class PgScrubber : public ScrubPgIF, - public ScrubMachineListener, - public SnapMapperAccessor { + public ScrubMachineListener, + public SnapMapperAccessor { public: explicit PgScrubber(PG* pg); - friend class ScrubBackend; // will be replaced by a limited interface + friend class ScrubBackend; // will be replaced by a limited interface // ------------------ the I/F exposed to the PG (ScrubPgIF) ------------- @@ -290,10 +306,11 @@ class PgScrubber : public ScrubPgIF, void send_replica_pushes_upd(epoch_t epoch_queued) final; /** - * The PG has updated its 'applied version'. It might be that we are waiting for this - * information: after selecting a range of objects to scrub, we've marked the latest - * version of these objects in m_subset_last_update. We will not start the map building - * before we know that the PG has reached this version. + * The PG has updated its 'applied version'. It might be that we are waiting + * for this information: after selecting a range of objects to scrub, we've + * marked the latest version of these objects in m_subset_last_update. We will + * not start the map building before we know that the PG has reached this + * version. */ void on_applied_when_primary(const eversion_t& applied_version) final; @@ -319,7 +336,8 @@ class PgScrubber : public ScrubPgIF, bool write_blocked_by_scrub(const hobject_t& soid) final; /// true if the given range intersects the scrub interval in any way - bool range_intersects_scrub(const hobject_t& start, const hobject_t& end) final; + bool range_intersects_scrub(const hobject_t& start, + const hobject_t& end) final; /** * we are a replica being asked by the Primary to reserve OSD resources for @@ -342,7 +360,8 @@ class PgScrubber : public ScrubPgIF, void on_primary_change(const requested_scrub_t& request_flags) final; - void on_maybe_registration_change(const requested_scrub_t& request_flags) final; + void on_maybe_registration_change( + const requested_scrub_t& request_flags) final; void scrub_requested(scrub_level_t scrub_level, scrub_type_t scrub_type, @@ -373,14 +392,17 @@ class PgScrubber : public ScrubPgIF, return m_replica_request_priority; }; - unsigned int scrub_requeue_priority(Scrub::scrub_prio_t with_priority, - unsigned int suggested_priority) const final; + unsigned int scrub_requeue_priority( + Scrub::scrub_prio_t with_priority, + unsigned int suggested_priority) const final; /// the version that refers to m_flags.priority - unsigned int scrub_requeue_priority(Scrub::scrub_prio_t with_priority) const final; + unsigned int scrub_requeue_priority( + Scrub::scrub_prio_t with_priority) const final; void add_callback(Context* context) final { m_callbacks.push_back(context); } - [[nodiscard]] bool are_callbacks_pending() const final // used for an assert in PG.cc + [[nodiscard]] bool are_callbacks_pending() const final // used for an assert + // in PG.cc { return !m_callbacks.empty(); } @@ -396,7 +418,7 @@ class PgScrubber : public ScrubPgIF, * add to scrub statistics, but only if the soid is below the scrub start */ void stats_of_handled_objects(const object_stat_sum_t& delta_stats, - const hobject_t& soid) override + const hobject_t& soid) override { ceph_assert(false); } @@ -404,8 +426,9 @@ class PgScrubber : public ScrubPgIF, /** * finalize the parameters of the initiated scrubbing session: * - * The "current scrub" flags (m_flags) are set from the 'planned_scrub' flag-set; - * PG_STATE_SCRUBBING, and possibly PG_STATE_DEEP_SCRUB & PG_STATE_REPAIR are set. + * The "current scrub" flags (m_flags) are set from the 'planned_scrub' + * flag-set; PG_STATE_SCRUBBING, and possibly PG_STATE_DEEP_SCRUB & + * PG_STATE_REPAIR are set. */ void set_op_parameters(requested_scrub_t& request) final; @@ -423,10 +446,14 @@ class PgScrubber : public ScrubPgIF, std::stringstream& ss) override; int m_debug_blockrange{0}; - // ------------------------------------------------------------------------------------------- - // the I/F used by the state-machine (i.e. the implementation of ScrubMachineListener) + // -------------------------------------------------------------------------- + // the I/F used by the state-machine (i.e. the implementation of + // ScrubMachineListener) - [[nodiscard]] bool is_primary() const final { return m_pg->recovery_state.is_primary(); } + [[nodiscard]] bool is_primary() const final + { + return m_pg->recovery_state.is_primary(); + } void select_range_n_notify() final; @@ -446,13 +473,13 @@ class PgScrubber : public ScrubPgIF, void on_replica_init() final; void replica_handling_done() final; - /// the version of 'scrub_clear_state()' that does not try to invoke FSM services - /// (thus can be called from FSM reactions) + /// the version of 'scrub_clear_state()' that does not try to invoke FSM + /// services (thus can be called from FSM reactions) void clear_pgscrub_state() final; /* - * Send an 'InternalSchedScrub' FSM event either immediately, or - if 'm_need_sleep' - * is asserted - after a configuration-dependent timeout. + * Send an 'InternalSchedScrub' FSM event either immediately, or - if + * 'm_need_sleep' is asserted - after a configuration-dependent timeout. */ void add_delayed_scheduling() final; @@ -462,10 +489,11 @@ class PgScrubber : public ScrubPgIF, void scrub_finish() final; - ScrubMachineListener::MsgAndEpoch - prep_replica_map_msg(Scrub::PreemptionNoted was_preempted) final; + ScrubMachineListener::MsgAndEpoch prep_replica_map_msg( + Scrub::PreemptionNoted was_preempted) final; - void send_replica_map(const ScrubMachineListener::MsgAndEpoch& preprepared) final; + void send_replica_map( + const ScrubMachineListener::MsgAndEpoch& preprepared) final; void send_preempted_replica() final; @@ -511,7 +539,8 @@ class PgScrubber : public ScrubPgIF, std::ostream& gen_prefix(std::ostream& out) const final; // fetching the snap-set for a given object (used by the scrub-backend) - int get_snaps(const hobject_t& hoid, std::set* snaps_set) const final + int get_snaps(const hobject_t& hoid, + std::set* snaps_set) const final { return m_pg->snap_mapper.get_snaps(hoid, snaps_set); } @@ -525,18 +554,20 @@ class PgScrubber : public ScrubPgIF, [[nodiscard]] bool is_scrub_registered() const; - /// the 'is-in-scheduling-queue' status, using relaxed-semantics access to the status + /// the 'is-in-scheduling-queue' status, using relaxed-semantics access to the + /// status std::string_view registration_state() const; virtual void _scrub_clear_state() {} - utime_t m_scrub_reg_stamp; ///< stamp we registered for - ScrubQueue::ScrubJobRef m_scrub_job; ///< the scrub-job used by the OSD to schedule us + utime_t m_scrub_reg_stamp; ///< stamp we registered for + ScrubQueue::ScrubJobRef m_scrub_job; ///< the scrub-job used by the OSD to + ///< schedule us ostream& show(ostream& out) const override; public: - // ------------------ the I/F used by the ScrubBackend (not named yet) ------------- + // ------------------ the I/F used by the ScrubBackend (not named yet) // note: the reason we must have these forwarders, is because of the // artificial PG vs. PrimaryLogPG distinction. Some of the services used @@ -594,21 +625,22 @@ class PgScrubber : public ScrubPgIF, * * It isn't if: * - (1) we are no longer 'actively scrubbing'; or - * - (2) the message is from an epoch prior to when we started the current scrub - * session; or + * - (2) the message is from an epoch prior to when we started the current + * scrub session; or * - (3) the message epoch is from a previous interval; or * - (4) the 'abort' configuration flags were set. * * For (1) & (2) - the incoming message is discarded, w/o further action. * - * For (3): (see check_interval() for a full description) if we have not reacted yet - * to this specific new interval, we do now: - * - replica reservations are silently discarded (we count on the replicas to notice - * the interval change and un-reserve themselves); + * For (3): (see check_interval() for a full description) if we have not + * reacted yet to this specific new interval, we do now: + * - replica reservations are silently discarded (we count on the replicas to + * notice the interval change and un-reserve themselves); * - the scrubbing is halted. * * For (4): the message will be discarded, but also: - * if this is the first time we've noticed the 'abort' request, we perform the abort. + * if this is the first time we've noticed the 'abort' request, we perform + * the abort. * * \returns should the incoming event be processed? */ @@ -631,18 +663,20 @@ class PgScrubber : public ScrubPgIF, epoch_t m_last_aborted{}; // last time we've noticed a request to abort - bool m_needs_sleep{true}; ///< should we sleep before being rescheduled? always - ///< 'true', unless we just got out of a sleep period + bool m_needs_sleep{true}; ///< should we sleep before being rescheduled? + ///< always 'true', unless we just got out of a + ///< sleep period utime_t m_sleep_started_at; - // 'optional', as 'ReplicaReservations' & 'LocalReservation' are 'RAII-designed' - // to guarantee un-reserving when deleted. + // 'optional', as 'ReplicaReservations' & 'LocalReservation' are + // 'RAII-designed' to guarantee un-reserving when deleted. std::optional m_reservations; std::optional m_local_osd_resource; - /// the 'remote' resource we, as a replica, grant our Primary when it is scrubbing + /// the 'remote' resource we, as a replica, grant our Primary when it is + /// scrubbing std::optional m_remote_osd_resource; void cleanup_on_finish(); // scrub_clear_state() as called for a Primary when @@ -656,7 +690,8 @@ class PgScrubber : public ScrubPgIF, */ virtual void _scrub_finish() {} - // common code used by build_primary_map_chunk() and build_replica_map_chunk(): + // common code used by build_primary_map_chunk() and + // build_replica_map_chunk(): int build_scrub_map_chunk(ScrubMap& map, // primary or replica? ScrubMapBuilder& pos, hobject_t start, @@ -668,45 +703,49 @@ class PgScrubber : public ScrubPgIF, OSDService* const m_osds; const pg_shard_t m_pg_whoami; ///< a local copy of m_pg->pg_whoami; - epoch_t m_interval_start{0}; ///< interval's 'from' of when scrubbing was first scheduled + epoch_t m_interval_start{0}; ///< interval's 'from' of when scrubbing was + ///< first scheduled /* - * the exact epoch when the scrubbing actually started (started here - cleared checks - * for no-scrub conf). Incoming events are verified against this, with stale events - * discarded. + * the exact epoch when the scrubbing actually started (started here - cleared + * checks for no-scrub conf). Incoming events are verified against this, with + * stale events discarded. */ epoch_t m_epoch_start{0}; ///< the actual epoch when scrubbing started /** - * (replica) a tag identifying a specific scrub "session". Incremented whenever the - * Primary releases the replica scrub resources. - * When the scrub session is terminated (even if the interval remains unchanged, as - * might happen following an asok no-scrub command), stale scrub-resched messages + * (replica) a tag identifying a specific scrub "session". Incremented + * whenever the Primary releases the replica scrub resources. When the scrub + * session is terminated (even if the interval remains unchanged, as might + * happen following an asok no-scrub command), stale scrub-resched messages * triggered by the backend will be discarded. */ Scrub::act_token_t m_current_token{1}; /** - * (primary/replica) a test aid. A counter that is incremented whenever a scrub starts, - * and again when it terminates. Exposed as part of the 'pg query' command, to be used - * by test scripts. + * (primary/replica) a test aid. A counter that is incremented whenever a + * scrub starts, and again when it terminates. Exposed as part of the 'pg + * query' command, to be used by test scripts. * - * @ATTN: not guaranteed to be accurate. To be only used for tests. This is why it - * is initialized to a meaningless number; + * @ATTN: not guaranteed to be accurate. To be only used for tests. This is + * why it is initialized to a meaningless number; */ - int32_t m_sessions_counter{(int32_t)((int64_t)(this) & 0x0000'0000'00ff'fff0)}; - bool m_publish_sessions{false}; //< will the counter be part of 'query' output? + int32_t m_sessions_counter{ + (int32_t)((int64_t)(this) & 0x0000'0000'00ff'fff0)}; + bool m_publish_sessions{false}; //< will the counter be part of 'query' + //output? scrub_flags_t m_flags; - /// a reference to the details of the next scrub (as requested and managed by the PG) + /// a reference to the details of the next scrub (as requested and managed by + /// the PG) requested_scrub_t& m_planned_scrub; bool m_active{false}; /** - * a flag designed to prevent the initiation of a second scrub on a PG for which scrubbing - * has been initiated. + * a flag designed to prevent the initiation of a second scrub on a PG for + * which scrubbing has been initiated. * * set once scrubbing was initiated (i.e. - even before the FSM event that * will trigger a state-change out of Inactive was handled), and only reset @@ -717,7 +756,8 @@ class PgScrubber : public ScrubPgIF, * - all the time from scrub_finish() calling update_stats() till the * FSM handles the 'finished' event * - * Compared with 'm_active', this flag is asserted earlier and remains ON for longer. + * Compared with 'm_active', this flag is asserted earlier and remains ON for + * longer. */ bool m_queued_or_active{false}; @@ -746,9 +786,9 @@ class PgScrubber : public ScrubPgIF, * 'm_is_deep' - is the running scrub a deep one? * * Note that most of the code directly checks PG_STATE_DEEP_SCRUB, which is - * primary-only (and is set earlier - when scheduling the scrub). 'm_is_deep' is - * meaningful both for the primary and the replicas, and is used as a parameter when - * building the scrub maps. + * primary-only (and is set earlier - when scheduling the scrub). 'm_is_deep' + * is meaningful both for the primary and the replicas, and is used as a + * parameter when building the scrub maps. */ bool m_is_deep{false}; @@ -770,15 +810,14 @@ class PgScrubber : public ScrubPgIF, * "scrub * * Note: based on PG_STATE_REPAIR, and not on m_is_repair. I.e. for - * auto_repair will show as "deep-scrub" and not as "repair" (until the first error - * is detected). + * auto_repair will show as "deep-scrub" and not as "repair" (until the first + * error is detected). */ std::string_view m_mode_desc; void update_op_mode_text(); -private: - + private: /** * initiate a deep-scrub after the current scrub ended with errors. */ @@ -838,19 +877,21 @@ private: std::unique_ptr m_be; /** - * we mark the request priority as it arrived. It influences the queuing priority - * when we wait for local updates + * we mark the request priority as it arrived. It influences the queuing + * priority when we wait for local updates */ Scrub::scrub_prio_t m_replica_request_priority; /** * the 'preemption' "state-machine". * Note: I was considering an orthogonal sub-machine implementation, but as - * the state diagram is extremely simple, the added complexity wasn't justified. + * the state diagram is extremely simple, the added complexity wasn't + * justified. */ class preemption_data_t : public Scrub::preemption_t { public: - explicit preemption_data_t(PG* pg); // the PG access is used for conf access (and logs) + explicit preemption_data_t(PG* pg); // the PG access is used for conf + // access (and logs) [[nodiscard]] bool is_preemptable() const final { return m_preemptable; } @@ -882,7 +923,8 @@ private: } } - /// used by a replica to set preemptability state according to the Primary's request + /// used by a replica to set preemptability state according to the Primary's + /// request void force_preemptability(bool is_allowed) { // note: no need to lock for a replica diff --git a/src/osd/scrubber/scrub_machine.cc b/src/osd/scrubber/scrub_machine.cc index a0e27e250d97..7cc789571aa2 100644 --- a/src/osd/scrubber/scrub_machine.cc +++ b/src/osd/scrubber/scrub_machine.cc @@ -1,8 +1,6 @@ // -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- // vim: ts=8 sw=2 smarttab -#include "scrub_machine.h" - #include #include @@ -10,7 +8,9 @@ #include "osd/OSD.h" #include "osd/OpRequest.h" + #include "ScrubStore.h" +#include "scrub_machine.h" #define dout_context g_ceph_context #define dout_subsys ceph_subsys_osd @@ -44,9 +44,11 @@ std::string ScrubMachine::current_states_desc() const { std::string sts{"<"}; for (auto si = state_begin(); si != state_end(); ++si) { - const auto& siw{ *si }; // prevents a warning re side-effects + const auto& siw{*si}; // prevents a warning re side-effects // the '7' is the size of the 'scrub::' - sts += boost::core::demangle(typeid(siw).name()).substr(7, std::string::npos) + "/"; + sts += + boost::core::demangle(typeid(siw).name()).substr(7, std::string::npos) + + "/"; } return sts + ">"; } @@ -268,8 +270,9 @@ WaitPushes::WaitPushes(my_context ctx) : my_base(ctx) sc::result WaitPushes::react(const ActivePushesUpd&) { DECLARE_LOCALS; // 'scrbr' & 'pg_id' aliases - dout(10) << "WaitPushes::react(const ActivePushesUpd&) pending_active_pushes: " - << scrbr->pending_active_pushes() << dendl; + dout(10) + << "WaitPushes::react(const ActivePushesUpd&) pending_active_pushes: " + << scrbr->pending_active_pushes() << dendl; if (!scrbr->pending_active_pushes()) { // done waiting @@ -328,8 +331,8 @@ BuildMap::BuildMap(my_context ctx) : my_base(ctx) dout(10) << " -- state -->> Act/BuildMap" << dendl; DECLARE_LOCALS; // 'scrbr' & 'pg_id' aliases - // no need to check for an epoch change, as all possible flows that brought us here have - // a check_interval() verification of their final event. + // no need to check for an epoch change, as all possible flows that brought + // us here have a check_interval() verification of their final event. if (scrbr->get_preemptor().was_preempted()) { @@ -374,7 +377,7 @@ sc::result BuildMap::react(const IntLocalMapDone&) DrainReplMaps::DrainReplMaps(my_context ctx) : my_base(ctx) { dout(10) << "-- state -->> Act/DrainReplMaps" << dendl; - // we may have received all maps already. Send the event that will make us check. + // we may have got all maps already. Send the event that will make us check. post_event(GotReplicas{}); } @@ -388,7 +391,8 @@ sc::result DrainReplMaps::react(const GotReplicas&) return transit(); } - dout(15) << "DrainReplMaps::react(const GotReplicas&): still draining incoming maps: " + dout(15) << "DrainReplMaps::react(const GotReplicas&): still draining " + "incoming maps: " << scrbr->dump_awaited_maps() << dendl; return discard_event(); } @@ -402,17 +406,18 @@ WaitReplicas::WaitReplicas(my_context ctx) : my_base(ctx) } /** - * note: now that maps_compare_n_cleanup() is "futurized"(*), and we remain in this state - * for a while even after we got all our maps, we must prevent are_all_maps_available() - * (actually - the code after the if()) from being called more than once. - * This is basically a separate state, but it's too transitory and artificial to justify - * the cost of a separate state. - - * (*) "futurized" - in Crimson, the call to maps_compare_n_cleanup() returns immediately - * after initiating the process. The actual termination of the maps comparing etc' is - * signalled via an event. As we share the code with "classic" OSD, here too - * maps_compare_n_cleanup() is responsible for signalling the completion of the - * processing. + * note: now that maps_compare_n_cleanup() is "futurized"(*), and we remain in + * this state for a while even after we got all our maps, we must prevent + * are_all_maps_available() (actually - the code after the if()) from being + * called more than once. + * This is basically a separate state, but it's too transitory and artificial + * to justify the cost of a separate state. + + * (*) "futurized" - in Crimson, the call to maps_compare_n_cleanup() returns + * immediately after initiating the process. The actual termination of the + * maps comparing etc' is signalled via an event. As we share the code with + * "classic" OSD, here too maps_compare_n_cleanup() is responsible for + * signalling the completion of the processing. */ sc::result WaitReplicas::react(const GotReplicas&) { @@ -433,7 +438,8 @@ sc::result WaitReplicas::react(const GotReplicas&) } else { - // maps_compare_n_cleanup() will arrange for MapsCompared event to be sent: + // maps_compare_n_cleanup() will arrange for MapsCompared event to be + // sent: scrbr->maps_compare_n_cleanup(); return discard_event(); } @@ -445,7 +451,8 @@ sc::result WaitReplicas::react(const GotReplicas&) sc::result WaitReplicas::react(const DigestUpdate&) { DECLARE_LOCALS; // 'scrbr' & 'pg_id' aliases - auto warn_msg = "WaitReplicas::react(const DigestUpdate&): Unexpected DigestUpdate event"s; + auto warn_msg = + "WaitReplicas::react(const DigestUpdate&): Unexpected DigestUpdate event"s; dout(10) << warn_msg << dendl; scrbr->log_cluster_warning(warn_msg); return discard_event(); @@ -488,9 +495,9 @@ sc::result WaitDigestUpdate::react(const ScrubFinished&) } ScrubMachine::ScrubMachine(PG* pg, ScrubMachineListener* pg_scrub) - : m_pg_id{pg->pg_id}, m_scrbr{pg_scrub} -{ -} + : m_pg_id{pg->pg_id} + , m_scrbr{pg_scrub} +{} ScrubMachine::~ScrubMachine() = default; @@ -538,7 +545,8 @@ ActiveReplica::ActiveReplica(my_context ctx) : my_base(ctx) { DECLARE_LOCALS; // 'scrbr' & 'pg_id' aliases dout(10) << "-- state -->> ActiveReplica" << dendl; - scrbr->on_replica_init(); // as we might have skipped ReplicaWaitUpdates + // and as we might have skipped ReplicaWaitUpdates: + scrbr->on_replica_init(); post_event(SchedReplica{}); } diff --git a/src/osd/scrubber/scrub_machine.h b/src/osd/scrubber/scrub_machine.h index 95aff5c6b1c7..fdd625090423 100644 --- a/src/osd/scrubber/scrub_machine.h +++ b/src/osd/scrubber/scrub_machine.h @@ -16,9 +16,9 @@ #include "common/version.h" #include "include/Context.h" +#include "osd/scrubber_common.h" #include "scrub_machine_lstnr.h" -#include "osd/scrubber_common.h" class PG; // holding a pointer to that one - just for testing class PgScrubber; @@ -51,17 +51,21 @@ void on_event_discard(std::string_view nm); std::string_view print() const { return #E; } \ }; -MEV(RemotesReserved) ///< all replicas have granted our reserve request +/// all replicas have granted our reserve request +MEV(RemotesReserved) -MEV(ReservationFailure) ///< a reservation request has failed +/// a reservation request has failed +MEV(ReservationFailure) -MEV(StartScrub) ///< initiate a new scrubbing session (relevant if we are a Primary) +/// initiate a new scrubbing session (relevant if we are a Primary) +MEV(StartScrub) -MEV(AfterRepairScrub) ///< initiate a new scrubbing session. Only triggered at Recovery - ///< completion. +/// initiate a new scrubbing session. Only triggered at Recovery completion +MEV(AfterRepairScrub) -MEV(Unblocked) ///< triggered when the PG unblocked an object that was marked for - ///< scrubbing. Via the PGScrubUnblocked op +/// triggered when the PG unblocked an object that was marked for scrubbing. +/// Via the PGScrubUnblocked op +MEV(Unblocked) MEV(InternalSchedScrub) @@ -69,48 +73,63 @@ MEV(SelectedChunkFree) MEV(ChunkIsBusy) -MEV(ActivePushesUpd) ///< Update to active_pushes. 'active_pushes' represents recovery - ///< that is in-flight to the local ObjectStore +/// Update to active_pushes. 'active_pushes' represents recovery that +/// is in-flight to the local ObjectStore +MEV(ActivePushesUpd) -MEV(UpdatesApplied) ///< (Primary only) all updates are committed +/// (Primary only) all updates are committed +MEV(UpdatesApplied) -MEV(InternalAllUpdates) ///< the internal counterpart of UpdatesApplied +/// the internal counterpart of UpdatesApplied +MEV(InternalAllUpdates) -MEV(GotReplicas) ///< got a map from a replica +/// got a map from a replica +MEV(GotReplicas) -MEV(IntBmPreempted) ///< internal - BuildMap preempted. Required, as detected within the - ///< ctor +/// internal - BuildMap preempted. Required, as detected within the ctor +MEV(IntBmPreempted) MEV(InternalError) MEV(IntLocalMapDone) -MEV(DigestUpdate) ///< external. called upon success of a MODIFY op. See - ///< scrub_snapshot_metadata() +/// external. called upon success of a MODIFY op. See +/// scrub_snapshot_metadata() +MEV(DigestUpdate) -MEV(MapsCompared) ///< maps_compare_n_cleanup() transactions are done +/// maps_compare_n_cleanup() transactions are done +MEV(MapsCompared) -MEV(StartReplica) ///< initiating replica scrub. +/// initiating replica scrub +MEV(StartReplica) -MEV(StartReplicaNoWait) ///< 'start replica' when there are no pending updates +/// 'start replica' when there are no pending updates +MEV(StartReplicaNoWait) MEV(SchedReplica) -MEV(ReplicaPushesUpd) ///< Update to active_pushes. 'active_pushes' represents recovery - ///< that is in-flight to the local ObjectStore +/// Update to active_pushes. 'active_pushes' represents recovery +/// that is in-flight to the local ObjectStore +MEV(ReplicaPushesUpd) -MEV(FullReset) ///< guarantee that the FSM is in the quiescent state (i.e. NotActive) +/// guarantee that the FSM is in the quiescent state (i.e. NotActive) +MEV(FullReset) -MEV(NextChunk) ///< finished handling this chunk. Go get the next one +/// finished handling this chunk. Go get the next one +MEV(NextChunk) -MEV(ScrubFinished) ///< all chunks handled +/// all chunks handled +MEV(ScrubFinished) +// +// STATES +// struct NotActive; ///< the quiescent state. No active scrubbing. struct ReservingReplicas; ///< securing scrub resources from replicas' OSDs struct ActiveScrubbing; ///< the active state for a Primary. A sub-machine. -struct ReplicaWaitUpdates; ///< an active state for a replica. Waiting for all active - ///< operations to finish. +struct ReplicaWaitUpdates; ///< an active state for a replica. Waiting for all + ///< active operations to finish. struct ActiveReplica; ///< an active state for a replica. @@ -135,27 +154,30 @@ class ScrubMachine : public sc::state_machine { /** * The Scrubber's base (quiescent) state. * Scrubbing is triggered by one of the following events: - * - (standard scenario for a Primary): 'StartScrub'. Initiates the OSDs resources - * reservation process. Will be issued by PG::scrub(), following a + * + * - (standard scenario for a Primary): 'StartScrub'. Initiates the OSDs + * resources reservation process. Will be issued by PG::scrub(), following a * queued "PGScrub" op. - * - a special end-of-recovery Primary scrub event ('AfterRepairScrub') that is - * not required to reserve resources. - * - (for a replica) 'StartReplica' or 'StartReplicaNoWait', triggered by an incoming - * MOSDRepScrub message. * - * note (20.8.21): originally, AfterRepairScrub was triggering a scrub without waiting - * for replica resources to be acquired. But once replicas started using the - * resource-request to identify and tag the scrub session, this bypass cannot be - * supported anymore. + * - a special end-of-recovery Primary scrub event ('AfterRepairScrub'). + * + * - (for a replica) 'StartReplica' or 'StartReplicaNoWait', triggered by + * an incoming MOSDRepScrub message. + * + * note (20.8.21): originally, AfterRepairScrub was triggering a scrub without + * waiting for replica resources to be acquired. But once replicas started + * using the resource-request to identify and tag the scrub session, this + * bypass cannot be supported anymore. */ struct NotActive : sc::state { explicit NotActive(my_context ctx); - using reactions = mpl::list, - // a scrubbing that was initiated at recovery completion - sc::custom_reaction, - sc::transition, - sc::transition>; + using reactions = + mpl::list, + // a scrubbing that was initiated at recovery completion: + sc::custom_reaction, + sc::transition, + sc::transition>; sc::result react(const StartScrub&); sc::result react(const AfterRepairScrub&); }; @@ -178,26 +200,35 @@ struct ReservingReplicas : sc::state { // the "active" sub-states -struct RangeBlocked; ///< the objects range is blocked -struct PendingTimer; ///< either delaying the scrub by some time and requeuing, or just - ///< requeue -struct NewChunk; ///< select a chunk to scrub, and verify its availability +/// the objects range is blocked +struct RangeBlocked; + +/// either delaying the scrub by some time and requeuing, or just requeue +struct PendingTimer; + +/// select a chunk to scrub, and verify its availability +struct NewChunk; + struct WaitPushes; struct WaitLastUpdate; struct BuildMap; -struct DrainReplMaps; ///< a problem during BuildMap. Wait for all replicas to report, - ///< then restart. -struct WaitReplicas; ///< wait for all replicas to report + +/// a problem during BuildMap. Wait for all replicas to report, then restart. +struct DrainReplMaps; + +/// wait for all replicas to report +struct WaitReplicas; + struct WaitDigestUpdate; -struct ActiveScrubbing : sc::state { +struct ActiveScrubbing + : sc::state { explicit ActiveScrubbing(my_context ctx); ~ActiveScrubbing(); - using reactions = mpl::list< - sc::custom_reaction, - sc::custom_reaction>; + using reactions = mpl::list, + sc::custom_reaction>; sc::result react(const FullReset&); sc::result react(const InternalError&); @@ -231,9 +262,10 @@ struct NewChunk : sc::state { * initiate the update process for this chunk * * Wait fo 'active_pushes' to clear. - * 'active_pushes' represents recovery that is in-flight to the local Objectstore, hence - * scrub waits until the correct data is readable (in-flight data to the Objectstore is - * not readable until written to disk, termed 'applied' here) + * 'active_pushes' represents recovery that is in-flight to the local + * Objectstore, hence scrub waits until the correct data is readable + * (in-flight data to the Objectstore is not readable until written to + * disk, termed 'applied' here) */ struct WaitPushes : sc::state { @@ -250,10 +282,11 @@ struct WaitLastUpdate : sc::state { void on_new_updates(const UpdatesApplied&); - using reactions = mpl::list, - sc::in_state_reaction>; + using reactions = + mpl::list, + sc::in_state_reaction>; sc::result react(const InternalAllUpdates&); }; @@ -266,14 +299,12 @@ struct BuildMap : sc::state { // handled by our parent state; // - if preempted, we switch to DrainReplMaps, where we will wait for all // replicas to send their maps before acknowledging the preemption; - // - an interval change will be handled by the relevant 'send-event' functions, - // and will translated into a 'FullReset' event. - using reactions = - mpl::list, - sc::transition, // looping, waiting - // for the backend to - // finish - sc::custom_reaction>; + // - an interval change will be handled by the relevant 'send-event' + // functions, and will translated into a 'FullReset' event. + using reactions = mpl::list, + // looping, waiting for the backend to finish: + sc::transition, + sc::custom_reaction>; sc::result react(const IntLocalMapDone&); }; @@ -285,8 +316,8 @@ struct DrainReplMaps : sc::state { explicit DrainReplMaps(my_context ctx); using reactions = - mpl::list // all replicas are accounted for - >; + // all replicas are accounted for: + mpl::list>; sc::result react(const GotReplicas&); }; @@ -294,11 +325,11 @@ struct DrainReplMaps : sc::state { struct WaitReplicas : sc::state { explicit WaitReplicas(my_context ctx); - using reactions = - mpl::list, // all replicas are accounted for - sc::transition, - sc::custom_reaction - >; + using reactions = mpl::list< + // all replicas are accounted for: + sc::custom_reaction, + sc::transition, + sc::custom_reaction>; sc::result react(const GotReplicas&); sc::result react(const DigestUpdate&); @@ -309,13 +340,13 @@ struct WaitDigestUpdate : sc::state { explicit WaitDigestUpdate(my_context ctx); using reactions = mpl::list, - sc::custom_reaction, - sc::transition>; + sc::custom_reaction, + sc::transition>; sc::result react(const DigestUpdate&); sc::result react(const ScrubFinished&); }; -// ----------------------------- the "replica active" states ----------------------- +// ----------------------------- the "replica active" states /* * Waiting for 'active_pushes' to complete @@ -326,8 +357,8 @@ struct WaitDigestUpdate : sc::state { */ struct ReplicaWaitUpdates : sc::state { explicit ReplicaWaitUpdates(my_context ctx); - using reactions = - mpl::list, sc::custom_reaction>; + using reactions = mpl::list, + sc::custom_reaction>; sc::result react(const ReplicaPushesUpd&); sc::result react(const FullReset&); diff --git a/src/osd/scrubber/scrub_machine_lstnr.h b/src/osd/scrubber/scrub_machine_lstnr.h index 94dbed218b48..f4fc9e4fbe9b 100644 --- a/src/osd/scrubber/scrub_machine_lstnr.h +++ b/src/osd/scrubber/scrub_machine_lstnr.h @@ -7,7 +7,6 @@ */ #include "common/version.h" #include "include/Context.h" - #include "osd/osd_types.h" namespace Scrub { @@ -90,13 +89,13 @@ struct ScrubMachineListener { virtual void replica_handling_done() = 0; - /// the version of 'scrub_clear_state()' that does not try to invoke FSM services - /// (thus can be called from FSM reactions) + /// the version of 'scrub_clear_state()' that does not try to invoke FSM + /// services (thus can be called from FSM reactions) virtual void clear_pgscrub_state() = 0; /* - * Send an 'InternalSchedScrub' FSM event either immediately, or - if 'm_need_sleep' - * is asserted - after a configuration-dependent timeout. + * Send an 'InternalSchedScrub' FSM event either immediately, or - if + * 'm_need_sleep' is asserted - after a configuration-dependent timeout. */ virtual void add_delayed_scheduling() = 0; @@ -113,8 +112,8 @@ struct ScrubMachineListener { /** * Prepare a MOSDRepScrubMap message carrying the requested scrub map * @param was_preempted - were we preempted? - * @return the message, and the current value of 'm_replica_min_epoch' (which is - * used when sending the message, but will be overwritten before that). + * @return the message, and the current value of 'm_replica_min_epoch' (which + * is used when sending the message, but will be overwritten before that). */ [[nodiscard]] virtual MsgAndEpoch prep_replica_map_msg( Scrub::PreemptionNoted was_preempted) = 0; diff --git a/src/osd/scrubber_common.h b/src/osd/scrubber_common.h index 2b4d370be11d..7825b4814ca6 100644 --- a/src/osd/scrubber_common.h +++ b/src/osd/scrubber_common.h @@ -52,8 +52,8 @@ struct requested_scrub_t { /** * scrub must not be aborted. - * Set for explicitly requested scrubs, and for scrubs originated by the pairing - * process with the 'repair' flag set (in the RequestScrub event). + * Set for explicitly requested scrubs, and for scrubs originated by the + * pairing process with the 'repair' flag set (in the RequestScrub event). * * Will be copied into the 'required' scrub flag upon scrub start. */ @@ -64,14 +64,15 @@ struct requested_scrub_t { * - scrub_requested() with need_auto param set, which only happens in * - scrub_finish() - if deep_scrub_on_error is set, and we have errors * - * If set, will prevent the OSD from casually postponing our scrub. When scrubbing - * starts, will cause must_scrub, must_deep_scrub and auto_repair to be set. + * If set, will prevent the OSD from casually postponing our scrub. When + * scrubbing starts, will cause must_scrub, must_deep_scrub and auto_repair to + * be set. */ bool need_auto{false}; /** - * Set for scrub-after-recovery just before we initiate the recovery deep scrub, - * or if scrub_requested() was called with either need_auto ot repair. + * Set for scrub-after-recovery just before we initiate the recovery deep + * scrub, or if scrub_requested() was called with either need_auto ot repair. * Affects PG_STATE_DEEP_SCRUB. */ bool must_deep_scrub{false}; @@ -98,8 +99,8 @@ struct requested_scrub_t { bool must_repair{false}; /* - * the value of auto_repair is determined in sched_scrub() (once per scrub. previous - * value is not remembered). Set if + * the value of auto_repair is determined in sched_scrub() (once per scrub. + * previous value is not remembered). Set if * - allowed by configuration and backend, and * - must_scrub is not set (i.e. - this is a periodic scrub), * - time_for_deep was just set @@ -122,7 +123,10 @@ struct ScrubPgIF { virtual ~ScrubPgIF() = default; - friend std::ostream& operator<<(std::ostream& out, const ScrubPgIF& s) { return s.show(out); } + friend std::ostream& operator<<(std::ostream& out, const ScrubPgIF& s) + { + return s.show(out); + } virtual std::ostream& show(std::ostream& out) const = 0; @@ -146,9 +150,11 @@ struct ScrubPgIF { virtual void send_replica_pushes_upd(epoch_t epoch_queued) = 0; - virtual void send_start_replica(epoch_t epoch_queued, Scrub::act_token_t token) = 0; + virtual void send_start_replica(epoch_t epoch_queued, + Scrub::act_token_t token) = 0; - virtual void send_sched_replica(epoch_t epoch_queued, Scrub::act_token_t token) = 0; + virtual void send_sched_replica(epoch_t epoch_queued, + Scrub::act_token_t token) = 0; virtual void send_full_reset(epoch_t epoch_queued) = 0; @@ -164,12 +170,14 @@ struct ScrubPgIF { virtual void send_maps_compared(epoch_t epoch_queued) = 0; - virtual void on_applied_when_primary(const eversion_t &applied_version) = 0; + virtual void on_applied_when_primary(const eversion_t& applied_version) = 0; // -------------------------------------------------- - [[nodiscard]] virtual bool are_callbacks_pending() - const = 0; // currently only used for an assert + [[nodiscard]] virtual bool are_callbacks_pending() const = 0; // currently + // only used + // for an + // assert /** * the scrubber is marked 'active': @@ -215,17 +223,19 @@ struct ScrubPgIF { const requested_scrub_t& request_flags) const = 0; /** - * Return true if soid is currently being scrubbed and pending IOs should block. - * May have a side effect of preempting an in-progress scrub -- will return false - * in that case. + * Return true if soid is currently being scrubbed and pending IOs should + * block. May have a side effect of preempting an in-progress scrub -- will + * return false in that case. * * @param soid object to check for ongoing scrub - * @return boolean whether a request on soid should block until scrub completion + * @return boolean whether a request on soid should block until scrub + * completion */ virtual bool write_blocked_by_scrub(const hobject_t& soid) = 0; /// Returns whether any objects in the range [begin, end] are being scrubbed - virtual bool range_intersects_scrub(const hobject_t& start, const hobject_t& end) = 0; + virtual bool range_intersects_scrub(const hobject_t& start, + const hobject_t& end) = 0; /// the op priority, taken from the primary's request message virtual Scrub::scrub_prio_t replica_op_priority() const = 0; @@ -233,8 +243,9 @@ struct ScrubPgIF { /// the priority of the on-going scrub (used when requeuing events) virtual unsigned int scrub_requeue_priority( Scrub::scrub_prio_t with_priority) const = 0; - virtual unsigned int scrub_requeue_priority(Scrub::scrub_prio_t with_priority, - unsigned int suggested_priority) const = 0; + virtual unsigned int scrub_requeue_priority( + Scrub::scrub_prio_t with_priority, + unsigned int suggested_priority) const = 0; virtual void add_callback(Context* context) = 0; @@ -243,8 +254,8 @@ struct ScrubPgIF { const hobject_t& soid) = 0; /** - * the version of 'scrub_clear_state()' that does not try to invoke FSM services - * (thus can be called from FSM reactions) + * the version of 'scrub_clear_state()' that does not try to invoke FSM + * services (thus can be called from FSM reactions) */ virtual void clear_pgscrub_state() = 0; @@ -255,8 +266,8 @@ struct ScrubPgIF { virtual void send_remotes_reserved(epoch_t epoch_queued) = 0; /** - * triggers the 'ReservationFailure' (at least one replica denied us the requested - * resources) state-machine event + * triggers the 'ReservationFailure' (at least one replica denied us the + * requested resources) state-machine event */ virtual void send_reservation_failure(epoch_t epoch_queued) = 0; @@ -309,7 +320,8 @@ struct ScrubPgIF { */ virtual void update_scrub_job(const requested_scrub_t& request_flags) = 0; - virtual void on_maybe_registration_change(const requested_scrub_t& request_flags) = 0; + virtual void on_maybe_registration_change( + const requested_scrub_t& request_flags) = 0; // on the replica: virtual void handle_scrub_reserve_request(OpRequestRef op) = 0; @@ -317,7 +329,8 @@ struct ScrubPgIF { // and on the primary: virtual void handle_scrub_reserve_grant(OpRequestRef op, pg_shard_t from) = 0; - virtual void handle_scrub_reserve_reject(OpRequestRef op, pg_shard_t from) = 0; + virtual void handle_scrub_reserve_reject(OpRequestRef op, + pg_shard_t from) = 0; virtual void rm_from_osd_scrubbing() = 0; -- 2.47.3