From: Ronen Friedman Date: Fri, 15 Sep 2023 14:03:09 +0000 (-0500) Subject: osd/scrub: move OSD::sched_scrub() to OsdScrub X-Git-Tag: v19.0.0~438^2~10 X-Git-Url: http://git.apps.os.sepia.ceph.com/?a=commitdiff_plain;h=60deaa45f2a92bbd74b79229c7633b1ea4524272;p=ceph-ci.git osd/scrub: move OSD::sched_scrub() to OsdScrub ... (as OsdScrub::initiate_scrub()). The random backoff dice roller (scrub_random_backoff()) is moved as well. Signed-off-by: Ronen Friedman --- diff --git a/src/osd/OSD.cc b/src/osd/OSD.cc index 13076fb9aff..255976aba0e 100644 --- a/src/osd/OSD.cc +++ b/src/osd/OSD.cc @@ -6282,9 +6282,7 @@ void OSD::tick_without_osd_lock() } if (is_active()) { - if (!scrub_random_backoff()) { - sched_scrub(); - } + service.get_scrub_services().initiate_scrub(service.is_recovery_active()); service.promote_throttle_recalibrate(); resume_creating_pg(); bool need_send_beacon = false; @@ -7597,17 +7595,6 @@ void OSD::handle_fast_scrub(MOSDScrub2 *m) m->put(); } -bool OSD::scrub_random_backoff() -{ - if (random_bool_with_probability(cct->_conf->osd_scrub_backoff_ratio)) { - dout(20) - << "scrub_random_backoff lost coin flip, randomly backing off (ratio: " - << cct->_conf->osd_scrub_backoff_ratio << ")" << dendl; - return true; // backing off - } - return false; -} - MPGStats* OSD::collect_pg_stats() { dout(15) << __func__ << dendl; diff --git a/src/osd/OSD.h b/src/osd/OSD.h index 40368387159..338b390dde1 100644 --- a/src/osd/OSD.h +++ b/src/osd/OSD.h @@ -1864,9 +1864,7 @@ protected: // -- scrubbing -- - void sched_scrub(); void resched_all_scrubs(); - bool scrub_random_backoff(); // -- status reporting -- MPGStats *collect_pg_stats(); diff --git a/src/osd/scrubber/osd_scrub.cc b/src/osd/scrubber/osd_scrub.cc index 8c4740cc71c..3c03b489eb0 100644 --- a/src/osd/scrubber/osd_scrub.cc +++ b/src/osd/scrubber/osd_scrub.cc @@ -52,73 +52,85 @@ void OsdScrub::log_fwd(std::string_view text) dout(20) << text << dendl; } - -// ////////////////////////////////////////////////////////////////////////// // -// scrub initiation - OSD code temporarily moved here from OSD.cc - -// temporary dout() support for OSD members: -static ostream& _prefix(std::ostream* _dout, int whoami, epoch_t epoch) { - return *_dout << "osd." << whoami << " " << epoch << " "; +bool OsdScrub::scrub_random_backoff() const +{ + if (random_bool_with_probability(conf->osd_scrub_backoff_ratio)) { + dout(20) << fmt::format( + "lost coin flip, randomly backing off (ratio: {:.3f})", + conf->osd_scrub_backoff_ratio) + << dendl; + return true; // backing off + } + return false; } -#undef dout_prefix -#define dout_prefix _prefix(_dout, whoami, get_osdmap_epoch()) -void OSD::sched_scrub() +void OsdScrub::initiate_scrub(bool is_recovery_active) { - auto& scrub_scheduler = service.get_scrub_services(); + if (scrub_random_backoff()) { + // dice-roll says we should not scrub now + return; + } - if (auto blocked_pgs = scrub_scheduler.get_blocked_pgs_count(); - blocked_pgs > 0) { + if (auto blocked_pgs = get_blocked_pgs_count(); blocked_pgs > 0) { // some PGs managed by this OSD were blocked by a locked object during // scrub. This means we might not have the resources needed to scrub now. dout(10) - << fmt::format( - "{}: PGs are blocked while scrubbing due to locked objects ({} PGs)", - __func__, - blocked_pgs) - << dendl; + << fmt::format( + "PGs are blocked while scrubbing due to locked objects ({} PGs)", + blocked_pgs) + << dendl; } // fail fast if no resources are available - if (!scrub_scheduler.can_inc_scrubs()) { - dout(20) << __func__ << ": OSD cannot inc scrubs" << dendl; + if (!m_resource_bookkeeper.can_inc_scrubs()) { + dout(20) << "too many scrubs already running on this OSD" << dendl; return; } // if there is a PG that is just now trying to reserve scrub replica resources - // we should wait and not initiate a new scrub - if (scrub_scheduler.is_reserving_now()) { - dout(20) << __func__ << ": scrub resources reservation in progress" << dendl; + if (is_reserving_now()) { + dout(10) << "scrub resources reservation in progress" << dendl; return; } Scrub::OSDRestrictions env_conditions; - if (service.is_recovery_active() && !cct->_conf->osd_scrub_during_recovery) { - if (!cct->_conf->osd_repair_during_recovery) { - dout(15) << __func__ << ": not scheduling scrubs due to active recovery" - << dendl; + if (is_recovery_active && !conf->osd_scrub_during_recovery) { + if (!conf->osd_repair_during_recovery) { + dout(15) << "not scheduling scrubs due to active recovery" << dendl; return; } - dout(10) << __func__ - << " will only schedule explicitly requested repair due to active recovery" - << dendl; + dout(10) << "will only schedule explicitly requested repair due to active " + "recovery" + << dendl; env_conditions.allow_requested_repair_only = true; } if (g_conf()->subsys.should_gather()) { - dout(20) << __func__ << " sched_scrub starts" << dendl; - auto all_jobs = scrub_scheduler.list_registered_jobs(); + dout(20) << "scrub scheduling (@tick) starts" << dendl; + auto all_jobs = list_registered_jobs(); for (const auto& sj : all_jobs) { - dout(20) << "sched_scrub scrub-queue jobs: " << *sj << dendl; + dout(20) << fmt::format("\tscrub-queue jobs: {}", *sj) << dendl; } } - auto was_started = scrub_scheduler.select_pg_and_scrub(env_conditions); - dout(20) << "sched_scrub done (" << ScrubQueue::attempt_res_text(was_started) - << ")" << dendl; + auto was_started = select_pg_and_scrub(env_conditions); + dout(20) << fmt::format( + "scrub scheduling done ({})", + ScrubQueue::attempt_res_text(was_started)) + << dendl; } +// ////////////////////////////////////////////////////////////////////////// // +// scrub initiation - OSD code temporarily moved here from OSD.cc + +// temporary dout() support for OSD members: +static ostream& _prefix(std::ostream* _dout, int whoami, epoch_t epoch) { + return *_dout << "osd." << whoami << " " << epoch << " "; +} +#undef dout_prefix +#define dout_prefix _prefix(_dout, whoami, get_osdmap_epoch()) Scrub::schedule_result_t OSDService::initiate_a_scrub(spg_t pgid, bool allow_requested_repair_only) diff --git a/src/osd/scrubber/osd_scrub.h b/src/osd/scrubber/osd_scrub.h index f69fecaef6a..23baecea745 100644 --- a/src/osd/scrubber/osd_scrub.h +++ b/src/osd/scrubber/osd_scrub.h @@ -34,6 +34,12 @@ class OsdScrub { // note: public, as accessed by the dout macros std::ostream& gen_prefix(std::ostream& out, std::string_view fn) const; + /** + * called periodically by the OSD to select the first scrub-eligible PG + * and scrub it. + */ + void initiate_scrub(bool active_recovery); + /** * logs a string at log level 20, using OsdScrub's prefix. * An aux function to be used by sub-objects. @@ -180,4 +186,11 @@ class OsdScrub { /// number of PGs stuck while scrubbing, waiting for objects int get_blocked_pgs_count() const; + + /** + * roll a dice to determine whether we should skip this tick, not trying to + * schedule a new scrub. + * \returns true with probability of osd_scrub_backoff_ratio. + */ + bool scrub_random_backoff() const; };