From 0bf1b29513a69773033b8965d01ad93538394ca3 Mon Sep 17 00:00:00 2001 From: Ronen Friedman Date: Sun, 10 Sep 2023 13:50:35 -0500 Subject: [PATCH] osd/scrub: select_pg_and_scrub() moved into osd_scrub.cc No code changes. Signed-off-by: Ronen Friedman --- src/osd/scrubber/osd_scrub.cc | 179 +++++++++++++++++++++++----- src/osd/scrubber/osd_scrub_sched.cc | 129 -------------------- 2 files changed, 151 insertions(+), 157 deletions(-) diff --git a/src/osd/scrubber/osd_scrub.cc b/src/osd/scrubber/osd_scrub.cc index fe92fd63c7dbd..97f266d3dee47 100644 --- a/src/osd/scrubber/osd_scrub.cc +++ b/src/osd/scrubber/osd_scrub.cc @@ -12,39 +12,13 @@ using namespace ::std::chrono; using namespace ::std::chrono_literals; using namespace ::std::literals; -#define dout_subsys ceph_subsys_osd -#undef dout_context -#define dout_context (cct) -#undef dout_prefix -#define dout_prefix \ - *_dout << "osd." << osd_service.get_nodeid() << " scrub-queue::" << __func__ \ - << " " - - - -void ScrubQueue::dump_scrubs(ceph::Formatter* f) const -{ - ceph_assert(f != nullptr); - std::lock_guard lck(jobs_lock); - - f->open_array_section("scrubs"); - - std::for_each(to_scrub.cbegin(), to_scrub.cend(), [&f](const ScrubJobRef& j) { - j->dump(f); - }); - - std::for_each(penalized.cbegin(), - penalized.cend(), - [&f](const ScrubJobRef& j) { j->dump(f); }); - - f->close_section(); -} // ////////////////////////////////////////////////////////////////////////// // // scrub initiation - OSD code temporarily moved here from OSD.cc +#define dout_subsys ceph_subsys_osd #undef dout_context -#define dout_context cct +#define dout_context (cct) #define dout_subsys ceph_subsys_osd #undef dout_prefix #define dout_prefix _prefix(_dout, whoami, get_osdmap_epoch()) @@ -176,6 +150,155 @@ void OSD::resched_all_scrubs() << " " +void ScrubQueue::dump_scrubs(ceph::Formatter* f) const +{ + ceph_assert(f != nullptr); + std::lock_guard lck(jobs_lock); + + f->open_array_section("scrubs"); + + std::for_each(to_scrub.cbegin(), to_scrub.cend(), [&f](const ScrubJobRef& j) { + j->dump(f); + }); + + std::for_each(penalized.cbegin(), + penalized.cend(), + [&f](const ScrubJobRef& j) { j->dump(f); }); + + f->close_section(); +} + +/** + * a note regarding 'to_scrub_copy': + * 'to_scrub_copy' is a sorted set of all the ripe jobs from to_copy. + * As we usually expect to refer to only the first job in this set, we could + * consider an alternative implementation: + * - have collect_ripe_jobs() return the copied set without sorting it; + * - loop, performing: + * - use std::min_element() to find a candidate; + * - try that one. If not suitable, discard from 'to_scrub_copy' + */ +Scrub::schedule_result_t ScrubQueue::select_pg_and_scrub( + Scrub::OSDRestrictions& preconds) +{ + dout(10) << " reg./pen. sizes: " << to_scrub.size() << " / " + << penalized.size() << dendl; + + utime_t now_is = time_now(); + + preconds.time_permit = scrub_time_permit(now_is); + preconds.load_is_low = scrub_load_below_threshold(); + preconds.only_deadlined = !preconds.time_permit || !preconds.load_is_low; + + // create a list of candidates (copying, as otherwise creating a deadlock): + // - possibly restore penalized + // - (if we didn't handle directly) remove invalid jobs + // - create a copy of the to_scrub (possibly up to first not-ripe) + // - same for the penalized (although that usually be a waste) + // unlock, then try the lists + + std::unique_lock lck{jobs_lock}; + + // pardon all penalized jobs that have deadlined (or were updated) + scan_penalized(restore_penalized, now_is); + restore_penalized = false; + + // remove the 'updated' flag from all entries + std::for_each(to_scrub.begin(), + to_scrub.end(), + [](const auto& jobref) -> void { jobref->updated = false; }); + + // add failed scrub attempts to the penalized list + move_failed_pgs(now_is); + + // collect all valid & ripe jobs from the two lists. Note that we must copy, + // as when we use the lists we will not be holding jobs_lock (see + // explanation above) + + auto to_scrub_copy = collect_ripe_jobs(to_scrub, now_is); + auto penalized_copy = collect_ripe_jobs(penalized, now_is); + lck.unlock(); + + // try the regular queue first + auto res = select_from_group(to_scrub_copy, preconds, now_is); + + // in the sole scenario in which we've gone over all ripe jobs without success + // - we will try the penalized + if (res == Scrub::schedule_result_t::none_ready && !penalized_copy.empty()) { + res = select_from_group(penalized_copy, preconds, now_is); + dout(10) << "tried the penalized. Res: " + << ScrubQueue::attempt_res_text(res) << dendl; + restore_penalized = true; + } + + dout(15) << dendl; + return res; +} + + +// not holding jobs_lock. 'group' is a copy of the actual list. +Scrub::schedule_result_t ScrubQueue::select_from_group( + ScrubQContainer& group, + const Scrub::OSDRestrictions& preconds, + utime_t now_is) +{ + dout(15) << "jobs #: " << group.size() << dendl; + + for (auto& candidate : group) { + + // we expect the first job in the list to be a good candidate (if any) + + dout(20) << "try initiating scrub for " << candidate->pgid << dendl; + + if (preconds.only_deadlined && (candidate->schedule.deadline.is_zero() || + candidate->schedule.deadline >= now_is)) { + dout(15) << " not scheduling scrub for " << candidate->pgid << " due to " + << (preconds.time_permit ? "high load" : "time not permitting") + << dendl; + continue; + } + + // we have a candidate to scrub. We turn to the OSD to verify that the PG + // configuration allows the specified type of scrub, and to initiate the + // scrub. + switch (osd_service.initiate_a_scrub( + candidate->pgid, preconds.allow_requested_repair_only)) { + + case Scrub::schedule_result_t::scrub_initiated: + // the happy path. We are done + dout(20) << " initiated for " << candidate->pgid << dendl; + return Scrub::schedule_result_t::scrub_initiated; + + case Scrub::schedule_result_t::already_started: + case Scrub::schedule_result_t::preconditions: + case Scrub::schedule_result_t::bad_pg_state: + // continue with the next job + dout(20) << "failed (state/cond/started) " << candidate->pgid << dendl; + break; + + case Scrub::schedule_result_t::no_such_pg: + // The pg is no longer there + dout(20) << "failed (no pg) " << candidate->pgid << dendl; + break; + + case Scrub::schedule_result_t::no_local_resources: + // failure to secure local resources. No point in trying the other + // PGs at this time. Note that this is not the same as replica resources + // failure! + dout(20) << "failed (local) " << candidate->pgid << dendl; + return Scrub::schedule_result_t::no_local_resources; + + case Scrub::schedule_result_t::none_ready: + // can't happen. Just for the compiler. + dout(5) << "failed !!! " << candidate->pgid << dendl; + return Scrub::schedule_result_t::none_ready; + } + } + + dout(20) << " returning 'none ready'" << dendl; + return Scrub::schedule_result_t::none_ready; +} + // ////////////////////////////////////////////////////////////////////////// // // CPU load tracking and related diff --git a/src/osd/scrubber/osd_scrub_sched.cc b/src/osd/scrubber/osd_scrub_sched.cc index b07b4385a58bc..9170e369ac47f 100644 --- a/src/osd/scrubber/osd_scrub_sched.cc +++ b/src/osd/scrubber/osd_scrub_sched.cc @@ -310,72 +310,6 @@ std::string_view ScrubQueue::qu_state_text(qu_state_t st) } // clang-format on -/** - * a note regarding 'to_scrub_copy': - * 'to_scrub_copy' is a sorted set of all the ripe jobs from to_copy. - * As we usually expect to refer to only the first job in this set, we could - * consider an alternative implementation: - * - have collect_ripe_jobs() return the copied set without sorting it; - * - loop, performing: - * - use std::min_element() to find a candidate; - * - try that one. If not suitable, discard from 'to_scrub_copy' - */ -Scrub::schedule_result_t ScrubQueue::select_pg_and_scrub( - Scrub::OSDRestrictions& preconds) -{ - dout(10) << " reg./pen. sizes: " << to_scrub.size() << " / " - << penalized.size() << dendl; - - utime_t now_is = time_now(); - - preconds.time_permit = scrub_time_permit(now_is); - preconds.load_is_low = scrub_load_below_threshold(); - preconds.only_deadlined = !preconds.time_permit || !preconds.load_is_low; - - // create a list of candidates (copying, as otherwise creating a deadlock): - // - possibly restore penalized - // - (if we didn't handle directly) remove invalid jobs - // - create a copy of the to_scrub (possibly up to first not-ripe) - // - same for the penalized (although that usually be a waste) - // unlock, then try the lists - - std::unique_lock lck{jobs_lock}; - - // pardon all penalized jobs that have deadlined (or were updated) - scan_penalized(restore_penalized, now_is); - restore_penalized = false; - - // remove the 'updated' flag from all entries - std::for_each(to_scrub.begin(), - to_scrub.end(), - [](const auto& jobref) -> void { jobref->updated = false; }); - - // add failed scrub attempts to the penalized list - move_failed_pgs(now_is); - - // collect all valid & ripe jobs from the two lists. Note that we must copy, - // as when we use the lists we will not be holding jobs_lock (see - // explanation above) - - auto to_scrub_copy = collect_ripe_jobs(to_scrub, now_is); - auto penalized_copy = collect_ripe_jobs(penalized, now_is); - lck.unlock(); - - // try the regular queue first - auto res = select_from_group(to_scrub_copy, preconds, now_is); - - // in the sole scenario in which we've gone over all ripe jobs without success - // - we will try the penalized - if (res == Scrub::schedule_result_t::none_ready && !penalized_copy.empty()) { - res = select_from_group(penalized_copy, preconds, now_is); - dout(10) << "tried the penalized. Res: " - << ScrubQueue::attempt_res_text(res) << dendl; - restore_penalized = true; - } - - dout(15) << dendl; - return res; -} // must be called under lock void ScrubQueue::rm_unregistered_jobs(ScrubQContainer& group) @@ -434,69 +368,6 @@ ScrubQueue::ScrubQContainer ScrubQueue::collect_ripe_jobs( return ripes; } -// not holding jobs_lock. 'group' is a copy of the actual list. -Scrub::schedule_result_t ScrubQueue::select_from_group( - ScrubQContainer& group, - const Scrub::OSDRestrictions& preconds, - utime_t now_is) -{ - dout(15) << "jobs #: " << group.size() << dendl; - - for (auto& candidate : group) { - - // we expect the first job in the list to be a good candidate (if any) - - dout(20) << "try initiating scrub for " << candidate->pgid << dendl; - - if (preconds.only_deadlined && (candidate->schedule.deadline.is_zero() || - candidate->schedule.deadline >= now_is)) { - dout(15) << " not scheduling scrub for " << candidate->pgid << " due to " - << (preconds.time_permit ? "high load" : "time not permitting") - << dendl; - continue; - } - - // we have a candidate to scrub. We turn to the OSD to verify that the PG - // configuration allows the specified type of scrub, and to initiate the - // scrub. - switch ( - osd_service.initiate_a_scrub(candidate->pgid, - preconds.allow_requested_repair_only)) { - - case Scrub::schedule_result_t::scrub_initiated: - // the happy path. We are done - dout(20) << " initiated for " << candidate->pgid << dendl; - return Scrub::schedule_result_t::scrub_initiated; - - case Scrub::schedule_result_t::already_started: - case Scrub::schedule_result_t::preconditions: - case Scrub::schedule_result_t::bad_pg_state: - // continue with the next job - dout(20) << "failed (state/cond/started) " << candidate->pgid << dendl; - break; - - case Scrub::schedule_result_t::no_such_pg: - // The pg is no longer there - dout(20) << "failed (no pg) " << candidate->pgid << dendl; - break; - - case Scrub::schedule_result_t::no_local_resources: - // failure to secure local resources. No point in trying the other - // PGs at this time. Note that this is not the same as replica resources - // failure! - dout(20) << "failed (local) " << candidate->pgid << dendl; - return Scrub::schedule_result_t::no_local_resources; - - case Scrub::schedule_result_t::none_ready: - // can't happen. Just for the compiler. - dout(5) << "failed !!! " << candidate->pgid << dendl; - return Scrub::schedule_result_t::none_ready; - } - } - - dout(20) << " returning 'none ready'" << dendl; - return Scrub::schedule_result_t::none_ready; -} ScrubQueue::scrub_schedule_t ScrubQueue::adjust_target_time( const sched_params_t& times) const -- 2.39.5