From 162efeaeefd99cd9864536af8df192a2324aba75 Mon Sep 17 00:00:00 2001 From: Ronen Friedman Date: Mon, 9 Sep 2024 07:45:16 -0500 Subject: [PATCH] osd/scrub: complete on_mid_scrub_abort() refactoring Now - referring to the aborted target and the updated job's target of that urgency, to set the scheduling details of the latter. Signed-off-by: Ronen Friedman --- src/osd/scrubber/osd_scrub.cc | 4 ++- src/osd/scrubber/pg_scrubber.cc | 56 ++++++++++++++++++--------------- 2 files changed, 34 insertions(+), 26 deletions(-) diff --git a/src/osd/scrubber/osd_scrub.cc b/src/osd/scrubber/osd_scrub.cc index ec9f2bd2bbd..c67d2fca5fc 100644 --- a/src/osd/scrubber/osd_scrub.cc +++ b/src/osd/scrubber/osd_scrub.cc @@ -232,7 +232,9 @@ Scrub::schedule_result_t OsdScrub::initiate_a_scrub( const Scrub::SchedEntry& candidate, Scrub::OSDRestrictions restrictions) { - dout(20) << fmt::format("trying pg[{}]", candidate.pgid) << dendl; + dout(20) << fmt::format( + "trying pg[{}] (target:{})", candidate.pgid, candidate) + << dendl; // we have a candidate to scrub. We need some PG information to // know if scrubbing is allowed diff --git a/src/osd/scrubber/pg_scrubber.cc b/src/osd/scrubber/pg_scrubber.cc index 05dd78baff3..442f55dbefd 100644 --- a/src/osd/scrubber/pg_scrubber.cc +++ b/src/osd/scrubber/pg_scrubber.cc @@ -2176,40 +2176,46 @@ void PgScrubber::on_mid_scrub_abort(Scrub::delay_cause_t issue) m_planned_scrub) << dendl; - // assuming we can still depend on the 'scrubbing' flag being set; - // Also on Queued&Active. - - // note again: this is not how merging should work in the final version: - // e.g. - the 'aborted_schedule' data should be passed thru the scrubber. - // In this current patchwork, for example, we are only guessing at - // the original value of 'must_deep_scrub'. - m_planned_scrub.must_repair = m_planned_scrub.must_repair || m_is_repair; - m_planned_scrub.need_auto = m_planned_scrub.need_auto || m_flags.auto_repair; - // copy the aborted target const auto aborted_target = *m_active_target; m_active_target.reset(); const auto scrub_clock_now = ceph_clock_now(); - update_targets(m_planned_scrub, scrub_clock_now); + auto& current_targ = m_scrub_job->get_target(aborted_target.level()); + ceph_assert(!current_targ.queued); - // we may have updated both targets. For sure - we took notice of any change - // that made any of the targets into a high-priority one. All that's left: - // delay the specific target that was aborted. + // merge the aborted target with the current one + auto& curr_sched = current_targ.sched_info.schedule; + auto& abrt_sched = aborted_target.sched_info.schedule; - auto& trgt = m_scrub_job->delay_on_failure(aborted_target.level(), issue, - scrub_clock_now); + current_targ.sched_info.urgency = + std::max(current_targ.urgency(), aborted_target.urgency()); + curr_sched.scheduled_at = + std::min(curr_sched.scheduled_at, abrt_sched.scheduled_at); + curr_sched.deadline = std::min(curr_sched.deadline, abrt_sched.deadline); + curr_sched.not_before = + std::min(curr_sched.not_before, abrt_sched.not_before); + + dout(10) << fmt::format( + "{}: merged target (before delay): {}", __func__, + current_targ) + << dendl; - /// \todo complete the merging of the deadline & target for non-hp targets -#ifdef NOT_YET - if (!aborted_target.is_high_priority()) { - std::ignore = aborted_target; + // affect a delay, as there was a failure mid-scrub + m_scrub_job->delay_on_failure(current_targ.level(), issue, scrub_clock_now); + + // reinstate both targets in the queue + m_osds->get_scrub_services().enqueue_target(current_targ); + current_targ.queued = true; + + // our sister target is off the queue, too: + auto& sister = m_scrub_job->get_target( + aborted_target.level() == scrub_level_t::deep ? scrub_level_t::shallow + : scrub_level_t::deep); + if (!sister.queued) { + m_osds->get_scrub_services().enqueue_target(sister); + sister.queued = true; } -#endif - ceph_assert(!trgt.queued); - ceph_assert(!m_scrub_job->is_queued()); - m_osds->get_scrub_services().enqueue_target(trgt); - trgt.queued = true; } -- 2.39.5