From 51a593e7e2d77eb2ccb7b08663d9855d4a603c5a Mon Sep 17 00:00:00 2001 From: Ronen Friedman Date: Thu, 15 Aug 2024 07:51:15 -0500 Subject: [PATCH] osd/scrub: fix the conditions for auto-repair scrubs The conditions for auto-repair scrubs should have been changed when need_auto lost some of its setters. Also fix the rescheduling of repair scrubs when the last scrub ended with errors. Signed-off-by: Ronen Friedman --- qa/standalone/scrub/osd-recovery-scrub.sh | 2 +- src/osd/scrubber/osd_scrub.cc | 28 +++++++++++--------- src/osd/scrubber/pg_scrubber.cc | 21 +++++++++++---- src/osd/scrubber/scrub_job.cc | 5 ++++ src/osd/scrubber/scrub_job.h | 6 ++++- src/osd/scrubber_common.h | 31 +++++++++++++++-------- 6 files changed, 63 insertions(+), 30 deletions(-) diff --git a/qa/standalone/scrub/osd-recovery-scrub.sh b/qa/standalone/scrub/osd-recovery-scrub.sh index 3d3121fe8d8..1f4319e3ad5 100755 --- a/qa/standalone/scrub/osd-recovery-scrub.sh +++ b/qa/standalone/scrub/osd-recovery-scrub.sh @@ -242,7 +242,7 @@ function TEST_recovery_scrub_2() { setup $dir || return 1 run_mon $dir a --osd_pool_default_size=1 --mon_allow_pool_size_one=true || return 1 run_mgr $dir x || return 1 - local ceph_osd_args="--osd-scrub-interval-randomize-ratio=0 " + local ceph_osd_args="--osd-scrub-interval-randomize-ratio=0.1 " ceph_osd_args+="--osd_scrub_backoff_ratio=0 " ceph_osd_args+="--osd_stats_update_period_not_scrubbing=3 " ceph_osd_args+="--osd_stats_update_period_scrubbing=2" diff --git a/src/osd/scrubber/osd_scrub.cc b/src/osd/scrubber/osd_scrub.cc index dfba0ee56f0..1708fc26b44 100644 --- a/src/osd/scrubber/osd_scrub.cc +++ b/src/osd/scrubber/osd_scrub.cc @@ -115,13 +115,15 @@ void OsdScrub::initiate_scrub(bool is_recovery_active) const auto env_restrictions = restrictions_on_scrubbing(is_recovery_active, scrub_time); - dout(10) << fmt::format("scrub scheduling (@tick) starts. " - "time now:{:s}, recovery is active?:{} restrictions:{}", - scrub_time, is_recovery_active, env_restrictions) + dout(10) << fmt::format( + "scrub scheduling (@tick) starts. " + "time now:{:s}, recovery is active?:{} restrictions:{}", + scrub_time, is_recovery_active, env_restrictions) << dendl; if (g_conf()->subsys.should_gather() && - !env_restrictions.high_priority_only) { + !env_restrictions.max_concurrency_reached && + !env_restrictions.random_backoff_active) { debug_log_all_jobs(); } @@ -165,7 +167,8 @@ bool OsdScrub::is_sched_target_eligible( ScrubJob::observes_max_concurrency(e.urgency)) { return false; } - if (!r.load_is_low && ScrubJob::observes_random_backoff(e.urgency)) { + if (r.random_backoff_active && + ScrubJob::observes_random_backoff(e.urgency)) { return false; } if (!r.time_permit && ScrubJob::observes_allowed_hours(e.urgency)) { @@ -174,7 +177,9 @@ bool OsdScrub::is_sched_target_eligible( if (!r.load_is_low && ScrubJob::observes_load_limit(e.urgency)) { return false; } - // recovery? + if (r.recovery_in_progress && ScrubJob::observes_recovery(e.urgency)) { + return false; + } return true; } @@ -190,13 +195,12 @@ Scrub::OSDRestrictions OsdScrub::restrictions_on_scrubbing( if (!m_resource_bookkeeper.can_inc_scrubs()) { // our local OSD is already running too many scrubs dout(15) << "OSD cannot inc scrubs" << dendl; - env_conditions.high_priority_only = true; + env_conditions.max_concurrency_reached = true; } else if (scrub_random_backoff()) { // dice-roll says we should not scrub now - dout(15) << "Lost in dice. Only high priority scrubs allowed." - << dendl; - env_conditions.high_priority_only = true; + dout(15) << "Lost in dice. Only high priority scrubs allowed." << dendl; + env_conditions.random_backoff_active = true; } else if (is_recovery_active && !conf->osd_scrub_during_recovery) { if (conf->osd_repair_during_recovery) { @@ -207,9 +211,9 @@ Scrub::OSDRestrictions OsdScrub::restrictions_on_scrubbing( env_conditions.allow_requested_repair_only = true; } else { - dout(15) << "recovery in progress. Only high priority scrubs allowed." + dout(15) << "recovery in progress. Operator-initiated scrubs only." << dendl; - env_conditions.high_priority_only = true; + env_conditions.recovery_in_progress = true; } } else { diff --git a/src/osd/scrubber/pg_scrubber.cc b/src/osd/scrubber/pg_scrubber.cc index 7b58c2cfdfc..1d2f2dc09a1 100644 --- a/src/osd/scrubber/pg_scrubber.cc +++ b/src/osd/scrubber/pg_scrubber.cc @@ -710,12 +710,21 @@ bool PgScrubber::is_after_repair_required() const } +/** + * mark for a deep-scrub after the current scrub ended with errors. + * Note that no need to requeue the target, as it will be requeued + * when the scrub ends. + */ void PgScrubber::request_rescrubbing(requested_scrub_t& request_flags) { dout(10) << __func__ << " flags: " << request_flags << dendl; request_flags.need_auto = true; - update_scrub_job(delay_ready_t::no_delay); + auto& trgt = m_scrub_job->get_target(scrub_level_t::deep); + trgt.up_urgency_to(urgency_t::must_repair); + trgt.sched_info.schedule.scheduled_at = {0, 0}; + trgt.sched_info.schedule.not_before = ceph_clock_now(); + // no need to requeue, as scrub_finish() will do that. } @@ -1709,7 +1718,7 @@ void PgScrubber::set_op_parameters( m_epoch_start = m_pg->get_osdmap_epoch(); m_flags.check_repair = m_active_target->urgency() == urgency_t::after_repair; - m_flags.auto_repair = request.auto_repair || request.need_auto; + m_flags.auto_repair = false; m_flags.required = request.req_scrub || request.must_scrub; m_flags.priority = (request.must_scrub || request.need_auto) @@ -1733,6 +1742,9 @@ void PgScrubber::set_op_parameters( m_is_deep = m_active_target->sched_info.level == scrub_level_t::deep; if (m_is_deep) { state_set(PG_STATE_DEEP_SCRUB); + if (pg_cond.can_autorepair || request.auto_repair) { + m_flags.auto_repair = true; + } } else { ceph_assert(!request.must_deep_scrub); ceph_assert(!request.need_auto); @@ -2105,9 +2117,6 @@ void PgScrubber::scrub_finish() ceph_assert(tr == 0); } - // determine the next scrub time - update_scrub_job(delay_ready_t::delay_ready); - if (has_error) { m_pg->queue_peering_event(PGPeeringEventRef( std::make_shared(get_osdmap_epoch(), @@ -2123,6 +2132,8 @@ void PgScrubber::scrub_finish() if (do_auto_scrub) { request_rescrubbing(m_planned_scrub); } + // determine the next scrub time + update_scrub_job(delay_ready_t::delay_ready); if (m_pg->is_active() && m_pg->is_primary()) { m_pg->recovery_state.share_pg_info(); diff --git a/src/osd/scrubber/scrub_job.cc b/src/osd/scrubber/scrub_job.cc index 45a300aaafe..ee33ee06706 100644 --- a/src/osd/scrubber/scrub_job.cc +++ b/src/osd/scrubber/scrub_job.cc @@ -397,3 +397,8 @@ bool ScrubJob::observes_random_backoff(urgency_t urgency) { return urgency < urgency_t::after_repair; } + +bool ScrubJob::observes_recovery(urgency_t urgency) +{ + return urgency < urgency_t::operator_requested; +} diff --git a/src/osd/scrubber/scrub_job.h b/src/osd/scrubber/scrub_job.h index 256e0d2caa1..98a3e101f9b 100644 --- a/src/osd/scrubber/scrub_job.h +++ b/src/osd/scrubber/scrub_job.h @@ -344,6 +344,7 @@ class ScrubJob { * | noscrub | yes | no? | no | no | * | max-scrubs | yes | yes? | no | no | * | backoff | yes | no | no | no | + * | recovery | yes | no | no | no | * +------------+------------+--------------+----------+-------------+ */ @@ -362,7 +363,10 @@ class ScrubJob { static bool observes_max_concurrency(urgency_t urgency); - static bool observes_random_backoff(urgency_t urgency);}; + static bool observes_random_backoff(urgency_t urgency); + + static bool observes_recovery(urgency_t urgency); +}; } // namespace Scrub namespace std { diff --git a/src/osd/scrubber_common.h b/src/osd/scrubber_common.h index c14834db8aa..5d6fe290214 100644 --- a/src/osd/scrubber_common.h +++ b/src/osd/scrubber_common.h @@ -84,12 +84,21 @@ using act_token_t = uint32_t; /// (note: struct size should be kept small, as it is copied around) struct OSDRestrictions { /// high local OSD concurrency. Thus - only high priority scrubs are allowed - bool high_priority_only{false}; - bool allow_requested_repair_only{false}; - bool only_deadlined{false}; + bool max_concurrency_reached{false}; + + /// rolled a dice, and decided not to scrub in this tick + bool random_backoff_active{false}; + + /// the OSD is performing recovery & osd_repair_during_recovery is 'true' + bool allow_requested_repair_only:1{false}; + + /// the load is high, or the time is not right. For periodic scrubs, + /// only the overdue ones are allowed. + bool only_deadlined:1{false}; bool load_is_low:1{true}; bool time_permit:1{true}; - bool max_concurrency_reached:1{false}; + /// the OSD is performing a recovery, osd_scrub_during_recovery is 'false', + /// and so is osd_repair_during_recovery bool recovery_in_progress:1{false}; }; static_assert(sizeof(Scrub::OSDRestrictions) <= sizeof(uint32_t)); @@ -185,13 +194,13 @@ struct formatter { auto format(const Scrub::OSDRestrictions& conds, FormatContext& ctx) const { return fmt::format_to( - ctx.out(), - "priority-only:{},overdue-only:{},load:{},time:{},repair-only:{}", - conds.high_priority_only, - conds.only_deadlined, - conds.load_is_low ? "ok" : "high", - conds.time_permit ? "ok" : "no", - conds.allow_requested_repair_only); + ctx.out(), "<{}.{}.{}.{}.{}.{}>", + conds.max_concurrency_reached ? "max-scrubs" : "", + conds.random_backoff_active ? "backoff" : "", + conds.load_is_low ? "" : "high-load", + conds.time_permit ? "" : "time-restrict", + conds.recovery_in_progress ? "recovery" : "", + conds.allow_requested_repair_only ? "repair-only" : ""); } }; -- 2.39.5