From: Ronen Friedman Date: Sat, 15 Jun 2024 12:29:30 +0000 (-0500) Subject: osd/scrub: modify scrub registration implementation details X-Git-Tag: v20.0.0~1493^2~8 X-Git-Url: http://git.apps.os.sepia.ceph.com/?a=commitdiff_plain;h=473177f8f99ff8c6952ffc411fdede95ba831a06;p=ceph.git osd/scrub: modify scrub registration implementation details following the change to the queue into holding a copy of the ScrubJob, the registration process - initiated by schedule_scrub_with_osd() - can now be simplified. adjust_target_time() is relocated as is. It will be refactored in the next commit. Signed-off-by: Ronen Friedman --- diff --git a/src/osd/scrubber/osd_scrub.cc b/src/osd/scrubber/osd_scrub.cc index 4ec948c7e05c5..32d1f9b19f2c0 100644 --- a/src/osd/scrubber/osd_scrub.cc +++ b/src/osd/scrubber/osd_scrub.cc @@ -49,7 +49,12 @@ OsdScrub::~OsdScrub() std::ostream& OsdScrub::gen_prefix(std::ostream& out, std::string_view fn) const { - return out << m_log_prefix << fn << ": "; + if (fn.starts_with("operator")) { + // it's a lambda, and __func__ is not available + return out << m_log_prefix; + } else { + return out << m_log_prefix << fn << ": "; + } } void OsdScrub::dump_scrubs(ceph::Formatter* f) const @@ -414,13 +419,6 @@ PerfCounters* OsdScrub::get_perf_counters(int pool_type, scrub_level_t level) // ////////////////////////////////////////////////////////////////////////// // // forwarders to the queue -void OsdScrub::update_job( - Scrub::ScrubJob& sjob, - const Scrub::sched_params_t& suggested, - bool reset_notbefore) -{ - m_queue.update_job(sjob, suggested, reset_notbefore); -} void OsdScrub::delay_on_failure( Scrub::ScrubJob& sjob, @@ -431,12 +429,9 @@ void OsdScrub::delay_on_failure( m_queue.delay_on_failure(sjob, delay, delay_cause, now_is); } - -void OsdScrub::register_with_osd( - Scrub::ScrubJob& sjob, - const Scrub::sched_params_t& suggested) +void OsdScrub::enqueue_target(const Scrub::ScrubJob& sjob) { - m_queue.register_with_osd(sjob, suggested); + m_queue.enqueue_target(sjob); } void OsdScrub::remove_from_osd_queue(spg_t pgid) diff --git a/src/osd/scrubber/osd_scrub.h b/src/osd/scrubber/osd_scrub.h index a75fb0b5fc8eb..a00c28a11c3c8 100644 --- a/src/osd/scrubber/osd_scrub.h +++ b/src/osd/scrubber/osd_scrub.h @@ -76,45 +76,12 @@ class OsdScrub { void mark_pg_scrub_blocked(spg_t blocked_pg); void clear_pg_scrub_blocked(spg_t blocked_pg); - /** - * modify a scrub-job's scheduled time and deadline - * - * There are 3 argument combinations to consider: - * - 'must' is asserted, and the suggested time is 'scrub_must_stamp': - * the registration will be with "beginning of time" target, making the - * scrub-job eligible to immediate scrub (given that external conditions - * do not prevent scrubbing) - * - 'must' is asserted, and the suggested time is 'now': - * This happens if our stats are unknown. The results are similar to the - * previous scenario. - * - not a 'must': we take the suggested time as a basis, and add to it some - * configuration / random delays. - * ('must' is Scrub::sched_params_t.is_must) - * - * 'reset_notbefore' is used to reset the 'not_before' time to the updated - * 'scheduled_at' time. This is used whenever the scrub-job schedule is - * updated not as a result of a scrub attempt failure. - * - * locking: not using the jobs_lock - */ - void update_job( - Scrub::ScrubJob& sjob, - const Scrub::sched_params_t& suggested, - bool reset_notbefore); - /** * Add the scrub job to the list of jobs (i.e. list of PGs) to be periodically * scrubbed by the OSD. - * The registration is active as long as the PG exists and the OSD is its - * primary. - * - * See update_job() for the handling of the 'suggested' parameter. - * - * locking: might lock jobs_lock */ - void register_with_osd( - Scrub::ScrubJob& sjob, - const Scrub::sched_params_t& suggested); + void enqueue_target(const Scrub::ScrubJob& sjob); + /** * remove the pg from set of PGs to be scanned for scrubbing. diff --git a/src/osd/scrubber/osd_scrub_sched.cc b/src/osd/scrubber/osd_scrub_sched.cc index 49114d8c18d24..827026870d357 100644 --- a/src/osd/scrubber/osd_scrub_sched.cc +++ b/src/osd/scrubber/osd_scrub_sched.cc @@ -70,33 +70,6 @@ void ScrubQueue::enqueue_target(const Scrub::ScrubJob& sjob) to_scrub.push_back(std::make_unique(sjob)); } -// (only for this commit) -void ScrubQueue::register_with_osd( - Scrub::ScrubJob& scrub_job, - const sched_params_t& suggested) -{ - update_job(scrub_job, suggested, true /* resets not_before */); - enqueue_target(scrub_job); - scrub_job.target_queued = true; - - dout(10) - << fmt::format( - "pg[{}] sched-state: {} (@{:s})", - scrub_job.pgid, scrub_job.state_desc(), scrub_job.get_sched_time()) - << dendl; -} - - -void ScrubQueue::update_job(Scrub::ScrubJob& scrub_job, - const sched_params_t& suggested, - bool reset_nb) -{ - // adjust the suggested scrub time according to OSD-wide status - auto adjusted = adjust_target_time(suggested); - scrub_job.high_priority = suggested.is_must == must_scrub_t::mandatory; - scrub_job.update_schedule(adjusted, reset_nb); -} - void ScrubQueue::delay_on_failure( Scrub::ScrubJob& sjob, @@ -189,48 +162,6 @@ void ScrubQueue::for_each_job( } -Scrub::scrub_schedule_t ScrubQueue::adjust_target_time( - const sched_params_t& times) const -{ - Scrub::scrub_schedule_t sched_n_dead{ - times.proposed_time, times.proposed_time, times.proposed_time}; - - if (times.is_must == Scrub::must_scrub_t::not_mandatory) { - // unless explicitly requested, postpone the scrub with a random delay - double scrub_min_interval = times.min_interval > 0 - ? times.min_interval - : conf()->osd_scrub_min_interval; - double scrub_max_interval = times.max_interval > 0 - ? times.max_interval - : conf()->osd_scrub_max_interval; - - sched_n_dead.scheduled_at += scrub_min_interval; - double r = rand() / (double)RAND_MAX; - sched_n_dead.scheduled_at += - scrub_min_interval * conf()->osd_scrub_interval_randomize_ratio * r; - - if (scrub_max_interval <= 0) { - sched_n_dead.deadline = utime_t{}; - } else { - sched_n_dead.deadline += scrub_max_interval; - } - // note: no specific job can be named in the log message - dout(20) << fmt::format( - "not-must. Was:{:s} {{min:{}/{} max:{}/{} ratio:{}}} " - "Adjusted:{:s} ({:s})", - times.proposed_time, fmt::group_digits(times.min_interval), - fmt::group_digits(conf()->osd_scrub_min_interval), - fmt::group_digits(times.max_interval), - fmt::group_digits(conf()->osd_scrub_max_interval), - conf()->osd_scrub_interval_randomize_ratio, - sched_n_dead.scheduled_at, sched_n_dead.deadline) - << dendl; - } - // else - no log needed. All relevant data will be logged by the caller - return sched_n_dead; -} - - void ScrubQueue::dump_scrubs(ceph::Formatter* f) const { ceph_assert(f != nullptr); diff --git a/src/osd/scrubber/osd_scrub_sched.h b/src/osd/scrubber/osd_scrub_sched.h index d44e567aa0fca..b506dd9c10956 100644 --- a/src/osd/scrubber/osd_scrub_sched.h +++ b/src/osd/scrubber/osd_scrub_sched.h @@ -100,7 +100,6 @@ ScrubQueue interfaces (main functions): <4> - manipulating a job's state: - - register_with_osd() - remove_from_osd_queue() - update_job() @@ -173,48 +172,12 @@ class ScrubQueue { */ std::set get_pgs(const EntryPred&) const; - /** - * Add the scrub job to the list of jobs (i.e. list of PGs) to be periodically - * scrubbed by the OSD. - * The registration is active as long as the PG exists and the OSD is its - * primary. - * - * See update_job() for the handling of the 'suggested' parameter. - * - * locking: might lock jobs_lock - */ - void register_with_osd(Scrub::ScrubJob& sjob, const sched_params_t& suggested); - /** * Add the scrub job to the list of jobs (i.e. list of PGs) to be periodically * scrubbed by the OSD. */ void enqueue_target(const Scrub::ScrubJob& sjob); - /** - * modify a scrub-job's scheduled time and deadline - * - * There are 3 argument combinations to consider: - * - 'must' is asserted, and the suggested time is 'scrub_must_stamp': - * the registration will be with "beginning of time" target, making the - * scrub-job eligible to immediate scrub (given that external conditions - * do not prevent scrubbing) - * - 'must' is asserted, and the suggested time is 'now': - * This happens if our stats are unknown. The results are similar to the - * previous scenario. - * - not a 'must': we take the suggested time as a basis, and add to it some - * configuration / random delays. - * ('must' is sched_params_t.is_must) - * - * 'reset_notbefore' is used to reset the 'not_before' time to the updated - * 'scheduled_at' time. This is used whenever the scrub-job schedule is - * updated not as a result of a scrub attempt failure. - */ - void update_job( - Scrub::ScrubJob& sjob, - const sched_params_t& suggested, - bool reset_notbefore); - void delay_on_failure( Scrub::ScrubJob& sjob, std::chrono::seconds delay, @@ -256,7 +219,7 @@ class ScrubQueue { #endif /** - * jobs_lock protects the job containers. + * jobs_lock protects the job container. * * Note that PG locks should not be acquired while holding jobs_lock. */ @@ -276,17 +239,6 @@ class ScrubQueue { */ std::atomic_int_fast16_t blocked_scrubs_cnt{0}; - /** - * If the scrub job was not explicitly requested, we postpone it by some - * random length of time. - * And if delaying the scrub - we calculate, based on pool parameters, a - * deadline we should scrub before. - * - * @return a pair of values: the determined scrub time, and the deadline - */ - Scrub::scrub_schedule_t adjust_target_time( - const Scrub::sched_params_t& recomputed_params) const; - protected: // used by the unit-tests /** * unit-tests will override this function to return a mock time diff --git a/src/osd/scrubber/pg_scrubber.cc b/src/osd/scrubber/pg_scrubber.cc index 10802ac49631a..9a33dd193d997 100644 --- a/src/osd/scrubber/pg_scrubber.cc +++ b/src/osd/scrubber/pg_scrubber.cc @@ -552,13 +552,15 @@ void PgScrubber::schedule_scrub_with_osd() m_scrub_job->registered = true; auto suggested = determine_scrub_time(m_pg->get_pgpool().info.opts); - m_osds->get_scrub_services().register_with_osd(*m_scrub_job, suggested); + auto applicable_conf = populate_config_params(); + m_scrub_job->init_targets( + suggested, m_pg->info, applicable_conf, ceph_clock_now()); + + m_osds->get_scrub_services().enqueue_target(*m_scrub_job); dout(10) << fmt::format( - "{}: {} <{:.5}>&<{:.10}> --> <{:.5}>&<{:.14}>", - __func__, m_planned_scrub, - (is_primary() ? "Primary" : "Replica/other"), pre_reg, - pre_reg, registration_state(), m_scrub_job->state_desc()) + "{}: <{:.5}> --> <{:.5}>", __func__, + m_planned_scrub, pre_reg, registration_state()) << dendl; } @@ -604,7 +606,8 @@ void PgScrubber::update_scrub_job(const requested_scrub_t& request_flags) *m_scrub_job) << dendl; if (m_scrub_job->target_queued) { - m_osds->get_scrub_services().remove_from_osd_queue(*m_scrub_job); + m_osds->get_scrub_services().remove_from_osd_queue(m_pg_id); + m_scrub_job->target_queued = false; dout(20) << fmt::format( "{}: PG[{}] dequeuing for an update", __func__, m_pg_id) << dendl; @@ -612,8 +615,11 @@ void PgScrubber::update_scrub_job(const requested_scrub_t& request_flags) ceph_assert(m_pg->is_locked()); - auto suggested = determine_scrub_time(m_pg->get_pgpool().info.opts); - m_osds->get_scrub_services().update_job(*m_scrub_job, suggested, true); + const auto suggested = determine_scrub_time(m_pg->get_pgpool().info.opts); + const auto applicable_conf = populate_config_params(); + m_scrub_job->on_periods_change(suggested, applicable_conf, ceph_clock_now()); + m_osds->get_scrub_services().enqueue_target(*m_scrub_job); + m_scrub_job->target_queued = true; m_pg->publish_stats_to_osd(); dout(15) << __func__ << ": done " << registration_state() << dendl; diff --git a/src/osd/scrubber/scrub_job.cc b/src/osd/scrubber/scrub_job.cc index 236f4fe6f4c67..196a80d9b3110 100644 --- a/src/osd/scrubber/scrub_job.cc +++ b/src/osd/scrubber/scrub_job.cc @@ -43,6 +43,63 @@ ostream& operator<<(ostream& out, const ScrubJob& sjob) } } // namespace std + +Scrub::scrub_schedule_t ScrubJob::adjust_target_time( + const sched_params_t& times) const +{ + Scrub::scrub_schedule_t sched_n_dead{ + times.proposed_time, times.proposed_time, times.proposed_time}; + + const auto& conf = cct->_conf; + + if (times.is_must == Scrub::must_scrub_t::not_mandatory) { + // unless explicitly requested, postpone the scrub with a random delay + double scrub_min_interval = times.min_interval > 0 + ? times.min_interval + : conf->osd_scrub_min_interval; + double scrub_max_interval = times.max_interval > 0 + ? times.max_interval + : conf->osd_scrub_max_interval; + + sched_n_dead.scheduled_at += scrub_min_interval; + double r = rand() / (double)RAND_MAX; + sched_n_dead.scheduled_at += + scrub_min_interval * conf->osd_scrub_interval_randomize_ratio * r; + + if (scrub_max_interval <= 0) { + sched_n_dead.deadline = utime_t{}; + } else { + sched_n_dead.deadline += scrub_max_interval; + } + dout(20) << fmt::format( + "not-must. Was:{:s} {{min:{}/{} max:{}/{} ratio:{}}} " + "Adjusted:{:s} ({:s})", + times.proposed_time, fmt::group_digits(times.min_interval), + fmt::group_digits(conf->osd_scrub_min_interval), + fmt::group_digits(times.max_interval), + fmt::group_digits(conf->osd_scrub_max_interval), + conf->osd_scrub_interval_randomize_ratio, + sched_n_dead.scheduled_at, sched_n_dead.deadline) + << dendl; + } + // else - no log needed. All relevant data will be logged by the caller + return sched_n_dead; +} + + +// note: some parameters are unused in this commit. +void ScrubJob::init_targets( + const sched_params_t& suggested, + const pg_info_t& info, + const Scrub::sched_conf_t& aconf, + utime_t scrub_clock_now) +{ + auto adjusted = adjust_target_time(suggested); + high_priority = suggested.is_must == must_scrub_t::mandatory; + update_schedule(adjusted, true); +} + + void ScrubJob::update_schedule( const Scrub::scrub_schedule_t& adjusted, bool reset_failure_penalty) @@ -78,9 +135,13 @@ void ScrubJob::delay_on_failure( std::string ScrubJob::scheduling_state(utime_t now_is, bool is_deep_expected) const { - // if not in the OSD scheduling queues, not a candidate for scrubbing + // if not registered, not a candidate for scrubbing on this OSD (or at all) if (!registered) { - return "no scrub is scheduled"; + return "not registered for scrubbing"; + } + if (!target_queued) { + // if not currently queued - we are being scrubbed + return "scrubbing"; } // if the time has passed, we are surely in the queue diff --git a/src/osd/scrubber/scrub_job.h b/src/osd/scrubber/scrub_job.h index 423216f5224ec..bd09217f56ada 100644 --- a/src/osd/scrubber/scrub_job.h +++ b/src/osd/scrubber/scrub_job.h @@ -166,6 +166,15 @@ class ScrubJob { const scrub_schedule_t& adjusted, bool reset_failure_penalty); + /** + * If the scrub job was not explicitly requested, we postpone it by some + * random length of time. + * And if delaying the scrub - we calculate, based on pool parameters, a + * deadline we should scrub before. + */ + Scrub::scrub_schedule_t adjust_target_time( + const Scrub::sched_params_t& recomputed_params) const; + /** * push the 'not_before' time out by 'delay' seconds, so that this scrub target * would not be retried before 'delay' seconds have passed. @@ -175,6 +184,31 @@ class ScrubJob { delay_cause_t delay_cause, utime_t scrub_clock_now); + /** + * initial setting of the scheduling parameters of a newly registered + * PG. The scrub targets (in this stage of the refactoring - the whole + * scrub job) is initialized as for a regular periodic scrub. + */ + void init_targets( + const sched_params_t& suggested, + const pg_info_t& info, + const Scrub::sched_conf_t& aconf, + utime_t scrub_clock_now); + + /** + * recalculate the scheduling parameters for the periodic scrub targets. + * Used whenever the "external state" of the PG changes, e.g. when made + * primary - or indeed when the configuration changes. + * + * Does not modify ripe targets. + * (why? for example, a 'scrub pg' command following a 'deepscrub pg' + * would otherwise push the deep scrub to the future). + */ + void on_periods_change( + const sched_params_t& suggested, + const Scrub::sched_conf_t& aconf, + utime_t scrub_clock_now) {} + void dump(ceph::Formatter* f) const;