From 455f835c737a4b5268b32135ed0b7c26da9fc1ee Mon Sep 17 00:00:00 2001 From: Ronen Friedman Date: Mon, 6 Nov 2023 03:40:27 -0600 Subject: [PATCH] osd/scrub: introduce populate_config_params() to compute the scrub-related configuration parameters from both cluster and pool options. Signed-off-by: Ronen Friedman --- src/osd/scrubber/pg_scrubber.cc | 51 +++++++++++++++++++++++++ src/osd/scrubber/pg_scrubber.h | 6 +++ src/osd/scrubber/scrub_job.h | 68 +++++++++++++++++++++++++++++++++ 3 files changed, 125 insertions(+) diff --git a/src/osd/scrubber/pg_scrubber.cc b/src/osd/scrubber/pg_scrubber.cc index 0c9b0585088ef..babd8782e9cae 100644 --- a/src/osd/scrubber/pg_scrubber.cc +++ b/src/osd/scrubber/pg_scrubber.cc @@ -606,6 +606,57 @@ bool PgScrubber::reserve_local() return false; } +Scrub::sched_conf_t PgScrubber::populate_config_params() const +{ + const pool_opts_t& pool_conf = m_pg->get_pgpool().info.opts; + auto& conf = get_pg_cct()->_conf; // for brevity + Scrub::sched_conf_t configs; + + // deep-scrub optimal interval + configs.deep_interval = + pool_conf.value_or(pool_opts_t::DEEP_SCRUB_INTERVAL, 0.0); + if (configs.deep_interval <= 0.0) { + configs.deep_interval = conf->osd_deep_scrub_interval; + } + + // shallow-scrub interval + configs.shallow_interval = + pool_conf.value_or(pool_opts_t::SCRUB_MIN_INTERVAL, 0.0); + if (configs.shallow_interval <= 0.0) { + configs.shallow_interval = conf->osd_scrub_min_interval; + } + + // the max allowed delay between scrubs. + // For deep scrubs - there is no equivalent of scrub_max_interval. Per the + // documentation, once deep_scrub_interval has passed, we are already + // "overdue", at least as far as the "ignore allowed load" window is + // concerned. + + configs.max_deep = configs.deep_interval + configs.shallow_interval; + + auto max_shallow = pool_conf.value_or(pool_opts_t::SCRUB_MAX_INTERVAL, 0.0); + if (max_shallow <= 0.0) { + max_shallow = conf->osd_scrub_max_interval; + } + if (max_shallow > 0.0) { + configs.max_shallow = max_shallow; + // otherwise - we're left with the default nullopt + } + + // but seems like our tests require: \todo fix! + configs.max_deep = + std::max(configs.max_shallow.value_or(0.0), configs.deep_interval); + + configs.interval_randomize_ratio = conf->osd_scrub_interval_randomize_ratio; + configs.deep_randomize_ratio = conf->osd_deep_scrub_randomize_ratio; + configs.mandatory_on_invalid = conf->osd_scrub_invalid_stats; + + dout(15) << fmt::format("updated config:{}", configs) << dendl; + return configs; +} + + + // ---------------------------------------------------------------------------- bool PgScrubber::has_pg_marked_new_updates() const diff --git a/src/osd/scrubber/pg_scrubber.h b/src/osd/scrubber/pg_scrubber.h index ab82e2b1714cb..ca26e359578c4 100644 --- a/src/osd/scrubber/pg_scrubber.h +++ b/src/osd/scrubber/pg_scrubber.h @@ -769,6 +769,12 @@ class PgScrubber : public ScrubPgIF, */ void request_rescrubbing(requested_scrub_t& req_flags); + /** + * combine cluster & pool configuration options into a single struct + * of scrub-related parameters. + */ + Scrub::sched_conf_t populate_config_params() const; + /* * Select a range of objects to scrub. * diff --git a/src/osd/scrubber/scrub_job.h b/src/osd/scrubber/scrub_job.h index c2391a7889296..57ff1400f9cb2 100644 --- a/src/osd/scrubber/scrub_job.h +++ b/src/osd/scrubber/scrub_job.h @@ -142,6 +142,59 @@ class ScrubJob final : public RefCountedObject { using ScrubJobRef = ceph::ref_t; using ScrubQContainer = std::vector; + +/** + * A collection of the configuration parameters (pool & OSD) that affect + * scrub scheduling. + */ +struct sched_conf_t { + /// the desired interval between shallow scrubs + double shallow_interval{0.0}; + + /// the desired interval between deep scrubs + double deep_interval{0.0}; + + /** + * the maximum interval between shallow scrubs, as determined by either the + * OSD or the pool configuration. Empty if no limit is configured. + */ + std::optional max_shallow; + + /** + * the maximum interval between deep scrubs. + * For deep scrubs - there is no equivalent of scrub_max_interval. Per the + * documentation, once deep_scrub_interval has passed, we are already + * "overdue", at least as far as the "ignore allowed load" window is + * concerned. \todo based on users complaints (and the fact that the + * interaction between the configuration parameters is clear to no one), + * this will be revised shortly. + */ + double max_deep{0.0}; + + /** + * interval_randomize_ratio + * + * We add an extra random duration to the configured times when doing + * scheduling. An event configured with an interval of will + * actually be scheduled at a time selected uniformly from + * [, (1+) * ) + */ + double interval_randomize_ratio{0.0}; + + /** + * a randomization factor aimed at preventing 'thundering herd' problems + * upon deep-scrubs common intervals. If polling a random number smaller + * than that percentage, the next shallow scrub is upgraded to deep. + */ + double deep_randomize_ratio{0.0}; + + /** + * must we schedule a scrub with high urgency if we do not have a valid + * last scrub stamp? + */ + bool mandatory_on_invalid{true}; +}; + } // namespace Scrub namespace std { @@ -178,4 +231,19 @@ struct formatter { sjob.state.load(std::memory_order_relaxed)); } }; + +template <> +struct formatter { + constexpr auto parse(format_parse_context& ctx) { return ctx.begin(); } + template + auto format(const Scrub::sched_conf_t& cf, FormatContext& ctx) + { + return format_to( + ctx.out(), + "periods: s:{}/{} d:{}/{} iv-ratio:{} deep-rand:{} on-inv:{}", + cf.shallow_interval, cf.max_shallow.value_or(-1.0), cf.deep_interval, + cf.max_deep, cf.interval_randomize_ratio, cf.deep_randomize_ratio, + cf.mandatory_on_invalid); + } +}; } // namespace fmt -- 2.39.5