type: float
level: advanced
desc: Deep scrub each PG (i.e., verify data checksums) at least this often
- fmt_desc: The interval for "deep" scrubbing (fully reading all data). The
- ``osd_scrub_load_threshold`` does not affect this setting.
+ fmt_desc: The interval for "deep" scrubbing (fully reading all data).
default: 7_day
with_legacy: true
+- name: osd_deep_scrub_interval_cv
+ type: float
+ level: advanced
+ desc: determining the amount of variation in the deep scrub interval
+ long_desc: deep scrub intervals are varied by a random amount to prevent
+ stampedes. This parameter determines the amount of variation.
+ Technically - osd_deep_scrub_interval_cv is the coefficient of variation for
+ the deep scrub interval.
+ fmt_desc: The coefficient of variation for the deep scrub interval, specified as a
+ ratio. On average, the next deep scrub for a PG is scheduled osd_deep_scrub_interval
+ after the last deep scrub . The actual time is randomized to a normal distribution
+ with a standard deviation of osd_deep_scrub_interval * osd_deep_scrub_interval_cv
+ (clamped to within 2 standard deviations).
+ The default value guarantees that 95% of the deep scrubs will be scheduled in the range
+ [0.8 * osd_deep_scrub_interval, 1.2 * osd_deep_scrub_interval].
+ min: 0
+ max: 0.4
+ default: 0.2
+ with_legacy: false
- name: osd_deep_scrub_randomize_ratio
type: float
level: advanced
- desc: Scrubs will randomly become deep scrubs at this rate (0.15 -> 15% of scrubs
- are deep)
- long_desc: This prevents a deep scrub 'stampede' by spreading deep scrubs so they
- are uniformly distributed over the week
+ desc: deprecated. Has no effect.
default: 0.15
with_legacy: true
- name: osd_deep_scrub_stride
Scrub::sched_conf_t PgScrubber::populate_config_params() const
{
const pool_opts_t& pool_conf = m_pg->get_pgpool().info.opts;
- auto& conf = get_pg_cct()->_conf; // for brevity
+ const auto& conf = get_pg_cct()->_conf; // for brevity
Scrub::sched_conf_t configs;
// deep-scrub optimal interval
std::max(configs.max_shallow.value_or(0.0), configs.deep_interval);
configs.interval_randomize_ratio = conf->osd_scrub_interval_randomize_ratio;
- configs.deep_randomize_ratio = conf->osd_deep_scrub_randomize_ratio;
+ configs.deep_randomize_ratio = conf.get_val<double>("osd_deep_scrub_interval_cv");
configs.mandatory_on_invalid = conf->osd_scrub_invalid_stats;
dout(15) << fmt::format("{}: updated config:{}", __func__, configs) << dendl;
{
m_fsm = std::make_unique<ScrubMachine>(m_pg, this);
m_fsm->initiate();
-
- m_scrub_job = std::make_optional<Scrub::ScrubJob>(
- m_osds->cct, m_pg->pg_id, m_osds->get_nodeid());
+ m_scrub_job.emplace(m_osds->cct, m_pg->pg_id, m_osds->get_nodeid());
}
void PgScrubber::set_scrub_duration(std::chrono::milliseconds duration)
, shallow_target{pg, scrub_level_t::shallow}
, deep_target{pg, scrub_level_t::deep}
, cct{cct}
+ , random_gen{random_dev()}
, log_msg_prefix{fmt::format("osd.{} scrub-job:pg[{}]:", node_id, pgid)}
{}
return earliest_target().sched_info.schedule.not_before;
}
+
void ScrubJob::adjust_deep_schedule(
utime_t last_deep,
const Scrub::sched_conf_t& app_conf,
auto& dp_times = deep_target.sched_info.schedule; // shorthand
- if (!ScrubJob::requires_randomization(deep_target.urgency())) {
- // the target time is already set. Make sure to reset the n.b. and
- // the (irrelevant) deadline
- dp_times.not_before = dp_times.scheduled_at;
- dp_times.deadline = dp_times.scheduled_at;
-
- } else {
+ if (ScrubJob::requires_randomization(deep_target.urgency())) {
utime_t adj_not_before = last_deep;
utime_t adj_target = last_deep;
dp_times.deadline = adj_target;
// scrubs that are not already eligible for scrubbing.
if ((modify_ready_targets == delay_ready_t::delay_ready) ||
adj_not_before > scrub_clock_now) {
- adj_target += app_conf.deep_interval;
- double r = rand() / (double)RAND_MAX;
- adj_target += app_conf.deep_interval * app_conf.interval_randomize_ratio *
- r; // RRR fix
+ double sdv = app_conf.deep_interval * app_conf.deep_randomize_ratio;
+ std::normal_distribution<double> normal_dist{app_conf.deep_interval, sdv};
+ auto next_delay = std::clamp(
+ normal_dist(random_gen), app_conf.deep_interval - 2 * sdv,
+ app_conf.deep_interval + 2 * sdv);
+ adj_target += next_delay;
+ dout(20) << fmt::format(
+ "deep scrubbing: next_delay={:.0f} (interval={:.0f}, "
+ "ratio={:.3f}), adjusted:{:s}",
+ next_delay, app_conf.deep_interval,
+ app_conf.deep_randomize_ratio, adj_target)
+ << dendl;
}
// the deadline can be updated directly into the scrub-job
}
dp_times.scheduled_at = adj_target;
dp_times.not_before = adj_not_before;
+ } else {
+ // the target time is already set. Make sure to reset the n.b. and
+ // the (irrelevant) deadline
+ dp_times.not_before = dp_times.scheduled_at;
+ dp_times.deadline = dp_times.scheduled_at;
}
dout(10) << fmt::format(
#include <compare>
#include <iostream>
#include <memory>
+#include <random>
#include <vector>
#include "common/ceph_atomic.h"
/**
* a randomization factor aimed at preventing 'thundering herd' problems
- * upon deep-scrubs common intervals. If polling a random number smaller
- * than that percentage, the next shallow scrub is upgraded to deep.
+ * upon deep-scrubs common intervals. The actual deep scrub interval will
+ * be selected with a normal distribution around the configured interval,
+ * with a standard deviation of <deep_randomize_ratio> * <interval>.
*/
double deep_randomize_ratio{0.0};
CephContext* cct;
+ /// random generator for the randomization of the scrub times
+ /// \todo consider using one common generator in the OSD service
+ std::random_device random_dev;
+ std::mt19937 random_gen;
+
ScrubJob(CephContext* cct, const spg_t& pg, int node_id);
/**