From 299548024acbf8123a4e488424c06e16365fba5a Mon Sep 17 00:00:00 2001 From: Sage Weil Date: Sun, 13 Jan 2013 22:04:58 -0800 Subject: [PATCH] osd: change scrub min/max thresholds The previous 'osd scrub min interval' was mostly meaningless and useless. Meanwhile, the 'osd scrub max interval' would only trigger a scrub if the load was sufficiently low; if it was high, the PG might *never* scrub. Instead, make the 'min' what the max used to be. If it has been more than this many seconds, and the load is low, scrub. And add an additional condition that if it has been more than the max threshold, scrub the PG no matter what--regardless of the load. Note that this does not change the default scrub interval for less-loaded clusters, but it *does* change the meaning of existing config options. Fixes: #3786 Signed-off-by: Sage Weil --- PendingReleaseNotes | 11 +++++++++++ src/common/config_opts.h | 4 ++-- src/osd/OSD.cc | 30 +++++++++++++++++++++--------- 3 files changed, 34 insertions(+), 11 deletions(-) diff --git a/PendingReleaseNotes b/PendingReleaseNotes index e69de29bb2d1d..0f1f761b9f3aa 100644 --- a/PendingReleaseNotes +++ b/PendingReleaseNotes @@ -0,0 +1,11 @@ + + +* The meaning of 'osd scrub max interval' has been changed: it is now + the amount of time after which a PG will be scrubbed regardless of + the system load. The option 'osd scrub min interval' now has the + previous meaning: the amount of time before a PG is scrubbed if the + load is low. The defaults have been adjusted such that the only + change in behavior is that PGs will now be scrubbed after a week + even if the system load remains high. However, if either of these + options have been changed in ceph.conf, those settings should be + reviewed in light of their adjusted meanings. \ No newline at end of file diff --git a/src/common/config_opts.h b/src/common/config_opts.h index b587ba77d05b0..1edd4f57f6eae 100644 --- a/src/common/config_opts.h +++ b/src/common/config_opts.h @@ -344,8 +344,8 @@ OPTION(osd_recovery_max_chunk, OPT_U64, 8<<20) // max size of push chunk OPTION(osd_recovery_forget_lost_objects, OPT_BOOL, false) // off for now OPTION(osd_max_scrubs, OPT_INT, 1) OPTION(osd_scrub_load_threshold, OPT_FLOAT, 0.5) -OPTION(osd_scrub_min_interval, OPT_FLOAT, 300) -OPTION(osd_scrub_max_interval, OPT_FLOAT, 60*60*24) // once a day +OPTION(osd_scrub_min_interval, OPT_FLOAT, 60*60*24) // if load is low +OPTION(osd_scrub_max_interval, OPT_FLOAT, 7*60*60*24) // regardless of load OPTION(osd_deep_scrub_interval, OPT_FLOAT, 60*60*24*7) // once a week OPTION(osd_deep_scrub_stride, OPT_INT, 524288) OPTION(osd_auto_weight, OPT_BOOL, false) diff --git a/src/osd/OSD.cc b/src/osd/OSD.cc index 501ecea6ecc17..70c89f7c317ba 100644 --- a/src/osd/OSD.cc +++ b/src/osd/OSD.cc @@ -3563,11 +3563,13 @@ void OSD::sched_scrub() { assert(osd_lock.is_locked()); - bool should = scrub_should_schedule(); + bool load_is_low = scrub_should_schedule(); - dout(20) << "sched_scrub should=" << (int)should << dendl; + dout(20) << "sched_scrub load_is_low=" << (int)load_is_low << dendl; utime_t max = ceph_clock_now(g_ceph_context); + utime_t min = max; + min -= g_conf->osd_scrub_min_interval; max -= g_conf->osd_scrub_max_interval; //dout(20) << " " << last_scrub_pg << dendl; @@ -3577,20 +3579,30 @@ void OSD::sched_scrub() utime_t t = pos.first; pg_t pgid = pos.second; - if (t > max) { + if (t > min) { dout(10) << " " << pgid << " at " << t - << " > " << max << " (" << g_conf->osd_scrub_max_interval << " seconds ago)" << dendl; + << " > min " << min << " (" << g_conf->osd_scrub_min_interval << " seconds ago)" << dendl; + break; + } + if (t > max && !load_is_low) { + // save ourselves some effort break; } - dout(10) << " on " << t << " " << pgid << dendl; PG *pg = _lookup_lock_pg(pgid); if (pg) { if (pg->is_active() && - (should || pg->scrubber.must_scrub) && - pg->sched_scrub()) { - pg->unlock(); - break; + (load_is_low || + t < max || + pg->scrubber.must_scrub)) { + dout(10) << " " << pgid << " at " << t + << (pg->scrubber.must_scrub ? ", explicitly requested" : "") + << (t < max ? ", last_scrub > max" : "") + << dendl; + if (pg->sched_scrub()) { + pg->unlock(); + break; + } } pg->unlock(); } -- 2.39.5