OPTION(osd_scrub_load_threshold, OPT_FLOAT, 0.5)
OPTION(osd_scrub_min_interval, OPT_FLOAT, 60*60*24) // if load is low
OPTION(osd_scrub_max_interval, OPT_FLOAT, 7*60*60*24) // regardless of load
+OPTION(osd_scrub_interval_limit, OPT_FLOAT, 0.5) // randomize the scheduled scrub in the span of [min,min*(1+interval_limit))
OPTION(osd_scrub_chunk_min, OPT_INT, 5)
OPTION(osd_scrub_chunk_max, OPT_INT, 25)
OPTION(osd_scrub_sleep, OPT_FLOAT, 0) // sleep between [deep]scrub ops
return false;
}
+OSDService::ScrubJob::ScrubJob(const spg_t& pg, const utime_t& timestamp, bool must)
+ : pgid(pg),
+ sched_time(timestamp),
+ deadline(timestamp)
+{
+ // if not explicitly requested, postpone the scrub with a random delay
+ if (!must) {
+ sched_time += g_conf->osd_scrub_min_interval;
+ if (g_conf->osd_scrub_interval_limit > 0) {
+ sched_time += rand() % (int)(g_conf->osd_scrub_min_interval *
+ g_conf->osd_scrub_interval_limit);
+ }
+ deadline += g_conf->osd_scrub_max_interval;
+ }
+}
+
+bool OSDService::ScrubJob::ScrubJob::operator<(const OSDService::ScrubJob& rhs) const {
+ if (sched_time < rhs.sched_time)
+ return true;
+ if (sched_time > rhs.sched_time)
+ return false;
+ return pgid < rhs.pgid;
+}
+
bool OSD::scrub_time_permit(utime_t now)
{
struct tm bdt;
return time_permit;
}
-bool OSD::scrub_should_schedule()
+bool OSD::scrub_load_below_threshold()
{
- if (!scrub_time_permit(ceph_clock_now(cct))) {
- return false;
- }
double loadavgs[1];
if (getloadavg(loadavgs, 1) != 1) {
dout(10) << __func__ << " couldn't read loadavgs\n" << dendl;
void OSD::sched_scrub()
{
- bool load_is_low = scrub_should_schedule();
-
- dout(20) << "sched_scrub load_is_low=" << (int)load_is_low << dendl;
-
utime_t now = ceph_clock_now(cct);
-
- //dout(20) << " " << last_scrub_pg << dendl;
+ bool time_permit = scrub_time_permit(now);
+ bool load_is_low = scrub_load_below_threshold();
+ dout(20) << "sched_scrub load_is_low=" << (int)load_is_low << dendl;
- pair<utime_t, spg_t> pos;
- if (service.first_scrub_stamp(&pos)) {
+ OSDService::ScrubJob scrub;
+ if (service.first_scrub_stamp(&scrub)) {
do {
- utime_t t = pos.first;
- spg_t pgid = pos.second;
- dout(30) << "sched_scrub examine " << pgid << " at " << t << dendl;
-
- utime_t diff = now - t;
- if ((double)diff < cct->_conf->osd_scrub_min_interval) {
- dout(10) << "sched_scrub " << pgid << " at " << t
- << ": " << (double)diff << " < min (" << cct->_conf->osd_scrub_min_interval << " seconds)" << dendl;
- break;
- }
- if ((double)diff < cct->_conf->osd_scrub_max_interval && !load_is_low) {
+ dout(30) << "sched_scrub examine " << scrub.pgid << " at " << scrub.sched_time << dendl;
+
+ if (scrub.sched_time > now) {
// save ourselves some effort
- dout(10) << "sched_scrub " << pgid << " high load at " << t
- << ": " << (double)diff << " < max (" << cct->_conf->osd_scrub_max_interval << " seconds)" << dendl;
+ dout(10) << "sched_scrub " << scrub.pgid << " schedued at " << scrub.sched_time
+ << " > " << now << dendl;
break;
}
- PG *pg = _lookup_lock_pg(pgid);
- if (pg) {
- if (pg->get_pgbackend()->scrub_supported() && pg->is_active() &&
- (load_is_low ||
- (double)diff >= cct->_conf->osd_scrub_max_interval ||
- pg->scrubber.must_scrub)) {
- dout(10) << "sched_scrub scrubbing " << pgid << " at " << t
- << (pg->scrubber.must_scrub ? ", explicitly requested" :
- ( (double)diff >= cct->_conf->osd_scrub_max_interval ? ", diff >= max" : ""))
- << dendl;
- if (pg->sched_scrub()) {
- pg->unlock();
- break;
- }
+ PG *pg = _lookup_lock_pg(scrub.pgid);
+ if (!pg)
+ continue;
+ if (pg->get_pgbackend()->scrub_supported() && pg->is_active() &&
+ (scrub.deadline < now || (time_permit && load_is_low))) {
+ dout(10) << "sched_scrub scrubbing " << scrub.pgid << " at " << scrub.sched_time
+ << (pg->scrubber.must_scrub ? ", explicitly requested" :
+ (load_is_low ? ", load_is_low" : " deadline < now"))
+ << dendl;
+ if (pg->sched_scrub()) {
+ pg->unlock();
+ break;
}
- pg->unlock();
}
- } while (service.next_scrub_stamp(pos, &pos));
+ pg->unlock();
+ } while (service.next_scrub_stamp(scrub, &scrub));
}
dout(20) << "sched_scrub done" << dendl;
}
Mutex sched_scrub_lock;
int scrubs_pending;
int scrubs_active;
- set< pair<utime_t,spg_t> > last_scrub_pg;
+ struct ScrubJob {
+ /// pg to be scrubbed
+ spg_t pgid;
+ /// a time scheduled for scrub. but the scrub could be delayed if system
+ /// load is too high or it fails to fall in the scrub hours
+ utime_t sched_time;
+ /// the hard upper bound of scrub time
+ utime_t deadline;
+ ScrubJob() {}
+ explicit ScrubJob(const spg_t& pg, const utime_t& timestamp, bool must = true);
+ /// order the jobs by sched_time
+ bool operator<(const ScrubJob& rhs) const;
+ };
+ set<ScrubJob> sched_scrub_pg;
- void reg_last_pg_scrub(spg_t pgid, utime_t t) {
+ /// @returns the scrub_reg_stamp used for unregister the scrub job
+ utime_t reg_pg_scrub(spg_t pgid, utime_t t, bool must) {
+ ScrubJob scrub(pgid, t, must);
Mutex::Locker l(sched_scrub_lock);
- last_scrub_pg.insert(pair<utime_t,spg_t>(t, pgid));
+ sched_scrub_pg.insert(scrub);
+ return scrub.sched_time;
}
- void unreg_last_pg_scrub(spg_t pgid, utime_t t) {
+ void unreg_pg_scrub(spg_t pgid, utime_t t) {
Mutex::Locker l(sched_scrub_lock);
- pair<utime_t,spg_t> p(t, pgid);
- set<pair<utime_t,spg_t> >::iterator it = last_scrub_pg.find(p);
- assert(it != last_scrub_pg.end());
- last_scrub_pg.erase(it);
+ size_t removed = sched_scrub_pg.erase(ScrubJob(pgid, t));
+ assert(removed);
}
- bool first_scrub_stamp(pair<utime_t, spg_t> *out) {
+ bool first_scrub_stamp(ScrubJob *out) {
Mutex::Locker l(sched_scrub_lock);
- if (last_scrub_pg.empty())
+ if (sched_scrub_pg.empty())
return false;
- set< pair<utime_t, spg_t> >::iterator iter = last_scrub_pg.begin();
+ set<ScrubJob>::iterator iter = sched_scrub_pg.begin();
*out = *iter;
return true;
}
- bool next_scrub_stamp(pair<utime_t, spg_t> next,
- pair<utime_t, spg_t> *out) {
+ bool next_scrub_stamp(const ScrubJob& next,
+ ScrubJob *out) {
Mutex::Locker l(sched_scrub_lock);
- if (last_scrub_pg.empty())
+ if (sched_scrub_pg.empty())
return false;
- set< pair<utime_t, spg_t> >::iterator iter = last_scrub_pg.lower_bound(next);
- if (iter == last_scrub_pg.end())
+ set<ScrubJob>::iterator iter = sched_scrub_pg.lower_bound(next);
+ if (iter == sched_scrub_pg.end())
return false;
++iter;
- if (iter == last_scrub_pg.end())
+ if (iter == sched_scrub_pg.end())
return false;
*out = *iter;
return true;
// -- scrubbing --
void sched_scrub();
bool scrub_random_backoff();
- bool scrub_should_schedule();
+ bool scrub_load_below_threshold();
bool scrub_time_permit(utime_t now);
xlist<PG*> scrub_queue;
void PG::reg_next_scrub()
{
+ if (!is_primary())
+ return;
+
+ utime_t reg_stamp;
if (scrubber.must_scrub ||
(info.stats.stats_invalid && g_conf->osd_scrub_invalid_stats)) {
- scrubber.scrub_reg_stamp = utime_t();
+ reg_stamp = ceph_clock_now(cct);
} else {
- scrubber.scrub_reg_stamp = info.history.last_scrub_stamp;
+ reg_stamp = info.history.last_scrub_stamp;
}
- if (is_primary())
- osd->reg_last_pg_scrub(info.pgid, scrubber.scrub_reg_stamp);
+ // note down the sched_time, so we can locate this scrub, and remove it
+ // later on.
+ scrubber.scrub_reg_stamp = osd->reg_pg_scrub(info.pgid,
+ reg_stamp,
+ scrubber.must_scrub);
}
void PG::unreg_next_scrub()
{
if (is_primary())
- osd->unreg_last_pg_scrub(info.pgid, scrubber.scrub_reg_stamp);
+ osd->unreg_pg_scrub(info.pgid, scrubber.scrub_reg_stamp);
}
void PG::sub_op_scrub_map(OpRequestRef op)