active(false),
shallow_errors(0), deep_errors(0), fixed(0),
must_scrub(false), must_deep_scrub(false), must_repair(false),
- need_auto(false),
+ need_auto(false), time_for_deep(false),
auto_repair(false),
check_repair(false),
deep_scrub_on_error(false),
// returns true if a scrub has been newly kicked off
bool PG::sched_scrub()
{
- bool nodeep_scrub = false;
ceph_assert(is_locked());
if (!(is_primary() && is_active() && is_clean() && !is_scrubbing())) {
return false;
}
- double deep_scrub_interval = 0;
- pool.info.opts.get(pool_opts_t::DEEP_SCRUB_INTERVAL, &deep_scrub_interval);
- if (deep_scrub_interval <= 0) {
- deep_scrub_interval = cct->_conf->osd_deep_scrub_interval;
- }
- bool time_for_deep = ceph_clock_now() >=
- info.history.last_deep_scrub_stamp + deep_scrub_interval;
-
- bool deep_coin_flip = false;
- // Only add random deep scrubs when NOT user initiated scrub
- // If we randomize when noscrub set then it guarantees
- // we will deep scrub because this function is called often.
- if (!time_for_deep && !scrubber.must_scrub
- && !(get_osdmap()->test_flag(CEPH_OSDMAP_NOSCRUB)
- || pool.info.has_flag(pg_pool_t::FLAG_NOSCRUB)))
- deep_coin_flip = (rand() % 100) < cct->_conf->osd_deep_scrub_randomize_ratio * 100;
- dout(20) << __func__ << ": time_for_deep=" << time_for_deep << " deep_coin_flip=" << deep_coin_flip << dendl;
-
- time_for_deep = (time_for_deep || deep_coin_flip);
-
- //NODEEP_SCRUB so ignore time initiated deep-scrub
- if (get_osdmap()->test_flag(CEPH_OSDMAP_NODEEP_SCRUB) ||
- pool.info.has_flag(pg_pool_t::FLAG_NODEEP_SCRUB)) {
- time_for_deep = false;
- nodeep_scrub = true;
- }
-
- if (!scrubber.must_scrub) {
- ceph_assert(!scrubber.must_deep_scrub);
-
- //NOSCRUB so skip regular scrubs
- if ((get_osdmap()->test_flag(CEPH_OSDMAP_NOSCRUB) ||
- pool.info.has_flag(pg_pool_t::FLAG_NOSCRUB)) && !time_for_deep) {
- if (scrubber.reserved) {
- // cancel scrub if it is still in scheduling,
- // so pgs from other pools where scrub are still legal
- // have a chance to go ahead with scrubbing.
- clear_scrub_reserved();
- scrub_unreserve_replicas();
+ // All processing the first time through commits us to whatever
+ // choices are made.
+ if (!scrubber.reserved) {
+ dout(20) << __func__ << ": Start processing pg " << info.pgid << dendl;
+
+ bool allow_deep_scrub = !(get_osdmap()->test_flag(CEPH_OSDMAP_NODEEP_SCRUB) ||
+ pool.info.has_flag(pg_pool_t::FLAG_NODEEP_SCRUB));
+ bool allow_scrub = !(get_osdmap()->test_flag(CEPH_OSDMAP_NOSCRUB) ||
+ pool.info.has_flag(pg_pool_t::FLAG_NOSCRUB));
+ bool has_deep_errors = (info.stats.stats.sum.num_deep_scrub_errors > 0);
+ bool try_to_auto_repair = (cct->_conf->osd_scrub_auto_repair
+ && get_pgbackend()->auto_repair_supported());
+
+ scrubber.time_for_deep = false;
+ // Clear these in case user issues the scrub/repair command during
+ // the scheduling of the scrub/repair (e.g. request reservation)
+ scrubber.deep_scrub_on_error = false;
+ scrubber.auto_repair = false;
+
+ // All periodic scrub handling goes here because must_scrub is
+ // always set for must_deep_scrub and must_repair.
+ if (!scrubber.must_scrub) {
+ ceph_assert(!scrubber.must_deep_scrub && !scrubber.must_repair);
+ // Handle deep scrub determination only if allowed
+ if (allow_deep_scrub) {
+ // Initial entry and scheduled scrubs without nodeep_scrub set get here
+ if (scrubber.need_auto) {
+ dout(20) << __func__ << ": need repair after scrub errors" << dendl;
+ scrubber.time_for_deep = true;
+ } else {
+ double deep_scrub_interval = 0;
+ pool.info.opts.get(pool_opts_t::DEEP_SCRUB_INTERVAL, &deep_scrub_interval);
+ if (deep_scrub_interval <= 0) {
+ deep_scrub_interval = cct->_conf->osd_deep_scrub_interval;
+ }
+ scrubber.time_for_deep = ceph_clock_now() >=
+ info.history.last_deep_scrub_stamp + deep_scrub_interval;
+
+ bool deep_coin_flip = false;
+ // If we randomize when !allow_scrub && allow_deep_scrub, then it guarantees
+ // we will deep scrub because this function is called often.
+ if (!scrubber.time_for_deep && allow_scrub)
+ deep_coin_flip = (rand() % 100) < cct->_conf->osd_deep_scrub_randomize_ratio * 100;
+ dout(20) << __func__ << ": time_for_deep=" << scrubber.time_for_deep << " deep_coin_flip=" << deep_coin_flip << dendl;
+
+ scrubber.time_for_deep = (scrubber.time_for_deep || deep_coin_flip);
+ }
+
+ if (!scrubber.time_for_deep && has_deep_errors) {
+ osd->clog->info() << "osd." << osd->whoami
+ << " pg " << info.pgid
+ << " Deep scrub errors, upgrading scrub to deep-scrub";
+ scrubber.time_for_deep = true;
+ }
+
+ if (try_to_auto_repair) {
+ if (scrubber.time_for_deep) {
+ dout(20) << __func__ << ": auto repair with deep scrubbing" << dendl;
+ scrubber.auto_repair = true;
+ } else if (allow_scrub) {
+ dout(20) << __func__ << ": auto repair with scrubbing, rescrub if errors found" << dendl;
+ scrubber.deep_scrub_on_error = true;
+ }
+ }
+ } else { // !allow_deep_scrub
+ dout(20) << __func__ << ": nodeep_scrub set" << dendl;
+ if (has_deep_errors) {
+ osd->clog->error() << "osd." << osd->whoami
+ << " pg " << info.pgid
+ << " Regular scrub skipped due to deep-scrub errors and nodeep-scrub set";
+ return false;
+ }
}
- return false;
- }
- }
- // Clear these in case user issues the scrub/repair command during
- // the scheduling of the scrub/repair (e.g. request reservation)
- scrubber.deep_scrub_on_error = false;
- scrubber.auto_repair = false;
- if (scrubber.need_auto) {
- dout(20) << __func__ << ": need repair after scrub errors" << dendl;
- time_for_deep = true;
- } else if (cct->_conf->osd_scrub_auto_repair
- && get_pgbackend()->auto_repair_supported()
- // respect the command from user, and not do auto-repair
- && !scrubber.must_repair
- && !scrubber.must_scrub
- && !scrubber.must_deep_scrub) {
- if (time_for_deep) {
- dout(20) << __func__ << ": auto repair with deep scrubbing" << dendl;
- scrubber.auto_repair = true;
- } else {
- dout(20) << __func__ << ": auto repair with scrubbing, rescrub if errors found" << dendl;
- scrubber.deep_scrub_on_error = true;
+ //NOSCRUB so skip regular scrubs
+ if (!allow_scrub && !scrubber.time_for_deep) {
+ return false;
+ }
+ // scrubber.must_scrub
+ } else if (!scrubber.must_deep_scrub && has_deep_errors) {
+ osd->clog->error() << "osd." << osd->whoami
+ << " pg " << info.pgid
+ << " Regular scrub request, deep-scrub details will be lost";
}
- }
+ // Unless precluded this was handle above
+ scrubber.need_auto = false;
- bool ret = true;
- if (!scrubber.reserved) {
ceph_assert(scrubber.reserved_peers.empty());
if ((cct->_conf->osd_scrub_during_recovery || !osd->is_recovery_active()) &&
osd->inc_scrubs_pending()) {
scrub_reserve_replicas();
} else {
dout(20) << __func__ << ": failed to reserve locally" << dendl;
- ret = false;
+ return false;
}
}
+
if (scrubber.reserved) {
if (scrubber.reserve_failed) {
- dout(20) << "sched_scrub: failed, a peer declined" << dendl;
+ dout(20) << __func__ << ": failed, a peer declined" << dendl;
clear_scrub_reserved();
scrub_unreserve_replicas();
- ret = false;
+ return false;
} else if (scrubber.reserved_peers.size() ==
recovery_state.get_acting().size()) {
- dout(20) << "sched_scrub: success, reserved self and replicas" << dendl;
- if (time_for_deep) {
- dout(10) << "sched_scrub: scrub will be deep" << dendl;
+ dout(20) << __func__ << ": success, reserved self and replicas" << dendl;
+ if (scrubber.time_for_deep) {
+ dout(10) << __func__ << ": scrub will be deep" << dendl;
state_set(PG_STATE_DEEP_SCRUB);
- } else if (!scrubber.must_deep_scrub && info.stats.stats.sum.num_deep_scrub_errors) {
- if (!nodeep_scrub) {
- osd->clog->info() << "osd." << osd->whoami
- << " pg " << info.pgid
- << " Deep scrub errors, upgrading scrub to deep-scrub";
- state_set(PG_STATE_DEEP_SCRUB);
- } else if (!scrubber.must_scrub) {
- osd->clog->error() << "osd." << osd->whoami
- << " pg " << info.pgid
- << " Regular scrub skipped due to deep-scrub errors and nodeep-scrub set";
- clear_scrub_reserved();
- scrub_unreserve_replicas();
- return false;
- } else {
- osd->clog->error() << "osd." << osd->whoami
- << " pg " << info.pgid
- << " Regular scrub request, deep-scrub details will be lost";
- }
+ scrubber.time_for_deep = false;
}
queue_scrub();
} else {
// none declined, since scrubber.reserved is set
- dout(20) << "sched_scrub: reserved " << scrubber.reserved_peers << ", waiting for replicas" << dendl;
+ dout(20) << __func__ << ": reserved " << scrubber.reserved_peers
+ << ", waiting for replicas" << dendl;
}
}
-
- return ret;
+ return true;
}
bool PG::is_scrub_registered()
out << " MUST_DEEP_SCRUB";
if (pg.scrubber.must_scrub)
out << " MUST_SCRUB";
+ if (pg.scrubber.time_for_deep)
+ out << " TIME_FOR_DEEP";
if (pg.scrubber.need_auto)
out << " NEED_AUTO";