m_planned_scrub.must_deep_scrub = true;
m_planned_scrub.check_repair = true;
m_planned_scrub.must_scrub = true;
+ m_planned_scrub.calculated_to_deep = true;
if (is_scrub_queued_or_active()) {
dout(10) << __func__ << ": scrubbing already ("
// analyse the combination of the requested scrub flags, the osd/pool configuration
// and the PG status to determine whether we should scrub now, and what type of scrub
// should that be.
- auto updated_flags = verify_scrub_mode();
+ auto updated_flags = validate_scrub_mode();
if (!updated_flags) {
// the stars do not align for starting a scrub for this PG at this time
// (due to configuration or priority issues)
}
bool PG::is_time_for_deep(bool allow_deep_scrub,
- bool allow_scrub,
+ bool allow_shallow_scrub,
bool has_deep_errors,
const requested_scrub_t& planned) const
{
- dout(10) << __func__ << ": need_auto?" << planned.need_auto << " allow_deep_scrub? "
- << allow_deep_scrub << dendl;
+ dout(10) << fmt::format(
+ "{}: need-auto? {} allowed? {}/{} deep-errors? {} "
+ "last_deep_scrub_stamp {}",
+ __func__,
+ planned.need_auto,
+ allow_shallow_scrub,
+ allow_deep_scrub,
+ has_deep_errors,
+ info.history.last_deep_scrub_stamp)
+ << dendl;
if (!allow_deep_scrub)
return false;
}
if (ceph_clock_now() >= next_deepscrub_interval()) {
- dout(20) << __func__ << ": now (" << ceph_clock_now() << ") >= time for deep ("
- << next_deepscrub_interval() << ")" << dendl;
+ dout(20) << __func__ << ": now (" << ceph_clock_now()
+ << ") >= time for deep (" << next_deepscrub_interval() << ")"
+ << dendl;
return true;
}
if (has_deep_errors) {
+ // note: the text below is matched by 'standalone' tests
osd->clog->info() << "osd." << osd->whoami << " pg " << info.pgid
- << " Deep scrub errors, upgrading scrub to deep-scrub";
+ << " Deep scrub errors, upgrading scrub to deep-scrub";
return true;
}
- // we only flip coins if 'allow_scrub' is asserted. Otherwise - as this function is
- // called often, we will probably be deep-scrubbing most of the time.
- if (allow_scrub) {
- bool deep_coin_flip =
+ // we only flip coins if 'allow_shallow_scrub' is asserted. Otherwise - as
+ // this function is called often, we will probably be deep-scrubbing most of
+ // the time.
+ if (allow_shallow_scrub) {
+ const bool deep_coin_flip =
(rand() % 100) < cct->_conf->osd_deep_scrub_randomize_ratio * 100;
dout(15) << __func__ << ": time_for_deep=" << planned.time_for_deep
return false;
}
-bool PG::verify_periodic_scrub_mode(bool allow_deep_scrub,
- bool try_to_auto_repair,
- bool allow_regular_scrub,
- bool has_deep_errors,
- requested_scrub_t& planned) const
+/*
+ clang-format off
+
+ Request details | none | no-scrub | no-scrub+no-deep | no-deep
+ ------------------------------------------------------------------------
+ ------------------------------------------------------------------------
+ initiated | shallow | shallow | shallow | shallow
+ ------------------------------------------------------------------------
+ init. + t.f.deep | deep | deep | shallow | shallow
+ ------------------------------------------------------------------------
+ initiated deep | deep | deep | deep | deep
+ ------------------------------------------------------------------------
+
+ clang-format on
+*/
+std::optional<requested_scrub_t> PG::validate_initiated_scrub(
+ bool allow_deep_scrub,
+ bool try_to_auto_repair,
+ bool time_for_deep,
+ bool has_deep_errors,
+ const requested_scrub_t& planned) const
+{
+ requested_scrub_t upd_flags{planned};
+
+ upd_flags.time_for_deep = time_for_deep;
+ upd_flags.deep_scrub_on_error = false;
+ upd_flags.auto_repair = false; // will only be considered for periodic scrubs
+
+ if (upd_flags.must_deep_scrub) {
+ upd_flags.calculated_to_deep = true;
+ } else if (upd_flags.time_for_deep && allow_deep_scrub) {
+ upd_flags.calculated_to_deep = true;
+ } else {
+ upd_flags.calculated_to_deep = false;
+ if (has_deep_errors) {
+ osd->clog->error() << fmt::format(
+ "osd.{} pg {} Regular scrub request, deep-scrub details will be lost",
+ osd->whoami,
+ info.pgid);
+ }
+ }
+
+ return upd_flags;
+}
+
+/*
+ clang-format off
+
+ for periodic scrubs:
+
+ Periodic type | none | no-scrub | no-scrub+no-deep | no-deep
+ ------------------------------------------------------------------------
+ ------------------------------------------------------------------------
+ periodic | shallow | x | x | shallow
+ ------------------------------------------------------------------------
+ periodic + t.f.deep| deep | deep | x | shallow
+ ------------------------------------------------------------------------
+
+ clang-format on
+*/
+std::optional<requested_scrub_t> PG::validate_periodic_mode(
+ bool allow_deep_scrub,
+ bool try_to_auto_repair,
+ bool allow_shallow_scrub,
+ bool time_for_deep,
+ bool has_deep_errors,
+ const requested_scrub_t& planned) const
{
ceph_assert(!planned.must_deep_scrub && !planned.must_repair);
if (!allow_deep_scrub && has_deep_errors) {
- osd->clog->error()
- << "osd." << osd->whoami << " pg " << info.pgid
- << " Regular scrub skipped due to deep-scrub errors and nodeep-scrub set";
- return false;
+ osd->clog->error()
+ << "osd." << osd->whoami << " pg " << info.pgid
+ << " Regular scrub skipped due to deep-scrub errors and nodeep-scrub set";
+ return std::nullopt; // no scrubbing
}
- if (allow_deep_scrub) {
- // Initial entry and scheduled scrubs without nodeep_scrub set get here
+ requested_scrub_t upd_flags{planned};
- planned.time_for_deep =
- is_time_for_deep(allow_deep_scrub, allow_regular_scrub, has_deep_errors, planned);
+ upd_flags.time_for_deep = time_for_deep;
+ upd_flags.deep_scrub_on_error = false;
+ upd_flags.auto_repair = false;
+ upd_flags.calculated_to_deep = false;
+
+ dout(20) << fmt::format("{}: allowed:{}/{} t.f.d:{} req:{}",
+ __func__,
+ allow_shallow_scrub,
+ allow_deep_scrub,
+ upd_flags.time_for_deep,
+ planned)
+ << dendl;
- if (try_to_auto_repair) {
- if (planned.time_for_deep) {
- dout(20) << __func__ << ": auto repair with deep scrubbing" << dendl;
- planned.auto_repair = true;
- } else if (allow_regular_scrub) {
- dout(20) << __func__ << ": auto repair with scrubbing, rescrub if errors found"
+ // should we perform a shallow scrub?
+ if (allow_shallow_scrub) {
+ if (!upd_flags.time_for_deep || !allow_deep_scrub) {
+ if (try_to_auto_repair) {
+ dout(10) << __func__
+ << ": auto repair with scrubbing, rescrub if errors found"
<< dendl;
- planned.deep_scrub_on_error = true;
+ upd_flags.deep_scrub_on_error = true;
}
+ dout(20) << __func__ << " will do shallow scrub (time_for_deep = "
+ << upd_flags.time_for_deep << ")" << dendl;
+ return upd_flags;
}
+ // else - either deep-scrub or nothing
}
- dout(20) << __func__ << " updated flags: " << planned
- << " allow_regular_scrub: " << allow_regular_scrub << dendl;
-
- // NOSCRUB so skip regular scrubs
- if (!allow_regular_scrub && !planned.time_for_deep) {
- return false;
+ if (upd_flags.time_for_deep) {
+ if (allow_deep_scrub) {
+ if (try_to_auto_repair) {
+ dout(20) << __func__ << ": auto repair with deep scrubbing" << dendl;
+ upd_flags.auto_repair = true;
+ }
+ upd_flags.calculated_to_deep = true;
+ dout(20) << fmt::format("{}: final: {}", __func__, upd_flags) << dendl;
+ return upd_flags;
+ }
+ if (allow_shallow_scrub) {
+ dout(20) << fmt::format("{}: final:{}", __func__, upd_flags) << dendl;
+ return upd_flags;
+ }
+ return std::nullopt;
}
- return true;
+ return std::nullopt; // no scrubbing
}
-std::optional<requested_scrub_t> PG::verify_scrub_mode() const
+
+/*
+ From docs.ceph.com (osd-internals/scrub):
+
+ clang-format off
+
+ Desired no-scrub flags & scrub type interactions:
+
+ Periodic type | none | no-scrub | no-scrub+no-deep | no-deep
+ ------------------------------------------------------------------------
+ ------------------------------------------------------------------------
+ periodic | shallow | x | x | shallow
+ ------------------------------------------------------------------------
+ periodic + t.f.deep| deep | deep | x | shallow
+ ------------------------------------------------------------------------
+ initiated | shallow | shallow | shallow | shallow
+ ------------------------------------------------------------------------
+ init. + t.f.deep | deep | deep | shallow | shallow
+ ------------------------------------------------------------------------
+ initiated deep | deep | deep | deep | deep
+ ------------------------------------------------------------------------
+
+ "periodic" - if !must_scrub && !must_deep_scrub;
+ "initiated deep" - if must_scrub && must_deep_scrub;
+ "initiated" - if must_scrub && !must_deep_scrub;
+
+ clang-format on
+*/
+/*
+ * The returned flags collection (requested_scrub_t) is based on
+ * m_planned_scrub with the following modifications:
+ *
+ * - calculated_to_deep will be set to shallow or deep, depending on the
+ * scrub type (according to the decision table above);
+ * - deep_scrub_on_error will be determined;
+ * - same for auto_repair;
+ * - time_for_deep will be set to true if the scrub is periodic and the
+ * time for a deep scrub has been reached (+ some other conditions);
+ * and
+ * - need_auto is cleared
+ */
+std::optional<requested_scrub_t> PG::validate_scrub_mode() const
{
- const bool allow_regular_scrub =
+ const bool allow_shallow_scrub =
!(get_osdmap()->test_flag(CEPH_OSDMAP_NOSCRUB) ||
pool.info.has_flag(pg_pool_t::FLAG_NOSCRUB));
const bool allow_deep_scrub =
- allow_regular_scrub &&
!(get_osdmap()->test_flag(CEPH_OSDMAP_NODEEP_SCRUB) ||
pool.info.has_flag(pg_pool_t::FLAG_NODEEP_SCRUB));
const bool has_deep_errors = (info.stats.stats.sum.num_deep_scrub_errors > 0);
const bool try_to_auto_repair = (cct->_conf->osd_scrub_auto_repair &&
- get_pgbackend()->auto_repair_supported());
+ get_pgbackend()->auto_repair_supported());
dout(10) << __func__ << " pg: " << info.pgid
- << " allow: " << allow_regular_scrub << "/" << allow_deep_scrub
- << " deep errs: " << has_deep_errors
- << " auto-repair: " << try_to_auto_repair << " ("
- << cct->_conf->osd_scrub_auto_repair << ")" << dendl;
-
- auto upd_flags = m_planned_scrub;
+ << " allow: " << allow_shallow_scrub << "/" << allow_deep_scrub
+ << " deep errs: " << has_deep_errors
+ << " auto-repair: " << try_to_auto_repair << " ("
+ << cct->_conf->osd_scrub_auto_repair << ")" << dendl;
- upd_flags.time_for_deep = false;
- // Clear these in case user issues the scrub/repair command during
- // the scheduling of the scrub/repair (e.g. request reservation)
- upd_flags.deep_scrub_on_error = false;
- upd_flags.auto_repair = false;
+ // scrubbing while recovering?
+ const bool prevented_by_recovery =
+ osd->is_recovery_active() && !cct->_conf->osd_scrub_during_recovery &&
+ (!cct->_conf->osd_repair_during_recovery || !m_planned_scrub.must_repair);
- if (upd_flags.must_scrub && !upd_flags.must_deep_scrub && has_deep_errors) {
- osd->clog->error()
- << "osd." << osd->whoami << " pg " << info.pgid
- << " Regular scrub request, deep-scrub details will be lost";
+ if (prevented_by_recovery) {
+ dout(20) << __func__ << ": scrubbing prevented during recovery" << dendl;
+ return std::nullopt;
}
- if (!upd_flags.must_scrub) {
- // All periodic scrub handling goes here because must_scrub is
- // always set for must_deep_scrub and must_repair.
+ const bool time_for_deep = is_time_for_deep(allow_deep_scrub,
+ allow_shallow_scrub,
+ has_deep_errors,
+ m_planned_scrub);
+ std::optional<requested_scrub_t> upd_flags;
- const bool can_start_periodic = verify_periodic_scrub_mode(
- allow_deep_scrub, try_to_auto_repair, allow_regular_scrub,
- has_deep_errors, upd_flags);
- if (!can_start_periodic) {
- // "I don't want no scrub"
+ if (m_planned_scrub.must_scrub) {
+ upd_flags = validate_initiated_scrub(allow_deep_scrub,
+ try_to_auto_repair,
+ time_for_deep,
+ has_deep_errors,
+ m_planned_scrub);
+ } else {
+ ceph_assert(!m_planned_scrub.must_deep_scrub);
+ upd_flags = validate_periodic_mode(allow_deep_scrub,
+ try_to_auto_repair,
+ allow_shallow_scrub,
+ time_for_deep,
+ has_deep_errors,
+ m_planned_scrub);
+ if (!upd_flags) {
dout(20) << __func__ << ": no periodic scrubs allowed" << dendl;
return std::nullopt;
}
}
- // scrubbing while recovering?
-
- bool prevented_by_recovery =
- osd->is_recovery_active() && !cct->_conf->osd_scrub_during_recovery &&
- (!cct->_conf->osd_repair_during_recovery || !upd_flags.must_repair);
-
- if (prevented_by_recovery) {
- dout(20) << __func__ << ": scrubbing prevented during recovery" << dendl;
- return std::nullopt;
- }
-
- upd_flags.need_auto = false;
+ dout(10) << fmt::format("{}: next scrub flags: {}", __func__, *upd_flags)
+ << dendl;
+ upd_flags->need_auto = false;
return upd_flags;
}
bool PgScrubber::should_abort() const
{
+ // note that set_op_parameters() guarantees that we would never have
+ // must_scrub set (i.e. possibly have started a scrub even though noscrub
+ // was set), without having 'required' also set.
if (m_flags.required) {
return false; // not stopping 'required' scrubs for configuration changes
}
+ // note: deep scrubs are allowed even if 'no-scrub' is set (but not
+ // 'no-deepscrub')
if (m_is_deep) {
if (get_osdmap()->test_flag(CEPH_OSDMAP_NODEEP_SCRUB) ||
m_pg->pool.info.has_flag(pg_pool_t::FLAG_NODEEP_SCRUB)) {
return true;
}
} else if (get_osdmap()->test_flag(CEPH_OSDMAP_NOSCRUB) ||
- m_pg->pool.info.has_flag(pg_pool_t::FLAG_NOSCRUB)) {
+ m_pg->pool.info.has_flag(pg_pool_t::FLAG_NOSCRUB)) {
dout(10) << "noscrub set, aborting" << dendl;
return true;
}
m_epoch_start = epoch_queued;
m_needs_sleep = true;
- m_is_deep = state_test(PG_STATE_DEEP_SCRUB);
+ ceph_assert(m_is_deep == state_test(PG_STATE_DEEP_SCRUB));
update_op_mode_text();
}
m_current_token);
}
-void PgScrubber::set_op_parameters(requested_scrub_t& request)
+void PgScrubber::set_op_parameters(const requested_scrub_t& request)
{
- dout(10) << __func__ << " input: " << request << dendl;
+ dout(10) << fmt::format("{}: @ input: {}", __func__, request) << dendl;
set_queued_or_active(); // we are fully committed now.
state_set(PG_STATE_SCRUBBING);
// will we be deep-scrubbing?
- if (request.must_deep_scrub || request.need_auto || request.time_for_deep) {
+ if (request.calculated_to_deep) {
state_set(PG_STATE_DEEP_SCRUB);
+ m_is_deep = true;
+ } else {
+ m_is_deep = false;
+
+ // make sure we got the 'calculated_to_deep' flag right
+ ceph_assert(!request.must_deep_scrub);
+ ceph_assert(!request.need_auto);
}
// m_is_repair is set for either 'must_repair' or 'repair-on-the-go' (i.e.
m_is_repair = request.must_repair || m_flags.auto_repair;
if (request.must_repair) {
state_set(PG_STATE_REPAIR);
- // not calling update_op_mode_text() yet, as m_is_deep not set yet
+ update_op_mode_text();
}
// the publishing here is required for tests synchronization
dout(10) << __func__ << " " << *op->get_req() << dendl;
op->mark_started();
auto request_ep = op->get_req<MOSDScrubReserve>()->get_map_epoch();
+ dout(20) << fmt::format("{}: request_ep:{} recovery:{}",
+ __func__,
+ request_ep,
+ m_osds->is_recovery_active())
+ << dendl;
/*
* if we are currently holding a reservation, then:
if (request_ep < m_pg->get_same_interval_since()) {
// will not ack stale requests
+ dout(10) << fmt::format("{}: stale reservation (request ep{} < {}) denied",
+ __func__,
+ request_ep,
+ m_pg->get_same_interval_since())
+ << dendl;
return;
}
bool granted{false};
if (m_remote_osd_resource.has_value()) {
- dout(10) << __func__ << " already reserved." << dendl;
+ dout(10) << __func__ << " already reserved. Reassigned." << dendl;
/*
* it might well be that we did not yet finish handling the latest scrub-op
m_remote_osd_resource.reset();
dout(20) << __func__ << ": failed to reserve remotely" << dendl;
}
+ } else {
+ dout(10) << __func__ << ": recovery is active; not granting" << dendl;
}
dout(10) << __func__ << " reserved? " << (granted ? "yes" : "no") << dendl;