void OsdScrub::initiate_scrub(bool is_recovery_active)
{
- if (scrub_random_backoff()) {
- // dice-roll says we should not scrub now
- return;
- }
+ const utime_t scrub_time = ceph_clock_now();
+ dout(10) << fmt::format(
+ "time now:{}, recover is active?:{}", scrub_time,
+ is_recovery_active)
+ << dendl;
if (auto blocked_pgs = get_blocked_pgs_count(); blocked_pgs > 0) {
// some PGs managed by this OSD were blocked by a locked object during
<< dendl;
}
- // fail fast if no resources are available
- if (!m_resource_bookkeeper.can_inc_scrubs()) {
- dout(20) << "too many scrubs already running on this OSD" << dendl;
- return;
- }
-
- // if there is a PG that is just now trying to reserve scrub replica resources -
- // we should wait and not initiate a new scrub
- if (m_queue.is_reserving_now()) {
- dout(10) << "scrub resources reservation in progress" << dendl;
- return;
- }
-
- utime_t scrub_time = ceph_clock_now();
- dout(10) << fmt::format(
- "time now:{}, recover is active?:{}", scrub_time,
- is_recovery_active)
- << dendl;
-
// check the OSD-wide environment conditions (scrub resources, time, etc.).
// These may restrict the type of scrubs we are allowed to start, or just
- // prevent us from starting any scrub at all.
+ // prevent us from starting any non-operator-initiated scrub at all.
auto env_restrictions =
restrictions_on_scrubbing(is_recovery_active, scrub_time);
- if (!env_restrictions) {
- return;
- }
- if (g_conf()->subsys.should_gather<ceph_subsys_osd, 20>()) {
+ if (g_conf()->subsys.should_gather<ceph_subsys_osd, 20>() &&
+ !env_restrictions.high_priority_only) {
dout(20) << "scrub scheduling (@tick) starts" << dendl;
auto all_jobs = m_queue.list_registered_jobs();
for (const auto& sj : all_jobs) {
// queue interface used here: we ask for a list of
// eligible targets (based on the known restrictions).
// We try all elements of this list until a (possibly temporary) success.
- auto candidates = m_queue.ready_to_scrub(*env_restrictions, scrub_time);
+ auto candidates = m_queue.ready_to_scrub(env_restrictions, scrub_time);
if (candidates.empty()) {
dout(20) << "no PGs are ready for scrubbing" << dendl;
return;
// scrub. For some failures - we can continue with the next candidate. For
// others - we should stop trying to scrub at this tick.
auto res = initiate_a_scrub(
- candidate, env_restrictions->allow_requested_repair_only);
+ candidate, env_restrictions.allow_requested_repair_only);
if (res == schedule_result_t::target_specific_failure) {
// continue with the next job.
}
-std::optional<Scrub::OSDRestrictions> OsdScrub::restrictions_on_scrubbing(
+Scrub::OSDRestrictions OsdScrub::restrictions_on_scrubbing(
bool is_recovery_active,
utime_t scrub_clock_now) const
{
- // our local OSD may already be running too many scrubs
+ Scrub::OSDRestrictions env_conditions;
+
+ // some environmental conditions prevent all but high priority scrubs
+
if (!m_resource_bookkeeper.can_inc_scrubs()) {
- dout(10) << "OSD cannot inc scrubs" << dendl;
- return std::nullopt;
- }
+ // our local OSD is already running too many scrubs
+ dout(15) << "OSD cannot inc scrubs" << dendl;
+ env_conditions.high_priority_only = true;
- // if there is a PG that is just now trying to reserve scrub replica resources
- // - we should wait and not initiate a new scrub
- if (m_queue.is_reserving_now()) {
+ } else if (scrub_random_backoff()) {
+ // dice-roll says we should not scrub now
+ dout(15) << "Lost in dice. Only high priority scrubs allowed."
+ << dendl;
+ env_conditions.high_priority_only = true;
+
+ } else if (m_queue.is_reserving_now()) {
+ // if there is a PG that is just now trying to reserve scrub replica
+ // resources - we should wait and not initiate a new scrub
dout(10) << "scrub resources reservation in progress" << dendl;
- return std::nullopt;
- }
+ env_conditions.high_priority_only = true;
- Scrub::OSDRestrictions env_conditions;
- env_conditions.time_permit = scrub_time_permit(scrub_clock_now);
- env_conditions.load_is_low = m_load_tracker.scrub_load_below_threshold();
- env_conditions.only_deadlined =
- !env_conditions.time_permit || !env_conditions.load_is_low;
-
- if (is_recovery_active && !conf->osd_scrub_during_recovery) {
- if (!conf->osd_repair_during_recovery) {
- dout(15) << "not scheduling scrubs due to active recovery" << dendl;
- return std::nullopt;
+ } else if (is_recovery_active && !conf->osd_scrub_during_recovery) {
+ if (conf->osd_repair_during_recovery) {
+ dout(15)
+ << "will only schedule explicitly requested repair due to active "
+ "recovery"
+ << dendl;
+ env_conditions.allow_requested_repair_only = true;
+
+ } else {
+ dout(15) << "recovery in progress. Only high priority scrubs allowed."
+ << dendl;
+ env_conditions.high_priority_only = true;
}
+ } else {
- dout(10) << "will only schedule explicitly requested repair due to active "
- "recovery"
- << dendl;
- env_conditions.allow_requested_repair_only = true;
+ // regular, i.e. non-high-priority scrubs are allowed
+ env_conditions.time_permit = scrub_time_permit(scrub_clock_now);
+ env_conditions.load_is_low = m_load_tracker.scrub_load_below_threshold();
+ env_conditions.only_deadlined =
+ !env_conditions.time_permit || !env_conditions.load_is_low;
}
return env_conditions;
/**
* check the OSD-wide environment conditions (scrub resources, time, etc.).
- * These may restrict the type of scrubs we are allowed to start, or just
- * prevent us from starting any scrub at all.
+ * These may restrict the type of scrubs we are allowed to start, maybe
+ * down to allowing only high-priority scrubs
*
* Specifically:
- * a nullopt is returned if we are not allowed to scrub at all, for either of
+ * 'only high priority' flag is set for either of
* the following reasons: no local resources (too many scrubs on this OSD);
* a dice roll says we will not scrub in this tick;
* a recovery is in progress, and we are not allowed to scrub while recovery;
* a PG is trying to acquire replica resources.
- *
- * If we are allowed to scrub, the returned value specifies whether the only
- * high priority scrubs or only overdue ones are allowed to go on.
*/
- std::optional<Scrub::OSDRestrictions> restrictions_on_scrubbing(
+ Scrub::OSDRestrictions restrictions_on_scrubbing(
bool is_recovery_active,
utime_t scrub_clock_now) const;
// adjust the suggested scrub time according to OSD-wide status
auto adjusted = adjust_target_time(suggested);
scrub_job->update_schedule(adjusted);
+ scrub_job->high_priority = suggested.is_must == must_scrub_t::mandatory;
}
sched_params_t ScrubQueue::determine_scrub_time(
}
namespace {
-struct cmp_sched_time_t {
- bool operator()(const Scrub::ScrubJobRef& lhs,
- const Scrub::ScrubJobRef& rhs) const
+struct cmp_time_n_priority_t {
+ bool operator()(const Scrub::ScrubJobRef& lhs, const Scrub::ScrubJobRef& rhs)
+ const
{
- return lhs->schedule.scheduled_at < rhs->schedule.scheduled_at;
+ return lhs->is_high_priority() > rhs->is_high_priority() ||
+ (lhs->is_high_priority() == rhs->is_high_priority() &&
+ lhs->schedule.scheduled_at < rhs->schedule.scheduled_at);
}
};
} // namespace
OSDRestrictions restrictions,
utime_t time_now)
{
- auto filtr = [time_now, restrictions](const auto& jobref) -> bool {
+ auto filtr = [time_now, rst = restrictions](const auto& jobref) -> bool {
return jobref->schedule.scheduled_at <= time_now &&
- (!restrictions.only_deadlined ||
- (!jobref->schedule.deadline.is_zero() &&
- jobref->schedule.deadline <= time_now));
+ (!rst.high_priority_only || jobref->high_priority) &&
+ (!rst.only_deadlined || (!jobref->schedule.deadline.is_zero() &&
+ jobref->schedule.deadline <= time_now));
};
rm_unregistered_jobs(group);
ripes.reserve(group.size());
std::copy_if(group.begin(), group.end(), std::back_inserter(ripes), filtr);
- std::sort(ripes.begin(), ripes.end(), cmp_sched_time_t{});
+ std::sort(ripes.begin(), ripes.end(), cmp_time_n_priority_t{});
if (g_conf()->subsys.should_gather<ceph_subsys_osd, 20>()) {
for (const auto& jobref : group) {
using act_token_t = uint32_t;
/// "environment" preconditions affecting which PGs are eligible for scrubbing
+/// (note: struct size should be kept small, as it is copied around)
struct OSDRestrictions {
+ /// high local OSD concurrency. Thus - only high priority scrubs are allowed
+ bool high_priority_only{false};
bool allow_requested_repair_only{false};
- bool load_is_low{true};
- bool time_permit{true};
bool only_deadlined{false};
+ bool load_is_low:1{true};
+ bool time_permit:1{true};
};
+static_assert(sizeof(Scrub::OSDRestrictions) <= sizeof(uint32_t));
} // namespace Scrub
{
return fmt::format_to(
ctx.out(),
- "overdue-only:{} load:{} time:{} repair-only:{}",
+ "priority-only:{} overdue-only:{} load:{} time:{} repair-only:{}",
+ conds.high_priority_only,
conds.only_deadlined,
conds.load_is_low ? "ok" : "high",
conds.time_permit ? "ok" : "no",