under most load conditions.
default: 10.0
with_legacy: true
+- name: osd_scrub_queued_snaptrims_limit
+ type: uint
+ level: advanced
+ desc: Do not initiate periodic scrubs when the total snap-trim queues across all
+ PGs exceeds this value. A value of '0' disables this limit.
+ default: 500
+ with_legacy: false
- name: osd_scrub_min_interval
type: float
level: advanced
if (r.cpu_overloaded && ScrubJob::observes_load_limit(e.urgency)) {
return false;
}
+ if (r.overload_of_snap_trimming &&
+ ScrubJob::observes_trims_load(e.urgency)) {
+ return false;
+ }
if (r.recovery_in_progress && ScrubJob::observes_recovery(e.urgency)) {
return false;
}
env_conditions.restricted_time = !scrub_time_permit(scrub_clock_now);
env_conditions.cpu_overloaded = !scrub_load_below_threshold();
+ const auto snaptrims_limit =
+ cct->_conf.get_val<uint64_t>("osd_scrub_queued_snaptrims_limit");
+ env_conditions.overload_of_snap_trimming =
+ (snaptrims_limit > 0) &&
+ (m_osd_svc.get_snap_trim_queue_total() > snaptrims_limit);
return env_conditions;
}
return urgency < urgency_t::after_repair;
}
+bool ScrubJob::observes_trims_load(urgency_t urgency)
+{
+ return urgency < urgency_t::repairing;
+}
+
bool ScrubJob::requires_reservation(urgency_t urgency)
{
return urgency < urgency_t::after_repair;
* if continued into the forbidden times, by having a longer sleep time;
* (note that this is only applicable to the wq scheduler).
* - load: the scrub must not be initiated if the OSD is under heavy CPU load;
+ * - trims: the scrub must not be initiated if the OSD has too many snap-trim
+ * jobs pending;
* - noscrub: the scrub is aborted if the 'noscrub' flag (or the
* 'nodeep-scrub' flag for deep scrubs) is set;
* - randomization: the scrub's target time is extended by a random
* | limitation | must- | after-repair |repairing| operator | must-repair |
* | | scrub |(aft recovery)|(errors) | request | |
* +------------+---------+--------------+---------+----------+-------------+
- * | reservation| yes! | no | no? | no | no |
- * | dow/time | yes | yes | no | no | no |
+ * | reservation| yes! | no | no | no | no |
+ * | dow/time | yes | yes | no X | no | no |
* | ext-sleep | no | no | no | no | no |
* | load | yes | no | no | no | no |
+ * | trims | yes | yes | no | no | no |
* | noscrub | yes | no | Yes | no | no |
* | max-scrubs | yes | yes | Yes | no | no |
* | backoff | yes | no | no | no | no |
static bool observes_load_limit(urgency_t urgency);
+ static bool observes_trims_load(urgency_t urgency);
+
static bool requires_reservation(urgency_t urgency);
static bool requires_randomization(urgency_t urgency);
/// the CPU load is high. No regular scrubs are allowed.
bool cpu_overloaded:1{false};
+ /// long snap-trimming queues.
+ bool overload_of_snap_trimming:1{false};
+
/// outside of allowed scrubbing hours/days
bool restricted_time:1{false};
template <typename FormatContext>
auto format(const Scrub::OSDRestrictions& conds, FormatContext& ctx) const {
return fmt::format_to(
- ctx.out(), "<{}.{}.{}.{}.{}>",
+ ctx.out(), "<{}.{}.{}.{}.{}.{}>",
conds.max_concurrency_reached ? "max-scrubs" : "",
conds.random_backoff_active ? "backoff" : "",
conds.cpu_overloaded ? "high-load" : "",
+ conds.overload_of_snap_trimming ? "trim-overload" : "",
conds.restricted_time ? "time-restrict" : "",
conds.recovery_in_progress ? "recovery" : "");
}