stats (inc. scrub/block duration) every this many seconds.
default: 120
with_legacy: false
+- name: osd_scrub_retry_delay
+ type: int
+ level: advanced
+ desc: Period (in seconds) before retrying a PG that has failed a prior scrub.
+ long_desc: Minimum delay after a failed attempt to scrub a PG. The delay is
+ either applied to one of the scheduled scrubs for the PG (the next shallow
+ scrub or the next deep scrub), or to both.
+ This is a default value, used when the cause of the delay does not have an
+ associated configuration option. See the 'see also' for the configuration
+ options for some delay reasons that have their own configuration.
+ default: 30
+ min: 1
+ see_also:
+ - osd_scrub_retry_pg_state
+ - osd_scrub_retry_after_noscrub
+ with_legacy: false
+- name: osd_scrub_retry_after_noscrub
+ type: int
+ level: advanced
+ desc: Period (in seconds) before retrying to scrub a PG at a specific level
+ after detecting a no-scrub or no-deep-scrub flag
+ long_desc: Minimum delay after a failed attempt to scrub a PG at a level
+ (shallow or deep) that is disabled by cluster or pool no-scrub or no-deep-scrub
+ flags.
+ default: 60
+ min: 1
+ see_also:
+ - osd_scrub_retry_delay
+ with_legacy: false
+- name: osd_scrub_retry_pg_state
+ type: int
+ level: advanced
+ desc: Period (in seconds) before retrying to scrub a previously inactive/not-clean PG
+ long_desc: Minimum delay after a failed attempt to scrub a PG that is not
+ active and clean.
+ default: 60
+ min: 1
+ see_also:
+ - osd_scrub_retry_delay
+ with_legacy: false
- name: osd_scrub_disable_reservation_queuing
type: bool
level: advanced
// that made any of the targets into a high-priority one. All that's left:
// delay the specific target that was aborted.
- auto& trgt = m_scrub_job->delay_on_failure(aborted_target.level(), 5s, issue,
+ auto& trgt = m_scrub_job->delay_on_failure(aborted_target.level(), issue,
scrub_clock_now);
/// \todo complete the merging of the deadline & target for non-hp targets
return;
}
/// \todo fix the 5s' to use a cause-specific delay parameter
- auto& trgt =
- m_scrub_job->delay_on_failure(s_or_d, 5s, cause, scrub_clock_now);
+ auto& trgt = m_scrub_job->delay_on_failure(s_or_d, cause, scrub_clock_now);
ceph_assert(!trgt.queued);
m_osds->get_scrub_services().enqueue_target(trgt);
trgt.queued = true;
m_osds->get_scrub_services().dequeue_target(m_pg_id, sister_level);
trgt2.queued = false;
}
- m_scrub_job->delay_on_failure(sister_level, 5s, cause, scrub_clock_now);
+ m_scrub_job->delay_on_failure(sister_level, cause, scrub_clock_now);
m_osds->get_scrub_services().enqueue_target(trgt2);
trgt2.queued = true;
}
using scrub_schedule_t = Scrub::scrub_schedule_t;
using ScrubJob = Scrub::ScrubJob;
using delay_ready_t = Scrub::delay_ready_t;
+using namespace std::chrono;
namespace {
utime_t add_double(utime_t t, double d)
SchedTarget& ScrubJob::delay_on_failure(
scrub_level_t level,
- std::chrono::seconds delay,
Scrub::delay_cause_t delay_cause,
utime_t scrub_clock_now)
{
+ seconds delay = seconds(cct->_conf.get_val<int64_t>("osd_scrub_retry_delay"));
+ switch (delay_cause) {
+ case delay_cause_t::flags:
+ delay =
+ seconds(cct->_conf.get_val<int64_t>("osd_scrub_retry_after_noscrub"));
+ break;
+ case delay_cause_t::pg_state:
+ delay = seconds(cct->_conf.get_val<int64_t>("osd_scrub_retry_pg_state"));
+ break;
+ case delay_cause_t::local_resources:
+ default:
+ // for all other possible delay causes: use the default delay
+ break;
+ }
+
auto& delayed_target =
(level == scrub_level_t::deep) ? deep_target : shallow_target;
delayed_target.sched_info.schedule.not_before =
std::max(scrub_clock_now, delayed_target.sched_info.schedule.not_before) +
utime_t{delay};
delayed_target.sched_info.last_issue = delay_cause;
+ dout(20) << fmt::format(
+ "delayed {}scrub due to {} for {}s. Updated: {}",
+ (level == scrub_level_t::deep ? "deep " : ""), delay_cause,
+ delay.count(), delayed_target)
+ << dendl;
return delayed_target;
}
/**
* For the level specified, set the 'not-before' time to 'now+delay',
- * so that this scrub target
- * would not be retried before 'delay' seconds have passed.
+ * so that this scrub target would not be retried before the required
+ * delay seconds have passed.
+ * The delay is determined based on the 'cause' parameter.
* The 'last_issue' is updated to the cause of the delay.
* \returns a reference to the target that was modified.
*/
[[maybe_unused]] SchedTarget& delay_on_failure(
scrub_level_t level,
- std::chrono::seconds delay,
delay_cause_t delay_cause,
utime_t scrub_clock_now);