or after an aborted scrub.
To note: one of the important changes in this commit:
merging the functionality of adjust_target_time() &
update_schedule() into a single function - adjust_schedule().
Regarding the handling of aborts:
Most of the time - all that is required following a scrub abort is to
requeue the scrub job - the one that triggered the aborted scrub -
with just a delay added to its n.b..
But we must take into account scenarios where "something" caused the
parameters prepared for the *next* scrub to show higher urgency or
priority. "Something" - as in an operator command requiring immediate
scrubbing, or a change in the pool/cluster configuration.
In such cases - the current requested flags and the parameters of
the aborted scrub must be merged.
Note that the current implementation is a temporary solution, to be
replaced by a per-level updating of the relevant target.
Signed-off-by: Ronen Friedman <rfriedma@redhat.com>
}
-/*
- * Note: referring to m_planned_scrub here is temporary, as this set of
- * scheduling flags will be removed in a followup PR.
- */
void PgScrubber::schedule_scrub_with_osd()
{
ceph_assert(is_primary());
ceph_assert(m_scrub_job);
- auto pre_reg = registration_state();
- m_scrub_job->registered = true;
-
- const auto applicable_conf = populate_config_params();
- const auto scrub_clock_now = ceph_clock_now();
- auto suggested = determine_initial_schedule(applicable_conf, scrub_clock_now);
- m_scrub_job->init_targets(
- suggested, m_pg->info, applicable_conf, scrub_clock_now);
-
- m_osds->get_scrub_services().enqueue_target(*m_scrub_job);
-
- dout(10) << fmt::format(
- "{}: <flags:{}> <{:.5}> --> <{:.5}>", __func__,
- m_planned_scrub, pre_reg, registration_state())
+ dout(20) << fmt::format(
+ "{}: state at entry: {}", __func__, m_scrub_job->state_desc())
<< dendl;
+ m_scrub_job->registered = true;
+ update_scrub_job(delay_ready_t::delay_ready);
}
ceph_assert(m_pg->is_locked());
const auto applicable_conf = populate_config_params();
const auto scrub_clock_now = ceph_clock_now();
- const auto suggested = determine_initial_schedule(applicable_conf, scrub_clock_now);
- m_scrub_job->on_periods_change(suggested, applicable_conf, scrub_clock_now);
+ const auto suggested =
+ determine_initial_schedule(applicable_conf, scrub_clock_now);
+
+ ceph_assert(m_scrub_job->is_registered());
+ m_scrub_job->adjust_schedule(
+ suggested, applicable_conf, scrub_clock_now, delay_ready);
m_osds->get_scrub_services().enqueue_target(*m_scrub_job);
m_scrub_job->target_queued = true;
m_pg->publish_stats_to_osd();
<< dendl;
// move the relevant time-stamp backwards - enough to trigger a scrub
-
- utime_t now_is = ceph_clock_now();
- utime_t stamp = now_is;
-
+ utime_t stamp = ceph_clock_now();
if (offset > 0) {
stamp -= offset;
} else {
}
+/**
+ * The scrub session was aborted. We are left with two sets of parameters
+ * as to when the next scrub of this PG should take place, and what should
+ * it be like. One set of parameters is the one that was used to start the
+ * scrub, and that was 'frozen' by set_op_parameters(). It has its own
+ * scheduling target, priority, not-before, etc'.
+ * The other set is the updated state of the current scrub-job. It may
+ * have had its priority, flags, or schedule modified in the meantime.
+ * And - it does not (at least initially, i.e. immediately after
+ * set_op_parameters()), have high priority.
+ *
+ * Alas, the scrub session that was initiated was aborted. We must now
+ * merge the two sets of parameters, using the highest priority and the
+ * nearest target time for the next scrub.
+ *
+ * Note: only half-functioning in this commit. As the scrub-job copy
+ * (the one that was in the scheduling queue, and was passed to the scrubber)
+ * does not have the 'urgency' parameter, we are missing some information
+ * that is still encoded in the 'planned scrub' flags. This will be fixed in
+ * the next step.
+ */
+void PgScrubber::on_mid_scrub_abort(Scrub::delay_cause_t issue)
+{
+ // assuming we can still depend on the 'scrubbing' flag being set;
+ // Also on Queued&Active.
+
+ // note again: this is not how merging should work in the final version:
+ // e.g. - the 'aborted_schedule' data should be passed thru the scrubber.
+ // In this current patchworik, for example, we are only guessing at
+ // the original value of 'must_deep_scrub'.
+ m_planned_scrub.must_deep_scrub =
+ m_planned_scrub.must_deep_scrub || (m_flags.required && m_is_deep);
+ m_planned_scrub.must_scrub = m_planned_scrub.must_deep_scrub ||
+ m_planned_scrub.must_scrub || m_flags.required;
+ m_planned_scrub.must_repair = m_planned_scrub.must_repair || m_is_repair;
+ m_planned_scrub.need_auto = m_planned_scrub.need_auto || m_flags.auto_repair;
+ m_planned_scrub.deep_scrub_on_error =
+ m_planned_scrub.deep_scrub_on_error || m_flags.deep_scrub_on_error;
+ m_planned_scrub.check_repair =
+ m_planned_scrub.check_repair || m_flags.check_repair;
+
+ m_scrub_job->merge_and_delay(
+ m_active_target->schedule, issue, m_planned_scrub, ceph_clock_now());
+ ceph_assert(m_scrub_job->is_registered());
+ ceph_assert(!m_scrub_job->target_queued);
+ m_osds->get_scrub_services().enqueue_target(*m_scrub_job);
+ m_scrub_job->target_queued = true;
+}
+
+
void PgScrubber::requeue_penalized(Scrub::delay_cause_t cause)
{
/// \todo fix the 5s' to use a cause-specific delay parameter
void scrub_finish() final;
- void penalize_next_scrub(Scrub::delay_cause_t cause) final;
+ void on_mid_scrub_abort(Scrub::delay_cause_t issue) final;
ScrubMachineListener::MsgAndEpoch prep_replica_map_msg(
Scrub::PreemptionNoted was_preempted) final;
using sched_conf_t = Scrub::sched_conf_t;
using scrub_schedule_t = Scrub::scrub_schedule_t;
using ScrubJob = Scrub::ScrubJob;
+using delay_ready_t = Scrub::delay_ready_t;
// ////////////////////////////////////////////////////////////////////////// //
: pgid{pg}
, whoami{node_id}
, cct{cct}
- , log_msg_prefix{fmt::format("osd.{}: scrub-job:pg[{}]:", node_id, pgid)}
+ , log_msg_prefix{fmt::format("osd.{} scrub-job:pg[{}]:", node_id, pgid)}
{}
// debug usage only
} // namespace std
-Scrub::scrub_schedule_t ScrubJob::adjust_target_time(
- const sched_conf_t& app_conf,
- const sched_params_t& suggested) const
+void ScrubJob::adjust_schedule(
+ const Scrub::sched_params_t& suggested,
+ const Scrub::sched_conf_t& app_conf,
+ utime_t scrub_clock_now,
+ delay_ready_t modify_ready_targets)
{
- Scrub::scrub_schedule_t adjusted{
- suggested.proposed_time, suggested.proposed_time, suggested.proposed_time};
+ dout(10) << fmt::format(
+ "{} current h.p.:{:c} conf:{} also-ready?{:c} "
+ "sjob@entry:{}",
+ suggested, high_priority ? 'y' : 'n', app_conf,
+ (modify_ready_targets == delay_ready_t::delay_ready) ? 'y'
+ : 'n',
+ *this)
+ << dendl;
- if (suggested.is_must == Scrub::must_scrub_t::not_mandatory) {
- // unless explicitly requested, postpone the scrub with a random delay
- adjusted.scheduled_at += app_conf.shallow_interval;
- double r = rand() / (double)RAND_MAX;
- adjusted.scheduled_at +=
- app_conf.shallow_interval * app_conf.interval_randomize_ratio * r;
+ high_priority = (suggested.is_must == must_scrub_t::mandatory);
+ utime_t adj_not_before = suggested.proposed_time;
+ utime_t adj_target = suggested.proposed_time;
+ schedule.deadline = adj_target;
+
+ if (!high_priority) {
+ // add a random delay to the proposed scheduled time - but only for periodic
+ // scrubs that are not already eligible for scrubbing.
+ if ((modify_ready_targets == delay_ready_t::delay_ready) ||
+ adj_not_before > scrub_clock_now) {
+ adj_target += app_conf.shallow_interval;
+ double r = rand() / (double)RAND_MAX;
+ adj_target +=
+ app_conf.shallow_interval * app_conf.interval_randomize_ratio * r;
+ }
+ // the deadline can be updated directly into the scrub-job
if (app_conf.max_shallow) {
- adjusted.deadline += *app_conf.max_shallow;
+ schedule.deadline += *app_conf.max_shallow;
} else {
- adjusted.deadline = utime_t{};
+ schedule.deadline = utime_t{};
}
- if (adjusted.not_before < adjusted.scheduled_at) {
- adjusted.not_before = adjusted.scheduled_at;
+ if (adj_not_before < adj_target) {
+ adj_not_before = adj_target;
}
-
- dout(20) << fmt::format(
- "not-must. Was:{:s} config:{} adjusted:{}",
- suggested.proposed_time, app_conf, adjusted) << dendl;
}
- // else - no log is needed. All relevant data will be logged by the caller
- return adjusted;
+ schedule.scheduled_at = adj_target;
+ schedule.not_before = adj_not_before;
+ dout(10) << fmt::format(
+ "adjusted: nb:{:s} target:{:s} deadline:{:s} ({})",
+ schedule.not_before, schedule.scheduled_at, schedule.deadline,
+ state_desc())
+ << dendl;
}
-void ScrubJob::init_targets(
- const sched_params_t& suggested,
- const pg_info_t& info,
- const Scrub::sched_conf_t& aconf,
+void ScrubJob::merge_and_delay(
+ const scrub_schedule_t& aborted_schedule,
+ delay_cause_t issue,
+ requested_scrub_t updated_flags,
utime_t scrub_clock_now)
{
- auto adjusted = adjust_target_time(aconf, suggested);
- high_priority = suggested.is_must == must_scrub_t::mandatory;
- update_schedule(adjusted, true);
+ // merge the schedule targets:
+ schedule.scheduled_at =
+ std::min(aborted_schedule.scheduled_at, schedule.scheduled_at);
+ high_priority = high_priority || updated_flags.must_scrub;
+ delay_on_failure(5s, issue, scrub_clock_now);
+
+ // the new deadline is the minimum of the two
+ schedule.deadline = std::min(aborted_schedule.deadline, schedule.deadline);
}
-void ScrubJob::update_schedule(
- const Scrub::scrub_schedule_t& adjusted,
- bool reset_failure_penalty)
-{
- dout(15) << fmt::format(
- "was: nb:{:s}({:s}). Called with: rest?{} {:s} ({})",
- schedule.not_before, schedule.scheduled_at,
- reset_failure_penalty, adjusted.scheduled_at,
- state_desc())
- << dendl;
- schedule.scheduled_at = adjusted.scheduled_at;
- schedule.deadline = adjusted.deadline;
-
- if (reset_failure_penalty || (schedule.not_before < schedule.scheduled_at)) {
- schedule.not_before = schedule.scheduled_at;
- }
- dout(10) << fmt::format(
- "adjusted: nb:{:s} ({:s}) ({})", schedule.not_before,
- schedule.scheduled_at, state_desc())
- << dendl;
-}
-
void ScrubJob::delay_on_failure(
std::chrono::seconds delay,
Scrub::delay_cause_t delay_cause,
}
/**
- * 'reset_failure_penalty' is used to reset the 'not_before' jo attribute to
- * the updated 'scheduled_at' time. This is used whenever the scrub-job
- * schedule is updated, and the update is not a result of a scrub attempt
- * failure.
- */
- void update_schedule(
- const scrub_schedule_t& adjusted,
- bool reset_failure_penalty);
-
- /**
- * If the scrub job was not explicitly requested, we postpone it by some
- * random length of time.
- * And if delaying the scrub - we calculate, based on pool parameters, a
- * deadline we should scrub before.
+ * Given a proposed time for the next scrub, and the relevant
+ * configuration, adjust_schedule() determines the actual target time,
+ * the deadline, and the 'not_before' time for the scrub.
+ * The new values are updated into the scrub-job.
*
- * @return updated (i.e. - possibly delayed) scrub schedule (schedule,
- * deadline, not_before)
+ * Specifically:
+ * - for high-priority scrubs: n.b. & deadline are set equal to the
+ * (untouched) proposed target time.
+ * - for regular scrubs: the proposed time is adjusted (delayed) based
+ * on the configuration; the deadline is set further out (if configured)
+ * and the n.b. is reset to the target.
*/
- Scrub::scrub_schedule_t adjust_target_time(
- const Scrub::sched_conf_t& app_conf,
- const Scrub::sched_params_t& proposed_schedule) const;
+ void adjust_schedule(
+ const Scrub::sched_params_t& suggested,
+ const Scrub::sched_conf_t& aconf,
+ utime_t scrub_clock_now,
+ Scrub::delay_ready_t modify_ready_targets);
/**
* push the 'not_before' time out by 'delay' seconds, so that this scrub target
utime_t scrub_clock_now);
/**
- * initial setting of the scheduling parameters of a newly registered
- * PG. The scrub targets (in this stage of the refactoring - the whole
- * scrub job) is initialized as for a regular periodic scrub.
+ * Recalculating any possible updates to the scrub schedule, following an
+ * aborted scrub attempt.
+ * Usually - we can use the same schedule that triggered the aborted scrub.
+ * But we must take into account scenarios where "something" caused the
+ * parameters prepared for the *next* scrub to show higher urgency or
+ * priority. "Something" - as in an operator command requiring immediate
+ * scrubbing, or a change in the pool/cluster configuration.
*/
- void init_targets(
- const sched_params_t& suggested,
- const pg_info_t& info,
- const Scrub::sched_conf_t& aconf,
+ void merge_and_delay(
+ const scrub_schedule_t& aborted_schedule,
+ Scrub::delay_cause_t issue,
+ requested_scrub_t updated_flags,
utime_t scrub_clock_now);
/**
void dump(ceph::Formatter* f) const;
-
bool is_registered() const { return registered; }
/**
}
};
-
template <>
struct formatter<Scrub::ScrubJob> {
constexpr auto parse(format_parse_context& ctx) { return ctx.begin(); }
auto format(const Scrub::ScrubJob& sjob, FormatContext& ctx) const
{
return fmt::format_to(
- ctx.out(), "pg[{}] @ nb:{:s} ({:s}) (dl:{:s}) - <{}>",
+ ctx.out(), "pg[{}]:nb:{:s} / trg:{:s} / dl:{:s} <{}>",
sjob.pgid, sjob.schedule.not_before, sjob.schedule.scheduled_at,
sjob.schedule.deadline, sjob.state_desc());
}
// completed successfully), we use it now to set the 'failed scrub' duration.
if (session.m_session_started_at != ScrubTimePoint{}) {
// delay the next invocation of the scrubber on this target
- scrbr->penalize_next_scrub(Scrub::delay_cause_t::aborted);
+ scrbr->on_mid_scrub_abort(Scrub::delay_cause_t::aborted);
auto logged_duration = ScrubClock::now() - session.m_session_started_at;
session.m_perf_set->tinc(scrbcnt_failed_elapsed, logged_duration);
virtual void scrub_finish() = 0;
/// notify the scrubber about a scrub failure
- /// (note: temporary implementation)
- virtual void penalize_next_scrub(Scrub::delay_cause_t cause) = 0;
+ virtual void on_mid_scrub_abort(Scrub::delay_cause_t cause) = 0;
/**
* Prepare a MOSDRepScrubMap message carrying the requested scrub map