using namespace ::std::chrono_literals;
using namespace ::std::literals;
-#define dout_subsys ceph_subsys_osd
-#undef dout_context
-#define dout_context (cct)
-#undef dout_prefix
-#define dout_prefix \
- *_dout << "osd." << osd_service.get_nodeid() << " scrub-queue::" << __func__ \
- << " "
-
-
-
-void ScrubQueue::dump_scrubs(ceph::Formatter* f) const
-{
- ceph_assert(f != nullptr);
- std::lock_guard lck(jobs_lock);
-
- f->open_array_section("scrubs");
-
- std::for_each(to_scrub.cbegin(), to_scrub.cend(), [&f](const ScrubJobRef& j) {
- j->dump(f);
- });
-
- std::for_each(penalized.cbegin(),
- penalized.cend(),
- [&f](const ScrubJobRef& j) { j->dump(f); });
-
- f->close_section();
-}
// ////////////////////////////////////////////////////////////////////////// //
// scrub initiation - OSD code temporarily moved here from OSD.cc
+#define dout_subsys ceph_subsys_osd
#undef dout_context
-#define dout_context cct
+#define dout_context (cct)
#define dout_subsys ceph_subsys_osd
#undef dout_prefix
#define dout_prefix _prefix(_dout, whoami, get_osdmap_epoch())
<< " "
+void ScrubQueue::dump_scrubs(ceph::Formatter* f) const
+{
+ ceph_assert(f != nullptr);
+ std::lock_guard lck(jobs_lock);
+
+ f->open_array_section("scrubs");
+
+ std::for_each(to_scrub.cbegin(), to_scrub.cend(), [&f](const ScrubJobRef& j) {
+ j->dump(f);
+ });
+
+ std::for_each(penalized.cbegin(),
+ penalized.cend(),
+ [&f](const ScrubJobRef& j) { j->dump(f); });
+
+ f->close_section();
+}
+
+/**
+ * a note regarding 'to_scrub_copy':
+ * 'to_scrub_copy' is a sorted set of all the ripe jobs from to_copy.
+ * As we usually expect to refer to only the first job in this set, we could
+ * consider an alternative implementation:
+ * - have collect_ripe_jobs() return the copied set without sorting it;
+ * - loop, performing:
+ * - use std::min_element() to find a candidate;
+ * - try that one. If not suitable, discard from 'to_scrub_copy'
+ */
+Scrub::schedule_result_t ScrubQueue::select_pg_and_scrub(
+ Scrub::OSDRestrictions& preconds)
+{
+ dout(10) << " reg./pen. sizes: " << to_scrub.size() << " / "
+ << penalized.size() << dendl;
+
+ utime_t now_is = time_now();
+
+ preconds.time_permit = scrub_time_permit(now_is);
+ preconds.load_is_low = scrub_load_below_threshold();
+ preconds.only_deadlined = !preconds.time_permit || !preconds.load_is_low;
+
+ // create a list of candidates (copying, as otherwise creating a deadlock):
+ // - possibly restore penalized
+ // - (if we didn't handle directly) remove invalid jobs
+ // - create a copy of the to_scrub (possibly up to first not-ripe)
+ // - same for the penalized (although that usually be a waste)
+ // unlock, then try the lists
+
+ std::unique_lock lck{jobs_lock};
+
+ // pardon all penalized jobs that have deadlined (or were updated)
+ scan_penalized(restore_penalized, now_is);
+ restore_penalized = false;
+
+ // remove the 'updated' flag from all entries
+ std::for_each(to_scrub.begin(),
+ to_scrub.end(),
+ [](const auto& jobref) -> void { jobref->updated = false; });
+
+ // add failed scrub attempts to the penalized list
+ move_failed_pgs(now_is);
+
+ // collect all valid & ripe jobs from the two lists. Note that we must copy,
+ // as when we use the lists we will not be holding jobs_lock (see
+ // explanation above)
+
+ auto to_scrub_copy = collect_ripe_jobs(to_scrub, now_is);
+ auto penalized_copy = collect_ripe_jobs(penalized, now_is);
+ lck.unlock();
+
+ // try the regular queue first
+ auto res = select_from_group(to_scrub_copy, preconds, now_is);
+
+ // in the sole scenario in which we've gone over all ripe jobs without success
+ // - we will try the penalized
+ if (res == Scrub::schedule_result_t::none_ready && !penalized_copy.empty()) {
+ res = select_from_group(penalized_copy, preconds, now_is);
+ dout(10) << "tried the penalized. Res: "
+ << ScrubQueue::attempt_res_text(res) << dendl;
+ restore_penalized = true;
+ }
+
+ dout(15) << dendl;
+ return res;
+}
+
+
+// not holding jobs_lock. 'group' is a copy of the actual list.
+Scrub::schedule_result_t ScrubQueue::select_from_group(
+ ScrubQContainer& group,
+ const Scrub::OSDRestrictions& preconds,
+ utime_t now_is)
+{
+ dout(15) << "jobs #: " << group.size() << dendl;
+
+ for (auto& candidate : group) {
+
+ // we expect the first job in the list to be a good candidate (if any)
+
+ dout(20) << "try initiating scrub for " << candidate->pgid << dendl;
+
+ if (preconds.only_deadlined && (candidate->schedule.deadline.is_zero() ||
+ candidate->schedule.deadline >= now_is)) {
+ dout(15) << " not scheduling scrub for " << candidate->pgid << " due to "
+ << (preconds.time_permit ? "high load" : "time not permitting")
+ << dendl;
+ continue;
+ }
+
+ // we have a candidate to scrub. We turn to the OSD to verify that the PG
+ // configuration allows the specified type of scrub, and to initiate the
+ // scrub.
+ switch (osd_service.initiate_a_scrub(
+ candidate->pgid, preconds.allow_requested_repair_only)) {
+
+ case Scrub::schedule_result_t::scrub_initiated:
+ // the happy path. We are done
+ dout(20) << " initiated for " << candidate->pgid << dendl;
+ return Scrub::schedule_result_t::scrub_initiated;
+
+ case Scrub::schedule_result_t::already_started:
+ case Scrub::schedule_result_t::preconditions:
+ case Scrub::schedule_result_t::bad_pg_state:
+ // continue with the next job
+ dout(20) << "failed (state/cond/started) " << candidate->pgid << dendl;
+ break;
+
+ case Scrub::schedule_result_t::no_such_pg:
+ // The pg is no longer there
+ dout(20) << "failed (no pg) " << candidate->pgid << dendl;
+ break;
+
+ case Scrub::schedule_result_t::no_local_resources:
+ // failure to secure local resources. No point in trying the other
+ // PGs at this time. Note that this is not the same as replica resources
+ // failure!
+ dout(20) << "failed (local) " << candidate->pgid << dendl;
+ return Scrub::schedule_result_t::no_local_resources;
+
+ case Scrub::schedule_result_t::none_ready:
+ // can't happen. Just for the compiler.
+ dout(5) << "failed !!! " << candidate->pgid << dendl;
+ return Scrub::schedule_result_t::none_ready;
+ }
+ }
+
+ dout(20) << " returning 'none ready'" << dendl;
+ return Scrub::schedule_result_t::none_ready;
+}
+
// ////////////////////////////////////////////////////////////////////////// //
// CPU load tracking and related
}
// clang-format on
-/**
- * a note regarding 'to_scrub_copy':
- * 'to_scrub_copy' is a sorted set of all the ripe jobs from to_copy.
- * As we usually expect to refer to only the first job in this set, we could
- * consider an alternative implementation:
- * - have collect_ripe_jobs() return the copied set without sorting it;
- * - loop, performing:
- * - use std::min_element() to find a candidate;
- * - try that one. If not suitable, discard from 'to_scrub_copy'
- */
-Scrub::schedule_result_t ScrubQueue::select_pg_and_scrub(
- Scrub::OSDRestrictions& preconds)
-{
- dout(10) << " reg./pen. sizes: " << to_scrub.size() << " / "
- << penalized.size() << dendl;
-
- utime_t now_is = time_now();
-
- preconds.time_permit = scrub_time_permit(now_is);
- preconds.load_is_low = scrub_load_below_threshold();
- preconds.only_deadlined = !preconds.time_permit || !preconds.load_is_low;
-
- // create a list of candidates (copying, as otherwise creating a deadlock):
- // - possibly restore penalized
- // - (if we didn't handle directly) remove invalid jobs
- // - create a copy of the to_scrub (possibly up to first not-ripe)
- // - same for the penalized (although that usually be a waste)
- // unlock, then try the lists
-
- std::unique_lock lck{jobs_lock};
-
- // pardon all penalized jobs that have deadlined (or were updated)
- scan_penalized(restore_penalized, now_is);
- restore_penalized = false;
-
- // remove the 'updated' flag from all entries
- std::for_each(to_scrub.begin(),
- to_scrub.end(),
- [](const auto& jobref) -> void { jobref->updated = false; });
-
- // add failed scrub attempts to the penalized list
- move_failed_pgs(now_is);
-
- // collect all valid & ripe jobs from the two lists. Note that we must copy,
- // as when we use the lists we will not be holding jobs_lock (see
- // explanation above)
-
- auto to_scrub_copy = collect_ripe_jobs(to_scrub, now_is);
- auto penalized_copy = collect_ripe_jobs(penalized, now_is);
- lck.unlock();
-
- // try the regular queue first
- auto res = select_from_group(to_scrub_copy, preconds, now_is);
-
- // in the sole scenario in which we've gone over all ripe jobs without success
- // - we will try the penalized
- if (res == Scrub::schedule_result_t::none_ready && !penalized_copy.empty()) {
- res = select_from_group(penalized_copy, preconds, now_is);
- dout(10) << "tried the penalized. Res: "
- << ScrubQueue::attempt_res_text(res) << dendl;
- restore_penalized = true;
- }
-
- dout(15) << dendl;
- return res;
-}
// must be called under lock
void ScrubQueue::rm_unregistered_jobs(ScrubQContainer& group)
return ripes;
}
-// not holding jobs_lock. 'group' is a copy of the actual list.
-Scrub::schedule_result_t ScrubQueue::select_from_group(
- ScrubQContainer& group,
- const Scrub::OSDRestrictions& preconds,
- utime_t now_is)
-{
- dout(15) << "jobs #: " << group.size() << dendl;
-
- for (auto& candidate : group) {
-
- // we expect the first job in the list to be a good candidate (if any)
-
- dout(20) << "try initiating scrub for " << candidate->pgid << dendl;
-
- if (preconds.only_deadlined && (candidate->schedule.deadline.is_zero() ||
- candidate->schedule.deadline >= now_is)) {
- dout(15) << " not scheduling scrub for " << candidate->pgid << " due to "
- << (preconds.time_permit ? "high load" : "time not permitting")
- << dendl;
- continue;
- }
-
- // we have a candidate to scrub. We turn to the OSD to verify that the PG
- // configuration allows the specified type of scrub, and to initiate the
- // scrub.
- switch (
- osd_service.initiate_a_scrub(candidate->pgid,
- preconds.allow_requested_repair_only)) {
-
- case Scrub::schedule_result_t::scrub_initiated:
- // the happy path. We are done
- dout(20) << " initiated for " << candidate->pgid << dendl;
- return Scrub::schedule_result_t::scrub_initiated;
-
- case Scrub::schedule_result_t::already_started:
- case Scrub::schedule_result_t::preconditions:
- case Scrub::schedule_result_t::bad_pg_state:
- // continue with the next job
- dout(20) << "failed (state/cond/started) " << candidate->pgid << dendl;
- break;
-
- case Scrub::schedule_result_t::no_such_pg:
- // The pg is no longer there
- dout(20) << "failed (no pg) " << candidate->pgid << dendl;
- break;
-
- case Scrub::schedule_result_t::no_local_resources:
- // failure to secure local resources. No point in trying the other
- // PGs at this time. Note that this is not the same as replica resources
- // failure!
- dout(20) << "failed (local) " << candidate->pgid << dendl;
- return Scrub::schedule_result_t::no_local_resources;
-
- case Scrub::schedule_result_t::none_ready:
- // can't happen. Just for the compiler.
- dout(5) << "failed !!! " << candidate->pgid << dendl;
- return Scrub::schedule_result_t::none_ready;
- }
- }
-
- dout(20) << " returning 'none ready'" << dendl;
- return Scrub::schedule_result_t::none_ready;
-}
ScrubQueue::scrub_schedule_t ScrubQueue::adjust_target_time(
const sched_params_t& times) const