From 95dd59ba873cd0e04e3cbf97e74e7d3919c41c4c Mon Sep 17 00:00:00 2001 From: Ronen Friedman Date: Tue, 16 Aug 2022 15:48:04 +0300 Subject: [PATCH] osd/scrub: reduce the amount of log lines Following call chains, and removing repeated data. Signed-off-by: Ronen Friedman --- src/osd/PG.cc | 11 ++--- src/osd/scrubber/PrimaryLogScrub.cc | 1 - src/osd/scrubber/osd_scrub_sched.cc | 76 ++++++++++++++--------------- src/osd/scrubber/osd_scrub_sched.h | 34 ++++++++----- src/osd/scrubber/pg_scrubber.cc | 58 ++++++++++------------ src/osd/scrubber/pg_scrubber.h | 5 +- src/osd/scrubber_common.h | 7 ++- 7 files changed, 93 insertions(+), 99 deletions(-) diff --git a/src/osd/PG.cc b/src/osd/PG.cc index bc644bfd2d8..1887c33cde5 100644 --- a/src/osd/PG.cc +++ b/src/osd/PG.cc @@ -1685,10 +1685,8 @@ std::optional PG::validate_scrub_mode() const */ void PG::on_info_history_change() { - dout(20) << __func__ << " for a " << (is_primary() ? "Primary" : "non-primary") <on_maybe_registration_change(m_planned_scrub); + m_scrubber->on_primary_change(__func__, m_planned_scrub); } void PG::reschedule_scrub() @@ -1705,10 +1703,9 @@ void PG::reschedule_scrub() void PG::on_primary_status_change(bool was_primary, bool now_primary) { // make sure we have a working scrubber when becoming a primary - if (was_primary != now_primary) { ceph_assert(m_scrubber); - m_scrubber->on_primary_change(m_planned_scrub); + m_scrubber->on_primary_change(__func__, m_planned_scrub); } } @@ -1742,10 +1739,10 @@ void PG::on_new_interval() assert(m_scrubber); // log some scrub data before we react to the interval - dout(20) << __func__ << (is_scrub_queued_or_active() ? " scrubbing " : " ") + dout(30) << __func__ << (is_scrub_queued_or_active() ? " scrubbing " : " ") << "flags: " << m_planned_scrub << dendl; - m_scrubber->on_maybe_registration_change(m_planned_scrub); + m_scrubber->on_primary_change(__func__, m_planned_scrub); } epoch_t PG::oldest_stored_osdmap() { diff --git a/src/osd/scrubber/PrimaryLogScrub.cc b/src/osd/scrubber/PrimaryLogScrub.cc index a868dd55e34..8a969e52138 100644 --- a/src/osd/scrubber/PrimaryLogScrub.cc +++ b/src/osd/scrubber/PrimaryLogScrub.cc @@ -227,7 +227,6 @@ PrimaryLogScrub::PrimaryLogScrub(PrimaryLogPG* pg) : PgScrubber{pg}, m_pl_pg{pg} void PrimaryLogScrub::_scrub_clear_state() { - dout(15) << __func__ << dendl; m_scrub_cstat = object_stat_collection_t(); } diff --git a/src/osd/scrubber/osd_scrub_sched.cc b/src/osd/scrubber/osd_scrub_sched.cc index 7205e1077bb..d451e44ecc3 100644 --- a/src/osd/scrubber/osd_scrub_sched.cc +++ b/src/osd/scrubber/osd_scrub_sched.cc @@ -14,7 +14,7 @@ using namespace ::std::literals; #define dout_context (cct) #define dout_subsys ceph_subsys_osd #undef dout_prefix -#define dout_prefix *_dout << "osd." << whoami << " " +#define dout_prefix *_dout << "osd." << whoami << " " ScrubQueue::ScrubJob::ScrubJob(CephContext* cct, const spg_t& pg, int node_id) : RefCountedObject{cct} @@ -45,9 +45,8 @@ void ScrubQueue::ScrubJob::update_schedule( // the (atomic) flag will only be cleared by select_pg_and_scrub() after // scan_penalized() is called and the job was moved to the to_scrub queue. updated = true; - - dout(10) << " pg[" << pgid << "] adjusted: " << schedule.scheduled_at << " " - << registration_state() << dendl; + dout(10) << fmt::format("{}: pg[{}] adjusted: {:s} ({})", __func__, pgid, + schedule.scheduled_at, registration_state()) << dendl; } std::string ScrubQueue::ScrubJob::scheduling_state(utime_t now_is, @@ -65,7 +64,7 @@ std::string ScrubQueue::ScrubJob::scheduling_state(utime_t now_is, return fmt::format("queued for {}scrub", (is_deep_expected ? "deep " : "")); } - return fmt::format("{}scrub scheduled @ {}", + return fmt::format("{}scrub scheduled @ {:s}", (is_deep_expected ? "deep " : ""), schedule.scheduled_at); } @@ -151,13 +150,15 @@ void ScrubQueue::remove_from_osd_queue(ScrubJobRef scrub_job) } } -void ScrubQueue::register_with_osd(ScrubJobRef scrub_job, - const ScrubQueue::sched_params_t& suggested) +void ScrubQueue::register_with_osd( + ScrubJobRef scrub_job, + const ScrubQueue::sched_params_t& suggested) { qu_state_t state_at_entry = scrub_job->state.load(); - - dout(15) << "pg[" << scrub_job->pgid << "] was " - << qu_state_text(state_at_entry) << dendl; + dout(20) << fmt::format( + "pg[{}] state at entry: <{:.14}>", scrub_job->pgid, + state_at_entry) + << dendl; switch (state_at_entry) { case qu_state_t::registered: @@ -172,7 +173,7 @@ void ScrubQueue::register_with_osd(ScrubJobRef scrub_job, if (state_at_entry != scrub_job->state) { lck.unlock(); - dout(5) << " scrub job state changed" << dendl; + dout(5) << " scrub job state changed. Retrying." << dendl; // retry register_with_osd(scrub_job, suggested); break; @@ -183,7 +184,6 @@ void ScrubQueue::register_with_osd(ScrubJobRef scrub_job, scrub_job->in_queues = true; scrub_job->state = qu_state_t::registered; } - break; case qu_state_t::unregistering: @@ -204,10 +204,11 @@ void ScrubQueue::register_with_osd(ScrubJobRef scrub_job, break; } - dout(10) << "pg(" << scrub_job->pgid << ") sched-state changed from " - << qu_state_text(state_at_entry) << " to " - << qu_state_text(scrub_job->state) - << " at: " << scrub_job->schedule.scheduled_at << dendl; + dout(10) << fmt::format( + "pg[{}] sched-state changed from <{:.14}> to <{:.14}> (@{:s})", + scrub_job->pgid, state_at_entry, scrub_job->state.load(), + scrub_job->schedule.scheduled_at) + << dendl; } // look mommy - no locks! @@ -222,10 +223,9 @@ void ScrubQueue::update_job(ScrubJobRef scrub_job, ScrubQueue::sched_params_t ScrubQueue::determine_scrub_time( const requested_scrub_t& request_flags, const pg_info_t& pg_info, - const pool_opts_t pool_conf) const + const pool_opts_t& pool_conf) const { ScrubQueue::sched_params_t res; - dout(15) << ": requested_scrub_t: {}" << request_flags << dendl; if (request_flags.must_scrub || request_flags.need_auto) { @@ -234,8 +234,7 @@ ScrubQueue::sched_params_t ScrubQueue::determine_scrub_time( res.is_must = ScrubQueue::must_scrub_t::mandatory; // we do not need the interval data in this case - } else if (pg_info.stats.stats_invalid && - conf()->osd_scrub_invalid_stats) { + } else if (pg_info.stats.stats_invalid && conf()->osd_scrub_invalid_stats) { res.proposed_time = time_now(); res.is_must = ScrubQueue::must_scrub_t::mandatory; @@ -246,13 +245,12 @@ ScrubQueue::sched_params_t ScrubQueue::determine_scrub_time( } dout(15) << fmt::format( - ": suggested: {} hist: {} v: {}/{} must: {} pool-min: {}", - res.proposed_time, - pg_info.history.last_scrub_stamp, + "suggested: {:s} hist: {:s} v:{}/{} must:{} pool-min:{} {}", + res.proposed_time, pg_info.history.last_scrub_stamp, (bool)pg_info.stats.stats_invalid, conf()->osd_scrub_invalid_stats, (res.is_must == must_scrub_t::mandatory ? "y" : "n"), - res.min_interval) + res.min_interval, request_flags) << dendl; return res; } @@ -522,21 +520,10 @@ Scrub::schedule_result_t ScrubQueue::select_from_group( ScrubQueue::scrub_schedule_t ScrubQueue::adjust_target_time( const sched_params_t& times) const { - ScrubQueue::scrub_schedule_t sched_n_dead{times.proposed_time, - times.proposed_time}; - - if (g_conf()->subsys.should_gather()) { - dout(20) << "min t: " << times.min_interval - << " osd: " << conf()->osd_scrub_min_interval - << " max t: " << times.max_interval - << " osd: " << conf()->osd_scrub_max_interval << dendl; - - dout(20) << "at " << sched_n_dead.scheduled_at << " ratio " - << conf()->osd_scrub_interval_randomize_ratio << dendl; - } + ScrubQueue::scrub_schedule_t sched_n_dead{ + times.proposed_time, times.proposed_time}; if (times.is_must == ScrubQueue::must_scrub_t::not_mandatory) { - // unless explicitly requested, postpone the scrub with a random delay double scrub_min_interval = times.min_interval > 0 ? times.min_interval @@ -555,10 +542,19 @@ ScrubQueue::scrub_schedule_t ScrubQueue::adjust_target_time( } else { sched_n_dead.deadline += scrub_max_interval; } + // note: no specific job can be named in the log message + dout(20) << fmt::format( + "not-must. Was:{:s} {{min:{}/{} max:{}/{} ratio:{}}} " + "Adjusted:{:s} ({:s})", + times.proposed_time, fmt::group_digits(times.min_interval), + fmt::group_digits(conf()->osd_scrub_min_interval), + fmt::group_digits(times.max_interval), + fmt::group_digits(conf()->osd_scrub_max_interval), + conf()->osd_scrub_interval_randomize_ratio, + sched_n_dead.scheduled_at, sched_n_dead.deadline) + << dendl; } - - dout(17) << "at (final) " << sched_n_dead.scheduled_at << " - " - << sched_n_dead.deadline << dendl; + // else - no log needed. All relevant data will be logged by the caller return sched_n_dead; } diff --git a/src/osd/scrubber/osd_scrub_sched.h b/src/osd/scrubber/osd_scrub_sched.h index 1034d478933..70e5289f8ed 100644 --- a/src/osd/scrubber/osd_scrub_sched.h +++ b/src/osd/scrubber/osd_scrub_sched.h @@ -271,8 +271,8 @@ class ScrubQueue { */ std::string_view registration_state() const { - return in_queues.load(std::memory_order_relaxed) ? " in-queue" - : " not-queued"; + return in_queues.load(std::memory_order_relaxed) ? "in-queue" + : "not-queued"; } /** @@ -363,7 +363,7 @@ class ScrubQueue { sched_params_t determine_scrub_time(const requested_scrub_t& request_flags, const pg_info_t& pg_info, - const pool_opts_t pool_conf) const; + const pool_opts_t& pool_conf) const; public: void dump_scrubs(ceph::Formatter* f) const; @@ -421,7 +421,7 @@ class ScrubQueue { * variables. Specifically, the following are guaranteed: * - 'in_queues' is asserted only if the job is in one of the queues; * - a job will only be in state 'registered' if in one of the queues; - * - no job will be in the two queues simulatenously + * - no job will be in the two queues simultaneously; * * Note that PG locks should not be acquired while holding jobs_lock. */ @@ -522,6 +522,19 @@ protected: // used by the unit-tests virtual utime_t time_now() const { return ceph_clock_now(); } }; +template <> +struct fmt::formatter + : fmt::formatter { + template + auto format(const ScrubQueue::qu_state_t& s, FormatContext& ctx) + { + auto out = ctx.out(); + out = fmt::formatter::format( + std::string{ScrubQueue::qu_state_text(s)}, ctx); + return out; + } +}; + template <> struct fmt::formatter { constexpr auto parse(format_parse_context& ctx) { return ctx.begin(); } @@ -531,13 +544,10 @@ struct fmt::formatter { { return fmt::format_to( ctx.out(), - "{}, {} dead: {} - {} / failure: {} / pen. t.o.: {} / queue state: {}", - sjob.pgid, - sjob.schedule.scheduled_at, - sjob.schedule.deadline, - sjob.registration_state(), - sjob.resources_failure, - sjob.penalty_timeout, - ScrubQueue::qu_state_text(sjob.state)); + "pg[{}] @ {:s} (dl:{:s}) - <{}> / failure: {} / pen. t.o.: {:s} / queue " + "state: {:.7}", + sjob.pgid, sjob.schedule.scheduled_at, sjob.schedule.deadline, + sjob.registration_state(), sjob.resources_failure, sjob.penalty_timeout, + sjob.state.load(std::memory_order_relaxed)); } }; diff --git a/src/osd/scrubber/pg_scrubber.cc b/src/osd/scrubber/pg_scrubber.cc index 587fa5e96b0..580c1e4c848 100644 --- a/src/osd/scrubber/pg_scrubber.cc +++ b/src/osd/scrubber/pg_scrubber.cc @@ -492,63 +492,57 @@ void PgScrubber::rm_from_osd_scrubbing() unregister_from_osd(); } -void PgScrubber::on_primary_change(const requested_scrub_t& request_flags) +void PgScrubber::on_primary_change( + std::string_view caller, + const requested_scrub_t& request_flags) { - dout(10) << __func__ << (is_primary() ? " Primary " : " Replica ") - << " flags: " << request_flags << dendl; - if (!m_scrub_job) { + // we won't have a chance to see more logs from this function, thus: + dout(10) << fmt::format( + "{}: (from {}& w/{}) {}.Reg-state:{:.7}. No scrub-job", + __func__, caller, request_flags, + (is_primary() ? "Primary" : "Replica/other"), + registration_state()) + << dendl; return; } - dout(15) << __func__ << " scrub-job state: " << m_scrub_job->state_desc() - << dendl; - + auto pre_state = m_scrub_job->state_desc(); + auto pre_reg = registration_state(); if (is_primary()) { auto suggested = m_osds->get_scrub_services().determine_scrub_time( - request_flags, - m_pg->info, - m_pg->get_pgpool().info.opts); + request_flags, m_pg->info, m_pg->get_pgpool().info.opts); m_osds->get_scrub_services().register_with_osd(m_scrub_job, suggested); } else { m_osds->get_scrub_services().remove_from_osd_queue(m_scrub_job); } - dout(15) << __func__ << " done " << registration_state() << dendl; -} - -void PgScrubber::on_maybe_registration_change( - const requested_scrub_t& request_flags) -{ - dout(10) << __func__ << (is_primary() ? " Primary " : " Replica/other ") - << registration_state() << " flags: " << request_flags << dendl; - - on_primary_change(request_flags); - dout(15) << __func__ << " done " << registration_state() << dendl; + dout(10) + << fmt::format( + "{} (from {} {}): {}. <{:.5}>&<{:.10}> --> <{:.5}>&<{:.14}>", + __func__, caller, request_flags, + (is_primary() ? "Primary" : "Replica/other"), pre_reg, pre_state, + registration_state(), m_scrub_job->state_desc()) + << dendl; } void PgScrubber::update_scrub_job(const requested_scrub_t& request_flags) { - dout(10) << __func__ << " flags: " << request_flags << dendl; - - { - // verify that the 'in_q' status matches our "Primariority" - if (m_scrub_job && is_primary() && !m_scrub_job->in_queues) { - dout(1) << __func__ << " !!! primary but not scheduled! " << dendl; - } + dout(10) << fmt::format("{}: flags:<{}>", __func__, request_flags) << dendl; + // verify that the 'in_q' status matches our "Primariority" + if (m_scrub_job && is_primary() && !m_scrub_job->in_queues) { + dout(1) << __func__ << " !!! primary but not scheduled! " << dendl; } if (is_primary() && m_scrub_job) { ceph_assert(m_pg->is_locked()); auto suggested = m_osds->get_scrub_services().determine_scrub_time( - request_flags, - m_pg->info, - m_pg->get_pgpool().info.opts); + request_flags, m_pg->info, m_pg->get_pgpool().info.opts); m_osds->get_scrub_services().update_job(m_scrub_job, suggested); m_pg->publish_stats_to_osd(); } - dout(15) << __func__ << " done " << registration_state() << dendl; + dout(15) << __func__ << ": done " << registration_state() << dendl; } void PgScrubber::scrub_requested(scrub_level_t scrub_level, diff --git a/src/osd/scrubber/pg_scrubber.h b/src/osd/scrubber/pg_scrubber.h index ad6c1db3e18..8d3359c8746 100644 --- a/src/osd/scrubber/pg_scrubber.h +++ b/src/osd/scrubber/pg_scrubber.h @@ -359,9 +359,8 @@ class PgScrubber : public ScrubPgIF, void rm_from_osd_scrubbing() final; - void on_primary_change(const requested_scrub_t& request_flags) final; - - void on_maybe_registration_change( + void on_primary_change( + std::string_view caller, const requested_scrub_t& request_flags) final; void scrub_requested(scrub_level_t scrub_level, diff --git a/src/osd/scrubber_common.h b/src/osd/scrubber_common.h index e3264d0a658..b50b280b08e 100644 --- a/src/osd/scrubber_common.h +++ b/src/osd/scrubber_common.h @@ -386,7 +386,9 @@ struct ScrubPgIF { * * Following our status as Primary or replica. */ - virtual void on_primary_change(const requested_scrub_t& request_flags) = 0; + virtual void on_primary_change( + std::string_view caller, + const requested_scrub_t& request_flags) = 0; /** * Recalculate the required scrub time. @@ -396,9 +398,6 @@ struct ScrubPgIF { */ virtual void update_scrub_job(const requested_scrub_t& request_flags) = 0; - virtual void on_maybe_registration_change( - const requested_scrub_t& request_flags) = 0; - // on the replica: virtual void handle_scrub_reserve_request(OpRequestRef op) = 0; virtual void handle_scrub_reserve_release(OpRequestRef op) = 0; -- 2.39.5