From c50c20fd19e76a05535bc2623b31ce69c94abcc3 Mon Sep 17 00:00:00 2001 From: Ronen Friedman Date: Tue, 14 Jun 2022 14:36:13 +0000 Subject: [PATCH] osd/scrub: PGs that are scrubbing now update stats periodically Added periodic calls to all PGs for which the OSD is the primary, asking for a scrub statistics update. This allows operator queries (e.g. 'pg dump pgs') to present up-to-date scrub duration, "scrub is blocked" duration, etc'. Signed-off-by: Ronen Friedman --- src/common/options/osd.yaml.in | 17 ++++++++ src/osd/OSD.cc | 4 +- src/osd/PG.cc | 12 +++++- src/osd/PG.h | 3 +- src/osd/scrubber/osd_scrub_sched.cc | 2 +- src/osd/scrubber/pg_scrubber.cc | 66 ++++++++++++++++++++++++++--- src/osd/scrubber/pg_scrubber.h | 6 +++ src/osd/scrubber_common.h | 7 +++ 8 files changed, 108 insertions(+), 9 deletions(-) diff --git a/src/common/options/osd.yaml.in b/src/common/options/osd.yaml.in index 8fbbd79f52b32..a9fe7f2af4bd5 100644 --- a/src/common/options/osd.yaml.in +++ b/src/common/options/osd.yaml.in @@ -457,6 +457,23 @@ options: long_desc: Waiting too long for an object in the scrubbed chunk to be unlocked. default: 120 with_legacy: true +# timely updates to the 'pg dump' output, esp. re scrub scheduling +- name: osd_stats_update_period_scrubbing + type: int + level: advanced + desc: Stats update period (seconds) when scrubbing + long_desc: A PG actively scrubbing (or blocked while scrubbing) publishes its + stats (inc. scrub/block duration) every this many seconds. + default: 15 + with_legacy: false +- name: osd_stats_update_period_not_scrubbing + type: int + level: advanced + desc: Stats update period (seconds) when not scrubbing + long_desc: A PG we are a primary of, publishes its + stats (inc. scrub/block duration) every this many seconds. + default: 120 + with_legacy: false # where rados plugins are stored - name: osd_class_dir type: str diff --git a/src/osd/OSD.cc b/src/osd/OSD.cc index c42b5e793ade3..5533d84db61a4 100644 --- a/src/osd/OSD.cc +++ b/src/osd/OSD.cc @@ -7536,6 +7536,8 @@ MPGStats* OSD::collect_pg_stats() min_last_epoch_clean = get_osdmap_epoch(); min_last_epoch_clean_pgs.clear(); + auto now_is = ceph::coarse_real_clock::now(); + std::set pool_set; vector pgs; _get_pgs(&pgs); @@ -7545,7 +7547,7 @@ MPGStats* OSD::collect_pg_stats() if (!pg->is_primary()) { continue; } - pg->with_pg_stats([&](const pg_stat_t& s, epoch_t lec) { + pg->with_pg_stats(now_is, [&](const pg_stat_t& s, epoch_t lec) { m->pg_stat[pg->pg_id.pgid] = s; min_last_epoch_clean = std::min(min_last_epoch_clean, lec); min_last_epoch_clean_pgs.push_back(pg->pg_id.pgid); diff --git a/src/osd/PG.cc b/src/osd/PG.cc index 9723a4aa63ffe..6d98f14bfb8c7 100644 --- a/src/osd/PG.cc +++ b/src/osd/PG.cc @@ -2677,8 +2677,18 @@ void PG::dump_missing(Formatter *f) } } -void PG::with_pg_stats(std::function&& f) +void PG::with_pg_stats(ceph::coarse_real_clock::time_point now_is, + std::function&& f) { + dout(30) << __func__ << dendl; + // possibly update the scrub state & timers + lock(); + if (m_scrubber) { + m_scrubber->update_scrub_stats(now_is); + } + unlock(); + + // now - the actual publishing std::lock_guard l{pg_stats_publish_lock}; if (pg_stats_publish) { f(*pg_stats_publish, pg_stats_publish->get_effective_last_epoch_clean()); diff --git a/src/osd/PG.h b/src/osd/PG.h index 11850ada6bc97..fd65ab92f4e0e 100644 --- a/src/osd/PG.h +++ b/src/osd/PG.h @@ -699,7 +699,8 @@ public: void dump_pgstate_history(ceph::Formatter *f); void dump_missing(ceph::Formatter *f); - void with_pg_stats(std::function&& f); + void with_pg_stats(ceph::coarse_real_clock::time_point now_is, + std::function&& f); void with_heartbeat_peers(std::function&& f); void shutdown(); diff --git a/src/osd/scrubber/osd_scrub_sched.cc b/src/osd/scrubber/osd_scrub_sched.cc index b2bd6e5f0f9bc..7205e1077bb73 100644 --- a/src/osd/scrubber/osd_scrub_sched.cc +++ b/src/osd/scrubber/osd_scrub_sched.cc @@ -724,7 +724,7 @@ ScrubQueue::ScrubQContainer ScrubQueue::list_registered_jobs() const } // ////////////////////////////////////////////////////////////////////////// // -// ScrubJob - scrub resource management +// ScrubQueue - scrub resource management bool ScrubQueue::can_inc_scrubs() const { diff --git a/src/osd/scrubber/pg_scrubber.cc b/src/osd/scrubber/pg_scrubber.cc index a7572019a2eae..5341f68dc7024 100644 --- a/src/osd/scrubber/pg_scrubber.cc +++ b/src/osd/scrubber/pg_scrubber.cc @@ -11,6 +11,7 @@ #include "debug.h" +#include "common/ceph_time.h" #include "common/errno.h" #include "messages/MOSDOp.h" #include "messages/MOSDRepScrub.h" @@ -18,6 +19,7 @@ #include "messages/MOSDScrubReserve.h" #include "osd/OSD.h" #include "osd/PG.h" +#include "include/utime_fmt.h" #include "osd/osd_types_fmt.h" #include "ScrubStore.h" @@ -532,6 +534,7 @@ void PgScrubber::update_scrub_job(const requested_scrub_t& request_flags) } if (is_primary() && m_scrub_job) { + ceph_assert(m_pg->is_locked()); auto suggested = m_osds->get_scrub_services().determine_scrub_time( request_flags, m_pg->info, @@ -777,14 +780,16 @@ Scrub::BlockedRangeWarning PgScrubber::acquire_blocked_alarm() int grace = get_pg_cct()->_conf->osd_blocked_scrub_grace_period; if (grace == 0) { // we will not be sending any alarms re the blocked object - dout(20) + dout(10) << __func__ << ": blocked-alarm disabled ('osd_blocked_scrub_grace_period' set to 0)" << dendl; return nullptr; } ceph::timespan grace_period{m_debug_blockrange ? 4s : seconds{grace}}; - dout(30) << __func__ << ": timeout:" << grace_period.count() << dendl; + dout(20) << fmt::format(": timeout:{}", + std::chrono::duration_cast(grace_period)) + << dendl; return std::make_unique(m_osds, grace_period, *this, @@ -1747,6 +1752,7 @@ void PgScrubber::set_scrub_blocked(utime_t since) // we are called from a time-triggered lambda, // thus - not under PG-lock PGRef pg = m_osds->osd->lookup_lock_pg(m_pg_id); + ceph_assert(pg); // 'this' here should not exist if the PG was removed m_osds->get_scrub_services().mark_pg_scrub_blocked(m_pg_id); m_scrub_job->blocked_since = since; m_scrub_job->blocked = true; @@ -2386,9 +2392,9 @@ int PgScrubber::asok_debug(std::string_view cmd, dout(10) << __func__ << " cmd: " << cmd << " param: " << param << dendl; if (cmd == "block") { - // set a flag that will cause the next 'select_range' to report a blocked + // 'm_debug_blockrange' causes the next 'select_range' to report a blocked // object - m_debug_blockrange = 1; + m_debug_blockrange = 10; // >1, so that will trigger fast state reports } else if (cmd == "unblock") { // send an 'unblock' event, as if a blocked range was freed @@ -2405,7 +2411,7 @@ int PgScrubber::asok_debug(std::string_view cmd, if (cmd == "set") { // set a flag that will cause the next 'select_range' to report a // blocked object - m_debug_blockrange = 1; + m_debug_blockrange = 10; // >1, so that will trigger fast state reports } else { // send an 'unblock' event, as if a blocked range was freed m_debug_blockrange = 0; @@ -2416,6 +2422,56 @@ int PgScrubber::asok_debug(std::string_view cmd, return 0; } + +/* + * Note: under PG lock + */ +void PgScrubber::update_scrub_stats(ceph::coarse_real_clock::time_point now_is) +{ + using clock = ceph::coarse_real_clock; + using namespace std::chrono; + + const seconds period_active = seconds(m_pg->get_cct()->_conf.get_val( + "osd_stats_update_period_scrubbing")); + if (!period_active.count()) { + // a way for the operator to disable these stats updates + return; + } + const seconds period_inactive = + seconds(m_pg->get_cct()->_conf.get_val( + "osd_stats_update_period_not_scrubbing") + + m_pg_id.pgid.m_seed % 30); + + // determine the required update period, based on our current state + auto period{period_inactive}; + if (m_active) { + period = m_debug_blockrange ? 2s : period_active; + } + + /// \todo use the date library (either the one included in Arrow or directly) + /// to get the formatting of the time_points. + + if (g_conf()->subsys.should_gather()) { + // will only create the debug strings if required + char buf[50]; + auto printable_last = fmt::localtime(clock::to_time_t(m_last_stat_upd)); + strftime(buf, sizeof(buf), "%Y-%m-%dT%T", &printable_last); + dout(20) << fmt::format("{}: period: {}/{}-> {} last:{}", + __func__, + period_active, + period_inactive, + period, + buf) + << dendl; + } + + if (now_is - m_last_stat_upd > period) { + m_pg->publish_stats_to_osd(); + m_last_stat_upd = now_is; + } +} + + // ///////////////////// preemption_data_t ////////////////////////////////// PgScrubber::preemption_data_t::preemption_data_t(PG* pg) : m_pg{pg} diff --git a/src/osd/scrubber/pg_scrubber.h b/src/osd/scrubber/pg_scrubber.h index 2f28b3281fbe0..71e0bee7bbced 100644 --- a/src/osd/scrubber/pg_scrubber.h +++ b/src/osd/scrubber/pg_scrubber.h @@ -441,6 +441,8 @@ class PgScrubber : public ScrubPgIF, return false; } + void update_scrub_stats(ceph::coarse_real_clock::time_point now_is) final; + int asok_debug(std::string_view cmd, std::string param, Formatter* f, @@ -882,6 +884,10 @@ class PgScrubber : public ScrubPgIF, void persist_scrub_results(inconsistent_objs_t&& all_errors); void apply_snap_mapper_fixes(const std::vector& fix_list); + // our latest periodic 'publish_stats_to_osd()'. Required frequency depends on + // scrub state. + ceph::coarse_real_clock::time_point m_last_stat_upd{}; + // ------------ members used if we are a replica epoch_t m_replica_min_epoch; ///< the min epoch needed to handle this message diff --git a/src/osd/scrubber_common.h b/src/osd/scrubber_common.h index 4a63cdf18161f..31109528a5120 100644 --- a/src/osd/scrubber_common.h +++ b/src/osd/scrubber_common.h @@ -315,6 +315,13 @@ struct ScrubPgIF { virtual bool get_store_errors(const scrub_ls_arg_t& arg, scrub_ls_result_t& res_inout) const = 0; + /** + * force a periodic 'publish_stats_to_osd()' call, to update scrub-related + * counters and statistics. + */ + virtual void update_scrub_stats( + ceph::coarse_real_clock::time_point now_is) = 0; + // --------------- reservations ----------------------------------- /** -- 2.39.5