From: Samuel Just Date: Tue, 14 Feb 2023 05:32:18 +0000 (-0800) Subject: osd/scrubber: use schedule_timer_event_after for range block alarm X-Git-Url: http://git-server-git.apps.pok.os.sepia.ceph.com/?a=commitdiff_plain;h=7c19046c68c5bf20d9536d605c43c158b966a1a7;p=ceph.git osd/scrubber: use schedule_timer_event_after for range block alarm Signed-off-by: Samuel Just (cherry picked from commit ebd5c6c44893457c9fd45ad2b37d45d5a4467727) --- diff --git a/src/osd/scrubber/pg_scrubber.cc b/src/osd/scrubber/pg_scrubber.cc index 1c27cc15e1cf..868003ebdf97 100644 --- a/src/osd/scrubber/pg_scrubber.cc +++ b/src/osd/scrubber/pg_scrubber.cc @@ -878,27 +878,6 @@ bool PgScrubber::range_intersects_scrub(const hobject_t& start, return (start < m_max_end && end >= m_start); } -Scrub::BlockedRangeWarning PgScrubber::acquire_blocked_alarm() -{ - int grace = get_pg_cct()->_conf->osd_blocked_scrub_grace_period; - if (grace == 0) { - // we will not be sending any alarms re the blocked object - dout(10) - << __func__ - << ": blocked-alarm disabled ('osd_blocked_scrub_grace_period' set to 0)" - << dendl; - return nullptr; - } - ceph::timespan grace_period{m_debug_blockrange ? 4s : seconds{grace}}; - dout(20) << fmt::format(": timeout:{}", - std::chrono::duration_cast(grace_period)) - << dendl; - return std::make_unique(m_osds, - grace_period, - *this, - m_pg_id); -} - /** * if we are required to sleep: * arrange a callback sometimes later. @@ -2999,45 +2978,4 @@ ostream& operator<<(ostream& out, const MapsCollectionStatus& sf) return out << " ] "; } -// ///////////////////// blocked_range_t /////////////////////////////// - -blocked_range_t::blocked_range_t(OSDService* osds, - ceph::timespan waittime, - ScrubMachineListener& scrubber, - spg_t pg_id) - : m_osds{osds} - , m_scrubber{scrubber} - , m_pgid{pg_id} -{ - auto now_is = std::chrono::system_clock::now(); - m_callbk = new LambdaContext([this, now_is]([[maybe_unused]] int r) { - std::time_t now_c = std::chrono::system_clock::to_time_t(now_is); - char buf[50]; - strftime(buf, sizeof(buf), "%Y-%m-%dT%H:%M:%S", std::localtime(&now_c)); - lgeneric_subdout(g_ceph_context, osd, 10) - << "PgScrubber: " << m_pgid - << " blocked on an object for too long (since " << buf << ")" << dendl; - m_osds->clog->warn() << "osd." << m_osds->whoami - << " PgScrubber: " << m_pgid - << " blocked on an object for too long (since " << buf - << ")"; - - m_warning_issued = true; - m_scrubber.set_scrub_blocked(utime_t{now_c,0}); - return; - }); - - std::lock_guard l(m_osds->sleep_lock); - m_osds->sleep_timer.add_event_after(waittime, m_callbk); -} - -blocked_range_t::~blocked_range_t() -{ - if (m_warning_issued) { - m_scrubber.clear_scrub_blocked(); - } - std::lock_guard l(m_osds->sleep_lock); - m_osds->sleep_timer.cancel_event(m_callbk); -} - } // namespace Scrub diff --git a/src/osd/scrubber/pg_scrubber.h b/src/osd/scrubber/pg_scrubber.h index 47295969d581..0818b57e0cee 100644 --- a/src/osd/scrubber/pg_scrubber.h +++ b/src/osd/scrubber/pg_scrubber.h @@ -508,6 +508,18 @@ class PgScrubber : public ScrubPgIF, void cancel_callback(scrubber_callback_cancel_token_t); + ceph::timespan get_range_blocked_grace() { + int grace = get_pg_cct()->_conf->osd_blocked_scrub_grace_period; + if (grace == 0) { + return ceph::timespan{}; + } + ceph::timespan grace_period{ + m_debug_blockrange ? + std::chrono::seconds(4) : + std::chrono::seconds{grace}}; + return grace_period; + } + [[nodiscard]] bool is_primary() const final { return m_pg->recovery_state.is_primary(); @@ -520,7 +532,6 @@ class PgScrubber : public ScrubPgIF, void select_range_n_notify() final; - Scrub::BlockedRangeWarning acquire_blocked_alarm() final; void set_scrub_blocked(utime_t since) final; void clear_scrub_blocked() final; diff --git a/src/osd/scrubber/scrub_machine.cc b/src/osd/scrubber/scrub_machine.cc index c372c7ede9e5..fff9b0fca91c 100644 --- a/src/osd/scrubber/scrub_machine.cc +++ b/src/osd/scrubber/scrub_machine.cc @@ -21,9 +21,11 @@ using namespace std::chrono; using namespace std::chrono_literals; #define DECLARE_LOCALS \ - ScrubMachineListener* scrbr = context().m_scrbr; \ + auto& machine = context(); \ + std::ignore = machine; \ + ScrubMachineListener* scrbr = machine.m_scrbr; \ std::ignore = scrbr; \ - auto pg_id = context().m_pg_id; \ + auto pg_id = machine.m_pg_id; \ std::ignore = pg_id; NamedSimply::NamedSimply(ScrubMachineListener* scrubber, const char* name) @@ -208,9 +210,42 @@ RangeBlocked::RangeBlocked(my_context ctx) dout(10) << "-- state -->> Act/RangeBlocked" << dendl; DECLARE_LOCALS; // 'scrbr' & 'pg_id' aliases - // arrange to have a warning message issued if we are stuck in this - // state for longer than some reasonable number of minutes. - m_timeout = scrbr->acquire_blocked_alarm(); + auto grace = scrbr->get_range_blocked_grace(); + if (grace == ceph::timespan{}) { + // we will not be sending any alarms re the blocked object + dout(10) + << __func__ + << ": blocked-alarm disabled ('osd_blocked_scrub_grace_period' set to 0)" + << dendl; + } else { + // Schedule an event to warn that the pg has been blocked for longer than + // the timeout, see RangeBlockedAlarm handler below + dout(20) << fmt::format(": timeout:{}", + std::chrono::duration_cast(grace)) + << dendl; + + m_timeout_token = machine.schedule_timer_event_after( + grace); + } +} + +sc::result RangeBlocked::react(const RangeBlockedAlarm&) +{ + DECLARE_LOCALS; + char buf[50]; + std::time_t now_c = ceph::coarse_real_clock::to_time_t(entered_at); + strftime(buf, sizeof(buf), "%Y-%m-%dT%H:%M:%S", std::localtime(&now_c)); + dout(10) + << "PgScrubber: " << scrbr->get_spgid() + << " blocked on an object for too long (since " << buf << ")" << dendl; + scrbr->get_clog()->warn() + << "osd." << scrbr->get_whoami() + << " PgScrubber: " << scrbr->get_spgid() + << " blocked on an object for too long (since " << buf + << ")"; + + scrbr->set_scrub_blocked(utime_t{now_c, 0}); + return discard_event(); } // ----------------------- PendingTimer ----------------------------------- diff --git a/src/osd/scrubber/scrub_machine.h b/src/osd/scrubber/scrub_machine.h index ecbdc2d63a15..01710836a598 100644 --- a/src/osd/scrubber/scrub_machine.h +++ b/src/osd/scrubber/scrub_machine.h @@ -77,6 +77,8 @@ MEV(Unblocked) MEV(InternalSchedScrub) +MEV(RangeBlockedAlarm) + MEV(SelectedChunkFree) MEV(ChunkIsBusy) @@ -361,9 +363,14 @@ struct ActiveScrubbing struct RangeBlocked : sc::state, NamedSimply { explicit RangeBlocked(my_context ctx); - using reactions = mpl::list>; + using reactions = mpl::list< + sc::custom_reaction, + sc::transition>; - Scrub::BlockedRangeWarning m_timeout; + ceph::coarse_real_clock::time_point entered_at = + ceph::coarse_real_clock::now(); + ScrubMachine::timer_event_token_t m_timeout_token; + sc::result react(const RangeBlockedAlarm &); }; struct PendingTimer : sc::state, NamedSimply { diff --git a/src/osd/scrubber/scrub_machine_lstnr.h b/src/osd/scrubber/scrub_machine_lstnr.h index ff0f2fd36ba7..fb31ba2e2f5e 100644 --- a/src/osd/scrubber/scrub_machine_lstnr.h +++ b/src/osd/scrubber/scrub_machine_lstnr.h @@ -45,30 +45,6 @@ struct preemption_t { virtual bool disable_and_test() = 0; }; -/// an aux used when blocking on a busy object. -/// Issues a log warning if still blocked after 'waittime'. -struct blocked_range_t { - blocked_range_t(OSDService* osds, - ceph::timespan waittime, - ScrubMachineListener& scrubber, - spg_t pg_id); - ~blocked_range_t(); - - OSDService* m_osds; - ScrubMachineListener& m_scrubber; - - /// used to identify ourselves to the PG, when no longer blocked - spg_t m_pgid; - Context* m_callbk; - - // once timed-out, we flag the OSD's scrub-queue as having - // a problem. 'm_warning_issued' signals the need to clear - // that OSD-wide flag. - bool m_warning_issued{false}; -}; - -using BlockedRangeWarning = std::unique_ptr; - } // namespace Scrub struct ScrubMachineListener { @@ -104,6 +80,8 @@ struct ScrubMachineListener { */ virtual void cancel_callback(scrubber_callback_cancel_token_t) = 0; + virtual ceph::timespan get_range_blocked_grace() = 0; + struct MsgAndEpoch { MessageRef m_msg; epoch_t m_epoch; @@ -119,8 +97,6 @@ struct ScrubMachineListener { virtual void select_range_n_notify() = 0; - virtual Scrub::BlockedRangeWarning acquire_blocked_alarm() = 0; - /// walk the log to find the latest update that affects our chunk virtual eversion_t search_log_for_updates() const = 0;