]> git-server-git.apps.pok.os.sepia.ceph.com Git - ceph.git/commitdiff
osd/scrubber: use schedule_timer_event_after for range block alarm
authorSamuel Just <sjust@redhat.com>
Tue, 14 Feb 2023 05:32:18 +0000 (21:32 -0800)
committerRonen Friedman <rfriedma@redhat.com>
Wed, 26 Mar 2025 14:38:52 +0000 (14:38 +0000)
Signed-off-by: Samuel Just <sjust@redhat.com>
(cherry picked from commit ebd5c6c44893457c9fd45ad2b37d45d5a4467727)

src/osd/scrubber/pg_scrubber.cc
src/osd/scrubber/pg_scrubber.h
src/osd/scrubber/scrub_machine.cc
src/osd/scrubber/scrub_machine.h
src/osd/scrubber/scrub_machine_lstnr.h

index 1c27cc15e1cf8a4058cbec755beee095029cff7e..868003ebdf971122ff25f677c43c8294c846a31b 100644 (file)
@@ -878,27 +878,6 @@ bool PgScrubber::range_intersects_scrub(const hobject_t& start,
   return (start < m_max_end && end >= m_start);
 }
 
-Scrub::BlockedRangeWarning PgScrubber::acquire_blocked_alarm()
-{
-  int grace = get_pg_cct()->_conf->osd_blocked_scrub_grace_period;
-  if (grace == 0) {
-    // we will not be sending any alarms re the blocked object
-    dout(10)
-      << __func__
-      << ": blocked-alarm disabled ('osd_blocked_scrub_grace_period' set to 0)"
-      << dendl;
-    return nullptr;
-  }
-  ceph::timespan grace_period{m_debug_blockrange ? 4s : seconds{grace}};
-  dout(20) << fmt::format(": timeout:{}",
-                         std::chrono::duration_cast<seconds>(grace_period))
-          << dendl;
-  return std::make_unique<blocked_range_t>(m_osds,
-                                          grace_period,
-                                          *this,
-                                          m_pg_id);
-}
-
 /**
  *  if we are required to sleep:
  *     arrange a callback sometimes later.
@@ -2999,45 +2978,4 @@ ostream& operator<<(ostream& out, const MapsCollectionStatus& sf)
   return out << " ] ";
 }
 
-// ///////////////////// blocked_range_t ///////////////////////////////
-
-blocked_range_t::blocked_range_t(OSDService* osds,
-                                ceph::timespan waittime,
-                                ScrubMachineListener& scrubber,
-                                spg_t pg_id)
-    : m_osds{osds}
-    , m_scrubber{scrubber}
-    , m_pgid{pg_id}
-{
-  auto now_is = std::chrono::system_clock::now();
-  m_callbk = new LambdaContext([this, now_is]([[maybe_unused]] int r) {
-    std::time_t now_c = std::chrono::system_clock::to_time_t(now_is);
-    char buf[50];
-    strftime(buf, sizeof(buf), "%Y-%m-%dT%H:%M:%S", std::localtime(&now_c));
-    lgeneric_subdout(g_ceph_context, osd, 10)
-      << "PgScrubber: " << m_pgid
-      << " blocked on an object for too long (since " << buf << ")" << dendl;
-    m_osds->clog->warn() << "osd." << m_osds->whoami
-                        << " PgScrubber: " << m_pgid
-                        << " blocked on an object for too long (since " << buf
-                        << ")";
-
-    m_warning_issued = true;
-    m_scrubber.set_scrub_blocked(utime_t{now_c,0});
-    return;
-  });
-
-  std::lock_guard l(m_osds->sleep_lock);
-  m_osds->sleep_timer.add_event_after(waittime, m_callbk);
-}
-
-blocked_range_t::~blocked_range_t()
-{
-  if (m_warning_issued) {
-    m_scrubber.clear_scrub_blocked();
-  }
-  std::lock_guard l(m_osds->sleep_lock);
-  m_osds->sleep_timer.cancel_event(m_callbk);
-}
-
 }  // namespace Scrub
index 47295969d581b2b3f457d3827fbd818859543128..0818b57e0ceef4def598a8b6c6c329e16f51cf44 100644 (file)
@@ -508,6 +508,18 @@ class PgScrubber : public ScrubPgIF,
 
   void cancel_callback(scrubber_callback_cancel_token_t);
 
+  ceph::timespan get_range_blocked_grace() {
+    int grace = get_pg_cct()->_conf->osd_blocked_scrub_grace_period;
+    if (grace == 0) {
+      return ceph::timespan{};
+    }
+    ceph::timespan grace_period{
+      m_debug_blockrange ?
+      std::chrono::seconds(4) :
+      std::chrono::seconds{grace}};
+    return grace_period;
+  }
+
   [[nodiscard]] bool is_primary() const final
   {
     return m_pg->recovery_state.is_primary();
@@ -520,7 +532,6 @@ class PgScrubber : public ScrubPgIF,
 
   void select_range_n_notify() final;
 
-  Scrub::BlockedRangeWarning acquire_blocked_alarm() final;
   void set_scrub_blocked(utime_t since) final;
   void clear_scrub_blocked() final;
 
index c372c7ede9e587decbdf03b16c59d6472e765e5c..fff9b0fca91c2741a31ef51b63e795c0da55713c 100644 (file)
@@ -21,9 +21,11 @@ using namespace std::chrono;
 using namespace std::chrono_literals;
 
 #define DECLARE_LOCALS                                           \
-  ScrubMachineListener* scrbr = context<ScrubMachine>().m_scrbr; \
+  auto& machine = context<ScrubMachine>();                      \
+  std::ignore = machine;                                        \
+  ScrubMachineListener* scrbr = machine.m_scrbr;                \
   std::ignore = scrbr;                                           \
-  auto pg_id = context<ScrubMachine>().m_pg_id;                  \
+  auto pg_id = machine.m_pg_id;                                         \
   std::ignore = pg_id;
 
 NamedSimply::NamedSimply(ScrubMachineListener* scrubber, const char* name)
@@ -208,9 +210,42 @@ RangeBlocked::RangeBlocked(my_context ctx)
   dout(10) << "-- state -->> Act/RangeBlocked" << dendl;
   DECLARE_LOCALS;  // 'scrbr' & 'pg_id' aliases
 
-  // arrange to have a warning message issued if we are stuck in this
-  // state for longer than some reasonable number of minutes.
-  m_timeout = scrbr->acquire_blocked_alarm();
+  auto grace = scrbr->get_range_blocked_grace();
+  if (grace == ceph::timespan{}) {
+    // we will not be sending any alarms re the blocked object
+    dout(10)
+      << __func__
+      << ": blocked-alarm disabled ('osd_blocked_scrub_grace_period' set to 0)"
+      << dendl;
+  } else {
+    // Schedule an event to warn that the pg has been blocked for longer than
+    // the timeout, see RangeBlockedAlarm handler below
+    dout(20) << fmt::format(": timeout:{}",
+                           std::chrono::duration_cast<seconds>(grace))
+            << dendl;
+
+    m_timeout_token = machine.schedule_timer_event_after<RangeBlockedAlarm>(
+      grace);
+  }
+}
+
+sc::result RangeBlocked::react(const RangeBlockedAlarm&)
+{
+  DECLARE_LOCALS;
+  char buf[50];
+  std::time_t now_c = ceph::coarse_real_clock::to_time_t(entered_at);
+  strftime(buf, sizeof(buf), "%Y-%m-%dT%H:%M:%S", std::localtime(&now_c));
+  dout(10)
+    << "PgScrubber: " << scrbr->get_spgid()
+    << " blocked on an object for too long (since " << buf << ")" << dendl;
+  scrbr->get_clog()->warn()
+    << "osd." << scrbr->get_whoami()
+    << " PgScrubber: " << scrbr->get_spgid()
+    << " blocked on an object for too long (since " << buf
+    << ")";
+
+  scrbr->set_scrub_blocked(utime_t{now_c, 0});
+  return discard_event();
 }
 
 // ----------------------- PendingTimer -----------------------------------
index ecbdc2d63a1589080582bab983fc25e8e6193fee..01710836a598baa5115944f0e4c2da4cda516513 100644 (file)
@@ -77,6 +77,8 @@ MEV(Unblocked)
 
 MEV(InternalSchedScrub)
 
+MEV(RangeBlockedAlarm)
+
 MEV(SelectedChunkFree)
 
 MEV(ChunkIsBusy)
@@ -361,9 +363,14 @@ struct ActiveScrubbing
 
 struct RangeBlocked : sc::state<RangeBlocked, ActiveScrubbing>, NamedSimply {
   explicit RangeBlocked(my_context ctx);
-  using reactions = mpl::list<sc::transition<Unblocked, PendingTimer>>;
+  using reactions = mpl::list<
+    sc::custom_reaction<RangeBlockedAlarm>,
+    sc::transition<Unblocked, PendingTimer>>;
 
-  Scrub::BlockedRangeWarning m_timeout;
+  ceph::coarse_real_clock::time_point entered_at =
+    ceph::coarse_real_clock::now();
+  ScrubMachine::timer_event_token_t m_timeout_token;
+  sc::result react(const RangeBlockedAlarm &);
 };
 
 struct PendingTimer : sc::state<PendingTimer, ActiveScrubbing>, NamedSimply {
index ff0f2fd36ba7dd2d36959088dd33135e6cae0439..fb31ba2e2f5e214e0b98b19e32a1c09e2a07b678 100644 (file)
@@ -45,30 +45,6 @@ struct preemption_t {
   virtual bool disable_and_test() = 0;
 };
 
-/// an aux used when blocking on a busy object.
-/// Issues a log warning if still blocked after 'waittime'.
-struct blocked_range_t {
-  blocked_range_t(OSDService* osds,
-                 ceph::timespan waittime,
-                 ScrubMachineListener& scrubber,
-                 spg_t pg_id);
-  ~blocked_range_t();
-
-  OSDService* m_osds;
-  ScrubMachineListener& m_scrubber;
-
-  /// used to identify ourselves to the PG, when no longer blocked
-  spg_t m_pgid;
-  Context* m_callbk;
-
-  // once timed-out, we flag the OSD's scrub-queue as having
-  // a problem. 'm_warning_issued' signals the need to clear
-  // that OSD-wide flag.
-  bool m_warning_issued{false};
-};
-
-using BlockedRangeWarning = std::unique_ptr<blocked_range_t>;
-
 }  // namespace Scrub
 
 struct ScrubMachineListener {
@@ -104,6 +80,8 @@ struct ScrubMachineListener {
    */
   virtual void cancel_callback(scrubber_callback_cancel_token_t) = 0;
 
+  virtual ceph::timespan get_range_blocked_grace() = 0;
+
   struct MsgAndEpoch {
     MessageRef m_msg;
     epoch_t m_epoch;
@@ -119,8 +97,6 @@ struct ScrubMachineListener {
 
   virtual void select_range_n_notify() = 0;
 
-  virtual Scrub::BlockedRangeWarning acquire_blocked_alarm() = 0;
-
   /// walk the log to find the latest update that affects our chunk
   virtual eversion_t search_log_for_updates() const = 0;