]> git-server-git.apps.pok.os.sepia.ceph.com Git - ceph.git/commitdiff
osd/scrub: show reservation status in 'pg dump' output 60808/head
authorRonen Friedman <rfriedma@redhat.com>
Fri, 22 Nov 2024 18:00:50 +0000 (12:00 -0600)
committerRonen Friedman <rfriedma@redhat.com>
Tue, 24 Dec 2024 14:29:57 +0000 (08:29 -0600)
Whenever a PG is selected for scrubbing, and is waiting for
remote reservations, the 'pg dump' output will include the
following text (under the 'SCRUB_SCHEDULING' column):
Reserving. Waiting Ns for OSD.k (n/m)

Signed-off-by: Ronen Friedman <rfriedma@redhat.com>
src/osd/osd_types.cc
src/osd/osd_types.h
src/osd/scrubber/pg_scrubber.cc
src/osd/scrubber/scrub_machine.cc
src/osd/scrubber/scrub_machine.h
src/osd/scrubber/scrub_reservations.h

index 5c2cf8b16b059509a733a91b7d82b62ad9511166..048f5aa0009cbe5a41200f32e9a85d91a07f38a4 100644 (file)
@@ -2942,6 +2942,14 @@ std::string pg_stat_t::dump_scrub_schedule() const
       return fmt::format(
        "Blocked! locked objects (for {}s)",
        scrub_sched_status.m_duration_seconds);
+    } else if (scrub_sched_status.m_num_to_reserve != 0) {
+      // we are waiting for some replicas to respond
+      return fmt::format(
+        "Reserving. Waiting {}s for OSD.{} ({}/{})",
+        scrub_sched_status.m_duration_seconds,
+        scrub_sched_status.m_osd_to_respond,
+        scrub_sched_status.m_ordinal_of_requested_replica,
+        scrub_sched_status.m_num_to_reserve);
     } else {
       return fmt::format(
        "{}scrubbing for {}s",
@@ -2964,7 +2972,7 @@ std::string pg_stat_t::dump_scrub_schedule() const
     case pg_scrub_sched_status_t::queued:
       return fmt::format(
         "queued for {}scrub",
-        ((scrub_sched_status.m_is_deep == scrub_level_t::deep) ? "deep " : ""));
+        (scrub_sched_status.m_is_deep == scrub_level_t::deep) ? "deep " : "");
     default:
       // a bug!
       return "SCRUB STATE MISMATCH!"s;
@@ -2979,12 +2987,15 @@ bool operator==(const pg_scrubbing_status_t& l, const pg_scrubbing_status_t& r)
     l.m_duration_seconds == r.m_duration_seconds &&
     l.m_is_active == r.m_is_active &&
     l.m_is_deep == r.m_is_deep &&
-    l.m_is_periodic == r.m_is_periodic;
+    l.m_is_periodic == r.m_is_periodic &&
+    l.m_osd_to_respond == r.m_osd_to_respond &&
+    l.m_ordinal_of_requested_replica == r.m_ordinal_of_requested_replica &&
+    l.m_num_to_reserve == r.m_num_to_reserve;
 }
 
 void pg_stat_t::encode(ceph::buffer::list &bl) const
 {
-  ENCODE_START(29, 22, bl);
+  ENCODE_START(30, 22, bl);
   encode(version, bl);
   encode(reported_seq, bl);
   encode(reported_epoch, bl);
@@ -3044,6 +3055,9 @@ void pg_stat_t::encode(ceph::buffer::list &bl) const
   encode(objects_trimmed, bl);
   encode(snaptrim_duration, bl);
   encode(log_dups_size, bl);
+  encode(scrub_sched_status.m_osd_to_respond, bl);
+  encode(scrub_sched_status.m_ordinal_of_requested_replica, bl);
+  encode(scrub_sched_status.m_num_to_reserve, bl);
 
   ENCODE_FINISH(bl);
 }
@@ -3052,7 +3066,7 @@ void pg_stat_t::decode(ceph::buffer::list::const_iterator &bl)
 {
   bool tmp;
   uint32_t old_state;
-  DECODE_START(29, bl);
+  DECODE_START(30, bl);
   decode(version, bl);
   decode(reported_seq, bl);
   decode(reported_epoch, bl);
@@ -3142,6 +3156,18 @@ void pg_stat_t::decode(ceph::buffer::list::const_iterator &bl)
     if (struct_v >= 29) {
       decode(log_dups_size, bl);
     }
+    if (struct_v >= 30) {
+      uint16_t osd_to_respond;
+      decode(osd_to_respond, bl);
+      scrub_sched_status.m_osd_to_respond = osd_to_respond;
+      uint8_t tmp8;
+      decode(tmp8, bl);
+      scrub_sched_status.m_ordinal_of_requested_replica = tmp8;
+      decode(tmp8, bl);
+      scrub_sched_status.m_num_to_reserve = tmp8;
+    } else {
+      scrub_sched_status.m_num_to_reserve = 0;
+    }
   }
   DECODE_FINISH(bl);
 }
index b6f5335a0f5138a711380e0722dc86b3093d74b6..1e92d5cd3d6e966d0e09bf06dc63fb141cc170e4 100644 (file)
@@ -2223,6 +2223,13 @@ struct pg_scrubbing_status_t {
   bool m_is_active{false};
   scrub_level_t m_is_deep{scrub_level_t::shallow};
   bool m_is_periodic{true};
+  // the following are only relevant when we are reserving replicas:
+  uint16_t m_osd_to_respond{0};
+  /// this is the n'th replica we are reserving (out of m_num_to_reserve)
+  uint8_t m_ordinal_of_requested_replica{0};
+  /// the number of replicas we are reserving for scrubbing. 0 means we are not
+  /// in the process of reserving replicas.
+  uint8_t m_num_to_reserve{0};
 };
 
 bool operator==(const pg_scrubbing_status_t& l, const pg_scrubbing_status_t& r);
index fea8c7570401b63c16315c7d0ac5c13dccec4317..8dd666fa2d58c650ca5e3795e19ff1ec52e523f3 100644 (file)
@@ -2441,7 +2441,7 @@ pg_scrubbing_status_t PgScrubber::get_schedule() const
          pg_scrub_sched_status_t::blocked,
          true,  // active
          (m_is_deep ? scrub_level_t::deep : scrub_level_t::shallow),
-         false};
+         (m_active_target->urgency() == urgency_t::periodic_regular)};
 
     } else {
       int32_t dur_seconds =
@@ -2452,9 +2452,11 @@ pg_scrubbing_status_t PgScrubber::get_schedule() const
          pg_scrub_sched_status_t::active,
          true,  // active
          (m_is_deep ? scrub_level_t::deep : scrub_level_t::shallow),
-         false /* is periodic? unknown, actually */};
+         (m_active_target->urgency() == urgency_t::periodic_regular)};
     }
   }
+
+  // not registered to be scrubbed?
   if (!m_scrub_job->is_registered()) {
     return pg_scrubbing_status_t{
        utime_t{},
@@ -2465,8 +2467,34 @@ pg_scrubbing_status_t PgScrubber::get_schedule() const
        false};
   }
 
-  // not taking 'no-*scrub' flags into account here.
+  // in session, but still reserving replicas?
+  const auto maybe_register = m_fsm->get_reservation_status();
+  if (maybe_register) {
+    // note that if we are here, we are scrubbing (even though
+    // m_active is false). The 'maybe_register' attests to being in
+    // ReservingReplicas state, and m_active wasn't set yet.
+    dout(20) << fmt::format(
+                   "{}:maybe_register: osd:{} {}s ({} of {})", __func__,
+                   maybe_register->m_osd_to_respond,
+                   maybe_register->m_duration_seconds,
+                   maybe_register->m_ordinal_of_requested_replica,
+                   maybe_register->m_num_to_reserve)
+            << dendl;
+    return pg_scrubbing_status_t{
+       utime_t{},
+       maybe_register->m_duration_seconds,
+       pg_scrub_sched_status_t::active,
+       true,  // active
+       (m_is_deep ? scrub_level_t::deep : scrub_level_t::shallow),
+       (m_active_target->urgency() == urgency_t::periodic_regular),
+       maybe_register->m_osd_to_respond,
+       maybe_register->m_ordinal_of_requested_replica,
+       maybe_register->m_num_to_reserve};
+  }
+
   const auto first_ready = m_scrub_job->earliest_eligible(now_is);
+  // eligible for scrubbing, but not yet selected to be scrubbed?
+  // (not taking 'no-*scrub' flags into account here.)
   if (first_ready) {
     const auto& targ = first_ready->get();
     return pg_scrubbing_status_t{
index da9466758f468fa0e7174d86575753e6ac51bf34..10866ce580a18bf5c0cc911ff7946f8bce2c2092 100644 (file)
@@ -106,6 +106,25 @@ ceph::timespan ScrubMachine::get_time_scrubbing() const
   return ceph::timespan{};
 }
 
+std::optional<pg_scrubbing_status_t> ScrubMachine::get_reservation_status()
+    const
+{
+  const auto resv_state = state_cast<const ReservingReplicas*>();
+  if (!resv_state) {
+    return std::nullopt;
+  }
+  const auto session = state_cast<const Session*>();
+  dout(30) << fmt::format(
+                 "{}: we are reserving {:p}-{:p}", __func__, (void*)session,
+                 (void*)resv_state)
+          << dendl;
+  if (!session || !session->m_reservations) {
+    dout(20) << fmt::format("{}: no reservations data", __func__) << dendl;
+    return std::nullopt;
+  }
+  return session->get_reservation_status();
+}
+
 // ////////////// the actual actions
 
 // ----------------------- NotActive -----------------------------------------
@@ -203,6 +222,23 @@ sc::result Session::react(const IntervalChanged&)
   return transit<NotActive>();
 }
 
+std::optional<pg_scrubbing_status_t> Session::get_reservation_status() const
+{
+  if (!m_reservations) {
+    return std::nullopt;
+  }
+  DECLARE_LOCALS;  // 'scrbr' & 'pg_id' aliases
+  const auto req = m_reservations->get_last_sent();
+  pg_scrubbing_status_t s;
+  s.m_osd_to_respond = req ? req->osd : 0;
+  s.m_ordinal_of_requested_replica = m_reservations->active_requests_cnt();
+  s.m_num_to_reserve = scrbr->get_pg()->get_actingset().size() - 1;
+  s.m_duration_seconds =
+      duration_cast<seconds>(context<ScrubMachine>().get_time_scrubbing())
+         .count();
+  return s;
+}
+
 
 // ----------------------- ReservingReplicas ---------------------------------
 
index ad0d3bfba3807b9247937c208f5e14dbb07392d1..7daeb15a4376490748c7697048854ca8994d7d1f 100644 (file)
@@ -2,6 +2,7 @@
 // vim: ts=8 sw=2 smarttab
 #pragma once
 
+#include <optional>
 #include <string>
 
 #include <boost/statechart/custom_reaction.hpp>
@@ -289,9 +290,12 @@ class ScrubMachine : public sc::state_machine<ScrubMachine, NotActive> {
   [[nodiscard]] bool is_accepting_updates() const;
   [[nodiscard]] bool is_primary_idle() const;
 
-  // elapsed time for the currently active scrub.session
+  /// elapsed time for the currently active scrub.session
   ceph::timespan get_time_scrubbing() const;
 
+  /// replica reservation process status
+  std::optional<pg_scrubbing_status_t> get_reservation_status() const;
+
 // ///////////////// aux declarations & functions //////////////////////// //
 
 
@@ -555,6 +559,9 @@ struct Session : sc::state<Session, PrimaryActive, ReservingReplicas>,
   /// abort reason - if known. Determines the delay time imposed on the
   /// failed scrub target.
   std::optional<Scrub::delay_cause_t> m_abort_reason{std::nullopt};
+
+  /// when reserving replicas: fetch the reservation status
+  std::optional<pg_scrubbing_status_t> get_reservation_status() const;
 };
 
 struct ReservingReplicas : sc::state<ReservingReplicas, Session>, NamedSimply {
index 173b23d7db52c5ecfa9b2a45a80ecb250b51020d..f5eca48b8887fcf6c70cbb51d4bc64454ef4c575 100644 (file)
@@ -157,13 +157,13 @@ class ReplicaReservations {
   // note: 'public', as accessed via the 'standard' dout_prefix() macro
   std::ostream& gen_prefix(std::ostream& out, std::string fn) const;
 
+  /// The number of requests that have been sent (and not rejected) so far.
+  size_t active_requests_cnt() const;
+
  private:
   /// send 'release' messages to all replicas we have managed to reserve
   void release_all();
 
-  /// The number of requests that have been sent (and not rejected) so far.
-  size_t active_requests_cnt() const;
-
   /**
    * Send a reservation request to the next replica.
    * - if there are no more replicas to send requests to, return true