]> git.apps.os.sepia.ceph.com Git - ceph-ci.git/commitdiff
osd/scrub: a single counters selection mechanism - step 1
authorRonen Friedman <rfriedma@redhat.com>
Sun, 13 Apr 2025 17:53:12 +0000 (12:53 -0500)
committerRonen Friedman <rfriedma@redhat.com>
Tue, 15 Apr 2025 11:01:30 +0000 (06:01 -0500)
Following the preceeding PR, the Scrubber now employs
two methods for selecting the specific subset of performance
counters to update (the replicated pool set or the EC one).
The first method is using labeled counters, with 4 optional labels
(Primary/Replica X Replicated/EC Pool). The second method
is by naming the specific OSD counters to use in ScrubIoCounterSet
objects, then selecting the appropriate set based on the pool type.

This commit is the first step on the path to unifying the two
methods - discarding the use of labeled counters, and only
naming OSD counters.

Signed-off-by: Ronen Friedman <rfriedma@redhat.com>
src/osd/osd_perf_counters.cc
src/osd/osd_perf_counters.h
src/osd/scrubber/pg_scrubber.cc
src/osd/scrubber/pg_scrubber.h
src/osd/scrubber/scrub_machine.cc
src/osd/scrubber/scrub_machine.h
src/osd/scrubber/scrub_machine_lstnr.h
src/osd/scrubber_common.h

index 04baa8fa68561a662670a899b6532dd14a29e516..bbab02a0bbcb5de7276a5d82d5b740c39e9731a2 100644 (file)
@@ -374,7 +374,11 @@ PerfCounters *build_osd_logger(CephContext *cct) {
   osd_plb.add_u64_counter(l_osd_scrub_ec_read_cnt, "scrub_ec_read_cnt", "scrub ec read calls count");
   osd_plb.add_u64_counter(l_osd_scrub_ec_read_bytes, "scrub_ec_read_bytes", "scrub ec read bytes read");
 
-  // scrub I/O performed for replicated pools
+  // scrub (no EC vs. replicated differentiation)
+  // scrub - replicated pools
+  osd_plb.add_u64_counter(l_osd_scrub_rppool_active_started, "num_scrubs_past_reservation_replicated", "scrubs count replicated");
+  // scrub - EC
+  osd_plb.add_u64_counter(l_osd_scrub_ec_active_started, "num_scrubs_past_reservation_ec", "scrubs count ec");
 
   return osd_plb.create_perf_counters();
 }
@@ -428,7 +432,6 @@ PerfCounters *build_scrub_labeled_perf(CephContext *cct, std::string label)
   scrub_perf.set_prio_default(PerfCountersBuilder::PRIO_INTERESTING);
 
   scrub_perf.add_u64_counter(scrbcnt_started, "num_scrubs_started", "scrubs attempted count");
-  scrub_perf.add_u64_counter(scrbcnt_active_started, "num_scrubs_past_reservation", "scrubs count");
   scrub_perf.add_u64_counter(scrbcnt_failed, "failed_scrubs", "failed scrubs count");
   scrub_perf.add_u64_counter(scrbcnt_successful, "successful_scrubs", "successful scrubs count");
   scrub_perf.add_time_avg(scrbcnt_failed_elapsed, "failed_scrubs_elapsed", "time to scrub failure");
index c43906591e320e0573352bb6ac12f537e7dc05f6..93d0ac311a126b7d590e0528840bb2b440256db9 100644 (file)
@@ -159,6 +159,12 @@ enum osd_counter_idx_t {
   l_osd_scrub_ec_read_cnt, ///< read calls count
   l_osd_scrub_ec_read_bytes, ///< total bytes read
 
+  // scrub (no EC vs. replicated differentiation)
+  // scrub - replicated pools
+  l_osd_scrub_rppool_active_started, ///< scrubs that got past replicas reservation
+  // scrub - EC
+  l_osd_scrub_ec_active_started, /// scrubs that got past secondaries reservation
+
   l_osd_last,
 };
 
@@ -211,8 +217,6 @@ enum {
   // -- basic statistics --
   /// The number of times we started a scrub
   scrbcnt_started,
-  /// # scrubs that got past replicas reservation
-  scrbcnt_active_started,
   /// # successful scrubs
   scrbcnt_successful,
   /// time to complete a successful scrub
index d60b85b93f34720df91bc93664782721bad6ee16..dbb871c38d499e5960c5b5012e23d79e770d562b 100644 (file)
@@ -999,7 +999,7 @@ std::optional<uint64_t> PgScrubber::select_range()
 
 void PgScrubber::select_range_n_notify()
 {
-  get_counters_set().inc(scrbcnt_chunks_selected);
+  get_labeled_counters()->inc(scrbcnt_chunks_selected);
   auto num_chunk_objects = select_range();
   if (num_chunk_objects.has_value()) {
     // the next chunk to handle is not blocked
@@ -1010,7 +1010,7 @@ void PgScrubber::select_range_n_notify()
     // we will wait for the objects range to become available for scrubbing
     dout(10) << __func__ << ": selected chunk is busy" << dendl;
     m_osds->queue_scrub_chunk_busy(m_pg, Scrub::scrub_prio_t::low_priority);
-    get_counters_set().inc(scrbcnt_chunks_busy);
+    get_labeled_counters()->inc(scrbcnt_chunks_busy);
   }
 }
 
@@ -1042,7 +1042,7 @@ bool PgScrubber::write_blocked_by_scrub(const hobject_t& soid)
     return false;
   }
 
-  get_counters_set().inc(scrbcnt_write_blocked);
+  get_labeled_counters()->inc(scrbcnt_write_blocked);
   dout(20) << __func__ << " " << soid << " can preempt? "
           << preemption_data.is_preemptable() << " already preempted? "
           << preemption_data.was_preempted() << dendl;
@@ -2525,11 +2525,22 @@ void PgScrubber::set_scrub_duration(std::chrono::milliseconds duration)
   });
 }
 
-PerfCounters& PgScrubber::get_counters_set() const
+PerfCounters* PgScrubber::get_osd_perf_counters() const
 {
-  return *m_osds->get_scrub_services().get_perf_counters(
+  return m_osds->logger;
+}
+
+const Scrub::ScrubCounterSet& PgScrubber::get_unlabeled_counters() const
+{
+  return m_pg->pool.info.is_replicated() ? io_counters_replicated
+                                        : io_counters_ec;
+}
+
+PerfCounters* PgScrubber::get_labeled_counters() const
+{
+  return m_osds->get_scrub_services().get_perf_counters(
       (m_pg->pool.info.is_replicated() ? pg_pool_t::TYPE_REPLICATED
-                                      : pg_pool_t::TYPE_ERASURE),
+                                       : pg_pool_t::TYPE_ERASURE),
       (m_is_deep ? scrub_level_t::deep : scrub_level_t::shallow));
 }
 
index 273be33c028333eba7fbfa34a298bb7c7fcb30dc..507089263197112e58261099f62b725bdef5bbd5 100644 (file)
@@ -143,7 +143,8 @@ static inline constexpr ScrubCounterSet io_counters_replicated{
   .omapgetheader_cnt = l_osd_scrub_omapgetheader_cnt,
   .omapgetheader_bytes = l_osd_scrub_omapgetheader_bytes,
   .omapget_cnt = l_osd_scrub_omapget_cnt,
-  .omapget_bytes = l_osd_scrub_omapget_bytes
+  .omapget_bytes = l_osd_scrub_omapget_bytes,
+  .active_started_cnt = l_osd_scrub_rppool_active_started
 };
 
 static inline constexpr ScrubCounterSet io_counters_ec{
@@ -154,7 +155,8 @@ static inline constexpr ScrubCounterSet io_counters_ec{
   .omapgetheader_cnt = l_osd_scrub_omapgetheader_cnt,
   .omapgetheader_bytes = l_osd_scrub_omapgetheader_bytes,
   .omapget_cnt = l_osd_scrub_omapget_cnt,
-  .omapget_bytes = l_osd_scrub_omapget_bytes
+  .omapget_bytes = l_osd_scrub_omapget_bytes,
+  .active_started_cnt = l_osd_scrub_ec_active_started
 };
 }  // namespace Scrub
 
@@ -414,7 +416,9 @@ class PgScrubber : public ScrubPgIF,
   int get_whoami() const final;
   spg_t get_spgid() const final { return m_pg->get_pgid(); }
   PG* get_pg() const final { return m_pg; }
-  PerfCounters& get_counters_set() const final;
+  PerfCounters* get_osd_perf_counters() const final;
+  const Scrub::ScrubCounterSet& get_unlabeled_counters() const final;
+  PerfCounters* get_labeled_counters() const final;
 
   /// delay next retry of this PG after a replica reservation failure
   void flag_reservations_failure();
index b72cf03eec31455ede8cf19f32977d920896c493..439a9356651420b2a2dc765b39f6db90f5ef262b 100644 (file)
@@ -196,7 +196,9 @@ Session::Session(my_context ctx)
   dout(10) << "-- state -->> PrimaryActive/Session" << dendl;
   DECLARE_LOCALS;  // 'scrbr' & 'pg_id' aliases
 
-  m_perf_set = &scrbr->get_counters_set();
+  m_perf_set = scrbr->get_labeled_counters();
+  m_osd_counters = scrbr->get_osd_perf_counters();
+  m_counters_idx = &scrbr->get_unlabeled_counters();
   m_perf_set->inc(scrbcnt_started);
 }
 
@@ -319,7 +321,7 @@ ActiveScrubbing::ActiveScrubbing(my_context ctx)
   DECLARE_LOCALS;  // 'scrbr' & 'pg_id' aliases
   auto& session = context<Session>();
 
-  session.m_perf_set->inc(scrbcnt_active_started);
+  session.m_osd_counters->inc(session.m_counters_idx->active_started_cnt);
   scrbr->get_clog()->debug()
       << fmt::format("{} {} starts", pg_id, scrbr->get_op_mode_text());
 
@@ -343,8 +345,9 @@ ActiveScrubbing::~ActiveScrubbing()
        session.m_abort_reason.value_or(Scrub::delay_cause_t::aborted));
 
     auto logged_duration = ScrubClock::now() - session.m_session_started_at;
-    session.m_perf_set->tinc(scrbcnt_failed_elapsed, logged_duration);
-    session.m_perf_set->inc(scrbcnt_failed);
+    session.m_osd_counters->tinc(session.m_counters_idx->failed_elapsed,
+                                 logged_duration);
+    session.m_osd_counters->inc(session.m_counters_idx->failed_cnt);
   }
 }
 
@@ -718,13 +721,14 @@ sc::result WaitDigestUpdate::react(const ScrubFinished&)
   dout(10) << "WaitDigestUpdate::react(const ScrubFinished&)" << dendl;
   auto& session = context<Session>();
 
-  session.m_perf_set->inc(scrbcnt_successful);
+  session.m_osd_counters->inc(session.m_counters_idx->successful_cnt);
 
   // set the 'scrub duration'
   auto duration = machine.get_time_scrubbing();
-  session.m_perf_set->tinc(scrbcnt_successful_elapsed, duration);
   scrbr->set_scrub_duration(duration_cast<milliseconds>(duration));
   session.m_session_started_at = ScrubTimePoint{};
+  session.m_osd_counters->tinc(
+      session.m_counters_idx->successful_elapsed, duration);
 
   scrbr->scrub_finish();
   return transit<PrimaryIdle>();
index 60a3eea5ab50aebe708f5adaf2c630837f9860a0..48b78a7b38d11216bba65131ea7701eb85aad488 100644 (file)
@@ -562,10 +562,17 @@ struct Session : sc::state<Session, PrimaryActive, ReservingReplicas>,
   /// it's an RAII wrapper around the state of 'holding reservations')
   std::optional<ReplicaReservations> m_reservations{std::nullopt};
 
-  /// the relevant set of performance counters for this session
+  /// the relevant set of labeled performance counters for this session
   /// (relevant, i.e. for this pool type X scrub level)
   PerfCounters* m_perf_set{nullptr};
 
+  /// the OSD's unlabeled performance counters access point
+  PerfCounters* m_osd_counters{nullptr};
+
+  /// the set of performance counters for this session (relevant, i.e. for
+  /// this pool type)
+  const ScrubCounterSet* m_counters_idx{nullptr};
+
   /// the time when the session was initiated
   ScrubTimePoint m_session_started_at{ScrubClock::now()};
 
index 050e29cb8e2c24e4d93a7db1f1481adafc594f02..ea8b56856c493aa759b400421b0aa61c1eb6edcc 100644 (file)
@@ -57,10 +57,17 @@ struct ScrubMachineListener {
   virtual PG* get_pg() const = 0;
 
   /**
-   * access the set of performance counters relevant to this scrub
+   * the OSD's performance counters interface ("logger")
+   */
+  virtual PerfCounters* get_osd_perf_counters() const = 0;
+
+  virtual const Scrub::ScrubCounterSet& get_unlabeled_counters() const = 0;
+
+  /**
+   * the set of labeled performance counters relevant to this scrub
    * (one of the four sets of counters maintained by the OSD)
    */
-  virtual PerfCounters& get_counters_set() const = 0;
+  virtual PerfCounters* get_labeled_counters() const = 0;
 
   using scrubber_callback_t = std::function<void(void)>;
   using scrubber_callback_cancel_token_t = Context*;
index 19a3f141f92984f6a5b2b4161a29daf20cbd025f..ceca766f205734129b1bc66bb8a714caad595af7 100644 (file)
@@ -292,7 +292,7 @@ struct PgScrubBeListener {
 // defining a specific subset of performance counters. Each of the members
 // is set to (the index of) the corresponding performance counter.
 // Separate sets are used for replicated and erasure-coded pools.
-struct ScrubIoCounterSet {
+struct ScrubCounterSet {
   osd_counter_idx_t getattr_cnt; ///< get_attr calls count
   osd_counter_idx_t stats_cnt;  ///< stats calls count
   osd_counter_idx_t read_cnt;   ///< read calls count
@@ -301,7 +301,9 @@ struct ScrubIoCounterSet {
   osd_counter_idx_t omapgetheader_bytes;  ///< bytes read by omap get header
   osd_counter_idx_t omapget_cnt;  ///< omap get calls count
   osd_counter_idx_t omapget_bytes;  ///< total bytes read by omap get
+  osd_counter_idx_t active_started_cnt; ///< scrubs that got past reservation
 };
+
 }  // namespace Scrub