]> git.apps.os.sepia.ceph.com Git - ceph-ci.git/commitdiff
osd/scrub: collect replica reservations performance counters
authorRonen Friedman <rfriedma@redhat.com>
Sun, 17 Dec 2023 18:14:53 +0000 (12:14 -0600)
committerRonen Friedman <rfriedma@redhat.com>
Wed, 20 Dec 2023 16:56:06 +0000 (18:56 +0200)
Signed-off-by: Ronen Friedman <rfriedma@redhat.com>
src/osd/osd_perf_counters.cc
src/osd/osd_perf_counters.h
src/osd/scrubber/scrub_machine.cc
src/osd/scrubber/scrub_reservations.cc
src/osd/scrubber/scrub_reservations.h

index af8c62fb30427dc372d38487445d93f47b45abb7..30f0ba531665479a124f704e27d2c2f22f1b8d77 100644 (file)
@@ -340,7 +340,7 @@ PerfCounters *build_osd_logger(CephContext *cct) {
 
   return osd_plb.create_perf_counters();
 }
+
 
 PerfCounters *build_recoverystate_perf(CephContext *cct) {
   PerfCountersBuilder rs_perf(cct, "recoverystate_perf", rs_first, rs_last);
@@ -402,5 +402,15 @@ PerfCounters *build_scrub_labeled_perf(CephContext *cct, std::string label)
   scrub_perf.add_u64_counter(scrbcnt_blocked, "locked_object", "waiting on locked object events");
   scrub_perf.add_u64_counter(scrbcnt_write_blocked, "write_blocked_by_scrub", "write blocked by scrub");
 
+  // the replica reservation process
+  scrub_perf.add_u64_counter(scrbcnt_resrv_success, "scrub_reservations_completed", "successfully completed reservation processes");
+  scrub_perf.add_time_avg(scrbcnt_resrv_successful_elapsed, "successful_reservations_elapsed", "time to scrub reservation completion");
+  scrub_perf.add_u64_counter(scrbcnt_resrv_aborted, "reservation_process_aborted", "scrub reservation was aborted");
+  scrub_perf.add_u64_counter(scrbcnt_resrv_timed_out, "reservation_process_timed_out", "scrub reservation timed out");
+  scrub_perf.add_u64_counter(scrbcnt_resrv_rejected, "reservation_process_failure", "scrub reservation failed due to replica denial");
+  scrub_perf.add_u64_counter(scrbcnt_resrv_skipped, "reservation_process_skipped", "scrub reservation skipped for high priority scrub");
+  scrub_perf.add_time_avg(scrbcnt_resrv_failed_elapsed, "failed_reservations_elapsed", "time for scrub reservation to fail");
+  scrub_perf.add_u64(scrbcnt_resrv_replicas_num, "replicas_in_reservation", "number of replicas in reservation");
+
   return scrub_perf.create_perf_counters();
 }
index 1f65e953d337577cea3fa6efc07ae0a129c80a7e..00127dd7ff5d29f24a68a97c2811b87a18f3384d 100644 (file)
@@ -211,6 +211,24 @@ enum {
   /// # write blocked by the scrub
   scrbcnt_write_blocked,
 
+  // -- replicas reservation
+  /// # successfully completed reservation steps
+  scrbcnt_resrv_success,
+  /// time to complete a successful replicas reservation
+  scrbcnt_resrv_successful_elapsed,
+  /// # failed attempt to reserve replicas due to an abort
+  scrbcnt_resrv_aborted,
+  /// # reservation process timed out
+  scrbcnt_resrv_timed_out,
+  /// # reservation failed due to a 'rejected' response
+  scrbcnt_resrv_rejected,
+  /// # reservation skipped for high-priority scrubs
+  scrbcnt_resrv_skipped,
+  /// time for a replicas reservation process to fail
+  scrbcnt_resrv_failed_elapsed,
+  /// # number of replicas
+  scrbcnt_resrv_replicas_num,
+
   scrbcnt_last,
 };
 
index 3a0cd9866d70e817fe6397eb36bda0a1efac7b62..c68a4a1119c1f586bc61bd9db85dec6e781ecdc0 100644 (file)
@@ -180,12 +180,13 @@ ReservingReplicas::ReservingReplicas(my_context ctx)
     , NamedSimply(context<ScrubMachine>().m_scrbr, "Session/ReservingReplicas")
 {
   dout(10) << "-- state -->> ReservingReplicas" << dendl;
+  auto& session = context<Session>();
   DECLARE_LOCALS;  // 'scrbr' & 'pg_id' aliases
 
   // initiate the reservation process
-  context<Session>().m_reservations.emplace(*scrbr);
+  session.m_reservations.emplace(*scrbr, *session.m_perf_set);
 
-  if (context<Session>().m_reservations->get_last_sent()) {
+  if (session.m_reservations->get_last_sent()) {
     // the 1'st reservation request was sent
 
     auto timeout = scrbr->get_pg_cct()->_conf.get_val<milliseconds>(
@@ -228,12 +229,13 @@ sc::result ReservingReplicas::react(const ReplicaGrant& ev)
 sc::result ReservingReplicas::react(const ReplicaReject& ev)
 {
   DECLARE_LOCALS;  // 'scrbr' & 'pg_id' aliases
+  auto& session = context<Session>();
   dout(10) << "ReservingReplicas::react(const ReplicaReject&)" << dendl;
+  session.m_reservations->log_failure_and_duration(scrbcnt_resrv_rejected);
 
   // manipulate the 'next to reserve' iterator to exclude
   // the rejecting replica from the set of replicas requiring release
-  context<Session>().m_reservations->verify_rejections_source(
-      ev.m_op, ev.m_from);
+  session.m_reservations->verify_rejections_source(ev.m_op, ev.m_from);
 
   // set 'reservation failure' as the scrub termination cause (affecting
   // the rescheduling of this PG)
@@ -246,7 +248,9 @@ sc::result ReservingReplicas::react(const ReplicaReject& ev)
 sc::result ReservingReplicas::react(const ReservationTimeout&)
 {
   DECLARE_LOCALS;  // 'scrbr' & 'pg_id' aliases
+  auto& session = context<Session>();
   dout(10) << "ReservingReplicas::react(const ReservationTimeout&)" << dendl;
+  session.m_reservations->log_failure_and_duration(scrbcnt_resrv_timed_out);
 
   const auto msg = fmt::format(
       "osd.{} PgScrubber: {} timeout on reserving replicas (since {})",
index 449856e7b45989ff8471570473f46f0ab9ad96d6..3faafe9cb0a1dfcba5085c8460349d2a26302e6b 100644 (file)
@@ -29,11 +29,14 @@ static std::ostream& _prefix_fn(std::ostream* _dout, T* t, std::string fn = "")
 
 namespace Scrub {
 
-ReplicaReservations::ReplicaReservations(ScrubMachineListener& scrbr)
+ReplicaReservations::ReplicaReservations(
+    ScrubMachineListener& scrbr,
+    PerfCounters& pc)
     : m_scrubber{scrbr}
     , m_pg{m_scrubber.get_pg()}
     , m_pgid{m_scrubber.get_spgid().pgid}
     , m_osds{m_pg->get_pg_osd(ScrubberPasskey())}
+    , m_perf_set{pc}
 {
   // the acting set is sorted by pg_shard_t. The reservations are to be issued
   // in this order, so that the OSDs will receive the requests in a consistent
@@ -47,13 +50,17 @@ ReplicaReservations::ReplicaReservations(ScrubMachineListener& scrbr)
       [whoami = m_pg->pg_whoami](const pg_shard_t& shard) {
        return shard != whoami;
       });
+  m_perf_set.set(scrbcnt_resrv_replicas_num, m_sorted_secondaries.size());
 
   m_next_to_request = m_sorted_secondaries.cbegin();
   if (m_scrubber.is_high_priority()) {
     // for high-priority scrubs (i.e. - user-initiated), no reservations are
-    // needed.
+    // needed. Note: not perf-counted as either success or failure.
     dout(10) << "high-priority scrub - no reservations needed" << dendl;
+    m_perf_set.inc(scrbcnt_resrv_skipped);
   } else {
+    m_process_started_at = ScrubClock::now();
+
     // send out the 1'st request (unless we have no replicas)
     send_next_reservation_or_complete();
     m_slow_response_warn_timeout =
@@ -88,9 +95,34 @@ void ReplicaReservations::discard_remote_reservations()
   m_next_to_request = m_sorted_secondaries.cbegin();
 }
 
+void ReplicaReservations::log_success_and_duration()
+{
+  auto logged_duration = ScrubClock::now() - m_process_started_at.value();
+  m_perf_set.tinc(scrbcnt_resrv_successful_elapsed, logged_duration);
+  m_perf_set.inc(scrbcnt_resrv_success);
+  m_osds->logger->hinc(
+      l_osd_scrub_reservation_dur_hist, m_sorted_secondaries.size(),
+      logged_duration.count());
+  m_process_started_at.reset();
+}
+
+void ReplicaReservations::log_failure_and_duration(int failure_cause_counter)
+{
+  if (!m_process_started_at.has_value()) {
+    // outcome (success/failure) already logged
+    return;
+  }
+  auto logged_duration = ScrubClock::now() - m_process_started_at.value();
+  m_perf_set.tinc(scrbcnt_resrv_failed_elapsed, logged_duration);
+  m_process_started_at.reset();
+  // note: not counted into l_osd_scrub_reservation_dur_hist
+  m_perf_set.inc(failure_cause_counter);
+}
+
 ReplicaReservations::~ReplicaReservations()
 {
   release_all();
+  log_failure_and_duration(scrbcnt_resrv_aborted);
 }
 
 bool ReplicaReservations::handle_reserve_grant(OpRequestRef op, pg_shard_t from)
@@ -106,7 +138,7 @@ bool ReplicaReservations::handle_reserve_grant(OpRequestRef op, pg_shard_t from)
     return false;
   }
 
-  auto elapsed = clock::now() - m_last_request_sent_at;
+  auto elapsed = ScrubClock::now() - m_last_request_sent_at;
 
   // log a warning if the response was slow to arrive
   if ((m_slow_response_warn_timeout > 0ms) &&
@@ -131,6 +163,7 @@ bool ReplicaReservations::send_next_reservation_or_complete()
   if (m_next_to_request == m_sorted_secondaries.cend()) {
     // granted by all replicas
     dout(10) << "remote reservation complete" << dendl;
+    log_success_and_duration();
     return true;  // done
   }
 
@@ -141,7 +174,7 @@ bool ReplicaReservations::send_next_reservation_or_complete()
       spg_t{m_pgid, peer.shard}, epoch, MOSDScrubReserve::REQUEST,
       m_pg->pg_whoami);
   m_pg->send_cluster_message(peer.osd, m, epoch, false);
-  m_last_request_sent_at = clock::now();
+  m_last_request_sent_at = ScrubClock::now();
   dout(10) << fmt::format(
                  "reserving {} (the {} of {} replicas)", *m_next_to_request,
                  active_requests_cnt() + 1, m_sorted_secondaries.size())
index 1f2628579dcc6b7d05668f673512f1f95fec1ff1..9d59033bac874610924ac8865e6d32ebc95a9d6f 100644 (file)
@@ -46,8 +46,6 @@ namespace Scrub {
  *  lost - either due to a bug or due to a network issue.)
  */
 class ReplicaReservations {
-  using clock = ceph::coarse_real_clock;
-
   ScrubMachineListener& m_scrubber;
   PG* m_pg;
 
@@ -64,14 +62,22 @@ class ReplicaReservations {
   std::vector<pg_shard_t>::const_iterator m_next_to_request;
 
   /// for logs, and for detecting slow peers
-  clock::time_point m_last_request_sent_at;
+  ScrubTimePoint m_last_request_sent_at;
 
   /// the 'slow response' timeout (in milliseconds) - as configured.
   /// Doubles as a 'do once' flag for the warning.
   std::chrono::milliseconds m_slow_response_warn_timeout;
 
+  /// access to the performance counters container relevant to this scrub
+  /// parameters
+  PerfCounters& m_perf_set;
+
+  /// used only for the 'duration of the reservation process' perf counter.
+  /// discarded once the success or failure are recorded
+  std::optional<ScrubTimePoint> m_process_started_at;
+
  public:
-  ReplicaReservations(ScrubMachineListener& scrubber);
+  ReplicaReservations(ScrubMachineListener& scrubber, PerfCounters& pc);
 
   ~ReplicaReservations();
 
@@ -112,6 +118,12 @@ class ReplicaReservations {
   /// the only replica we are expecting a reply from
   std::optional<pg_shard_t> get_last_sent() const;
 
+  /**
+   * if the start time is still set, i.e. we have not yet marked
+   * this as a success or a failure - log its duration as that of a failure.
+   */
+  void log_failure_and_duration(int failure_cause_counter);
+
   // note: 'public', as accessed via the 'standard' dout_prefix() macro
   std::ostream& gen_prefix(std::ostream& out, std::string fn) const;
 
@@ -127,6 +139,13 @@ class ReplicaReservations {
    * - if there are no more replicas to send requests to, return true
    */
   bool send_next_reservation_or_complete();
+
+  // ---   perf counters helpers
+
+  /**
+   * log the duration of the reservation process as that of a success.
+   */
+  void log_success_and_duration();
 };
 
 } // namespace Scrub