osd/scrub: collect replica reservations performance counters

author Ronen Friedman <rfriedma@redhat.com>

Sun, 17 Dec 2023 18:14:53 +0000 (12:14 -0600)

committer Ronen Friedman <rfriedma@redhat.com>

Wed, 20 Dec 2023 16:56:06 +0000 (18:56 +0200)
author Ronen Friedman <rfriedma@redhat.com>
Sun, 17 Dec 2023 18:14:53 +0000 (12:14 -0600)
committer Ronen Friedman <rfriedma@redhat.com>
Wed, 20 Dec 2023 16:56:06 +0000 (18:56 +0200)
diff --git a/src/osd/osd_perf_counters.cc b/src/osd/osd_perf_counters.cc

index af8c62fb30427dc372d38487445d93f47b45abb7..30f0ba531665479a124f704e27d2c2f22f1b8d77 100644 (file)
--- a/src/osd/osd_perf_counters.cc
+++ b/src/osd/osd_perf_counters.cc
@@ -340,7 +340,7 @@ PerfCounters *build_osd_logger(CephContext *cct) {
  
    return osd_plb.create_perf_counters();
  }
- 
+
  
  PerfCounters *build_recoverystate_perf(CephContext *cct) {
    PerfCountersBuilder rs_perf(cct, "recoverystate_perf", rs_first, rs_last);
@@ -402,5 +402,15 @@ PerfCounters *build_scrub_labeled_perf(CephContext *cct, std::string label)
    scrub_perf.add_u64_counter(scrbcnt_blocked, "locked_object", "waiting on locked object events");
    scrub_perf.add_u64_counter(scrbcnt_write_blocked, "write_blocked_by_scrub", "write blocked by scrub");
  
+  // the replica reservation process
+  scrub_perf.add_u64_counter(scrbcnt_resrv_success, "scrub_reservations_completed", "successfully completed reservation processes");
+  scrub_perf.add_time_avg(scrbcnt_resrv_successful_elapsed, "successful_reservations_elapsed", "time to scrub reservation completion");
+  scrub_perf.add_u64_counter(scrbcnt_resrv_aborted, "reservation_process_aborted", "scrub reservation was aborted");
+  scrub_perf.add_u64_counter(scrbcnt_resrv_timed_out, "reservation_process_timed_out", "scrub reservation timed out");
+  scrub_perf.add_u64_counter(scrbcnt_resrv_rejected, "reservation_process_failure", "scrub reservation failed due to replica denial");
+  scrub_perf.add_u64_counter(scrbcnt_resrv_skipped, "reservation_process_skipped", "scrub reservation skipped for high priority scrub");
+  scrub_perf.add_time_avg(scrbcnt_resrv_failed_elapsed, "failed_reservations_elapsed", "time for scrub reservation to fail");
+  scrub_perf.add_u64(scrbcnt_resrv_replicas_num, "replicas_in_reservation", "number of replicas in reservation");
+
    return scrub_perf.create_perf_counters();
  }
diff --git a/src/osd/osd_perf_counters.h b/src/osd/osd_perf_counters.h

index 1f65e953d337577cea3fa6efc07ae0a129c80a7e..00127dd7ff5d29f24a68a97c2811b87a18f3384d 100644 (file)
--- a/src/osd/osd_perf_counters.h
+++ b/src/osd/osd_perf_counters.h
@@ -211,6 +211,24 @@ enum {
    /// # write blocked by the scrub
    scrbcnt_write_blocked,
  
+  // -- replicas reservation
+  /// # successfully completed reservation steps
+  scrbcnt_resrv_success,
+  /// time to complete a successful replicas reservation
+  scrbcnt_resrv_successful_elapsed,
+  /// # failed attempt to reserve replicas due to an abort
+  scrbcnt_resrv_aborted,
+  /// # reservation process timed out
+  scrbcnt_resrv_timed_out,
+  /// # reservation failed due to a 'rejected' response
+  scrbcnt_resrv_rejected,
+  /// # reservation skipped for high-priority scrubs
+  scrbcnt_resrv_skipped,
+  /// time for a replicas reservation process to fail
+  scrbcnt_resrv_failed_elapsed,
+  /// # number of replicas
+  scrbcnt_resrv_replicas_num,
+
    scrbcnt_last,
  };
  
diff --git a/src/osd/scrubber/scrub_machine.cc b/src/osd/scrubber/scrub_machine.cc

index 3a0cd9866d70e817fe6397eb36bda0a1efac7b62..c68a4a1119c1f586bc61bd9db85dec6e781ecdc0 100644 (file)
--- a/src/osd/scrubber/scrub_machine.cc
+++ b/src/osd/scrubber/scrub_machine.cc
@@ -180,12 +180,13 @@ ReservingReplicas::ReservingReplicas(my_context ctx)
      , NamedSimply(context<ScrubMachine>().m_scrbr, "Session/ReservingReplicas")
  {
    dout(10) << "-- state -->> ReservingReplicas" << dendl;
+  auto& session = context<Session>();
    DECLARE_LOCALS;  // 'scrbr' & 'pg_id' aliases
  
    // initiate the reservation process
-  context<Session>().m_reservations.emplace(*scrbr);
+  session.m_reservations.emplace(*scrbr, *session.m_perf_set);
  
-  if (context<Session>().m_reservations->get_last_sent()) {
+  if (session.m_reservations->get_last_sent()) {
      // the 1'st reservation request was sent
  
      auto timeout = scrbr->get_pg_cct()->_conf.get_val<milliseconds>(
@@ -228,12 +229,13 @@ sc::result ReservingReplicas::react(const ReplicaGrant& ev)
  sc::result ReservingReplicas::react(const ReplicaReject& ev)
  {
    DECLARE_LOCALS;  // 'scrbr' & 'pg_id' aliases
+  auto& session = context<Session>();
    dout(10) << "ReservingReplicas::react(const ReplicaReject&)" << dendl;
+  session.m_reservations->log_failure_and_duration(scrbcnt_resrv_rejected);
  
    // manipulate the 'next to reserve' iterator to exclude
    // the rejecting replica from the set of replicas requiring release
-  context<Session>().m_reservations->verify_rejections_source(
-      ev.m_op, ev.m_from);
+  session.m_reservations->verify_rejections_source(ev.m_op, ev.m_from);
  
    // set 'reservation failure' as the scrub termination cause (affecting
    // the rescheduling of this PG)
@@ -246,7 +248,9 @@ sc::result ReservingReplicas::react(const ReplicaReject& ev)
  sc::result ReservingReplicas::react(const ReservationTimeout&)
  {
    DECLARE_LOCALS;  // 'scrbr' & 'pg_id' aliases
+  auto& session = context<Session>();
    dout(10) << "ReservingReplicas::react(const ReservationTimeout&)" << dendl;
+  session.m_reservations->log_failure_and_duration(scrbcnt_resrv_timed_out);
  
    const auto msg = fmt::format(
        "osd.{} PgScrubber: {} timeout on reserving replicas (since {})",
diff --git a/src/osd/scrubber/scrub_reservations.cc b/src/osd/scrubber/scrub_reservations.cc

index 449856e7b45989ff8471570473f46f0ab9ad96d6..3faafe9cb0a1dfcba5085c8460349d2a26302e6b 100644 (file)
--- a/src/osd/scrubber/scrub_reservations.cc
+++ b/src/osd/scrubber/scrub_reservations.cc
@@ -29,11 +29,14 @@ static std::ostream& _prefix_fn(std::ostream* _dout, T* t, std::string fn = "")
  
  namespace Scrub {
  
-ReplicaReservations::ReplicaReservations(ScrubMachineListener& scrbr)
+ReplicaReservations::ReplicaReservations(
+    ScrubMachineListener& scrbr,
+    PerfCounters& pc)
      : m_scrubber{scrbr}
      , m_pg{m_scrubber.get_pg()}
      , m_pgid{m_scrubber.get_spgid().pgid}
      , m_osds{m_pg->get_pg_osd(ScrubberPasskey())}
+    , m_perf_set{pc}
  {
    // the acting set is sorted by pg_shard_t. The reservations are to be issued
    // in this order, so that the OSDs will receive the requests in a consistent
@@ -47,13 +50,17 @@ ReplicaReservations::ReplicaReservations(ScrubMachineListener& scrbr)
        [whoami = m_pg->pg_whoami](const pg_shard_t& shard) {
         return shard != whoami;
        });
+  m_perf_set.set(scrbcnt_resrv_replicas_num, m_sorted_secondaries.size());
  
    m_next_to_request = m_sorted_secondaries.cbegin();
    if (m_scrubber.is_high_priority()) {
      // for high-priority scrubs (i.e. - user-initiated), no reservations are
-    // needed.
+    // needed. Note: not perf-counted as either success or failure.
      dout(10) << "high-priority scrub - no reservations needed" << dendl;
+    m_perf_set.inc(scrbcnt_resrv_skipped);
    } else {
+    m_process_started_at = ScrubClock::now();
+
      // send out the 1'st request (unless we have no replicas)
      send_next_reservation_or_complete();
      m_slow_response_warn_timeout =
@@ -88,9 +95,34 @@ void ReplicaReservations::discard_remote_reservations()
    m_next_to_request = m_sorted_secondaries.cbegin();
  }
  
+void ReplicaReservations::log_success_and_duration()
+{
+  auto logged_duration = ScrubClock::now() - m_process_started_at.value();
+  m_perf_set.tinc(scrbcnt_resrv_successful_elapsed, logged_duration);
+  m_perf_set.inc(scrbcnt_resrv_success);
+  m_osds->logger->hinc(
+      l_osd_scrub_reservation_dur_hist, m_sorted_secondaries.size(),
+      logged_duration.count());
+  m_process_started_at.reset();
+}
+
+void ReplicaReservations::log_failure_and_duration(int failure_cause_counter)
+{
+  if (!m_process_started_at.has_value()) {
+    // outcome (success/failure) already logged
+    return;
+  }
+  auto logged_duration = ScrubClock::now() - m_process_started_at.value();
+  m_perf_set.tinc(scrbcnt_resrv_failed_elapsed, logged_duration);
+  m_process_started_at.reset();
+  // note: not counted into l_osd_scrub_reservation_dur_hist
+  m_perf_set.inc(failure_cause_counter);
+}
+
  ReplicaReservations::~ReplicaReservations()
  {
    release_all();
+  log_failure_and_duration(scrbcnt_resrv_aborted);
  }
  
  bool ReplicaReservations::handle_reserve_grant(OpRequestRef op, pg_shard_t from)
@@ -106,7 +138,7 @@ bool ReplicaReservations::handle_reserve_grant(OpRequestRef op, pg_shard_t from)
      return false;
    }
  
-  auto elapsed = clock::now() - m_last_request_sent_at;
+  auto elapsed = ScrubClock::now() - m_last_request_sent_at;
  
    // log a warning if the response was slow to arrive
    if ((m_slow_response_warn_timeout > 0ms) &&
@@ -131,6 +163,7 @@ bool ReplicaReservations::send_next_reservation_or_complete()
    if (m_next_to_request == m_sorted_secondaries.cend()) {
      // granted by all replicas
      dout(10) << "remote reservation complete" << dendl;
+    log_success_and_duration();
      return true;  // done
    }
  
@@ -141,7 +174,7 @@ bool ReplicaReservations::send_next_reservation_or_complete()
        spg_t{m_pgid, peer.shard}, epoch, MOSDScrubReserve::REQUEST,
        m_pg->pg_whoami);
    m_pg->send_cluster_message(peer.osd, m, epoch, false);
-  m_last_request_sent_at = clock::now();
+  m_last_request_sent_at = ScrubClock::now();
    dout(10) << fmt::format(
                   "reserving {} (the {} of {} replicas)", *m_next_to_request,
                   active_requests_cnt() + 1, m_sorted_secondaries.size())
diff --git a/src/osd/scrubber/scrub_reservations.h b/src/osd/scrubber/scrub_reservations.h

index 1f2628579dcc6b7d05668f673512f1f95fec1ff1..9d59033bac874610924ac8865e6d32ebc95a9d6f 100644 (file)
--- a/src/osd/scrubber/scrub_reservations.h
+++ b/src/osd/scrubber/scrub_reservations.h
@@ -46,8 +46,6 @@ namespace Scrub {
   *  lost - either due to a bug or due to a network issue.)
   */
  class ReplicaReservations {
-  using clock = ceph::coarse_real_clock;
-
    ScrubMachineListener& m_scrubber;
    PG* m_pg;
  
@@ -64,14 +62,22 @@ class ReplicaReservations {
    std::vector<pg_shard_t>::const_iterator m_next_to_request;
  
    /// for logs, and for detecting slow peers
-  clock::time_point m_last_request_sent_at;
+  ScrubTimePoint m_last_request_sent_at;
  
    /// the 'slow response' timeout (in milliseconds) - as configured.
    /// Doubles as a 'do once' flag for the warning.
    std::chrono::milliseconds m_slow_response_warn_timeout;
  
+  /// access to the performance counters container relevant to this scrub
+  /// parameters
+  PerfCounters& m_perf_set;
+
+  /// used only for the 'duration of the reservation process' perf counter.
+  /// discarded once the success or failure are recorded
+  std::optional<ScrubTimePoint> m_process_started_at;
+
   public:
-  ReplicaReservations(ScrubMachineListener& scrubber);
+  ReplicaReservations(ScrubMachineListener& scrubber, PerfCounters& pc);
  
    ~ReplicaReservations();
  
@@ -112,6 +118,12 @@ class ReplicaReservations {
    /// the only replica we are expecting a reply from
    std::optional<pg_shard_t> get_last_sent() const;
  
+  /**
+   * if the start time is still set, i.e. we have not yet marked
+   * this as a success or a failure - log its duration as that of a failure.
+   */
+  void log_failure_and_duration(int failure_cause_counter);
+
    // note: 'public', as accessed via the 'standard' dout_prefix() macro
    std::ostream& gen_prefix(std::ostream& out, std::string fn) const;
  
@@ -127,6 +139,13 @@ class ReplicaReservations {
     * - if there are no more replicas to send requests to, return true
     */
    bool send_next_reservation_or_complete();
+
+  // ---   perf counters helpers
+
+  /**
+   * log the duration of the reservation process as that of a success.
+   */
+  void log_success_and_duration();
  };
  
  } // namespace Scrub
author	Ronen Friedman <rfriedma@redhat.com>
	Sun, 17 Dec 2023 18:14:53 +0000 (12:14 -0600)
committer	Ronen Friedman <rfriedma@redhat.com>
	Wed, 20 Dec 2023 16:56:06 +0000 (18:56 +0200)
src/osd/osd_perf_counters.cc		patch \| blob \| history
src/osd/osd_perf_counters.h		patch \| blob \| history
src/osd/scrubber/scrub_machine.cc		patch \| blob \| history
src/osd/scrubber/scrub_reservations.cc		patch \| blob \| history
src/osd/scrubber/scrub_reservations.h		patch \| blob \| history