From: Ronen Friedman Date: Sun, 17 Dec 2023 18:14:53 +0000 (-0600) Subject: osd/scrub: collect replica reservations performance counters X-Git-Tag: testing/wip-batrick-testing-20240411.154038~748^2 X-Git-Url: http://git.apps.os.sepia.ceph.com/?a=commitdiff_plain;h=35603069833065b531f4486994c2614d1cfd8e38;p=ceph-ci.git osd/scrub: collect replica reservations performance counters Signed-off-by: Ronen Friedman --- diff --git a/src/osd/osd_perf_counters.cc b/src/osd/osd_perf_counters.cc index af8c62fb304..30f0ba53166 100644 --- a/src/osd/osd_perf_counters.cc +++ b/src/osd/osd_perf_counters.cc @@ -340,7 +340,7 @@ PerfCounters *build_osd_logger(CephContext *cct) { return osd_plb.create_perf_counters(); } - + PerfCounters *build_recoverystate_perf(CephContext *cct) { PerfCountersBuilder rs_perf(cct, "recoverystate_perf", rs_first, rs_last); @@ -402,5 +402,15 @@ PerfCounters *build_scrub_labeled_perf(CephContext *cct, std::string label) scrub_perf.add_u64_counter(scrbcnt_blocked, "locked_object", "waiting on locked object events"); scrub_perf.add_u64_counter(scrbcnt_write_blocked, "write_blocked_by_scrub", "write blocked by scrub"); + // the replica reservation process + scrub_perf.add_u64_counter(scrbcnt_resrv_success, "scrub_reservations_completed", "successfully completed reservation processes"); + scrub_perf.add_time_avg(scrbcnt_resrv_successful_elapsed, "successful_reservations_elapsed", "time to scrub reservation completion"); + scrub_perf.add_u64_counter(scrbcnt_resrv_aborted, "reservation_process_aborted", "scrub reservation was aborted"); + scrub_perf.add_u64_counter(scrbcnt_resrv_timed_out, "reservation_process_timed_out", "scrub reservation timed out"); + scrub_perf.add_u64_counter(scrbcnt_resrv_rejected, "reservation_process_failure", "scrub reservation failed due to replica denial"); + scrub_perf.add_u64_counter(scrbcnt_resrv_skipped, "reservation_process_skipped", "scrub reservation skipped for high priority scrub"); + scrub_perf.add_time_avg(scrbcnt_resrv_failed_elapsed, "failed_reservations_elapsed", "time for scrub reservation to fail"); + scrub_perf.add_u64(scrbcnt_resrv_replicas_num, "replicas_in_reservation", "number of replicas in reservation"); + return scrub_perf.create_perf_counters(); } diff --git a/src/osd/osd_perf_counters.h b/src/osd/osd_perf_counters.h index 1f65e953d33..00127dd7ff5 100644 --- a/src/osd/osd_perf_counters.h +++ b/src/osd/osd_perf_counters.h @@ -211,6 +211,24 @@ enum { /// # write blocked by the scrub scrbcnt_write_blocked, + // -- replicas reservation + /// # successfully completed reservation steps + scrbcnt_resrv_success, + /// time to complete a successful replicas reservation + scrbcnt_resrv_successful_elapsed, + /// # failed attempt to reserve replicas due to an abort + scrbcnt_resrv_aborted, + /// # reservation process timed out + scrbcnt_resrv_timed_out, + /// # reservation failed due to a 'rejected' response + scrbcnt_resrv_rejected, + /// # reservation skipped for high-priority scrubs + scrbcnt_resrv_skipped, + /// time for a replicas reservation process to fail + scrbcnt_resrv_failed_elapsed, + /// # number of replicas + scrbcnt_resrv_replicas_num, + scrbcnt_last, }; diff --git a/src/osd/scrubber/scrub_machine.cc b/src/osd/scrubber/scrub_machine.cc index 3a0cd9866d7..c68a4a1119c 100644 --- a/src/osd/scrubber/scrub_machine.cc +++ b/src/osd/scrubber/scrub_machine.cc @@ -180,12 +180,13 @@ ReservingReplicas::ReservingReplicas(my_context ctx) , NamedSimply(context().m_scrbr, "Session/ReservingReplicas") { dout(10) << "-- state -->> ReservingReplicas" << dendl; + auto& session = context(); DECLARE_LOCALS; // 'scrbr' & 'pg_id' aliases // initiate the reservation process - context().m_reservations.emplace(*scrbr); + session.m_reservations.emplace(*scrbr, *session.m_perf_set); - if (context().m_reservations->get_last_sent()) { + if (session.m_reservations->get_last_sent()) { // the 1'st reservation request was sent auto timeout = scrbr->get_pg_cct()->_conf.get_val( @@ -228,12 +229,13 @@ sc::result ReservingReplicas::react(const ReplicaGrant& ev) sc::result ReservingReplicas::react(const ReplicaReject& ev) { DECLARE_LOCALS; // 'scrbr' & 'pg_id' aliases + auto& session = context(); dout(10) << "ReservingReplicas::react(const ReplicaReject&)" << dendl; + session.m_reservations->log_failure_and_duration(scrbcnt_resrv_rejected); // manipulate the 'next to reserve' iterator to exclude // the rejecting replica from the set of replicas requiring release - context().m_reservations->verify_rejections_source( - ev.m_op, ev.m_from); + session.m_reservations->verify_rejections_source(ev.m_op, ev.m_from); // set 'reservation failure' as the scrub termination cause (affecting // the rescheduling of this PG) @@ -246,7 +248,9 @@ sc::result ReservingReplicas::react(const ReplicaReject& ev) sc::result ReservingReplicas::react(const ReservationTimeout&) { DECLARE_LOCALS; // 'scrbr' & 'pg_id' aliases + auto& session = context(); dout(10) << "ReservingReplicas::react(const ReservationTimeout&)" << dendl; + session.m_reservations->log_failure_and_duration(scrbcnt_resrv_timed_out); const auto msg = fmt::format( "osd.{} PgScrubber: {} timeout on reserving replicas (since {})", diff --git a/src/osd/scrubber/scrub_reservations.cc b/src/osd/scrubber/scrub_reservations.cc index 449856e7b45..3faafe9cb0a 100644 --- a/src/osd/scrubber/scrub_reservations.cc +++ b/src/osd/scrubber/scrub_reservations.cc @@ -29,11 +29,14 @@ static std::ostream& _prefix_fn(std::ostream* _dout, T* t, std::string fn = "") namespace Scrub { -ReplicaReservations::ReplicaReservations(ScrubMachineListener& scrbr) +ReplicaReservations::ReplicaReservations( + ScrubMachineListener& scrbr, + PerfCounters& pc) : m_scrubber{scrbr} , m_pg{m_scrubber.get_pg()} , m_pgid{m_scrubber.get_spgid().pgid} , m_osds{m_pg->get_pg_osd(ScrubberPasskey())} + , m_perf_set{pc} { // the acting set is sorted by pg_shard_t. The reservations are to be issued // in this order, so that the OSDs will receive the requests in a consistent @@ -47,13 +50,17 @@ ReplicaReservations::ReplicaReservations(ScrubMachineListener& scrbr) [whoami = m_pg->pg_whoami](const pg_shard_t& shard) { return shard != whoami; }); + m_perf_set.set(scrbcnt_resrv_replicas_num, m_sorted_secondaries.size()); m_next_to_request = m_sorted_secondaries.cbegin(); if (m_scrubber.is_high_priority()) { // for high-priority scrubs (i.e. - user-initiated), no reservations are - // needed. + // needed. Note: not perf-counted as either success or failure. dout(10) << "high-priority scrub - no reservations needed" << dendl; + m_perf_set.inc(scrbcnt_resrv_skipped); } else { + m_process_started_at = ScrubClock::now(); + // send out the 1'st request (unless we have no replicas) send_next_reservation_or_complete(); m_slow_response_warn_timeout = @@ -88,9 +95,34 @@ void ReplicaReservations::discard_remote_reservations() m_next_to_request = m_sorted_secondaries.cbegin(); } +void ReplicaReservations::log_success_and_duration() +{ + auto logged_duration = ScrubClock::now() - m_process_started_at.value(); + m_perf_set.tinc(scrbcnt_resrv_successful_elapsed, logged_duration); + m_perf_set.inc(scrbcnt_resrv_success); + m_osds->logger->hinc( + l_osd_scrub_reservation_dur_hist, m_sorted_secondaries.size(), + logged_duration.count()); + m_process_started_at.reset(); +} + +void ReplicaReservations::log_failure_and_duration(int failure_cause_counter) +{ + if (!m_process_started_at.has_value()) { + // outcome (success/failure) already logged + return; + } + auto logged_duration = ScrubClock::now() - m_process_started_at.value(); + m_perf_set.tinc(scrbcnt_resrv_failed_elapsed, logged_duration); + m_process_started_at.reset(); + // note: not counted into l_osd_scrub_reservation_dur_hist + m_perf_set.inc(failure_cause_counter); +} + ReplicaReservations::~ReplicaReservations() { release_all(); + log_failure_and_duration(scrbcnt_resrv_aborted); } bool ReplicaReservations::handle_reserve_grant(OpRequestRef op, pg_shard_t from) @@ -106,7 +138,7 @@ bool ReplicaReservations::handle_reserve_grant(OpRequestRef op, pg_shard_t from) return false; } - auto elapsed = clock::now() - m_last_request_sent_at; + auto elapsed = ScrubClock::now() - m_last_request_sent_at; // log a warning if the response was slow to arrive if ((m_slow_response_warn_timeout > 0ms) && @@ -131,6 +163,7 @@ bool ReplicaReservations::send_next_reservation_or_complete() if (m_next_to_request == m_sorted_secondaries.cend()) { // granted by all replicas dout(10) << "remote reservation complete" << dendl; + log_success_and_duration(); return true; // done } @@ -141,7 +174,7 @@ bool ReplicaReservations::send_next_reservation_or_complete() spg_t{m_pgid, peer.shard}, epoch, MOSDScrubReserve::REQUEST, m_pg->pg_whoami); m_pg->send_cluster_message(peer.osd, m, epoch, false); - m_last_request_sent_at = clock::now(); + m_last_request_sent_at = ScrubClock::now(); dout(10) << fmt::format( "reserving {} (the {} of {} replicas)", *m_next_to_request, active_requests_cnt() + 1, m_sorted_secondaries.size()) diff --git a/src/osd/scrubber/scrub_reservations.h b/src/osd/scrubber/scrub_reservations.h index 1f2628579dc..9d59033bac8 100644 --- a/src/osd/scrubber/scrub_reservations.h +++ b/src/osd/scrubber/scrub_reservations.h @@ -46,8 +46,6 @@ namespace Scrub { * lost - either due to a bug or due to a network issue.) */ class ReplicaReservations { - using clock = ceph::coarse_real_clock; - ScrubMachineListener& m_scrubber; PG* m_pg; @@ -64,14 +62,22 @@ class ReplicaReservations { std::vector::const_iterator m_next_to_request; /// for logs, and for detecting slow peers - clock::time_point m_last_request_sent_at; + ScrubTimePoint m_last_request_sent_at; /// the 'slow response' timeout (in milliseconds) - as configured. /// Doubles as a 'do once' flag for the warning. std::chrono::milliseconds m_slow_response_warn_timeout; + /// access to the performance counters container relevant to this scrub + /// parameters + PerfCounters& m_perf_set; + + /// used only for the 'duration of the reservation process' perf counter. + /// discarded once the success or failure are recorded + std::optional m_process_started_at; + public: - ReplicaReservations(ScrubMachineListener& scrubber); + ReplicaReservations(ScrubMachineListener& scrubber, PerfCounters& pc); ~ReplicaReservations(); @@ -112,6 +118,12 @@ class ReplicaReservations { /// the only replica we are expecting a reply from std::optional get_last_sent() const; + /** + * if the start time is still set, i.e. we have not yet marked + * this as a success or a failure - log its duration as that of a failure. + */ + void log_failure_and_duration(int failure_cause_counter); + // note: 'public', as accessed via the 'standard' dout_prefix() macro std::ostream& gen_prefix(std::ostream& out, std::string fn) const; @@ -127,6 +139,13 @@ class ReplicaReservations { * - if there are no more replicas to send requests to, return true */ bool send_next_reservation_or_complete(); + + // --- perf counters helpers + + /** + * log the duration of the reservation process as that of a success. + */ + void log_success_and_duration(); }; } // namespace Scrub