From ef7127e8bfc01d55369fc7fd4da3db043686f124 Mon Sep 17 00:00:00 2001 From: Ronen Friedman Date: Fri, 22 Nov 2024 12:00:50 -0600 Subject: [PATCH] osd/scrub: show reservation status in 'pg dump' output Whenever a PG is selected for scrubbing, and is waiting for remote reservations, the 'pg dump' output will include the following text (under the 'SCRUB_SCHEDULING' column): Reserving. Waiting Ns for OSD.k (n/m) Signed-off-by: Ronen Friedman --- src/osd/osd_types.cc | 34 ++++++++++++++++++++++--- src/osd/osd_types.h | 7 ++++++ src/osd/scrubber/pg_scrubber.cc | 34 ++++++++++++++++++++++--- src/osd/scrubber/scrub_machine.cc | 36 +++++++++++++++++++++++++++ src/osd/scrubber/scrub_machine.h | 9 ++++++- src/osd/scrubber/scrub_reservations.h | 6 ++--- 6 files changed, 115 insertions(+), 11 deletions(-) diff --git a/src/osd/osd_types.cc b/src/osd/osd_types.cc index 5c2cf8b16b0..048f5aa0009 100644 --- a/src/osd/osd_types.cc +++ b/src/osd/osd_types.cc @@ -2942,6 +2942,14 @@ std::string pg_stat_t::dump_scrub_schedule() const return fmt::format( "Blocked! locked objects (for {}s)", scrub_sched_status.m_duration_seconds); + } else if (scrub_sched_status.m_num_to_reserve != 0) { + // we are waiting for some replicas to respond + return fmt::format( + "Reserving. Waiting {}s for OSD.{} ({}/{})", + scrub_sched_status.m_duration_seconds, + scrub_sched_status.m_osd_to_respond, + scrub_sched_status.m_ordinal_of_requested_replica, + scrub_sched_status.m_num_to_reserve); } else { return fmt::format( "{}scrubbing for {}s", @@ -2964,7 +2972,7 @@ std::string pg_stat_t::dump_scrub_schedule() const case pg_scrub_sched_status_t::queued: return fmt::format( "queued for {}scrub", - ((scrub_sched_status.m_is_deep == scrub_level_t::deep) ? "deep " : "")); + (scrub_sched_status.m_is_deep == scrub_level_t::deep) ? "deep " : ""); default: // a bug! return "SCRUB STATE MISMATCH!"s; @@ -2979,12 +2987,15 @@ bool operator==(const pg_scrubbing_status_t& l, const pg_scrubbing_status_t& r) l.m_duration_seconds == r.m_duration_seconds && l.m_is_active == r.m_is_active && l.m_is_deep == r.m_is_deep && - l.m_is_periodic == r.m_is_periodic; + l.m_is_periodic == r.m_is_periodic && + l.m_osd_to_respond == r.m_osd_to_respond && + l.m_ordinal_of_requested_replica == r.m_ordinal_of_requested_replica && + l.m_num_to_reserve == r.m_num_to_reserve; } void pg_stat_t::encode(ceph::buffer::list &bl) const { - ENCODE_START(29, 22, bl); + ENCODE_START(30, 22, bl); encode(version, bl); encode(reported_seq, bl); encode(reported_epoch, bl); @@ -3044,6 +3055,9 @@ void pg_stat_t::encode(ceph::buffer::list &bl) const encode(objects_trimmed, bl); encode(snaptrim_duration, bl); encode(log_dups_size, bl); + encode(scrub_sched_status.m_osd_to_respond, bl); + encode(scrub_sched_status.m_ordinal_of_requested_replica, bl); + encode(scrub_sched_status.m_num_to_reserve, bl); ENCODE_FINISH(bl); } @@ -3052,7 +3066,7 @@ void pg_stat_t::decode(ceph::buffer::list::const_iterator &bl) { bool tmp; uint32_t old_state; - DECODE_START(29, bl); + DECODE_START(30, bl); decode(version, bl); decode(reported_seq, bl); decode(reported_epoch, bl); @@ -3142,6 +3156,18 @@ void pg_stat_t::decode(ceph::buffer::list::const_iterator &bl) if (struct_v >= 29) { decode(log_dups_size, bl); } + if (struct_v >= 30) { + uint16_t osd_to_respond; + decode(osd_to_respond, bl); + scrub_sched_status.m_osd_to_respond = osd_to_respond; + uint8_t tmp8; + decode(tmp8, bl); + scrub_sched_status.m_ordinal_of_requested_replica = tmp8; + decode(tmp8, bl); + scrub_sched_status.m_num_to_reserve = tmp8; + } else { + scrub_sched_status.m_num_to_reserve = 0; + } } DECODE_FINISH(bl); } diff --git a/src/osd/osd_types.h b/src/osd/osd_types.h index b6f5335a0f5..1e92d5cd3d6 100644 --- a/src/osd/osd_types.h +++ b/src/osd/osd_types.h @@ -2223,6 +2223,13 @@ struct pg_scrubbing_status_t { bool m_is_active{false}; scrub_level_t m_is_deep{scrub_level_t::shallow}; bool m_is_periodic{true}; + // the following are only relevant when we are reserving replicas: + uint16_t m_osd_to_respond{0}; + /// this is the n'th replica we are reserving (out of m_num_to_reserve) + uint8_t m_ordinal_of_requested_replica{0}; + /// the number of replicas we are reserving for scrubbing. 0 means we are not + /// in the process of reserving replicas. + uint8_t m_num_to_reserve{0}; }; bool operator==(const pg_scrubbing_status_t& l, const pg_scrubbing_status_t& r); diff --git a/src/osd/scrubber/pg_scrubber.cc b/src/osd/scrubber/pg_scrubber.cc index fea8c757040..8dd666fa2d5 100644 --- a/src/osd/scrubber/pg_scrubber.cc +++ b/src/osd/scrubber/pg_scrubber.cc @@ -2441,7 +2441,7 @@ pg_scrubbing_status_t PgScrubber::get_schedule() const pg_scrub_sched_status_t::blocked, true, // active (m_is_deep ? scrub_level_t::deep : scrub_level_t::shallow), - false}; + (m_active_target->urgency() == urgency_t::periodic_regular)}; } else { int32_t dur_seconds = @@ -2452,9 +2452,11 @@ pg_scrubbing_status_t PgScrubber::get_schedule() const pg_scrub_sched_status_t::active, true, // active (m_is_deep ? scrub_level_t::deep : scrub_level_t::shallow), - false /* is periodic? unknown, actually */}; + (m_active_target->urgency() == urgency_t::periodic_regular)}; } } + + // not registered to be scrubbed? if (!m_scrub_job->is_registered()) { return pg_scrubbing_status_t{ utime_t{}, @@ -2465,8 +2467,34 @@ pg_scrubbing_status_t PgScrubber::get_schedule() const false}; } - // not taking 'no-*scrub' flags into account here. + // in session, but still reserving replicas? + const auto maybe_register = m_fsm->get_reservation_status(); + if (maybe_register) { + // note that if we are here, we are scrubbing (even though + // m_active is false). The 'maybe_register' attests to being in + // ReservingReplicas state, and m_active wasn't set yet. + dout(20) << fmt::format( + "{}:maybe_register: osd:{} {}s ({} of {})", __func__, + maybe_register->m_osd_to_respond, + maybe_register->m_duration_seconds, + maybe_register->m_ordinal_of_requested_replica, + maybe_register->m_num_to_reserve) + << dendl; + return pg_scrubbing_status_t{ + utime_t{}, + maybe_register->m_duration_seconds, + pg_scrub_sched_status_t::active, + true, // active + (m_is_deep ? scrub_level_t::deep : scrub_level_t::shallow), + (m_active_target->urgency() == urgency_t::periodic_regular), + maybe_register->m_osd_to_respond, + maybe_register->m_ordinal_of_requested_replica, + maybe_register->m_num_to_reserve}; + } + const auto first_ready = m_scrub_job->earliest_eligible(now_is); + // eligible for scrubbing, but not yet selected to be scrubbed? + // (not taking 'no-*scrub' flags into account here.) if (first_ready) { const auto& targ = first_ready->get(); return pg_scrubbing_status_t{ diff --git a/src/osd/scrubber/scrub_machine.cc b/src/osd/scrubber/scrub_machine.cc index da9466758f4..10866ce580a 100644 --- a/src/osd/scrubber/scrub_machine.cc +++ b/src/osd/scrubber/scrub_machine.cc @@ -106,6 +106,25 @@ ceph::timespan ScrubMachine::get_time_scrubbing() const return ceph::timespan{}; } +std::optional ScrubMachine::get_reservation_status() + const +{ + const auto resv_state = state_cast(); + if (!resv_state) { + return std::nullopt; + } + const auto session = state_cast(); + dout(30) << fmt::format( + "{}: we are reserving {:p}-{:p}", __func__, (void*)session, + (void*)resv_state) + << dendl; + if (!session || !session->m_reservations) { + dout(20) << fmt::format("{}: no reservations data", __func__) << dendl; + return std::nullopt; + } + return session->get_reservation_status(); +} + // ////////////// the actual actions // ----------------------- NotActive ----------------------------------------- @@ -203,6 +222,23 @@ sc::result Session::react(const IntervalChanged&) return transit(); } +std::optional Session::get_reservation_status() const +{ + if (!m_reservations) { + return std::nullopt; + } + DECLARE_LOCALS; // 'scrbr' & 'pg_id' aliases + const auto req = m_reservations->get_last_sent(); + pg_scrubbing_status_t s; + s.m_osd_to_respond = req ? req->osd : 0; + s.m_ordinal_of_requested_replica = m_reservations->active_requests_cnt(); + s.m_num_to_reserve = scrbr->get_pg()->get_actingset().size() - 1; + s.m_duration_seconds = + duration_cast(context().get_time_scrubbing()) + .count(); + return s; +} + // ----------------------- ReservingReplicas --------------------------------- diff --git a/src/osd/scrubber/scrub_machine.h b/src/osd/scrubber/scrub_machine.h index ad0d3bfba38..7daeb15a437 100644 --- a/src/osd/scrubber/scrub_machine.h +++ b/src/osd/scrubber/scrub_machine.h @@ -2,6 +2,7 @@ // vim: ts=8 sw=2 smarttab #pragma once +#include #include #include @@ -289,9 +290,12 @@ class ScrubMachine : public sc::state_machine { [[nodiscard]] bool is_accepting_updates() const; [[nodiscard]] bool is_primary_idle() const; - // elapsed time for the currently active scrub.session + /// elapsed time for the currently active scrub.session ceph::timespan get_time_scrubbing() const; + /// replica reservation process status + std::optional get_reservation_status() const; + // ///////////////// aux declarations & functions //////////////////////// // @@ -555,6 +559,9 @@ struct Session : sc::state, /// abort reason - if known. Determines the delay time imposed on the /// failed scrub target. std::optional m_abort_reason{std::nullopt}; + + /// when reserving replicas: fetch the reservation status + std::optional get_reservation_status() const; }; struct ReservingReplicas : sc::state, NamedSimply { diff --git a/src/osd/scrubber/scrub_reservations.h b/src/osd/scrubber/scrub_reservations.h index 173b23d7db5..f5eca48b888 100644 --- a/src/osd/scrubber/scrub_reservations.h +++ b/src/osd/scrubber/scrub_reservations.h @@ -157,13 +157,13 @@ class ReplicaReservations { // note: 'public', as accessed via the 'standard' dout_prefix() macro std::ostream& gen_prefix(std::ostream& out, std::string fn) const; + /// The number of requests that have been sent (and not rejected) so far. + size_t active_requests_cnt() const; + private: /// send 'release' messages to all replicas we have managed to reserve void release_all(); - /// The number of requests that have been sent (and not rejected) so far. - size_t active_requests_cnt() const; - /** * Send a reservation request to the next replica. * - if there are no more replicas to send requests to, return true -- 2.39.5