From: Ronen Friedman Date: Mon, 2 Oct 2023 09:43:54 +0000 (-0500) Subject: osd/scrub: modify 'a PG is reserving' to note PG X-Git-Tag: v19.0.0~197^2~9 X-Git-Url: http://git-server-git.apps.pok.os.sepia.ceph.com/?a=commitdiff_plain;h=a402e34b625ca3030a36f4621c1924017c964c9b;p=ceph.git osd/scrub: modify 'a PG is reserving' to note PG Only the PG that had set the 'I am in the process of reserving my replicas' is allowed to clear that status. That will simplify the follow-up commit, setting this flag from a specific scrub-FSM state. Signed-off-by: Ronen Friedman --- diff --git a/src/osd/scrubber/osd_scrub.cc b/src/osd/scrubber/osd_scrub.cc index 8da75233ebb9..e3a71e262347 100644 --- a/src/osd/scrubber/osd_scrub.cc +++ b/src/osd/scrubber/osd_scrub.cc @@ -466,12 +466,12 @@ int OsdScrub::get_blocked_pgs_count() const return m_queue.get_blocked_pgs_count(); } -bool OsdScrub::set_reserving_now() +bool OsdScrub::set_reserving_now(spg_t reserving_id, utime_t now_is) { - return m_queue.set_reserving_now(); + return m_queue.set_reserving_now(reserving_id, now_is); } -void OsdScrub::clear_reserving_now() +void OsdScrub::clear_reserving_now(spg_t reserving_id) { - m_queue.clear_reserving_now(); + m_queue.clear_reserving_now(reserving_id); } diff --git a/src/osd/scrubber/osd_scrub.h b/src/osd/scrubber/osd_scrub.h index 60e1f45adee9..570430660ed0 100644 --- a/src/osd/scrubber/osd_scrub.h +++ b/src/osd/scrubber/osd_scrub.h @@ -141,9 +141,9 @@ class OsdScrub { * and that PG is trying to acquire replica resources. * \retval false if the flag was already set (due to a race) */ - bool set_reserving_now(); + bool set_reserving_now(spg_t reserving_id, utime_t now_is); - void clear_reserving_now(); + void clear_reserving_now(spg_t reserving_id); /** * \returns true if the current time is within the scrub time window diff --git a/src/osd/scrubber/osd_scrub_sched.cc b/src/osd/scrubber/osd_scrub_sched.cc index 691461860148..324899f29ab8 100644 --- a/src/osd/scrubber/osd_scrub_sched.cc +++ b/src/osd/scrubber/osd_scrub_sched.cc @@ -479,18 +479,30 @@ int ScrubQueue::get_blocked_pgs_count() const // ////////////////////////////////////////////////////////////////////////// // // ScrubQueue - maintaining the 'some PG is reserving' flag -bool ScrubQueue::set_reserving_now() +bool ScrubQueue::set_reserving_now(spg_t reserving_id, utime_t now_is) { - auto was_set = a_pg_is_reserving.exchange(true); - return !was_set; + std::unique_lock l{reserving_lock}; + + if (!reserving_pg.has_value()) { + reserving_pg = reserving_id; + reserving_since = now_is; + return true; + } + ceph_assert(reserving_id != *reserving_pg); + return false; } -void ScrubQueue::clear_reserving_now() +void ScrubQueue::clear_reserving_now(spg_t was_reserving_id) { - a_pg_is_reserving = false; + std::unique_lock l{reserving_lock}; + if (reserving_pg && (*reserving_pg == was_reserving_id)) { + reserving_pg.reset(); + } + // otherwise - ignore silently } bool ScrubQueue::is_reserving_now() const { - return a_pg_is_reserving; + // no lock needed, as set_reserving_now() will recheck + return reserving_pg.has_value(); } diff --git a/src/osd/scrubber/osd_scrub_sched.h b/src/osd/scrubber/osd_scrub_sched.h index 9e222718c509..bd6de1c93478 100644 --- a/src/osd/scrubber/osd_scrub_sched.h +++ b/src/osd/scrubber/osd_scrub_sched.h @@ -246,8 +246,13 @@ class ScrubQueue { * (which is a possible result of a race between the check in OsdScrub and * the initiation of a scrub by some other PG) */ - bool set_reserving_now(); - void clear_reserving_now(); + bool set_reserving_now(spg_t reserving_id, utime_t now_is); + + /** + * silently ignore attempts to clear the flag if it was not set by + * the named pg. + */ + void clear_reserving_now(spg_t reserving_id); bool is_reserving_now() const; /// counting the number of PGs stuck while scrubbing, waiting for objects @@ -331,9 +336,11 @@ class ScrubQueue { * trying to secure its replicas' resources. We will refrain from initiating * any other scrub sessions until this one is done. * - * \todo keep the ID of the reserving PG; possibly also the time it started. + * \todo replace the local lock with regular osd-service locking */ - std::atomic_bool a_pg_is_reserving{false}; + ceph::mutex reserving_lock = ceph::make_mutex("ScrubQueue::reserving_lock"); + std::optional reserving_pg; + utime_t reserving_since; /** * If the scrub job was not explicitly requested, we postpone it by some diff --git a/src/osd/scrubber/pg_scrubber.cc b/src/osd/scrubber/pg_scrubber.cc index 22f5606d454b..e27e50b70d41 100644 --- a/src/osd/scrubber/pg_scrubber.cc +++ b/src/osd/scrubber/pg_scrubber.cc @@ -1687,14 +1687,14 @@ void PgScrubber::on_replica_reservation_timeout() } } -bool PgScrubber::set_reserving_now() -{ - return m_osds->get_scrub_services().set_reserving_now(); +bool PgScrubber::set_reserving_now() { + return m_osds->get_scrub_services().set_reserving_now(m_pg_id, + ceph_clock_now()); } void PgScrubber::clear_reserving_now() { - m_osds->get_scrub_services().clear_reserving_now(); + m_osds->get_scrub_services().clear_reserving_now(m_pg_id); } void PgScrubber::set_queued_or_active()