From e0ed0122526791547a317c6ca19ed081a92dfe69 Mon Sep 17 00:00:00 2001 From: David Zafman Date: Thu, 14 Jan 2021 20:39:32 -0800 Subject: [PATCH] osd: Try other PGs when reservation failures occur Fixes: https://tracker.ceph.com/issues/48843 Signed-off-by: David Zafman (cherry picked from commit 08c3ede0844507074de2b43638423c094976b493) --- PendingReleaseNotes | 3 +++ src/osd/OSD.cc | 34 ++++++++++++++++++++++++++++++++-- src/osd/PG.cc | 1 + src/osd/PG.h | 4 ++++ src/osd/pg_scrubber.h | 7 +++++++ src/osd/scrub_machine.cc | 2 ++ src/osd/scrub_machine_lstnr.h | 4 ++++ src/osd/scrubber_common.h | 4 ++++ 8 files changed, 57 insertions(+), 2 deletions(-) diff --git a/PendingReleaseNotes b/PendingReleaseNotes index fc75ba0d20c3d..bd0fbb3eab406 100644 --- a/PendingReleaseNotes +++ b/PendingReleaseNotes @@ -67,6 +67,9 @@ to property reflect the upper cache utilization before space is reclaimed. The default ``immutable_object_cache_watermark`` now is ``0.9``. If the capacity reaches 90% the daemon will delete cold cache. +* Scubs are more aggressive in trying to find more simultaneous possible PGs within osd_max_scrubs limitation. + It is possible that increasing osd_scrub_sleep may be necessary to maintain client responsiveness. + >=15.0.0 -------- diff --git a/src/osd/OSD.cc b/src/osd/OSD.cc index c53ab0a08564c..1f699ecc9aad5 100644 --- a/src/osd/OSD.cc +++ b/src/osd/OSD.cc @@ -7611,6 +7611,13 @@ void OSD::sched_scrub() continue; } + // If this one couldn't reserve, skip for now + if (pg->get_reserve_failed()) { + pg->unlock(); + dout(20) << __func__ << " pg " << scrub_job.pgid << " reserve failed, skipped" << dendl; + continue; + } + // This has already started, so go on to the next scrub job if (pg->is_scrub_active()) { pg->unlock(); @@ -7630,7 +7637,7 @@ void OSD::sched_scrub() if (pg->m_scrubber->is_reserving()) { pg->unlock(); dout(10) << __func__ << ": reserve in progress pgid " << scrub_job.pgid << dendl; - break; + goto out; } dout(15) << "sched_scrub scrubbing " << scrub_job.pgid << " at " << scrub_job.sched_time << (pg->get_must_scrub() ? ", explicitly requested" : @@ -7639,11 +7646,34 @@ void OSD::sched_scrub() if (pg->sched_scrub()) { pg->unlock(); dout(10) << __func__ << " scheduled a scrub!" << " (~" << scrub_job.pgid << "~)" << dendl; - break; + goto out; } + // If this is set now we must have had a local reserve failure, so can't scrub anything right now + if (pg->get_reserve_failed()) { + pg->unlock(); + dout(20) << __func__ << " pg " << scrub_job.pgid << " local reserve failed, nothing to be done now" << dendl; + goto out; + } + pg->unlock(); } while (service.next_scrub_stamp(scrub_job, &scrub_job)); + + // Clear reserve_failed from all pending PGs, so we try again + if (service.first_scrub_stamp(&scrub_job)) { + do { + if (scrub_job.sched_time > now) + break; + PGRef pg = _lookup_lock_pg(scrub_job.pgid); + // If we can't lock, it's ok we can get it next time + if (!pg) + continue; + pg->clear_reserve_failed(); + pg->unlock(); + } while (service.next_scrub_stamp(scrub_job, &scrub_job)); + } } + +out: dout(20) << "sched_scrub done" << dendl; } diff --git a/src/osd/PG.cc b/src/osd/PG.cc index df8c4f3451d4a..7aff77ca99e4b 100644 --- a/src/osd/PG.cc +++ b/src/osd/PG.cc @@ -1365,6 +1365,7 @@ bool PG::sched_scrub() // be retried by the OSD later on. if (!m_scrubber->reserve_local()) { dout(10) << __func__ << ": failed to reserve locally" << dendl; + set_reserve_failed(); return false; } diff --git a/src/osd/PG.h b/src/osd/PG.h index cad031c10861f..85bbfe858497c 100644 --- a/src/osd/PG.h +++ b/src/osd/PG.h @@ -186,6 +186,10 @@ public: /// scrubbing state for both Primary & replicas bool is_scrub_active() const { return m_scrubber->is_scrub_active(); } + bool get_reserve_failed() const { return m_scrubber->get_reserve_failed(); } + void set_reserve_failed() { m_scrubber->set_reserve_failed(); } + void clear_reserve_failed() { m_scrubber->clear_reserve_failed(); } + public: // -- members -- const coll_t coll; diff --git a/src/osd/pg_scrubber.h b/src/osd/pg_scrubber.h index bbbac38ca7565..69fa6b0fcebdb 100644 --- a/src/osd/pg_scrubber.h +++ b/src/osd/pg_scrubber.h @@ -412,6 +412,10 @@ class PgScrubber : public ScrubPgIF, public ScrubMachineListener { [[nodiscard]] bool is_scrub_active() const final { return m_active; } + [[nodiscard]] bool get_reserve_failed() const final { return m_reserve_failed; } + void set_reserve_failed() final { m_reserve_failed = true; } + void clear_reserve_failed() final { m_reserve_failed = false; } + private: void reset_internal_state(); @@ -536,6 +540,9 @@ class PgScrubber : public ScrubPgIF, public ScrubMachineListener { bool m_active{false}; + // This PG could not get all the scrub reservations + bool m_reserve_failed{false}; + eversion_t m_subset_last_update{}; std::unique_ptr m_store; diff --git a/src/osd/scrub_machine.cc b/src/osd/scrub_machine.cc index 2a2ee8732bda9..64d79e7236732 100644 --- a/src/osd/scrub_machine.cc +++ b/src/osd/scrub_machine.cc @@ -91,6 +91,8 @@ sc::result ReservingReplicas::react(const ReservationFailure&) DECLARE_LOCALS; // 'scrbr' & 'pg_id' aliases dout(10) << "ReservingReplicas::react(const ReservationFailure&)" << dendl; + // Mark PG so that we will try other PGs, before coming back to this one + scrbr->set_reserve_failed(); // the Scrubber must release all resources and abort the scrubbing scrbr->clear_pgscrub_state(); return transit(); diff --git a/src/osd/scrub_machine_lstnr.h b/src/osd/scrub_machine_lstnr.h index 2b96161215474..b2139773b4d59 100644 --- a/src/osd/scrub_machine_lstnr.h +++ b/src/osd/scrub_machine_lstnr.h @@ -114,6 +114,10 @@ struct ScrubMachineListener { virtual void unreserve_replicas() = 0; + [[nodiscard]] virtual bool get_reserve_failed() const = 0; + virtual void set_reserve_failed() = 0; + virtual void clear_reserve_failed() = 0; + /** * the FSM interface into the "are we waiting for maps, either our own or from * replicas" state. diff --git a/src/osd/scrubber_common.h b/src/osd/scrubber_common.h index 15a6cdf4dede4..3f3a618f8249b 100644 --- a/src/osd/scrubber_common.h +++ b/src/osd/scrubber_common.h @@ -150,6 +150,10 @@ struct ScrubPgIF { */ [[nodiscard]] virtual bool is_scrub_active() const = 0; + [[nodiscard]] virtual bool get_reserve_failed() const = 0; + virtual void set_reserve_failed() = 0; + virtual void clear_reserve_failed() = 0; + /// are we waiting for resource reservation grants form our replicas? [[nodiscard]] virtual bool is_reserving() const = 0; -- 2.39.5