From 0985ae71bce32c4d9e0e9e9f68bed38eb3c26897 Mon Sep 17 00:00:00 2001 From: Sage Weil Date: Tue, 17 Jun 2014 13:49:44 -0700 Subject: [PATCH] osd: prioritize backfill based on *how* degraded Instead of prioritizing all degraded PGs equally, prioritize those that have the fewest acting members (the most degraded). Drop the priority constants. Define our max priority independently of AsyncReserver, which uses a host unsigned. Signed-off-by: Sage Weil --- src/osd/OSD.h | 5 ----- src/osd/PG.cc | 35 ++++++++++++++++++++++++++++------- src/osd/PG.h | 5 +++++ src/osd/osd_types.h | 4 ++++ 4 files changed, 37 insertions(+), 12 deletions(-) diff --git a/src/osd/OSD.h b/src/osd/OSD.h index ac1c2e712915..775f45da8ed6 100644 --- a/src/osd/OSD.h +++ b/src/osd/OSD.h @@ -689,11 +689,6 @@ public: } // -- backfill_reservation -- - enum { - BACKFILL_LOW = 0, // backfill non-degraded PGs - BACKFILL_HIGH = 1, // backfill degraded PGs - RECOVERY = AsyncReserver::MAX_PRIORITY // log based recovery - }; Finisher reserver_finisher; AsyncReserver local_reserver; AsyncReserver remote_reserver; diff --git a/src/osd/PG.cc b/src/osd/PG.cc index b77706e4a393..63841b14573c 100644 --- a/src/osd/PG.cc +++ b/src/osd/PG.cc @@ -1874,6 +1874,26 @@ void PG::mark_clean() dirty_info = true; } +unsigned PG::get_recovery_priority() +{ + // a higher value -> a higher priority + return OSD_RECOVERY_PRIORITY_MAX; +} + +unsigned PG::get_backfill_priority() +{ + // a higher value -> a higher priority + + // degraded: 200 + num missing replicas + if (is_degraded()) { + assert(pool.info.size > acting.size()); + return 200 + (pool.info.size - acting.size()); + } + + // baseline + return 1; +} + void PG::finish_recovery(list& tfin) { dout(10) << "finish_recovery" << dendl; @@ -5521,13 +5541,12 @@ PG::RecoveryState::WaitRemoteBackfillReserved::react(const RemoteBackfillReserve backfill_osd_it->osd, pg->get_osdmap()->get_epoch()); if (con) { if (con->has_feature(CEPH_FEATURE_BACKFILL_RESERVATION)) { - unsigned priority = pg->is_degraded() ? OSDService::BACKFILL_HIGH - : OSDService::BACKFILL_LOW; pg->osd->send_message_osd_cluster( new MBackfillReserve( MBackfillReserve::REQUEST, spg_t(pg->info.pgid.pgid, backfill_osd_it->shard), - pg->get_osdmap()->get_epoch(), priority), + pg->get_osdmap()->get_epoch(), + pg->get_backfill_priority()), con.get()); } else { post_event(RemoteBackfillReserved()); @@ -5596,8 +5615,8 @@ PG::RecoveryState::WaitLocalBackfillReserved::WaitLocalBackfillReserved(my_conte pg->info.pgid, new QueuePeeringEvt( pg, pg->get_osdmap()->get_epoch(), - LocalBackfillReserved()), pg->is_degraded() ? OSDService::BACKFILL_HIGH - : OSDService::BACKFILL_LOW); + LocalBackfillReserved()), + pg->get_backfill_priority()); } void PG::RecoveryState::WaitLocalBackfillReserved::exit() @@ -5652,7 +5671,8 @@ PG::RecoveryState::RepWaitRecoveryReserved::RepWaitRecoveryReserved(my_context c pg->info.pgid, new QueuePeeringEvt( pg, pg->get_osdmap()->get_epoch(), - RemoteRecoveryReserved()), OSDService::RECOVERY); + RemoteRecoveryReserved()), + pg->get_recovery_priority()); } boost::statechart::result @@ -5793,7 +5813,8 @@ PG::RecoveryState::WaitLocalRecoveryReserved::WaitLocalRecoveryReserved(my_conte pg->info.pgid, new QueuePeeringEvt( pg, pg->get_osdmap()->get_epoch(), - LocalRecoveryReserved()), OSDService::RECOVERY); + LocalRecoveryReserved()), + pg->get_recovery_priority()); } void PG::RecoveryState::WaitLocalRecoveryReserved::exit() diff --git a/src/osd/PG.h b/src/osd/PG.h index dad7ab95e609..4aa208cae87e 100644 --- a/src/osd/PG.h +++ b/src/osd/PG.h @@ -710,6 +710,11 @@ public: bool needs_recovery() const; bool needs_backfill() const; + /// get log recovery reservation priority + unsigned get_recovery_priority(); + /// get backfill reservation priority + unsigned get_backfill_priority(); + void mark_clean(); ///< mark an active pg clean bool _calc_past_interval_range(epoch_t *start, epoch_t *end); diff --git a/src/osd/osd_types.h b/src/osd/osd_types.h index 3cdd92107fa8..d7ed516b7d28 100644 --- a/src/osd/osd_types.h +++ b/src/osd/osd_types.h @@ -56,6 +56,10 @@ #define CEPH_OSD_FEATURE_INCOMPAT_SHARDS CompatSet::Feature(11, "sharded objects") +/// max recovery priority for MBackfillReserve +#define OSD_RECOVERY_PRIORITY_MAX 255u + + typedef hobject_t collection_list_handle_t; /// convert a single CPEH_OSD_FLAG_* to a string -- 2.47.3