From: Sage Weil Date: Tue, 17 Jun 2014 20:49:44 +0000 (-0700) Subject: osd: prioritize backfill based on *how* degraded X-Git-Tag: v0.83~65^2 X-Git-Url: http://git-server-git.apps.pok.os.sepia.ceph.com/?a=commitdiff_plain;h=refs%2Fpull%2F1979%2Fhead;p=ceph.git osd: prioritize backfill based on *how* degraded Instead of prioritizing all degraded PGs equally, prioritize those that have the fewest acting members (the most degraded). Drop the priority constants. Define our max priority independently of AsyncReserver, which uses a host unsigned. Signed-off-by: Sage Weil --- diff --git a/src/osd/OSD.h b/src/osd/OSD.h index ac1c2e71291..775f45da8ed 100644 --- a/src/osd/OSD.h +++ b/src/osd/OSD.h @@ -689,11 +689,6 @@ public: } // -- backfill_reservation -- - enum { - BACKFILL_LOW = 0, // backfill non-degraded PGs - BACKFILL_HIGH = 1, // backfill degraded PGs - RECOVERY = AsyncReserver::MAX_PRIORITY // log based recovery - }; Finisher reserver_finisher; AsyncReserver local_reserver; AsyncReserver remote_reserver; diff --git a/src/osd/PG.cc b/src/osd/PG.cc index b77706e4a39..63841b14573 100644 --- a/src/osd/PG.cc +++ b/src/osd/PG.cc @@ -1874,6 +1874,26 @@ void PG::mark_clean() dirty_info = true; } +unsigned PG::get_recovery_priority() +{ + // a higher value -> a higher priority + return OSD_RECOVERY_PRIORITY_MAX; +} + +unsigned PG::get_backfill_priority() +{ + // a higher value -> a higher priority + + // degraded: 200 + num missing replicas + if (is_degraded()) { + assert(pool.info.size > acting.size()); + return 200 + (pool.info.size - acting.size()); + } + + // baseline + return 1; +} + void PG::finish_recovery(list& tfin) { dout(10) << "finish_recovery" << dendl; @@ -5521,13 +5541,12 @@ PG::RecoveryState::WaitRemoteBackfillReserved::react(const RemoteBackfillReserve backfill_osd_it->osd, pg->get_osdmap()->get_epoch()); if (con) { if (con->has_feature(CEPH_FEATURE_BACKFILL_RESERVATION)) { - unsigned priority = pg->is_degraded() ? OSDService::BACKFILL_HIGH - : OSDService::BACKFILL_LOW; pg->osd->send_message_osd_cluster( new MBackfillReserve( MBackfillReserve::REQUEST, spg_t(pg->info.pgid.pgid, backfill_osd_it->shard), - pg->get_osdmap()->get_epoch(), priority), + pg->get_osdmap()->get_epoch(), + pg->get_backfill_priority()), con.get()); } else { post_event(RemoteBackfillReserved()); @@ -5596,8 +5615,8 @@ PG::RecoveryState::WaitLocalBackfillReserved::WaitLocalBackfillReserved(my_conte pg->info.pgid, new QueuePeeringEvt( pg, pg->get_osdmap()->get_epoch(), - LocalBackfillReserved()), pg->is_degraded() ? OSDService::BACKFILL_HIGH - : OSDService::BACKFILL_LOW); + LocalBackfillReserved()), + pg->get_backfill_priority()); } void PG::RecoveryState::WaitLocalBackfillReserved::exit() @@ -5652,7 +5671,8 @@ PG::RecoveryState::RepWaitRecoveryReserved::RepWaitRecoveryReserved(my_context c pg->info.pgid, new QueuePeeringEvt( pg, pg->get_osdmap()->get_epoch(), - RemoteRecoveryReserved()), OSDService::RECOVERY); + RemoteRecoveryReserved()), + pg->get_recovery_priority()); } boost::statechart::result @@ -5793,7 +5813,8 @@ PG::RecoveryState::WaitLocalRecoveryReserved::WaitLocalRecoveryReserved(my_conte pg->info.pgid, new QueuePeeringEvt( pg, pg->get_osdmap()->get_epoch(), - LocalRecoveryReserved()), OSDService::RECOVERY); + LocalRecoveryReserved()), + pg->get_recovery_priority()); } void PG::RecoveryState::WaitLocalRecoveryReserved::exit() diff --git a/src/osd/PG.h b/src/osd/PG.h index dad7ab95e60..4aa208cae87 100644 --- a/src/osd/PG.h +++ b/src/osd/PG.h @@ -710,6 +710,11 @@ public: bool needs_recovery() const; bool needs_backfill() const; + /// get log recovery reservation priority + unsigned get_recovery_priority(); + /// get backfill reservation priority + unsigned get_backfill_priority(); + void mark_clean(); ///< mark an active pg clean bool _calc_past_interval_range(epoch_t *start, epoch_t *end); diff --git a/src/osd/osd_types.h b/src/osd/osd_types.h index 3cdd92107fa..d7ed516b7d2 100644 --- a/src/osd/osd_types.h +++ b/src/osd/osd_types.h @@ -56,6 +56,10 @@ #define CEPH_OSD_FEATURE_INCOMPAT_SHARDS CompatSet::Feature(11, "sharded objects") +/// max recovery priority for MBackfillReserve +#define OSD_RECOVERY_PRIORITY_MAX 255u + + typedef hobject_t collection_list_handle_t; /// convert a single CPEH_OSD_FLAG_* to a string