]> git.apps.os.sepia.ceph.com Git - ceph.git/commitdiff
osd: prioritize backfill based on *how* degraded 1979/head
authorSage Weil <sage@inktank.com>
Tue, 17 Jun 2014 20:49:44 +0000 (13:49 -0700)
committerSage Weil <sage@inktank.com>
Tue, 17 Jun 2014 21:35:24 +0000 (14:35 -0700)
Instead of prioritizing all degraded PGs equally, prioritize those that
have the fewest acting members (the most degraded).

Drop the priority constants.

Define our max priority independently of AsyncReserver, which uses a host
unsigned.

Signed-off-by: Sage Weil <sage@inktank.com>
src/osd/OSD.h
src/osd/PG.cc
src/osd/PG.h
src/osd/osd_types.h

index ac1c2e71291553eaf0f49800d23f892e84c080a8..775f45da8ed659a5dd023878747b4e08b83d694d 100644 (file)
@@ -689,11 +689,6 @@ public:
   }
 
   // -- backfill_reservation --
-  enum {
-    BACKFILL_LOW = 0,   // backfill non-degraded PGs
-    BACKFILL_HIGH = 1, // backfill degraded PGs
-    RECOVERY = AsyncReserver<spg_t>::MAX_PRIORITY  // log based recovery
-  };
   Finisher reserver_finisher;
   AsyncReserver<spg_t> local_reserver;
   AsyncReserver<spg_t> remote_reserver;
index b77706e4a393c2fd5ee5854032dc2984b38071df..63841b14573ced21fa7f4ef156d1ade4b388d89e 100644 (file)
@@ -1874,6 +1874,26 @@ void PG::mark_clean()
   dirty_info = true;
 }
 
+unsigned PG::get_recovery_priority()
+{
+  // a higher value -> a higher priority
+  return OSD_RECOVERY_PRIORITY_MAX;
+}
+
+unsigned PG::get_backfill_priority()
+{
+  // a higher value -> a higher priority
+
+  // degraded: 200 + num missing replicas
+  if (is_degraded()) {
+    assert(pool.info.size > acting.size());
+    return 200 + (pool.info.size - acting.size());
+  }
+
+  // baseline
+  return 1;
+}
+
 void PG::finish_recovery(list<Context*>& tfin)
 {
   dout(10) << "finish_recovery" << dendl;
@@ -5521,13 +5541,12 @@ PG::RecoveryState::WaitRemoteBackfillReserved::react(const RemoteBackfillReserve
       backfill_osd_it->osd, pg->get_osdmap()->get_epoch());
     if (con) {
       if (con->has_feature(CEPH_FEATURE_BACKFILL_RESERVATION)) {
-        unsigned priority = pg->is_degraded() ? OSDService::BACKFILL_HIGH
-         : OSDService::BACKFILL_LOW;
         pg->osd->send_message_osd_cluster(
           new MBackfillReserve(
          MBackfillReserve::REQUEST,
          spg_t(pg->info.pgid.pgid, backfill_osd_it->shard),
-         pg->get_osdmap()->get_epoch(), priority),
+         pg->get_osdmap()->get_epoch(),
+         pg->get_backfill_priority()),
        con.get());
       } else {
         post_event(RemoteBackfillReserved());
@@ -5596,8 +5615,8 @@ PG::RecoveryState::WaitLocalBackfillReserved::WaitLocalBackfillReserved(my_conte
     pg->info.pgid,
     new QueuePeeringEvt<LocalBackfillReserved>(
       pg, pg->get_osdmap()->get_epoch(),
-      LocalBackfillReserved()), pg->is_degraded() ? OSDService::BACKFILL_HIGH
-        : OSDService::BACKFILL_LOW);
+      LocalBackfillReserved()),
+    pg->get_backfill_priority());
 }
 
 void PG::RecoveryState::WaitLocalBackfillReserved::exit()
@@ -5652,7 +5671,8 @@ PG::RecoveryState::RepWaitRecoveryReserved::RepWaitRecoveryReserved(my_context c
     pg->info.pgid,
     new QueuePeeringEvt<RemoteRecoveryReserved>(
       pg, pg->get_osdmap()->get_epoch(),
-      RemoteRecoveryReserved()), OSDService::RECOVERY);
+      RemoteRecoveryReserved()),
+    pg->get_recovery_priority());
 }
 
 boost::statechart::result
@@ -5793,7 +5813,8 @@ PG::RecoveryState::WaitLocalRecoveryReserved::WaitLocalRecoveryReserved(my_conte
     pg->info.pgid,
     new QueuePeeringEvt<LocalRecoveryReserved>(
       pg, pg->get_osdmap()->get_epoch(),
-      LocalRecoveryReserved()), OSDService::RECOVERY);
+      LocalRecoveryReserved()),
+    pg->get_recovery_priority());
 }
 
 void PG::RecoveryState::WaitLocalRecoveryReserved::exit()
index dad7ab95e60976f6b553a97fde393eba892271f8..4aa208cae87ea6a557fba9a04dd7457bdbe48a91 100644 (file)
@@ -710,6 +710,11 @@ public:
   bool needs_recovery() const;
   bool needs_backfill() const;
 
+  /// get log recovery reservation priority
+  unsigned get_recovery_priority();
+  /// get backfill reservation priority
+  unsigned get_backfill_priority();
+
   void mark_clean();  ///< mark an active pg clean
 
   bool _calc_past_interval_range(epoch_t *start, epoch_t *end);
index 3cdd92107fa88851d20723f28e5a4a68e1d159f4..d7ed516b7d28cd0643ff149156d6d65de3c57598 100644 (file)
 #define CEPH_OSD_FEATURE_INCOMPAT_SHARDS CompatSet::Feature(11, "sharded objects")
 
 
+/// max recovery priority for MBackfillReserve
+#define OSD_RECOVERY_PRIORITY_MAX 255u
+
+
 typedef hobject_t collection_list_handle_t;
 
 /// convert a single CPEH_OSD_FLAG_* to a string