]> git.apps.os.sepia.ceph.com Git - ceph.git/commitdiff
osd: Prevent priority from overflowing in the next base level
authorDavid Zafman <dzafman@redhat.com>
Fri, 12 Apr 2019 23:21:12 +0000 (16:21 -0700)
committerSmith Farm <smithfarm@vanguard2.suse.cz>
Tue, 30 Apr 2019 15:57:41 +0000 (17:57 +0200)
Signed-off-by: David Zafman <dzafman@redhat.com>
(cherry picked from commit 42497780db7f1535407a644fca408985456e2fbc)

src/osd/PG.cc
src/osd/PG.h
src/osd/osd_types.h

index 2ffdcc449cc1da787a9e51ec8067c86c4a7a48b3..b6d3244e18735e41312b7178481966288c4c4ce3 100644 (file)
@@ -2455,11 +2455,13 @@ bool PG::set_force_backfill(bool b)
   return did;
 }
 
-int PG::clamp_recovery_priority(int priority, int pool_recovery_priority)
+int PG::clamp_recovery_priority(int priority, int pool_recovery_priority, int max)
 {
   static_assert(OSD_RECOVERY_PRIORITY_MIN < OSD_RECOVERY_PRIORITY_MAX, "Invalid priority range");
   static_assert(OSD_RECOVERY_PRIORITY_MIN >= 0, "Priority range must match unsigned type");
 
+  ceph_assert(max <= OSD_RECOVERY_PRIORITY_MAX);
+
   // User can't set this too high anymore, but might be a legacy value
   if (pool_recovery_priority > OSD_POOL_PRIORITY_MAX)
     pool_recovery_priority = OSD_POOL_PRIORITY_MAX;
@@ -2472,8 +2474,8 @@ int PG::clamp_recovery_priority(int priority, int pool_recovery_priority)
   priority += pool_recovery_priority;
 
   // Clamp to valid range
-  if (priority > OSD_RECOVERY_PRIORITY_MAX) {
-    return OSD_RECOVERY_PRIORITY_MAX;
+  if (priority > max) {
+    return max;
   } else if (priority < OSD_RECOVERY_PRIORITY_MIN) {
     return OSD_RECOVERY_PRIORITY_MIN;
   } else {
@@ -2485,20 +2487,22 @@ unsigned PG::get_recovery_priority()
 {
   // a higher value -> a higher priority
   int ret = OSD_RECOVERY_PRIORITY_BASE;
+  int base = ret;
 
   if (state & PG_STATE_FORCED_RECOVERY) {
     ret = OSD_RECOVERY_PRIORITY_FORCED;
   } else {
     // XXX: This priority boost isn't so much about inactive, but about data-at-risk
     if (is_degraded() && info.stats.avail_no_missing.size() < pool.info.min_size) {
+      base = OSD_RECOVERY_INACTIVE_PRIORITY_BASE;
       // inactive: no. of replicas < min_size, highest priority since it blocks IO
-      ret = OSD_RECOVERY_INACTIVE_PRIORITY_BASE + (pool.info.min_size - info.stats.avail_no_missing.size());
+      ret = base + (pool.info.min_size - info.stats.avail_no_missing.size());
     }
 
     int64_t pool_recovery_priority = 0;
     pool.info.opts.get(pool_opts_t::RECOVERY_PRIORITY, &pool_recovery_priority);
 
-    ret = clamp_recovery_priority(ret, pool_recovery_priority);
+    ret = clamp_recovery_priority(ret, pool_recovery_priority, max_prio_map[base]);
   }
   dout(20) << __func__ << " recovery priority is " << ret << dendl;
   return static_cast<unsigned>(ret);
@@ -2508,28 +2512,32 @@ unsigned PG::get_backfill_priority()
 {
   // a higher value -> a higher priority
   int ret = OSD_BACKFILL_PRIORITY_BASE;
+  int base = ret;
+
   if (state & PG_STATE_FORCED_BACKFILL) {
     ret = OSD_BACKFILL_PRIORITY_FORCED;
   } else {
     if (acting.size() < pool.info.min_size) {
+      base = OSD_BACKFILL_INACTIVE_PRIORITY_BASE;
       // inactive: no. of replicas < min_size, highest priority since it blocks IO
-      ret = OSD_BACKFILL_INACTIVE_PRIORITY_BASE + (pool.info.min_size - acting.size());
+      ret = base + (pool.info.min_size - acting.size());
 
     } else if (is_undersized()) {
       // undersized: OSD_BACKFILL_DEGRADED_PRIORITY_BASE + num missing replicas
       ceph_assert(pool.info.size > actingset.size());
-      ret = OSD_BACKFILL_DEGRADED_PRIORITY_BASE + (pool.info.size - actingset.size());
+      base = OSD_BACKFILL_DEGRADED_PRIORITY_BASE;
+      ret = base + (pool.info.size - actingset.size());
 
     } else if (is_degraded()) {
       // degraded: baseline degraded
-      ret = OSD_BACKFILL_DEGRADED_PRIORITY_BASE;
+      base = ret = OSD_BACKFILL_DEGRADED_PRIORITY_BASE;
     }
 
     // Adjust with pool's recovery priority
     int64_t pool_recovery_priority = 0;
     pool.info.opts.get(pool_opts_t::RECOVERY_PRIORITY, &pool_recovery_priority);
 
-    ret = clamp_recovery_priority(ret, pool_recovery_priority);
+    ret = clamp_recovery_priority(ret, pool_recovery_priority, max_prio_map[base]);
   }
 
   dout(20) << __func__ << " backfill priority is " << ret << dendl;
index 8f844a7c5999168a7f5bec15c03a9662057eddd8..4dca1c9ab1df713e6830be84cbdad11db0aecacd 100644 (file)
@@ -1432,7 +1432,7 @@ protected:
   bool needs_backfill() const;
 
   /// clip calculated priority to reasonable range
-  int clamp_recovery_priority(int prio, int pool_recovery_prio);
+  int clamp_recovery_priority(int prio, int pool_recovery_prio, int max);
   /// get log recovery reservation priority
   unsigned get_recovery_priority();
   /// get backfill reservation priority
index a2a22ab7754f9eb708f1a753e95a1ca4965d258d..2ed7e1cfa71cee50673f0505d79f10f531c80b5b 100644 (file)
 /// priority when more full
 #define OSD_DELETE_PRIORITY_FULL 255
 
+static std::map<int, int> max_prio_map = {
+       {OSD_BACKFILL_PRIORITY_BASE, OSD_BACKFILL_DEGRADED_PRIORITY_BASE - 1},
+       {OSD_BACKFILL_DEGRADED_PRIORITY_BASE, OSD_RECOVERY_PRIORITY_BASE - 1},
+       {OSD_RECOVERY_PRIORITY_BASE, OSD_BACKFILL_INACTIVE_PRIORITY_BASE - 1},
+       {OSD_RECOVERY_INACTIVE_PRIORITY_BASE, OSD_RECOVERY_PRIORITY_MAX},
+       {OSD_BACKFILL_INACTIVE_PRIORITY_BASE, OSD_RECOVERY_PRIORITY_MAX}
+};
 
 typedef hobject_t collection_list_handle_t;