]> git.apps.os.sepia.ceph.com Git - ceph-ci.git/commitdiff
osd: Prevent priority from overflowing in the next base level
authorDavid Zafman <dzafman@redhat.com>
Fri, 12 Apr 2019 23:21:12 +0000 (16:21 -0700)
committerDavid Zafman <dzafman@redhat.com>
Thu, 25 Apr 2019 20:53:27 +0000 (13:53 -0700)
Signed-off-by: David Zafman <dzafman@redhat.com>
src/osd/PG.cc
src/osd/PG.h
src/osd/osd_types.h

index ba5a22451342006c32c2d948cb1b1b1af7784102..aaeeb158275ffc406b7be34219d16e8fbf4ac853 100644 (file)
@@ -2453,11 +2453,13 @@ bool PG::set_force_backfill(bool b)
   return did;
 }
 
-int PG::clamp_recovery_priority(int priority, int pool_recovery_priority)
+int PG::clamp_recovery_priority(int priority, int pool_recovery_priority, int max)
 {
   static_assert(OSD_RECOVERY_PRIORITY_MIN < OSD_RECOVERY_PRIORITY_MAX, "Invalid priority range");
   static_assert(OSD_RECOVERY_PRIORITY_MIN >= 0, "Priority range must match unsigned type");
 
+  ceph_assert(max <= OSD_RECOVERY_PRIORITY_MAX);
+
   // User can't set this too high anymore, but might be a legacy value
   if (pool_recovery_priority > OSD_POOL_PRIORITY_MAX)
     pool_recovery_priority = OSD_POOL_PRIORITY_MAX;
@@ -2470,8 +2472,8 @@ int PG::clamp_recovery_priority(int priority, int pool_recovery_priority)
   priority += pool_recovery_priority;
 
   // Clamp to valid range
-  if (priority > OSD_RECOVERY_PRIORITY_MAX) {
-    return OSD_RECOVERY_PRIORITY_MAX;
+  if (priority > max) {
+    return max;
   } else if (priority < OSD_RECOVERY_PRIORITY_MIN) {
     return OSD_RECOVERY_PRIORITY_MIN;
   } else {
@@ -2483,20 +2485,22 @@ unsigned PG::get_recovery_priority()
 {
   // a higher value -> a higher priority
   int ret = OSD_RECOVERY_PRIORITY_BASE;
+  int base = ret;
 
   if (state & PG_STATE_FORCED_RECOVERY) {
     ret = OSD_RECOVERY_PRIORITY_FORCED;
   } else {
     // XXX: This priority boost isn't so much about inactive, but about data-at-risk
     if (is_degraded() && info.stats.avail_no_missing.size() < pool.info.min_size) {
+      base = OSD_RECOVERY_INACTIVE_PRIORITY_BASE;
       // inactive: no. of replicas < min_size, highest priority since it blocks IO
-      ret = OSD_RECOVERY_INACTIVE_PRIORITY_BASE + (pool.info.min_size - info.stats.avail_no_missing.size());
+      ret = base + (pool.info.min_size - info.stats.avail_no_missing.size());
     }
 
     int64_t pool_recovery_priority = 0;
     pool.info.opts.get(pool_opts_t::RECOVERY_PRIORITY, &pool_recovery_priority);
 
-    ret = clamp_recovery_priority(ret, pool_recovery_priority);
+    ret = clamp_recovery_priority(ret, pool_recovery_priority, max_prio_map[base]);
   }
   dout(20) << __func__ << " recovery priority is " << ret << dendl;
   return static_cast<unsigned>(ret);
@@ -2506,28 +2510,32 @@ unsigned PG::get_backfill_priority()
 {
   // a higher value -> a higher priority
   int ret = OSD_BACKFILL_PRIORITY_BASE;
+  int base = ret;
+
   if (state & PG_STATE_FORCED_BACKFILL) {
     ret = OSD_BACKFILL_PRIORITY_FORCED;
   } else {
     if (acting.size() < pool.info.min_size) {
+      base = OSD_BACKFILL_INACTIVE_PRIORITY_BASE;
       // inactive: no. of replicas < min_size, highest priority since it blocks IO
-      ret = OSD_BACKFILL_INACTIVE_PRIORITY_BASE + (pool.info.min_size - acting.size());
+      ret = base + (pool.info.min_size - acting.size());
 
     } else if (is_undersized()) {
       // undersized: OSD_BACKFILL_DEGRADED_PRIORITY_BASE + num missing replicas
       ceph_assert(pool.info.size > actingset.size());
-      ret = OSD_BACKFILL_DEGRADED_PRIORITY_BASE + (pool.info.size - actingset.size());
+      base = OSD_BACKFILL_DEGRADED_PRIORITY_BASE;
+      ret = base + (pool.info.size - actingset.size());
 
     } else if (is_degraded()) {
       // degraded: baseline degraded
-      ret = OSD_BACKFILL_DEGRADED_PRIORITY_BASE;
+      base = ret = OSD_BACKFILL_DEGRADED_PRIORITY_BASE;
     }
 
     // Adjust with pool's recovery priority
     int64_t pool_recovery_priority = 0;
     pool.info.opts.get(pool_opts_t::RECOVERY_PRIORITY, &pool_recovery_priority);
 
-    ret = clamp_recovery_priority(ret, pool_recovery_priority);
+    ret = clamp_recovery_priority(ret, pool_recovery_priority, max_prio_map[base]);
   }
 
   dout(20) << __func__ << " backfill priority is " << ret << dendl;
index 5a423452cb5c53fa2dff8e2fb204dea5a2255d45..869d229954caddee9356b0544272f425a9eae880 100644 (file)
@@ -1432,7 +1432,7 @@ protected:
   bool needs_backfill() const;
 
   /// clip calculated priority to reasonable range
-  int clamp_recovery_priority(int prio, int pool_recovery_prio);
+  int clamp_recovery_priority(int prio, int pool_recovery_prio, int max);
   /// get log recovery reservation priority
   unsigned get_recovery_priority();
   /// get backfill reservation priority
index b329bbdd779c225ba20791f00747350a5fe4aca3..9b9bdd23062de97a17dd7bdbc97ec83e2cdaa901 100644 (file)
 /// priority when more full
 #define OSD_DELETE_PRIORITY_FULL 255
 
+static std::map<int, int> max_prio_map = {
+       {OSD_BACKFILL_PRIORITY_BASE, OSD_BACKFILL_DEGRADED_PRIORITY_BASE - 1},
+       {OSD_BACKFILL_DEGRADED_PRIORITY_BASE, OSD_RECOVERY_PRIORITY_BASE - 1},
+       {OSD_RECOVERY_PRIORITY_BASE, OSD_BACKFILL_INACTIVE_PRIORITY_BASE - 1},
+       {OSD_RECOVERY_INACTIVE_PRIORITY_BASE, OSD_RECOVERY_PRIORITY_MAX},
+       {OSD_BACKFILL_INACTIVE_PRIORITY_BASE, OSD_RECOVERY_PRIORITY_MAX}
+};
 
 typedef hobject_t collection_list_handle_t;