]> git.apps.os.sepia.ceph.com Git - ceph-ci.git/commitdiff
osd: For recovery track OSDs that have 0 missing to know how degraded we are
authorDavid Zafman <dzafman@redhat.com>
Thu, 11 Apr 2019 01:45:32 +0000 (18:45 -0700)
committerDavid Zafman <dzafman@redhat.com>
Wed, 24 Apr 2019 22:09:23 +0000 (15:09 -0700)
Add backfill priority log message and remove redundanacy to recovery priority

Fixes: https://tracker.ceph.com/issues/39099
Signed-off-by: David Zafman <dzafman@redhat.com>
src/osd/PG.cc
src/osd/osd_types.cc
src/osd/osd_types.h

index ffdce25ab225ea4f26b57ab5c91b2a45f473a733..b4ad94181d6071fb34dc9f1b92e10bfc668e8792 100644 (file)
@@ -2471,15 +2471,23 @@ inline int PG::clamp_recovery_priority(int priority)
 unsigned PG::get_recovery_priority()
 {
   // a higher value -> a higher priority
-  int64_t ret = 0;
+  int ret = OSD_RECOVERY_PRIORITY_BASE;
 
   if (state & PG_STATE_FORCED_RECOVERY) {
     ret = OSD_RECOVERY_PRIORITY_FORCED;
   } else {
-    pool.info.opts.get(pool_opts_t::RECOVERY_PRIORITY, &ret);
-    ret = clamp_recovery_priority(OSD_RECOVERY_PRIORITY_BASE + ret);
+    // XXX: This priority boost isn't so much about inactive, but about data-at-risk
+    if (is_degraded() && info.stats.avail_no_missing.size() < pool.info.min_size) {
+      // inactive: no. of replicas < min_size, highest priority since it blocks IO
+      ret = OSD_RECOVERY_INACTIVE_PRIORITY_BASE + (pool.info.min_size - info.stats.avail_no_missing.size());
+    }
+
+    int64_t pool_recovery_priority = 0;
+    pool.info.opts.get(pool_opts_t::RECOVERY_PRIORITY, &pool_recovery_priority);
+
+    ret = clamp_recovery_priority(pool_recovery_priority + ret);
   }
-  dout(20) << __func__ << " recovery priority for " << *this << " is " << ret << ", state is " << state << dendl;
+  dout(20) << __func__ << " recovery priority is " << ret << dendl;
   return static_cast<unsigned>(ret);
 }
 
@@ -2511,6 +2519,7 @@ unsigned PG::get_backfill_priority()
     ret = clamp_recovery_priority(pool_recovery_priority + ret);
   }
 
+  dout(20) << __func__ << " backfill priority is " << ret << dendl;
   return static_cast<unsigned>(ret);
 }
 
@@ -3201,6 +3210,7 @@ void PG::_update_calc_stats()
   info.stats.stats.sum.num_objects_degraded = 0;
   info.stats.stats.sum.num_objects_unfound = 0;
   info.stats.stats.sum.num_objects_misplaced = 0;
+  info.stats.avail_no_missing.clear();
 
   if ((is_remapped() || is_undersized() || !is_clean()) && (is_peered() || is_activating())) {
     dout(20) << __func__ << " actingset " << actingset << " upset "
@@ -3234,6 +3244,8 @@ void PG::_update_calc_stats()
         acting_source_objects.emplace(missing, pg_whoami);
       }
       info.stats.stats.sum.num_objects_missing_on_primary = missing;
+      if (missing == 0)
+        info.stats.avail_no_missing.push_back(pg_whoami);
       dout(20) << __func__ << " shard " << pg_whoami
                << " primary objects " << num_objects
                << " missing " << missing
@@ -3267,6 +3279,8 @@ void PG::_update_calc_stats()
        acting_source_objects.emplace(missing, peer.first);
       }
       peer.second.stats.stats.sum.num_objects_missing = missing;
+      if (missing == 0)
+        info.stats.avail_no_missing.push_back(peer.first);
       dout(20) << __func__ << " shard " << peer.first
                << " objects " << peer_num_objects
                << " missing " << missing
index cc3ec866e433bb455ca47f7b705dfd3430bc02dd..58848a399375cf3fca82d84cb2764da9f1b48495 100644 (file)
@@ -2667,6 +2667,10 @@ void pg_stat_t::dump(Formatter *f) const
   for (auto p = acting.cbegin(); p != acting.cend(); ++p)
     f->dump_int("osd", *p);
   f->close_section();
+  f->open_array_section("avail_no_missing");
+  for (auto p = avail_no_missing.cbegin(); p != avail_no_missing.cend(); ++p)
+    f->dump_stream("shard") << *p;
+  f->close_section();
   f->open_array_section("blocked_by");
   for (auto p = blocked_by.cbegin(); p != blocked_by.cend(); ++p)
     f->dump_int("osd", *p);
@@ -2700,7 +2704,7 @@ void pg_stat_t::dump_brief(Formatter *f) const
 
 void pg_stat_t::encode(ceph::buffer::list &bl) const
 {
-  ENCODE_START(25, 22, bl);
+  ENCODE_START(26, 22, bl);
   encode(version, bl);
   encode(reported_seq, bl);
   encode(reported_epoch, bl);
@@ -2746,6 +2750,7 @@ void pg_stat_t::encode(ceph::buffer::list &bl) const
   encode(top_state, bl);
   encode(purged_snaps, bl);
   encode(manifest_stats_invalid, bl);
+  encode(avail_no_missing, bl);
   ENCODE_FINISH(bl);
 }
 
@@ -2753,7 +2758,7 @@ void pg_stat_t::decode(ceph::buffer::list::const_iterator &bl)
 {
   bool tmp;
   uint32_t old_state;
-  DECODE_START(25, bl);
+  DECODE_START(26, bl);
   decode(version, bl);
   decode(reported_seq, bl);
   decode(reported_epoch, bl);
@@ -2816,6 +2821,9 @@ void pg_stat_t::decode(ceph::buffer::list::const_iterator &bl)
     } else {
       manifest_stats_invalid = true;
     }
+    if (struct_v >= 26) {
+      decode(avail_no_missing, bl);
+    }
   }
   DECODE_FINISH(bl);
 }
@@ -2857,6 +2865,7 @@ void pg_stat_t::generate_test_instances(list<pg_stat_t*>& o)
   a.up.push_back(123);
   a.up_primary = 123;
   a.acting.push_back(456);
+  a.avail_no_missing.push_back(pg_shard_t(456, shard_id_t::NO_SHARD));
   a.acting_primary = 456;
   o.push_back(new pg_stat_t(a));
 
@@ -2901,6 +2910,7 @@ bool operator==(const pg_stat_t& l, const pg_stat_t& r)
     l.ondisk_log_size == r.ondisk_log_size &&
     l.up == r.up &&
     l.acting == r.acting &&
+    l.avail_no_missing == r.avail_no_missing &&
     l.mapping_epoch == r.mapping_epoch &&
     l.blocked_by == r.blocked_by &&
     l.last_became_active == r.last_became_active &&
index 1d59a85fd9dc09ba7085bed37fc8d9f027f1f171..85cdd89014bb4c4ea87afd4fcba4fe51299d752e 100644 (file)
@@ -83,6 +83,9 @@
 /// base backfill priority for MBackfillReserve (inactive PG)
 #define OSD_BACKFILL_INACTIVE_PRIORITY_BASE 220
 
+/// base recovery priority for MRecoveryReserve (inactive PG)
+#define OSD_RECOVERY_INACTIVE_PRIORITY_BASE 220
+
 /// max manually/automatically set recovery priority for MBackfillReserve
 #define OSD_RECOVERY_PRIORITY_MAX 253
 
@@ -2076,6 +2079,7 @@ struct pg_stat_t {
   int64_t ondisk_log_size;    // >= active_log_size
 
   std::vector<int32_t> up, acting;
+  std::vector<pg_shard_t> avail_no_missing;
   epoch_t mapping_epoch;
 
   std::vector<int32_t> blocked_by;  ///< osds on which the pg is blocked