]> git.apps.os.sepia.ceph.com Git - ceph.git/commitdiff
osd: For recovery track OSDs that have 0 missing to know how degraded we are
authorDavid Zafman <dzafman@redhat.com>
Thu, 11 Apr 2019 01:45:32 +0000 (18:45 -0700)
committerSmith Farm <smithfarm@vanguard2.suse.cz>
Tue, 30 Apr 2019 15:57:40 +0000 (17:57 +0200)
Add backfill priority log message and remove redundanacy to recovery priority

Fixes: https://tracker.ceph.com/issues/39099
Signed-off-by: David Zafman <dzafman@redhat.com>
(cherry picked from commit f0e7202cffed6e4adf54513891f1deeea2341d81)

Conflicts:
src/osd/osd_types.h
- nautilus does not have 75014ceb1437c5bb48293574ec6f991e4bec64bb

src/osd/PG.cc
src/osd/osd_types.cc
src/osd/osd_types.h

index f7f536265c433399e09ed9aa3268dc7c802be68e..8e8fceafdc50ee0afbbc889330eb6e9487add807 100644 (file)
@@ -2473,15 +2473,23 @@ inline int PG::clamp_recovery_priority(int priority)
 unsigned PG::get_recovery_priority()
 {
   // a higher value -> a higher priority
-  int64_t ret = 0;
+  int ret = OSD_RECOVERY_PRIORITY_BASE;
 
   if (state & PG_STATE_FORCED_RECOVERY) {
     ret = OSD_RECOVERY_PRIORITY_FORCED;
   } else {
-    pool.info.opts.get(pool_opts_t::RECOVERY_PRIORITY, &ret);
-    ret = clamp_recovery_priority(OSD_RECOVERY_PRIORITY_BASE + ret);
+    // XXX: This priority boost isn't so much about inactive, but about data-at-risk
+    if (is_degraded() && info.stats.avail_no_missing.size() < pool.info.min_size) {
+      // inactive: no. of replicas < min_size, highest priority since it blocks IO
+      ret = OSD_RECOVERY_INACTIVE_PRIORITY_BASE + (pool.info.min_size - info.stats.avail_no_missing.size());
+    }
+
+    int64_t pool_recovery_priority = 0;
+    pool.info.opts.get(pool_opts_t::RECOVERY_PRIORITY, &pool_recovery_priority);
+
+    ret = clamp_recovery_priority(pool_recovery_priority + ret);
   }
-  dout(20) << __func__ << " recovery priority for " << *this << " is " << ret << ", state is " << state << dendl;
+  dout(20) << __func__ << " recovery priority is " << ret << dendl;
   return static_cast<unsigned>(ret);
 }
 
@@ -2513,6 +2521,7 @@ unsigned PG::get_backfill_priority()
     ret = clamp_recovery_priority(pool_recovery_priority + ret);
   }
 
+  dout(20) << __func__ << " backfill priority is " << ret << dendl;
   return static_cast<unsigned>(ret);
 }
 
@@ -3203,6 +3212,7 @@ void PG::_update_calc_stats()
   info.stats.stats.sum.num_objects_degraded = 0;
   info.stats.stats.sum.num_objects_unfound = 0;
   info.stats.stats.sum.num_objects_misplaced = 0;
+  info.stats.avail_no_missing.clear();
 
   if ((is_remapped() || is_undersized() || !is_clean()) && (is_peered() || is_activating())) {
     dout(20) << __func__ << " actingset " << actingset << " upset "
@@ -3236,6 +3246,8 @@ void PG::_update_calc_stats()
         acting_source_objects.insert(make_pair(missing, pg_whoami));
       }
       info.stats.stats.sum.num_objects_missing_on_primary = missing;
+      if (missing == 0)
+        info.stats.avail_no_missing.push_back(pg_whoami);
       dout(20) << __func__ << " shard " << pg_whoami
                << " primary objects " << num_objects
                << " missing " << missing
@@ -3269,6 +3281,8 @@ void PG::_update_calc_stats()
        acting_source_objects.insert(make_pair(missing, peer.first));
       }
       peer.second.stats.stats.sum.num_objects_missing = missing;
+      if (missing == 0)
+        info.stats.avail_no_missing.push_back(peer.first);
       dout(20) << __func__ << " shard " << peer.first
                << " objects " << peer_num_objects
                << " missing " << missing
index d114e1e473ba8d91a30ebfb93955eac6a361e9e6..ba00dd54ba4f040f1bc6f8c7a6aaf67d14f5a316 100644 (file)
@@ -2641,6 +2641,10 @@ void pg_stat_t::dump(Formatter *f) const
   for (vector<int32_t>::const_iterator p = acting.begin(); p != acting.end(); ++p)
     f->dump_int("osd", *p);
   f->close_section();
+  f->open_array_section("avail_no_missing");
+  for (auto p = avail_no_missing.cbegin(); p != avail_no_missing.cend(); ++p)
+    f->dump_stream("shard") << *p;
+  f->close_section();
   f->open_array_section("blocked_by");
   for (vector<int32_t>::const_iterator p = blocked_by.begin();
        p != blocked_by.end(); ++p)
@@ -2677,7 +2681,7 @@ void pg_stat_t::dump_brief(Formatter *f) const
 
 void pg_stat_t::encode(bufferlist &bl) const
 {
-  ENCODE_START(25, 22, bl);
+  ENCODE_START(26, 22, bl);
   encode(version, bl);
   encode(reported_seq, bl);
   encode(reported_epoch, bl);
@@ -2723,6 +2727,7 @@ void pg_stat_t::encode(bufferlist &bl) const
   encode(top_state, bl);
   encode(purged_snaps, bl);
   encode(manifest_stats_invalid, bl);
+  encode(avail_no_missing, bl);
   ENCODE_FINISH(bl);
 }
 
@@ -2730,7 +2735,7 @@ void pg_stat_t::decode(bufferlist::const_iterator &bl)
 {
   bool tmp;
   uint32_t old_state;
-  DECODE_START(25, bl);
+  DECODE_START(26, bl);
   decode(version, bl);
   decode(reported_seq, bl);
   decode(reported_epoch, bl);
@@ -2793,6 +2798,9 @@ void pg_stat_t::decode(bufferlist::const_iterator &bl)
     } else {
       manifest_stats_invalid = true;
     }
+    if (struct_v >= 26) {
+      decode(avail_no_missing, bl);
+    }
   }
   DECODE_FINISH(bl);
 }
@@ -2834,6 +2842,7 @@ void pg_stat_t::generate_test_instances(list<pg_stat_t*>& o)
   a.up.push_back(123);
   a.up_primary = 123;
   a.acting.push_back(456);
+  a.avail_no_missing.push_back(pg_shard_t(456, shard_id_t::NO_SHARD));
   a.acting_primary = 456;
   o.push_back(new pg_stat_t(a));
 
@@ -2878,6 +2887,7 @@ bool operator==(const pg_stat_t& l, const pg_stat_t& r)
     l.ondisk_log_size == r.ondisk_log_size &&
     l.up == r.up &&
     l.acting == r.acting &&
+    l.avail_no_missing == r.avail_no_missing &&
     l.mapping_epoch == r.mapping_epoch &&
     l.blocked_by == r.blocked_by &&
     l.last_became_active == r.last_became_active &&
index c5fa0ca7dc38e2067bb6cae49590ec4ca24c46d9..90b4edea6181a50167a5e9021870765685532c0b 100644 (file)
@@ -82,6 +82,9 @@
 /// base backfill priority for MBackfillReserve (inactive PG)
 #define OSD_BACKFILL_INACTIVE_PRIORITY_BASE 220
 
+/// base recovery priority for MRecoveryReserve (inactive PG)
+#define OSD_RECOVERY_INACTIVE_PRIORITY_BASE 220
+
 /// max manually/automatically set recovery priority for MBackfillReserve
 #define OSD_RECOVERY_PRIORITY_MAX 253
 
@@ -2078,6 +2081,7 @@ struct pg_stat_t {
   int64_t ondisk_log_size;    // >= active_log_size
 
   vector<int32_t> up, acting;
+  vector<pg_shard_t> avail_no_missing;
   epoch_t mapping_epoch;
 
   vector<int32_t> blocked_by;  ///< osds on which the pg is blocked