]> git.apps.os.sepia.ceph.com Git - ceph-ci.git/commitdiff
osd/PG: do not use approx_missing_objects pre-nautilus
authorNeha Ojha <nojha@redhat.com>
Thu, 25 Apr 2019 02:15:27 +0000 (19:15 -0700)
committerNeha Ojha <nojha@redhat.com>
Thu, 25 Apr 2019 21:44:15 +0000 (14:44 -0700)
We changed async recovery cost calculation in nautilus to also take
into account approx_missing_objects in ab241bf7e927cda2d0ed1698383d18dc4a4b601c
This commit depends on https://github.com/ceph/ceph/pull/23663, hence
wasn't backported to mimic.

Mimic only uses the difference in length of logs as the cost. Due to this,
the same OSD might have different costs in a mixed mimic and nautilus(or above)
cluster. This can lead to choose_acting() cycling between OSDs, when trying
to select the acting set and async_recovery_targets.

Fixes: https://tracker.ceph.com/issues/39441
Signed-off-by: Neha Ojha <nojha@redhat.com>
src/osd/PG.cc
src/osd/PG.h

index 88219371434efb57ec276a12fed9f671e09af4e3..e59397eb32396edaf919919da419067d79771a3a 100644 (file)
@@ -1553,7 +1553,8 @@ bool PG::recoverable_and_ge_min_size(const vector<int> &want) const
 void PG::choose_async_recovery_ec(const map<pg_shard_t, pg_info_t> &all_info,
                                   const pg_info_t &auth_info,
                                   vector<int> *want,
-                                  set<pg_shard_t> *async_recovery) const
+                                  set<pg_shard_t> *async_recovery,
+                                  const OSDMapRef osdmap) const
 {
   set<pair<int, pg_shard_t> > candidates_by_cost;
   for (uint8_t i = 0; i < want->size(); ++i) {
@@ -1580,14 +1581,21 @@ void PG::choose_async_recovery_ec(const map<pg_shard_t, pg_info_t> &all_info,
     // past the authoritative last_update the same as those equal to it.
     version_t auth_version = auth_info.last_update.version;
     version_t candidate_version = shard_info.last_update.version;
-    auto approx_missing_objects =
-      shard_info.stats.stats.sum.num_objects_missing;
-    if (auth_version > candidate_version) {
-      approx_missing_objects += auth_version - candidate_version;
-    }
-    if (static_cast<uint64_t>(approx_missing_objects) >
-       cct->_conf.get_val<uint64_t>("osd_async_recovery_min_cost")) {
-      candidates_by_cost.emplace(approx_missing_objects, shard_i);
+    if (HAVE_FEATURE(osdmap->get_up_osd_features(), SERVER_NAUTILUS)) {
+      auto approx_missing_objects =
+        shard_info.stats.stats.sum.num_objects_missing;
+      if (auth_version > candidate_version) {
+        approx_missing_objects += auth_version - candidate_version;
+      }
+      if (static_cast<uint64_t>(approx_missing_objects) >
+         cct->_conf.get_val<uint64_t>("osd_async_recovery_min_cost")) {
+        candidates_by_cost.emplace(approx_missing_objects, shard_i);
+      }
+    } else {
+      if (auth_version > candidate_version &&
+          (auth_version - candidate_version) > cct->_conf.get_val<uint64_t>("osd_async_recovery_min_cost")) {
+        candidates_by_cost.insert(make_pair(auth_version - candidate_version, shard_i));
+      }
     }
   }
 
@@ -1612,7 +1620,8 @@ void PG::choose_async_recovery_ec(const map<pg_shard_t, pg_info_t> &all_info,
 void PG::choose_async_recovery_replicated(const map<pg_shard_t, pg_info_t> &all_info,
                                           const pg_info_t &auth_info,
                                           vector<int> *want,
-                                          set<pg_shard_t> *async_recovery) const
+                                          set<pg_shard_t> *async_recovery,
+                                          const OSDMapRef osdmap) const
 {
   set<pair<int, pg_shard_t> > candidates_by_cost;
   for (auto osd_num : *want) {
@@ -1631,16 +1640,28 @@ void PG::choose_async_recovery_replicated(const map<pg_shard_t, pg_info_t> &all_
     // logs plus historical missing objects as the cost of recovery
     version_t auth_version = auth_info.last_update.version;
     version_t candidate_version = shard_info.last_update.version;
-    auto approx_missing_objects =
-      shard_info.stats.stats.sum.num_objects_missing;
-    if (auth_version > candidate_version) {
-      approx_missing_objects += auth_version - candidate_version;
+    if (HAVE_FEATURE(osdmap->get_up_osd_features(), SERVER_NAUTILUS)) {
+      auto approx_missing_objects =
+        shard_info.stats.stats.sum.num_objects_missing;
+      if (auth_version > candidate_version) {
+        approx_missing_objects += auth_version - candidate_version;
+      } else {
+        approx_missing_objects += candidate_version - auth_version;
+      }
+      if (static_cast<uint64_t>(approx_missing_objects)  >
+         cct->_conf.get_val<uint64_t>("osd_async_recovery_min_cost")) {
+        candidates_by_cost.emplace(approx_missing_objects, shard_i);
+      }
     } else {
-      approx_missing_objects += candidate_version - auth_version;
-    }
-    if (static_cast<uint64_t>(approx_missing_objects)  >
-       cct->_conf.get_val<uint64_t>("osd_async_recovery_min_cost")) {
-      candidates_by_cost.emplace(approx_missing_objects, shard_i);
+      size_t approx_entries;
+      if (auth_version > candidate_version) {
+        approx_entries = auth_version - candidate_version;
+      } else {
+        approx_entries = candidate_version - auth_version;
+      }
+      if (approx_entries > cct->_conf.get_val<uint64_t>("osd_async_recovery_min_cost")) {
+        candidates_by_cost.insert(make_pair(approx_entries, shard_i));
+      }
     }
   }
 
@@ -1758,9 +1779,9 @@ bool PG::choose_acting(pg_shard_t &auth_log_shard_id,
   set<pg_shard_t> want_async_recovery;
   if (HAVE_FEATURE(get_osdmap()->get_up_osd_features(), SERVER_MIMIC)) {
     if (pool.info.is_erasure()) {
-      choose_async_recovery_ec(all_info, auth_log_shard->second, &want, &want_async_recovery);
+      choose_async_recovery_ec(all_info, auth_log_shard->second, &want, &want_async_recovery, get_osdmap());
     } else {
-      choose_async_recovery_replicated(all_info, auth_log_shard->second, &want, &want_async_recovery);
+      choose_async_recovery_replicated(all_info, auth_log_shard->second, &want, &want_async_recovery, get_osdmap());
     }
   }
   if (want != acting) {
index fd37b4e456ef36fd5fba8ce4b3c8a0052ae48df8..4aab6e34801817a183ba5cc0c0ac3abbcb855621 100644 (file)
@@ -1572,11 +1572,13 @@ protected:
   void choose_async_recovery_ec(const map<pg_shard_t, pg_info_t> &all_info,
                                 const pg_info_t &auth_info,
                                 vector<int> *want,
-                                set<pg_shard_t> *async_recovery) const;
+                                set<pg_shard_t> *async_recovery,
+                                const OSDMapRef osdmap) const;
   void choose_async_recovery_replicated(const map<pg_shard_t, pg_info_t> &all_info,
                                         const pg_info_t &auth_info,
                                         vector<int> *want,
-                                        set<pg_shard_t> *async_recovery) const;
+                                        set<pg_shard_t> *async_recovery,
+                                        const OSDMapRef osdmap) const;
 
   bool recoverable_and_ge_min_size(const vector<int> &want) const;
   bool choose_acting(pg_shard_t &auth_log_shard,