]> git-server-git.apps.pok.os.sepia.ceph.com Git - ceph.git/commitdiff
osd/PG: do not use approx_missing_objects pre-nautilus 28160/head
authorNeha Ojha <nojha@redhat.com>
Thu, 25 Apr 2019 02:15:27 +0000 (19:15 -0700)
committerPrashant D <pdhange@redhat.com>
Fri, 17 May 2019 00:20:40 +0000 (20:20 -0400)
We changed async recovery cost calculation in nautilus to also take
into account approx_missing_objects in ab241bf7e927cda2d0ed1698383d18dc4a4b601c
This commit depends on https://github.com/ceph/ceph/pull/23663, hence
wasn't backported to mimic.

Mimic only uses the difference in length of logs as the cost. Due to this,
the same OSD might have different costs in a mixed mimic and nautilus(or above)
cluster. This can lead to choose_acting() cycling between OSDs, when trying
to select the acting set and async_recovery_targets.

Fixes: https://tracker.ceph.com/issues/39441
Signed-off-by: Neha Ojha <nojha@redhat.com>
(cherry picked from commit 4c617ecf1cf6f25cca42e5d59fa162dbc60f4de8)

Conflicts:
src/osd/PG.cc : Resolved in choose_async_recovery_ec and
choose_async_recovery_replicated

src/osd/PG.cc
src/osd/PG.h

index cbfa2f9c37387a4f216f3469385e4029b3c1dddc..97eabc9967dd93255dbe1ca689648dcaec90eb41 100644 (file)
@@ -1554,7 +1554,8 @@ bool PG::recoverable_and_ge_min_size(const vector<int> &want) const
 void PG::choose_async_recovery_ec(const map<pg_shard_t, pg_info_t> &all_info,
                                   const pg_info_t &auth_info,
                                   vector<int> *want,
-                                  set<pg_shard_t> *async_recovery) const
+                                  set<pg_shard_t> *async_recovery,
+                                  const OSDMapRef osdmap) const
 {
   set<pair<int, pg_shard_t> > candidates_by_cost;
   for (uint8_t i = 0; i < want->size(); ++i) {
@@ -1581,14 +1582,21 @@ void PG::choose_async_recovery_ec(const map<pg_shard_t, pg_info_t> &all_info,
     // past the authoritative last_update the same as those equal to it.
     version_t auth_version = auth_info.last_update.version;
     version_t candidate_version = shard_info.last_update.version;
-    auto approx_missing_objects =
-      shard_info.stats.stats.sum.num_objects_missing;
-    if (auth_version > candidate_version) {
-      approx_missing_objects += auth_version - candidate_version;
-    }
-    if (static_cast<uint64_t>(approx_missing_objects) >
-       cct->_conf.get_val<uint64_t>("osd_async_recovery_min_cost")) {
-      candidates_by_cost.insert(make_pair(approx_missing_objects, shard_i));
+    if (HAVE_FEATURE(osdmap->get_up_osd_features(), SERVER_NAUTILUS)) {
+      auto approx_missing_objects =
+        shard_info.stats.stats.sum.num_objects_missing;
+      if (auth_version > candidate_version) {
+        approx_missing_objects += auth_version - candidate_version;
+      }
+      if (static_cast<uint64_t>(approx_missing_objects) >
+         cct->_conf.get_val<uint64_t>("osd_async_recovery_min_cost")) {
+        candidates_by_cost.emplace(approx_missing_objects, shard_i);
+      }
+    } else {
+      if (auth_version > candidate_version &&
+          (auth_version - candidate_version) > cct->_conf.get_val<uint64_t>("osd_async_recovery_min_cost")) {
+        candidates_by_cost.insert(make_pair(auth_version - candidate_version, shard_i));
+      }
     }
   }
 
@@ -1613,7 +1621,8 @@ void PG::choose_async_recovery_ec(const map<pg_shard_t, pg_info_t> &all_info,
 void PG::choose_async_recovery_replicated(const map<pg_shard_t, pg_info_t> &all_info,
                                           const pg_info_t &auth_info,
                                           vector<int> *want,
-                                          set<pg_shard_t> *async_recovery) const
+                                          set<pg_shard_t> *async_recovery,
+                                          const OSDMapRef osdmap) const
 {
   set<pair<int, pg_shard_t> > candidates_by_cost;
   for (auto osd_num : *want) {
@@ -1632,16 +1641,28 @@ void PG::choose_async_recovery_replicated(const map<pg_shard_t, pg_info_t> &all_
     // logs plus historical missing objects as the cost of recovery
     version_t auth_version = auth_info.last_update.version;
     version_t candidate_version = shard_info.last_update.version;
-    auto approx_missing_objects =
-      shard_info.stats.stats.sum.num_objects_missing;
-    if (auth_version > candidate_version) {
-      approx_missing_objects += auth_version - candidate_version;
+    if (HAVE_FEATURE(osdmap->get_up_osd_features(), SERVER_NAUTILUS)) {
+      auto approx_missing_objects =
+        shard_info.stats.stats.sum.num_objects_missing;
+      if (auth_version > candidate_version) {
+        approx_missing_objects += auth_version - candidate_version;
+      } else {
+        approx_missing_objects += candidate_version - auth_version;
+      }
+      if (static_cast<uint64_t>(approx_missing_objects)  >
+         cct->_conf.get_val<uint64_t>("osd_async_recovery_min_cost")) {
+        candidates_by_cost.emplace(approx_missing_objects, shard_i);
+      }
     } else {
-      approx_missing_objects += candidate_version - auth_version;
-    }
-    if (static_cast<uint64_t>(approx_missing_objects)  >
-       cct->_conf.get_val<uint64_t>("osd_async_recovery_min_cost")) {
-      candidates_by_cost.insert(make_pair(approx_missing_objects, shard_i));
+      size_t approx_entries;
+      if (auth_version > candidate_version) {
+        approx_entries = auth_version - candidate_version;
+      } else {
+        approx_entries = candidate_version - auth_version;
+      }
+      if (approx_entries > cct->_conf.get_val<uint64_t>("osd_async_recovery_min_cost")) {
+        candidates_by_cost.insert(make_pair(approx_entries, shard_i));
+      }
     }
   }
 
@@ -1759,9 +1780,9 @@ bool PG::choose_acting(pg_shard_t &auth_log_shard_id,
   set<pg_shard_t> want_async_recovery;
   if (HAVE_FEATURE(get_osdmap()->get_up_osd_features(), SERVER_MIMIC)) {
     if (pool.info.is_erasure()) {
-      choose_async_recovery_ec(all_info, auth_log_shard->second, &want, &want_async_recovery);
+      choose_async_recovery_ec(all_info, auth_log_shard->second, &want, &want_async_recovery, get_osdmap());
     } else {
-      choose_async_recovery_replicated(all_info, auth_log_shard->second, &want, &want_async_recovery);
+      choose_async_recovery_replicated(all_info, auth_log_shard->second, &want, &want_async_recovery, get_osdmap());
     }
   }
   if (want != acting) {
index 4dca1c9ab1df713e6830be84cbdad11db0aecacd..c7c39a1adc4f5a776a9ee671bbfed2411b04c865 100644 (file)
@@ -1569,11 +1569,13 @@ protected:
   void choose_async_recovery_ec(const map<pg_shard_t, pg_info_t> &all_info,
                                 const pg_info_t &auth_info,
                                 vector<int> *want,
-                                set<pg_shard_t> *async_recovery) const;
+                                set<pg_shard_t> *async_recovery,
+                                const OSDMapRef osdmap) const;
   void choose_async_recovery_replicated(const map<pg_shard_t, pg_info_t> &all_info,
                                         const pg_info_t &auth_info,
                                         vector<int> *want,
-                                        set<pg_shard_t> *async_recovery) const;
+                                        set<pg_shard_t> *async_recovery,
+                                        const OSDMapRef osdmap) const;
 
   bool recoverable_and_ge_min_size(const vector<int> &want) const;
   bool choose_acting(pg_shard_t &auth_log_shard,