]> git.apps.os.sepia.ceph.com Git - ceph.git/commitdiff
osd/PG: force auth_log_shard to be primary when appropriate 23663/head
authorxie xingguo <xie.xingguo@zte.com.cn>
Tue, 21 Aug 2018 08:37:41 +0000 (16:37 +0800)
committerxie xingguo <xie.xingguo@zte.com.cn>
Fri, 31 Aug 2018 08:29:25 +0000 (16:29 +0800)
So if there are a lot fo missing objects on primary, we can
make use of auth_log_shard to restore client I/O quickly.

Signed-off-by: xie xingguo <xie.xingguo@zte.com.cn>
qa/standalone/osd/osd-recovery-stats.sh
src/common/options.cc
src/osd/PG.cc
src/osd/PG.h

index 5f3c9352e8e422f444a0dfbe44cc2786e7ffc403..01f163ea5aa74d04fa9781e791e7c9cd89c6333e 100755 (executable)
@@ -26,6 +26,8 @@ function run() {
     export CEPH_ARGS
     CEPH_ARGS+="--fsid=$(uuidgen) --auth-supported=none "
     CEPH_ARGS+="--mon-host=$CEPH_MON "
+    # so we will not force auth_log_shard to be acting_primary
+    CEPH_ARGS+="--osd_force_auth_primary_missing_objects=1000000 "
     export margin=10
     export objects=200
     export poolname=test
index 233255d48f916a50d3e27079d376af5da8bf2e23..b61bfe2e9f87689f746abd884a1862b8554c0c88 100644 (file)
@@ -3217,6 +3217,10 @@ std::vector<Option> get_global_options() {
     .set_default(100)
     .set_description(""),
 
+    Option("osd_force_auth_primary_missing_objects", Option::TYPE_UINT, Option::LEVEL_ADVANCED)
+    .set_default(100)
+    .set_description("Approximate missing objects above which to force auth_log_shard to be primary temporarily"),
+
     Option("osd_async_recovery_min_pg_log_entries", Option::TYPE_UINT, Option::LEVEL_ADVANCED)
     .set_default(100)
     .set_description("Number of entries difference above which to use asynchronous recovery when appropriate"),
index cd7d3e1839e6ff88d45d5d0e6ff8dbd59bdd1a85..51208813f8a1c04084f91291c0d1a7e01821f734 100644 (file)
@@ -1312,6 +1312,7 @@ void PG::calc_ec_acting(
  */
 void PG::calc_replicated_acting(
   map<pg_shard_t, pg_info_t>::const_iterator auth_log_shard,
+  uint64_t force_auth_primary_missing_objects,
   unsigned size,
   const vector<int> &acting,
   const vector<int> &up,
@@ -1321,6 +1322,7 @@ void PG::calc_replicated_acting(
   vector<int> *want,
   set<pg_shard_t> *backfill,
   set<pg_shard_t> *acting_backfill,
+  const OSDMapRef osdmap,
   ostream &ss)
 {
   pg_shard_t auth_log_shard_id = auth_log_shard->first;
@@ -1335,7 +1337,32 @@ void PG::calc_replicated_acting(
       !primary->second.is_incomplete() &&
       primary->second.last_update >=
         auth_log_shard->second.log_tail) {
-    ss << "up_primary: " << up_primary << ") selected as primary" << std::endl;
+    if (HAVE_FEATURE(osdmap->get_up_osd_features(), SERVER_NAUTILUS)) {
+      auto approx_missing_objects =
+        primary->second.stats.stats.sum.num_objects_missing;
+      auto auth_version = auth_log_shard->second.last_update.version;
+      auto primary_version = primary->second.last_update.version;
+      if (auth_version > primary_version) {
+        approx_missing_objects += auth_version - primary_version;
+      } else {
+        approx_missing_objects += primary_version - auth_version;
+      }
+      if ((uint64_t)approx_missing_objects >
+          force_auth_primary_missing_objects) {
+        primary = auth_log_shard;
+        ss << "up_primary: " << up_primary << ") has approximate "
+           << approx_missing_objects
+           << "(>" << force_auth_primary_missing_objects <<") "
+           << "missing objects, osd." << auth_log_shard_id
+           << " selected as primary instead"
+           << std::endl;
+      } else {
+        ss << "up_primary: " << up_primary << ") selected as primary"
+           << std::endl;
+      }
+    } else {
+      ss << "up_primary: " << up_primary << ") selected as primary" << std::endl;
+    }
   } else {
     ceph_assert(!auth_log_shard->second.is_incomplete());
     ss << "up[0] needs backfill, osd." << auth_log_shard_id
@@ -1670,6 +1697,8 @@ bool PG::choose_acting(pg_shard_t &auth_log_shard_id,
   if (!pool.info.is_erasure())
     calc_replicated_acting(
       auth_log_shard,
+      cct->_conf.get_val<uint64_t>(
+        "osd_force_auth_primary_missing_objects"),
       get_osdmap()->get_pg_size(info.pgid.pgid),
       acting,
       up,
@@ -1679,6 +1708,7 @@ bool PG::choose_acting(pg_shard_t &auth_log_shard_id,
       &want,
       &want_backfill,
       &want_acting_backfill,
+      get_osdmap(),
       ss);
   else
     calc_ec_acting(
index 37f72c6575499d5106cfc6c18b541cf52d56065b..162141c71c52d72c0a275a451df6e694dbdf5990 100644 (file)
@@ -1441,6 +1441,7 @@ protected:
     ostream &ss);
   static void calc_replicated_acting(
     map<pg_shard_t, pg_info_t>::const_iterator auth_log_shard,
+    uint64_t force_auth_primary_missing_objects,
     unsigned size,
     const vector<int> &acting,
     const vector<int> &up,
@@ -1450,6 +1451,7 @@ protected:
     vector<int> *want,
     set<pg_shard_t> *backfill,
     set<pg_shard_t> *acting_backfill,
+    const OSDMapRef osdmap,
     ostream &ss);
   void choose_async_recovery_ec(const map<pg_shard_t, pg_info_t> &all_info,
                                 const pg_info_t &auth_info,