]> git-server-git.apps.pok.os.sepia.ceph.com Git - ceph.git/commitdiff
osd: pg_interval_t::check_new_interval should not rely on pool.min_size to determine... 5093/head
authorGuang G Yang <yguang@renownedground.corp.gq1.yahoo.com>
Thu, 2 Jul 2015 05:29:47 +0000 (05:29 +0000)
committerGuang G Yang <yguang@renownedground.corp.gq1.yahoo.com>
Mon, 13 Jul 2015 17:42:56 +0000 (17:42 +0000)
If the pool's min_size is set improperly, during peering, pg_interval_t::check_new_interval
might wrongly determine the PG's state and cause the PG to stuck at down+peering forever

Fixes: #12162
Signed-off-by: Guang Yang yguang@yahoo-inc.com
src/osd/OSD.cc
src/osd/PG.cc
src/osd/PG.h
src/osd/osd_types.cc
src/osd/osd_types.h
src/test/osd/types.cc

index 7c2d8501a969b6a29ffddc52e7574ebcf935e2f5..ebc2d6d8ee252f3a2de73e699c778db5f7b4a399 100644 (file)
@@ -3049,6 +3049,8 @@ void OSD::build_past_intervals_parallel()
       }
       assert(last_map);
 
+      boost::scoped_ptr<IsPGRecoverablePredicate> recoverable(
+        pg->get_is_recoverable_predicate());
       std::stringstream debug;
       bool new_interval = pg_interval_t::check_new_interval(
        p.primary,
@@ -3061,6 +3063,7 @@ void OSD::build_past_intervals_parallel()
        pg->info.history.last_epoch_clean,
        cur_map, last_map,
        pgid,
+        recoverable.get(),
        &pg->past_intervals,
        &debug);
       if (new_interval) {
index 7bb81bfdca4e62f9046d6b689f4f77708cb158f0..fe653036279f109d4b25a8d4b0e8910785698c09 100644 (file)
@@ -709,6 +709,8 @@ void PG::generate_past_intervals()
       pgid = pgid.get_ancestor(last_map->get_pg_num(pgid.pool()));
     cur_map->pg_to_up_acting_osds(pgid, &up, &up_primary, &acting, &primary);
 
+    boost::scoped_ptr<IsPGRecoverablePredicate> recoverable(
+      get_is_recoverable_predicate());
     std::stringstream debug;
     bool new_interval = pg_interval_t::check_new_interval(
       old_primary,
@@ -724,6 +726,7 @@ void PG::generate_past_intervals()
       cur_map,
       last_map,
       pgid,
+      recoverable.get(),
       &past_intervals,
       &debug);
     if (new_interval) {
@@ -4656,6 +4659,8 @@ void PG::start_peering_interval(
   } else {
     std::stringstream debug;
     assert(info.history.same_interval_since != 0);
+    boost::scoped_ptr<IsPGRecoverablePredicate> recoverable(
+      get_is_recoverable_predicate());
     bool new_interval = pg_interval_t::check_new_interval(
       old_acting_primary.osd,
       new_acting_primary,
@@ -4668,6 +4673,7 @@ void PG::start_peering_interval(
       osdmap,
       lastmap,
       info.pgid.pgid,
+      recoverable.get(),
       &past_intervals,
       &debug);
     dout(10) << __func__ << ": check_new_interval output: "
index b1aff2addc7f4ca05ed712c08ceac531bf2ea150..6c089b316f72cf7a5466710b04600a052bf5c55b 100644 (file)
@@ -197,6 +197,10 @@ public:
   void update_snap_mapper_bits(uint32_t bits) {
     snap_mapper.update_bits(bits);
   }
+  /// get_is_recoverable_predicate: caller owns returned pointer and must delete when done
+  IsPGRecoverablePredicate *get_is_recoverable_predicate() {
+    return get_pgbackend()->get_is_recoverable_predicate();
+  }
 protected:
   // Ops waiting for map, should be queued at back
   Mutex map_lock;
index 4bc95e4dd1852083c640fdbf9873b8b1ac9861ef..dbcbd3dab357a12ef6a4580c7aa5fc1a6da17bad 100644 (file)
@@ -947,6 +947,16 @@ void pg_pool_t::dump(Formatter *f) const
   f->dump_unsigned("expected_num_objects", expected_num_objects);
 }
 
+void pg_pool_t::convert_to_pg_shards(const vector<int> &from, set<pg_shard_t>* to) const {
+  for (size_t i = 0; i < from.size(); ++i) {
+    if (from[i] != CRUSH_ITEM_NONE) {
+      to->insert(
+        pg_shard_t(
+          from[i],
+          ec_pool() ? shard_id_t(i) : shard_id_t::NO_SHARD));
+    }
+  }
+}
 
 int pg_pool_t::calc_bits_of(int t)
 {
@@ -2630,6 +2640,7 @@ bool pg_interval_t::check_new_interval(
   OSDMapRef osdmap,
   OSDMapRef lastmap,
   pg_t pgid,
+  IsPGRecoverablePredicate *could_have_gone_active,
   map<epoch_t, pg_interval_t> *past_intervals,
   std::ostream *out)
 {
@@ -2663,9 +2674,14 @@ bool pg_interval_t::check_new_interval(
       if (*p != CRUSH_ITEM_NONE)
        ++num_acting;
 
+    const pg_pool_t& old_pg_pool = lastmap->get_pools().find(pgid.pool())->second;
+    set<pg_shard_t> old_acting_shards;
+    old_pg_pool.convert_to_pg_shards(old_acting, &old_acting_shards);
+
     if (num_acting &&
        i.primary != -1 &&
-       num_acting >= lastmap->get_pools().find(pgid.pool())->second.min_size) {
+       num_acting >= old_pg_pool.min_size &&
+        (*could_have_gone_active)(old_acting_shards)) {
       if (out)
        *out << "generate_past_intervals " << i
             << ": not rw,"
index 05ae133c13e9645d219c669b3a684f834db0ddfd..0c45f413ebc052b5b57bcdf0681009bb4743dce6 100644 (file)
@@ -961,6 +961,9 @@ struct pg_pool_t {
     return 0;
   }
 
+  /// converts the acting/up vector to a set of pg shards
+  void convert_to_pg_shards(const vector<int> &from, set<pg_shard_t>* to) const;
+
   typedef enum {
     CACHEMODE_NONE = 0,                  ///< no caching
     CACHEMODE_WRITEBACK = 1,             ///< write to cache, flush later
@@ -1978,6 +1981,7 @@ struct pg_interval_t {
     ceph::shared_ptr<const OSDMap> osdmap,  ///< [in] current map
     ceph::shared_ptr<const OSDMap> lastmap, ///< [in] last map
     pg_t pgid,                                  ///< [in] pgid for pg
+    IsPGRecoverablePredicate *could_have_gone_active, /// [in] predicate whether the pg can be active
     map<epoch_t, pg_interval_t> *past_intervals,///< [out] intervals
     ostream *out = 0                            ///< [out] debug ostream
     );
index fae8607af67a79915d4046e89b0f1633ff865662..63b64486217eac34dc1bb0e03b2c5fd0302d39bf 100644 (file)
@@ -21,6 +21,7 @@
 #include "gtest/gtest.h"
 #include "common/Thread.h"
 #include "include/stringify.h"
+#include "osd/ReplicatedBackend.h"
 
 #include <sstream>
 
@@ -140,6 +141,7 @@ TEST(pg_interval_t, check_new_interval)
   int64_t pool_id = 200;
   int pg_num = 4;
   __u8 min_size = 2;
+  boost::scoped_ptr<IsPGRecoverablePredicate> recoverable(new ReplicatedBackend::RPCRecPred());
   {
     OSDMap::Incremental inc(epoch + 1);
     inc.new_pools[pool_id].min_size = min_size;
@@ -184,6 +186,7 @@ TEST(pg_interval_t, check_new_interval)
                                                   osdmap,
                                                   lastmap,
                                                   pgid,
+                                                   recoverable.get(),
                                                   &past_intervals));
     ASSERT_TRUE(past_intervals.empty());
   }
@@ -213,6 +216,7 @@ TEST(pg_interval_t, check_new_interval)
                                                  osdmap,
                                                  lastmap,
                                                  pgid,
+                                                  recoverable.get(),
                                                  &past_intervals));
     ASSERT_EQ((unsigned int)1, past_intervals.size());
     ASSERT_EQ(same_interval_since, past_intervals[same_interval_since].first);
@@ -245,6 +249,7 @@ TEST(pg_interval_t, check_new_interval)
                                                  osdmap,
                                                  lastmap,
                                                  pgid,
+                                                  recoverable.get(),
                                                  &past_intervals));
     old_primary = new_primary;
     ASSERT_EQ((unsigned int)1, past_intervals.size());
@@ -278,6 +283,7 @@ TEST(pg_interval_t, check_new_interval)
                                                  osdmap,
                                                  lastmap,
                                                  pgid,
+                                                  recoverable.get(),
                                                  &past_intervals));
     ASSERT_EQ((unsigned int)1, past_intervals.size());
     ASSERT_EQ(same_interval_since, past_intervals[same_interval_since].first);
@@ -309,6 +315,7 @@ TEST(pg_interval_t, check_new_interval)
                                                  osdmap,
                                                  lastmap,
                                                  pgid,
+                                                  recoverable.get(),
                                                  &past_intervals));
     ASSERT_EQ((unsigned int)1, past_intervals.size());
     ASSERT_EQ(same_interval_since, past_intervals[same_interval_since].first);
@@ -347,6 +354,7 @@ TEST(pg_interval_t, check_new_interval)
                                                  osdmap,
                                                  lastmap,
                                                  pgid,
+                                                  recoverable.get(),
                                                  &past_intervals));
     ASSERT_EQ((unsigned int)1, past_intervals.size());
     ASSERT_EQ(same_interval_since, past_intervals[same_interval_since].first);
@@ -385,6 +393,7 @@ TEST(pg_interval_t, check_new_interval)
                                                  osdmap,
                                                  lastmap,
                                                  pgid,
+                                                  recoverable.get(),
                                                  &past_intervals));
     ASSERT_EQ((unsigned int)1, past_intervals.size());
     ASSERT_EQ(same_interval_since, past_intervals[same_interval_since].first);
@@ -418,6 +427,7 @@ TEST(pg_interval_t, check_new_interval)
                                                  osdmap,
                                                  lastmap,
                                                  pgid,
+                                                  recoverable.get(),
                                                  &past_intervals,
                                                  &out));
     ASSERT_EQ((unsigned int)1, past_intervals.size());
@@ -469,6 +479,7 @@ TEST(pg_interval_t, check_new_interval)
                                                  osdmap,
                                                  lastmap,
                                                  pgid,
+                                                  recoverable.get(),
                                                  &past_intervals,
                                                  &out));
     ASSERT_EQ((unsigned int)1, past_intervals.size());
@@ -503,6 +514,7 @@ TEST(pg_interval_t, check_new_interval)
                                                  osdmap,
                                                  lastmap,
                                                  pgid,
+                                                  recoverable.get(),
                                                  &past_intervals,
                                                  &out));
     ASSERT_EQ((unsigned int)1, past_intervals.size());
@@ -547,6 +559,7 @@ TEST(pg_interval_t, check_new_interval)
                                                  osdmap,
                                                  lastmap,
                                                  pgid,
+                                                  recoverable.get(),
                                                  &past_intervals,
                                                  &out));
     ASSERT_EQ((unsigned int)1, past_intervals.size());
@@ -595,6 +608,7 @@ TEST(pg_interval_t, check_new_interval)
                                                  osdmap,
                                                  lastmap,
                                                  pgid,
+                                                  recoverable.get(),
                                                  &past_intervals,
                                                  &out));
     ASSERT_EQ((unsigned int)1, past_intervals.size());