]> git-server-git.apps.pok.os.sepia.ceph.com Git - ceph.git/commitdiff
osd/: extend pg_interval_t to include primary
authorSamuel Just <sam.just@inktank.com>
Wed, 12 Feb 2014 18:44:45 +0000 (10:44 -0800)
committerSamuel Just <sam.just@inktank.com>
Tue, 18 Feb 2014 04:12:14 +0000 (20:12 -0800)
Otherwise, we cannot correctly determine up_from/up_thru for
old intervals.  Also, we need this information to determine
when a new interval starts due to a new primary without a
change in the acting set.

Signed-off-by: Samuel Just <sam.just@inktank.com>
src/osd/OSD.cc
src/osd/PG.cc
src/osd/osd_types.cc
src/osd/osd_types.h
src/test/osd/types.cc

index 15c38833ade5c0b0d18c0a70bd9b60881f7fe937..7cd40ac775f6ab9103f205fdefa1f3993e7bf8f6 100644 (file)
@@ -2100,6 +2100,7 @@ struct pistate {
   epoch_t start, end;
   vector<int> old_acting, old_up;
   epoch_t same_interval_since;
+  int primary;
 };
 
 void OSD::build_past_intervals_parallel()
@@ -2151,7 +2152,9 @@ void OSD::build_past_intervals_parallel()
        continue;
 
       vector<int> acting, up;
-      cur_map->pg_to_up_acting_osds(pg->info.pgid.pgid, up, acting);
+      int primary;
+      cur_map->pg_to_up_acting_osds(
+       pg->info.pgid.pgid, &up, 0, &acting, &primary);
 
       if (p.same_interval_since == 0) {
        dout(10) << __func__ << " epoch " << cur_epoch << " pg " << pg->info.pgid
@@ -2160,12 +2163,15 @@ void OSD::build_past_intervals_parallel()
        p.same_interval_since = cur_epoch;
        p.old_up = up;
        p.old_acting = acting;
+       p.primary = primary;
        continue;
       }
       assert(last_map);
 
       std::stringstream debug;
       bool new_interval = pg_interval_t::check_new_interval(
+       p.primary,
+       primary,
        p.old_acting, acting,
        p.old_up, up,
        p.same_interval_since,
index 954c0339901f5527c9bf109190963ecd983e571a..1e36d3fd90f06c1992397c8eb9dd4b484f8a3389 100644 (file)
@@ -606,10 +606,13 @@ void PG::generate_past_intervals()
   }
 
   OSDMapRef last_map, cur_map;
+  int primary = -1;
+  int old_primary = -1;
   vector<int> acting, up, old_acting, old_up;
 
   cur_map = osd->get_map(cur_epoch);
-  cur_map->pg_to_up_acting_osds(get_pgid().pgid, up, acting);
+  cur_map->pg_to_up_acting_osds(
+    get_pgid().pgid, &up, 0, &acting, &primary);
   epoch_t same_interval_since = cur_epoch;
   dout(10) << __func__ << " over epochs " << cur_epoch << "-"
           << end_epoch << dendl;
@@ -618,12 +621,16 @@ void PG::generate_past_intervals()
     last_map.swap(cur_map);
     old_up.swap(up);
     old_acting.swap(acting);
+    old_primary = primary;
 
     cur_map = osd->get_map(cur_epoch);
-    cur_map->pg_to_up_acting_osds(get_pgid().pgid, up, acting);
+    cur_map->pg_to_up_acting_osds(
+      get_pgid().pgid, &up, 0, &acting, &primary);
 
     std::stringstream debug;
     bool new_interval = pg_interval_t::check_new_interval(
+      old_primary,
+      primary,
       old_acting,
       acting,
       old_up,
@@ -2093,10 +2100,14 @@ void PG::update_heartbeat_peers()
 
   set<int> new_peers;
   if (is_primary()) {
-    for (unsigned i=0; i<acting.size(); i++)
-      new_peers.insert(acting[i]);
-    for (unsigned i=0; i<up.size(); i++)
-      new_peers.insert(up[i]);
+    for (unsigned i=0; i<acting.size(); i++) {
+      if (acting[i] != CRUSH_ITEM_NONE)
+       new_peers.insert(acting[i]);
+    }
+    for (unsigned i=0; i<up.size(); i++) {
+      if (up[i] != CRUSH_ITEM_NONE)
+       new_peers.insert(up[i]);
+    }
     for (map<pg_shard_t,pg_info_t>::iterator p = peer_info.begin();
         p != peer_info.end();
         ++p)
@@ -4584,6 +4595,8 @@ void PG::start_peering_interval(
   } else {
     std::stringstream debug;
     bool new_interval = pg_interval_t::check_new_interval(
+      oldprimary.osd,
+      new_acting_primary,
       oldacting, newacting,
       oldup, newup,
       info.history.same_interval_since,
index caae5782b646be84d299b4e2248dd8099577c6a6..d8af4c1097f26d5a78c2318726fbd507cd82dccb 100644 (file)
@@ -2056,23 +2056,30 @@ ostream &operator<<(ostream &lhs, const pg_notify_t &notify)
 
 void pg_interval_t::encode(bufferlist& bl) const
 {
-  ENCODE_START(2, 2, bl);
+  ENCODE_START(3, 2, bl);
   ::encode(first, bl);
   ::encode(last, bl);
   ::encode(up, bl);
   ::encode(acting, bl);
   ::encode(maybe_went_rw, bl);
+  ::encode(primary, bl);
   ENCODE_FINISH(bl);
 }
 
 void pg_interval_t::decode(bufferlist::iterator& bl)
 {
-  DECODE_START_LEGACY_COMPAT_LEN(2, 2, 2, bl);
+  DECODE_START_LEGACY_COMPAT_LEN(3, 2, 2, bl);
   ::decode(first, bl);
   ::decode(last, bl);
   ::decode(up, bl);
   ::decode(acting, bl);
   ::decode(maybe_went_rw, bl);
+  if (struct_v >= 3) {
+    ::decode(primary, bl);
+  } else {
+    if (acting.size())
+      primary = acting[0];
+  }
   DECODE_FINISH(bl);
 }
 
@@ -2104,6 +2111,8 @@ void pg_interval_t::generate_test_instances(list<pg_interval_t*>& o)
 }
 
 bool pg_interval_t::check_new_interval(
+  int old_primary,
+  int new_primary,
   const vector<int> &old_acting,
   const vector<int> &new_acting,
   const vector<int> &old_up,
@@ -2118,7 +2127,8 @@ bool pg_interval_t::check_new_interval(
   std::ostream *out)
 {
   // remember past interval
-  if (new_acting != old_acting || new_up != old_up ||
+  if (old_primary != new_primary ||
+      new_acting != old_acting || new_up != old_up ||
       (!(lastmap->get_pools().count(pool_id))) ||
       (lastmap->get_pools().find(pool_id)->second.min_size !=
        osdmap->get_pools().find(pool_id)->second.min_size)  ||
@@ -2129,24 +2139,25 @@ bool pg_interval_t::check_new_interval(
     i.last = osdmap->get_epoch() - 1;
     i.acting = old_acting;
     i.up = old_up;
+    i.primary = old_primary;
 
-    if (!i.acting.empty() &&
+    if (!i.acting.empty() && i.primary != -1 &&
        i.acting.size() >=
        lastmap->get_pools().find(pool_id)->second.min_size) {
       if (out)
        *out << "generate_past_intervals " << i
             << ": not rw,"
-            << " up_thru " << lastmap->get_up_thru(i.acting[0])
-            << " up_from " << lastmap->get_up_from(i.acting[0])
+            << " up_thru " << lastmap->get_up_thru(i.primary)
+            << " up_from " << lastmap->get_up_from(i.primary)
             << " last_epoch_clean " << last_epoch_clean
             << std::endl;
-      if (lastmap->get_up_thru(i.acting[0]) >= i.first &&
-         lastmap->get_up_from(i.acting[0]) <= i.first) {
+      if (lastmap->get_up_thru(i.primary) >= i.first &&
+         lastmap->get_up_from(i.primary) <= i.first) {
        i.maybe_went_rw = true;
        if (out)
          *out << "generate_past_intervals " << i
-              << " : primary up " << lastmap->get_up_from(i.acting[0])
-              << "-" << lastmap->get_up_thru(i.acting[0])
+              << " : primary up " << lastmap->get_up_from(i.primary)
+              << "-" << lastmap->get_up_thru(i.primary)
               << " includes interval"
               << std::endl;
       } else if (last_epoch_clean >= i.first &&
@@ -2168,8 +2179,8 @@ bool pg_interval_t::check_new_interval(
        i.maybe_went_rw = false;
        if (out)
          *out << "generate_past_intervals " << i
-              << " : primary up " << lastmap->get_up_from(i.acting[0])
-              << "-" << lastmap->get_up_thru(i.acting[0])
+              << " : primary up " << lastmap->get_up_from(i.primary)
+              << "-" << lastmap->get_up_thru(i.primary)
               << " does not include interval"
               << std::endl;
       }
index fe0493436b5af55391de3969e24c1314461a14e1..c6895f5f3aa7bb97a4c6cd831855067a591a41f7 100644 (file)
@@ -1634,8 +1634,9 @@ struct pg_interval_t {
   vector<int> up, acting;
   epoch_t first, last;
   bool maybe_went_rw;
+  int primary;
 
-  pg_interval_t() : first(0), last(0), maybe_went_rw(false) {}
+  pg_interval_t() : first(0), last(0), maybe_went_rw(false), primary(-1) {}
 
   void encode(bufferlist& bl) const;
   void decode(bufferlist::iterator& bl);
@@ -1647,6 +1648,8 @@ struct pg_interval_t {
    * if an interval was closed out.
    */
   static bool check_new_interval(
+    int old_primary,                            ///< [in] primary as of lastmap
+    int new_primary,                            ///< [in] primary as of lastmap
     const vector<int> &old_acting,              ///< [in] acting as of lastmap
     const vector<int> &new_acting,              ///< [in] acting as of osdmap
     const vector<int> &old_up,                  ///< [in] up as of lastmap
@@ -2685,11 +2688,10 @@ public:
     void dec(list<OpRequestRef> *requeue) {
       assert(count > 0);
       assert(requeue);
-      assert(requeue->empty());
       count--;
       if (count == 0) {
        state = RWNONE;
-       requeue->swap(waiters);
+       requeue->splice(requeue->end(), waiters);
       }
     }
     void put_read(list<OpRequestRef> *requeue) {
index a0d660fa92a5d4981a501985ba129219b35ebb0f..6380211cf811c203db54d858d097614b6dd226ff 100644 (file)
@@ -151,6 +151,8 @@ TEST(pg_interval_t, check_new_interval)
   new_acting.push_back(osd_id);
   new_acting.push_back(osd_id + 1);
   vector<int> old_acting = new_acting;
+  int old_primary = osd_id;
+  int new_primary = osd_id;
   vector<int> new_up;
   new_up.push_back(osd_id);
   vector<int> old_up = new_up;
@@ -166,7 +168,9 @@ TEST(pg_interval_t, check_new_interval)
     map<epoch_t, pg_interval_t> past_intervals;
 
     ASSERT_TRUE(past_intervals.empty());
-    ASSERT_FALSE(pg_interval_t::check_new_interval(old_acting,
+    ASSERT_FALSE(pg_interval_t::check_new_interval(old_primary,
+                                                  new_primary,
+                                                  old_acting,
                                                   new_acting,
                                                   old_up,
                                                   new_up,
@@ -192,7 +196,9 @@ TEST(pg_interval_t, check_new_interval)
     map<epoch_t, pg_interval_t> past_intervals;
 
     ASSERT_TRUE(past_intervals.empty());
-    ASSERT_TRUE(pg_interval_t::check_new_interval(old_acting,
+    ASSERT_TRUE(pg_interval_t::check_new_interval(old_primary,
+                                                 new_primary,
+                                                 old_acting,
                                                  new_acting,
                                                  old_up,
                                                  new_up,
@@ -215,13 +221,15 @@ TEST(pg_interval_t, check_new_interval)
   //
   {
     vector<int> new_acting;
-    int new_primary = osd_id + 1;
-    new_acting.push_back(new_primary);
+    int _new_primary = osd_id + 1;
+    new_acting.push_back(_new_primary);
 
     map<epoch_t, pg_interval_t> past_intervals;
 
     ASSERT_TRUE(past_intervals.empty());
-    ASSERT_TRUE(pg_interval_t::check_new_interval(old_acting,
+    ASSERT_TRUE(pg_interval_t::check_new_interval(old_primary,
+                                                 new_primary,
+                                                 old_acting,
                                                  new_acting,
                                                  old_up,
                                                  new_up,
@@ -232,6 +240,7 @@ TEST(pg_interval_t, check_new_interval)
                                                  pool_id,
                                                  pgid,
                                                  &past_intervals));
+    old_primary = new_primary;
     ASSERT_EQ((unsigned int)1, past_intervals.size());
     ASSERT_EQ(same_interval_since, past_intervals[same_interval_since].first);
     ASSERT_EQ(osdmap->get_epoch() - 1, past_intervals[same_interval_since].last);
@@ -244,13 +253,15 @@ TEST(pg_interval_t, check_new_interval)
   //
   {
     vector<int> new_up;
-    int new_primary = osd_id + 1;
-    new_up.push_back(new_primary);
+    int _new_primary = osd_id + 1;
+    new_up.push_back(_new_primary);
 
     map<epoch_t, pg_interval_t> past_intervals;
 
     ASSERT_TRUE(past_intervals.empty());
-    ASSERT_TRUE(pg_interval_t::check_new_interval(old_acting,
+    ASSERT_TRUE(pg_interval_t::check_new_interval(old_primary,
+                                                 new_primary,
+                                                 old_acting,
                                                  new_acting,
                                                  old_up,
                                                  new_up,
@@ -285,7 +296,9 @@ TEST(pg_interval_t, check_new_interval)
     map<epoch_t, pg_interval_t> past_intervals;
 
     ASSERT_TRUE(past_intervals.empty());
-    ASSERT_TRUE(pg_interval_t::check_new_interval(old_acting,
+    ASSERT_TRUE(pg_interval_t::check_new_interval(old_primary,
+                                                 new_primary,
+                                                 old_acting,
                                                  new_acting,
                                                  old_up,
                                                  new_up,
@@ -320,7 +333,9 @@ TEST(pg_interval_t, check_new_interval)
     map<epoch_t, pg_interval_t> past_intervals;
 
     ASSERT_TRUE(past_intervals.empty());
-    ASSERT_TRUE(pg_interval_t::check_new_interval(old_acting,
+    ASSERT_TRUE(pg_interval_t::check_new_interval(old_primary,
+                                                 new_primary,
+                                                 old_acting,
                                                  new_acting,
                                                  old_up,
                                                  new_up,
@@ -350,7 +365,9 @@ TEST(pg_interval_t, check_new_interval)
     ostringstream out;
 
     ASSERT_TRUE(past_intervals.empty());
-    ASSERT_TRUE(pg_interval_t::check_new_interval(old_acting,
+    ASSERT_TRUE(pg_interval_t::check_new_interval(old_primary,
+                                                 new_primary,
+                                                 old_acting,
                                                  new_acting,
                                                  old_up,
                                                  new_up,
@@ -398,7 +415,9 @@ TEST(pg_interval_t, check_new_interval)
     map<epoch_t, pg_interval_t> past_intervals;
 
     ASSERT_TRUE(past_intervals.empty());
-    ASSERT_TRUE(pg_interval_t::check_new_interval(old_acting,
+    ASSERT_TRUE(pg_interval_t::check_new_interval(old_primary,
+                                                 new_primary,
+                                                 old_acting,
                                                  new_acting,
                                                  old_up,
                                                  new_up,
@@ -429,7 +448,9 @@ TEST(pg_interval_t, check_new_interval)
     map<epoch_t, pg_interval_t> past_intervals;
 
     ASSERT_TRUE(past_intervals.empty());
-    ASSERT_TRUE(pg_interval_t::check_new_interval(old_acting,
+    ASSERT_TRUE(pg_interval_t::check_new_interval(old_primary,
+                                                 new_primary,
+                                                 old_acting,
                                                  new_acting,
                                                  old_up,
                                                  new_up,
@@ -470,7 +491,9 @@ TEST(pg_interval_t, check_new_interval)
     map<epoch_t, pg_interval_t> past_intervals;
 
     ASSERT_TRUE(past_intervals.empty());
-    ASSERT_TRUE(pg_interval_t::check_new_interval(old_acting,
+    ASSERT_TRUE(pg_interval_t::check_new_interval(old_primary,
+                                                 new_primary,
+                                                 old_acting,
                                                  new_acting,
                                                  old_up,
                                                  new_up,
@@ -515,7 +538,9 @@ TEST(pg_interval_t, check_new_interval)
     map<epoch_t, pg_interval_t> past_intervals;
 
     ASSERT_TRUE(past_intervals.empty());
-    ASSERT_TRUE(pg_interval_t::check_new_interval(old_acting,
+    ASSERT_TRUE(pg_interval_t::check_new_interval(old_primary,
+                                                 new_primary,
+                                                 old_acting,
                                                  new_acting,
                                                  old_up,
                                                  new_up,