]> git.apps.os.sepia.ceph.com Git - ceph.git/commitdiff
PG: check_new_interval now handles adding new maps to past intervals
authorSamuel Just <samuel.just@dreamhost.com>
Mon, 30 Apr 2012 22:09:23 +0000 (15:09 -0700)
committerSamuel Just <samuel.just@dreamhost.com>
Tue, 1 May 2012 20:12:05 +0000 (13:12 -0700)
Signed-off-by: Samuel Just <samuel.just@dreamhost.com>
src/osd/PG.cc
src/osd/PG.h
src/osd/osd_types.cc
src/osd/osd_types.h

index 8e1e4b232fa80ec72f454beca00c0334598ecf6a..3efacf46faf882829d35d474a43ea4263d07ef08 100644 (file)
@@ -712,87 +712,59 @@ bool PG::needs_recovery() const
 
 void PG::generate_past_intervals()
 {
-  epoch_t first_epoch = 0;
-  epoch_t stop = MAX(info.history.epoch_created, info.history.last_epoch_clean);
-  if (stop < osd->superblock.oldest_map)
-    stop = osd->superblock.oldest_map;   // this is a lower bound on last_epoch_clean cluster-wide.     
-  epoch_t last_epoch = info.history.same_interval_since - 1;
-
-  if (last_epoch < stop) {
-    dout(10) << __func__ << " last_epoch " << last_epoch << " < oldest " << stop
-            << ", nothing to do" << dendl;
-    return;
-  }
-
+  epoch_t end_epoch = info.history.same_interval_since;
   // Do we already have the intervals we want?
   map<epoch_t,pg_interval_t>::const_iterator pif = past_intervals.begin();
   if (pif != past_intervals.end()) {
-    if (pif->first <= stop) {
-      dout(10) << __func__ << " already have past intervals back to "
-              << stop << dendl;
+    if (pif->first <= info.history.last_epoch_clean) {
+      dout(10) << __func__ << ": already have past intervals back to "
+              << info.history.last_epoch_clean << dendl;
       return;
     }
-    dout(10) << __func__ << " only have past intervals back to " << pif->first << dendl;
-    last_epoch = pif->first - 1;
+    end_epoch = past_intervals.begin()->first;
   }
 
-  dout(10) << __func__ << " over epochs " << stop << "-" << last_epoch << dendl;
-
-  OSDMapRef nextmap = osd->get_map(last_epoch);
-  for (;
-       last_epoch >= stop;
-       last_epoch = first_epoch - 1) {
-    OSDMapRef lastmap = nextmap;
-    vector<int> tup, tacting;
-    lastmap->pg_to_up_acting_osds(get_pgid(), tup, tacting);
-    
-    // calc first_epoch, first_map
-    for (first_epoch = last_epoch; first_epoch > stop; first_epoch--) {
-      nextmap = osd->get_map(first_epoch-1);
-      vector<int> t;
-      nextmap->pg_to_acting_osds(get_pgid(), t);
-      if (t != tacting)
-       break;
-    }
-
-    pg_interval_t &i = past_intervals[first_epoch];
-    i.first = first_epoch;
-    i.last = last_epoch;
-    i.up.swap(tup);
-    i.acting.swap(tacting);
-    if (i.acting.size()) {
-      if (lastmap->get_up_thru(i.acting[0]) >= first_epoch &&
-         lastmap->get_up_from(i.acting[0]) <= first_epoch) {
-       i.maybe_went_rw = true;
-       dout(10) << "generate_past_intervals " << i
-                << " : primary up " << lastmap->get_up_from(i.acting[0])
-                << "-" << lastmap->get_up_thru(i.acting[0])
-                << dendl;
-      } else if (info.history.last_epoch_clean >= first_epoch &&
-                info.history.last_epoch_clean <= last_epoch) {
-       // If the last_epoch_clean is included in this interval, then
-       // the pg must have been rw (for recovery to have completed).
-       // This is important because we won't know the _real_
-       // first_epoch because we stop at last_epoch_clean, and we
-       // don't want the oldest interval to randomly have
-       // maybe_went_rw false depending on the relative up_thru vs
-       // last_epoch_clean timing.
-       i.maybe_went_rw = true;
-       dout(10) << "generate_past_intervals " << i
-                << " : includes last_epoch_clean " << info.history.last_epoch_clean
-                << " and presumed to have been rw"
-                << dendl;
-      } else {
-       i.maybe_went_rw = false;
-       dout(10) << "generate_past_intervals " << i
-                << " : primary up " << lastmap->get_up_from(i.acting[0])
-                << "-" << lastmap->get_up_thru(i.acting[0])
-                << " does not include interval"
-                << dendl;
-      }
-    } else {
-      i.maybe_went_rw = false;
-      dout(10) << "generate_past_intervals " << i << " : empty" << dendl;
+  epoch_t cur_epoch = MAX(MAX(info.history.epoch_created,
+                             info.history.last_epoch_clean),
+                         osd->superblock.oldest_map);
+  OSDMapRef last_map, cur_map;
+  if (cur_epoch >= end_epoch) {
+    dout(10) << __func__ << " start epoch " << cur_epoch
+            << " >= end epoch " << end_epoch
+            << ", nothing to do" << dendl;
+    return;
+  }
+  vector<int> acting, up, old_acting, old_up;
+
+  cur_map = osd->get_map(cur_epoch);
+  cur_map->pg_to_up_acting_osds(get_pgid(), up, acting);
+  epoch_t same_interval_since = cur_epoch;
+  dout(10) << __func__ << " over epochs " << cur_epoch << "-"
+          << end_epoch << dendl;
+  ++cur_epoch;
+  for (; cur_epoch <= end_epoch; ++cur_epoch) {
+    last_map.swap(cur_map);
+    old_up.swap(up);
+    old_acting.swap(acting);
+
+    cur_map = osd->get_map(cur_epoch);
+    cur_map->pg_to_up_acting_osds(get_pgid(), up, acting);
+
+    std::stringstream debug;
+    bool new_interval = pg_interval_t::check_new_interval(
+      old_acting,
+      acting,
+      old_up,
+      up,
+      same_interval_since,
+      info.history.last_epoch_clean,
+      cur_map,
+      last_map,
+      &past_intervals,
+      &debug);
+    if (new_interval) {
+      dout(10) << debug.str() << dendl;
+      same_interval_since = cur_epoch;
     }
   }
 
@@ -3557,24 +3529,18 @@ void PG::start_peering_interval(const OSDMapRef lastmap,
   if (!lastmap) {
     dout(10) << " no lastmap" << dendl;
     dirty_info = true;
-  } else if (acting != oldacting || up != oldup) {
-    // remember past interval
-    pg_interval_t& i = past_intervals[info.history.same_interval_since];
-    i.first = info.history.same_interval_since;
-    i.last = osdmap->get_epoch() - 1;
-    i.acting = oldacting;
-    i.up = oldup;
-
-    if (i.acting.size()) {
-      i.maybe_went_rw =
-       lastmap->get_up_thru(i.acting[0]) >= i.first &&
-       lastmap->get_up_from(i.acting[0]) <= i.first;
-    } else {
-      i.maybe_went_rw = 0;
+  } else {
+    bool new_interval = pg_interval_t::check_new_interval(
+      oldacting, newacting,
+      oldup, newup,
+      info.history.same_interval_since,
+      info.history.last_epoch_clean,
+      osdmap,
+      lastmap, &past_intervals);
+    if (new_interval) {
+      dout(10) << " noting past " << past_intervals.rbegin()->second << dendl;
+      dirty_info = true;
     }
-
-    dout(10) << " noting past " << i << dendl;
-    dirty_info = true;
   }
 
   if (oldacting != acting || oldup != up) {
index bb7491199864bae132724708268eb35f5373476e..39508d787a24815730f5afea794b5a64aee7be78 100644 (file)
@@ -399,7 +399,6 @@ public:
   bool dirty_info, dirty_log;
 
 public:
-
   // pg state
   pg_info_t        info;
   const coll_t coll;
index 7c019a0af95059720327387e7e948c62fb1040ea..e9ce2cda6547e4fad4a5c4adecf528a78ba0b589 100644 (file)
@@ -14,6 +14,8 @@
 
 #include "osd_types.h"
 #include "include/ceph_features.h"
+#include "PG.h"
+#include "OSDMap.h"
 
 // -- osd_reqid_t --
 void osd_reqid_t::encode(bufferlist &bl) const
@@ -1358,6 +1360,70 @@ void pg_interval_t::generate_test_instances(list<pg_interval_t*>& o)
   o.back()->maybe_went_rw = true;
 }
 
+bool pg_interval_t::check_new_interval(
+  const vector<int> &old_acting,
+  const vector<int> &new_acting,
+  const vector<int> &old_up,
+  const vector<int> &new_up,
+  epoch_t same_interval_since,
+  epoch_t last_epoch_clean,
+  OSDMapRef osdmap,
+  OSDMapRef lastmap,
+  map<epoch_t, pg_interval_t> *past_intervals,
+  std::ostream *out)
+{
+  // remember past interval
+  if (new_acting != old_acting || new_up != old_up) {
+    pg_interval_t& i = (*past_intervals)[same_interval_since];
+    i.first = same_interval_since;
+    i.last = osdmap->get_epoch() - 1;
+    i.acting = old_acting;
+    i.up = old_up;
+
+    if (i.acting.size()) {
+      if (lastmap->get_up_thru(i.acting[0]) >= i.first &&
+         lastmap->get_up_from(i.acting[0]) <= i.first) {
+       i.maybe_went_rw = true;
+       if (out)
+         *out << "generate_past_intervals " << i
+              << " : primary up " << lastmap->get_up_from(i.acting[0])
+              << "-" << lastmap->get_up_thru(i.acting[0])
+              << std::endl;
+      } else if (last_epoch_clean >= i.first &&
+                last_epoch_clean <= i.last) {
+       // If the last_epoch_clean is included in this interval, then
+       // the pg must have been rw (for recovery to have completed).
+       // This is important because we won't know the _real_
+       // first_epoch because we stop at last_epoch_clean, and we
+       // don't want the oldest interval to randomly have
+       // maybe_went_rw false depending on the relative up_thru vs
+       // last_epoch_clean timing.
+       i.maybe_went_rw = true;
+       if (out)
+         *out << "generate_past_intervals " << i
+              << " : includes last_epoch_clean " << last_epoch_clean
+              << " and presumed to have been rw"
+              << std::endl;
+      } else {
+       i.maybe_went_rw = false;
+       if (out)
+         *out << "generate_past_intervals " << i
+              << " : primary up " << lastmap->get_up_from(i.acting[0])
+              << "-" << lastmap->get_up_thru(i.acting[0])
+              << " does not include interval"
+              << std::endl;
+      }
+    } else {
+      i.maybe_went_rw = false;
+      if (out)
+       *out << "generate_past_intervals " << i << " : empty" << std::endl;
+    }
+    return true;
+  } else {
+    return false;
+  }
+}
+
 ostream& operator<<(ostream& out, const pg_interval_t& i)
 {
   out << "interval(" << i.first << "-" << i.last << " " << i.up << "/" << i.acting;
index d85ecb314bf0125c3ea7bf353b7c8dd5e031edc1..58c6a552b38e7c4a93c7bb4058fc108b1a7824e4 100644 (file)
@@ -18,6 +18,7 @@
 #include <sstream>
 #include <stdio.h>
 #include <stdexcept>
+#include <memory>
 
 #include "msg/msg_types.h"
 #include "include/types.h"
@@ -1042,6 +1043,7 @@ inline ostream& operator<<(ostream& out, const pg_info_t& pgi)
 /**
  * pg_interval_t - information about a past interval
  */
+class OSDMap;
 struct pg_interval_t {
   vector<int> up, acting;
   epoch_t first, last;
@@ -1053,6 +1055,23 @@ struct pg_interval_t {
   void decode(bufferlist::iterator& bl);
   void dump(Formatter *f) const;
   static void generate_test_instances(list<pg_interval_t*>& o);
+
+  /**
+   * Integrates a new map into *past_intervals, returns true
+   * if an interval was closed out.
+   */
+  static bool check_new_interval(
+    const vector<int> &old_acting,              ///< [in] acting as of lastmap
+    const vector<int> &new_acting,              ///< [in] acting as of osdmap
+    const vector<int> &old_up,                  ///< [in] up as of lastmap
+    const vector<int> &new_up,                  ///< [in] up as of osdmap
+    epoch_t same_interval_since,                ///< [in] as of osdmap
+    epoch_t last_epoch_clean,                   ///< [in] current
+    std::tr1::shared_ptr<const OSDMap> osdmap,  ///< [in] current map
+    std::tr1::shared_ptr<const OSDMap> lastmap, ///< [in] last map
+    map<epoch_t, pg_interval_t> *past_intervals,///< [out] intervals
+    ostream *out = 0                            ///< [out] debug ostream
+    );
 };
 WRITE_CLASS_ENCODER(pg_interval_t)