]> git.apps.os.sepia.ceph.com Git - ceph.git/commitdiff
osd: restart peering if requesting acting osd goes down
authorSage Weil <sage@newdream.net>
Tue, 31 Jan 2012 17:53:32 +0000 (09:53 -0800)
committerSage Weil <sage@newdream.net>
Tue, 31 Jan 2012 19:44:08 +0000 (11:44 -0800)
If we request an acting set, we need to restart peering if one of the
requested nodes goes down.  This prevents a deadlock where we get stuck
in WaitActingChange because we have [a,b], want [a,b,c], but c is down and
our up and acting don't actually change.

Signed-off-by: Sage Weil <sage@newdream.net>
src/osd/PG.cc
src/osd/PG.h

index c00068171d2a0cdb705f2f288d1aa697b513bbe4..b2da722d1f1b9a19b0a902ed5d627f1081d47301 100644 (file)
@@ -854,6 +854,20 @@ bool PG::adjust_need_up_thru(const OSDMapRef osdmap)
   return false;
 }
 
+void PG::remove_down_peer_info(const OSDMapRef osdmap)
+{
+  // Remove any downed osds from peer_info
+  map<int,PG::Info>::iterator p = peer_info.begin();
+  while (p != peer_info.end()) {
+    if (!osdmap->is_up(p->first)) {
+      dout(10) << " dropping down osd." << p->first << " info " << p->second << dendl;
+      peer_missing.erase(p->first);
+      peer_info.erase(p++);
+    } else
+      p++;
+  }
+}
+
 /*
  * Returns true unless there is a non-lost OSD in might_have_unfound.
  */
@@ -1144,6 +1158,7 @@ bool PG::choose_acting(int& newest_update_osd)
   if (want != acting) {
     dout(10) << "choose_acting want " << want << " != acting " << acting
             << ", requesting pg_temp change" << dendl;
+    want_acting = want;
     if (want == up) {
       vector<int> empty;
       osd->queue_want_pg_temp(info.pgid, empty);
@@ -3346,6 +3361,7 @@ void PG::start_peering_interval(const OSDMapRef lastmap,
 
   up = newup;
   acting = newacting;
+  want_acting.clear();
 
   int role = osdmap->calc_pg_role(osd->whoami, acting, acting.size());
   set_role(role);
@@ -3860,18 +3876,7 @@ boost::statechart::result PG::RecoveryState::Primary::react(const ActMap&)
 boost::statechart::result PG::RecoveryState::Primary::react(const AdvMap& advmap) 
 {
   PG *pg = context< RecoveryMachine >().pg;
-  OSDMapRef osdmap = advmap.osdmap;
-
-  // Remove any downed osds from peer_info
-  map<int,PG::Info>::iterator p = pg->peer_info.begin();
-  while (p != pg->peer_info.end()) {
-    if (!osdmap->is_up(p->first)) {
-      dout(10) << " dropping down osd." << p->first << " info " << p->second << dendl;
-      pg->peer_missing.erase(p->first);
-      pg->peer_info.erase(p++);
-    } else
-      p++;
-  }
+  pg->remove_down_peer_info(advmap.osdmap);
   return forward_event();
 }
 
@@ -4446,6 +4451,24 @@ PG::RecoveryState::WaitActingChange::WaitActingChange(my_context ctx)
   context< RecoveryMachine >().log_enter(state_name);
 }
 
+boost::statechart::result PG::RecoveryState::WaitActingChange::react(const AdvMap& advmap)
+{
+  PG *pg = context< RecoveryMachine >().pg;
+  OSDMapRef osdmap = advmap.osdmap;
+
+  pg->remove_down_peer_info(osdmap);
+
+  dout(10) << "verifying no want_acting " << pg->want_acting << " targets didn't go down" << dendl;
+  for (vector<int>::iterator p = pg->want_acting.begin(); p != pg->want_acting.end(); ++p) {
+    if (!osdmap->is_up(*p)) {
+      dout(10) << " want_acting target osd." << *p << " went down, resetting" << dendl;
+      post_event(advmap);
+      return transit< Reset >();
+    }
+  }
+  return forward_event();
+}
+
 boost::statechart::result PG::RecoveryState::WaitActingChange::react(const MLogRec& logevt)
 {
   dout(10) << "In WaitActingChange, ignoring MLocRec" << dendl;
index d37e67a1902b33729e0539bba4f202cf8ab1db08..dcf9341ef247f6d9faef1b682991a1bc475148fb 100644 (file)
@@ -910,7 +910,7 @@ public:
 
   // primary state
  public:
-  vector<int> up, acting;
+  vector<int> up, acting, want_acting;
   map<int,eversion_t> peer_last_complete_ondisk;
   eversion_t  min_last_complete_ondisk;  // up: min over last_complete_ondisk, peer_last_complete_ondisk
   eversion_t  pg_trim_to;
@@ -1209,11 +1209,13 @@ public:
     struct WaitActingChange : boost::statechart::state< WaitActingChange, Primary>,
                              NamedState {
       typedef boost::mpl::list <
+       boost::statechart::custom_reaction< AdvMap >,
        boost::statechart::custom_reaction< MLogRec >,
        boost::statechart::custom_reaction< MInfoRec >,
        boost::statechart::custom_reaction< MNotifyRec >
        > reactions;
       WaitActingChange(my_context ctx);
+      boost::statechart::result react(const AdvMap&);
       boost::statechart::result react(const MLogRec&);
       boost::statechart::result react(const MInfoRec&);
       boost::statechart::result react(const MNotifyRec&);
@@ -1524,7 +1526,8 @@ public:
   void generate_past_intervals();
   void trim_past_intervals();
   void build_prior(std::auto_ptr<PriorSet> &prior_set);
-  void clear_prior();
+
+  void remove_down_peer_info(const OSDMapRef osdmap);
 
   bool adjust_need_up_thru(const OSDMapRef osdmap);