]> git-server-git.apps.pok.os.sepia.ceph.com Git - ceph.git/commitdiff
osd: peering: move to Incomplete when.. incomplete
authorSage Weil <sage@inktank.com>
Fri, 27 Jul 2012 22:39:40 +0000 (15:39 -0700)
committerSage Weil <sage@inktank.com>
Sat, 28 Jul 2012 16:04:29 +0000 (09:04 -0700)
PG::choose_acting() may return false and *not* request an acting set change
if it can't find any suitable peers with enough info to recover.  In that
case, we should move to Incomplete, not WaitActingChange, just like we do
a bit lower in GetLog() if we have non-contiguous logs.  The state name is
more accurate, and this is also needed to fix bug #2860.

Signed-off-by: Sage Weil <sage@inktank.com>
src/osd/PG.cc

index f923af6424c8a7074fa95e4bb8f9b209a167a11a..c9e5045e6bb854b583c68c41360adf3c7b3d7e09 100644 (file)
@@ -1188,8 +1188,8 @@ bool PG::choose_acting(int& newest_update_osd)
   vector<int> want;
 
   if (!calc_acting(newest_update_osd, want)) {
-    dout(10) << "choose_acting failed, marking pg down" << dendl;
-    state_set(PG_STATE_DOWN);
+    dout(10) << "choose_acting failed" << dendl;
+    assert(want_acting.empty());
     return false;
   }
 
@@ -4727,6 +4727,7 @@ boost::statechart::result PG::RecoveryState::Active::react(const RecoveryComplet
   // adjust acting set?  (e.g. because backfill completed...)
   if (pg->acting != pg->up &&
       !pg->choose_acting(newest_update_osd)) {
+    assert(pg->want_acting.size());
     post_event(NeedActingChange());
     return discard_event();
   }
@@ -5169,7 +5170,11 @@ PG::RecoveryState::GetLog::GetLog(my_context ctx) :
 
   // adjust acting?
   if (!pg->choose_acting(newest_update_osd)) {
-    post_event(NeedActingChange());
+    if (pg->want_acting.size()) {
+      post_event(NeedActingChange());
+    } else {
+      post_event(IsIncomplete());
+    }
     return;
   }
 
@@ -5318,6 +5323,7 @@ PG::RecoveryState::Incomplete::Incomplete(my_context ctx)
   pg->state_set(PG_STATE_INCOMPLETE);
   pg->update_stats();
 }
+
 void PG::RecoveryState::Incomplete::exit()
 {
   context< RecoveryMachine >().log_exit(state_name, enter_time);