]> git.apps.os.sepia.ceph.com Git - ceph.git/commitdiff
osd: Don't start recovery for missing until active pg state set
authorDavid Zafman <dzafman@redhat.com>
Wed, 10 Jan 2018 21:30:41 +0000 (13:30 -0800)
committerDavid Zafman <dzafman@redhat.com>
Mon, 15 Jan 2018 02:17:23 +0000 (18:17 -0800)
I was seeing recovery hang when it is started before _activate_committed()
The state machine passes into "Active" but this transitions to activating
pg state and after commmitted into "active" pg state.

Signed-off-by: David Zafman <dzafman@redhat.com>
src/osd/PG.cc

index ac1bfe6a987448baf3f489e87be693c05940c4f5..e873fa6087a6b64b19c02503faa77a3a703954d7 100644 (file)
@@ -7093,6 +7093,7 @@ PG::RecoveryState::Recovering::Recovering(my_context ctx)
   pg->state_clear(PG_STATE_RECOVERY_WAIT);
   pg->state_clear(PG_STATE_RECOVERY_TOOFULL);
   pg->state_set(PG_STATE_RECOVERING);
+  assert(!pg->state_test(PG_STATE_ACTIVATING));
   pg->publish_stats_to_osd();
   pg->queue_recovery();
 }
@@ -7549,7 +7550,8 @@ boost::statechart::result PG::RecoveryState::Active::react(const MLogRec& logevt
     pg->peer_missing[logevt.from],
     logevt.from,
     context< RecoveryMachine >().get_recovery_ctx());
-  if (got_missing) {
+  // If there are missing AND we are "fully" active then start recovery now
+  if (got_missing && pg->state_test(PG_STATE_ACTIVE)) {
     post_event(DoRecovery());
   }
   return discard_event();