]> git-server-git.apps.pok.os.sepia.ceph.com Git - ceph.git/commitdiff
osd/PG: populate blocked_by with peers we are trying to activate 3350/head
authorSage Weil <sage@redhat.com>
Thu, 15 Jan 2015 00:41:45 +0000 (16:41 -0800)
committerSage Weil <sage@redhat.com>
Thu, 15 Jan 2015 00:41:45 +0000 (16:41 -0800)
Once peering finishes, all osds need to persist their info and ack before
we are fully active.  Populate blocked_by with those peers so we can tell
when they are stalling the process.

Fixes: #10477
Signed-off-by: Sage Weil <sage@redhat.com>
src/osd/PG.cc

index 9fb255f2468a91294956f7855c20317d4f9e4910..185af98f3325e3ae34e139bc6cad90478b9fd5d9 100644 (file)
@@ -1847,6 +1847,7 @@ void PG::all_activated_and_committed()
   assert(is_primary());
   assert(peer_activated.size() == actingbackfill.size());
   assert(!actingbackfill.empty());
+  assert(blocked_by.empty());
 
   // info.last_epoch_started is set during activate()
   info.history.last_epoch_started = info.last_epoch_started;
@@ -6244,6 +6245,17 @@ PG::RecoveryState::Active::Active(my_context ctx)
               *context< RecoveryMachine >().get_query_map(),
               context< RecoveryMachine >().get_info_map(),
               context< RecoveryMachine >().get_recovery_ctx());
+
+  // everyone has to commit/ack before we are truly active
+  pg->blocked_by.clear();
+  for (set<pg_shard_t>::iterator p = pg->actingbackfill.begin();
+       p != pg->actingbackfill.end();
+       ++p) {
+    if (p->shard != pg->pg_whoami.shard) {
+      pg->blocked_by.insert(p->shard);
+    }
+  }
+  pg->publish_stats_to_osd();
   dout(10) << "Activate Finished" << dendl;
 }
 
@@ -6373,7 +6385,8 @@ boost::statechart::result PG::RecoveryState::Active::react(const MInfoRec& infoe
     dout(10) << " peer osd." << infoevt.from << " activated and committed" 
             << dendl;
     pg->peer_activated.insert(infoevt.from);
-
+    pg->blocked_by.erase(infoevt.from.shard);
+    pg->publish_stats_to_osd();
     if (pg->peer_activated.size() == pg->actingbackfill.size()) {
       pg->all_activated_and_committed();
     }
@@ -6480,6 +6493,7 @@ void PG::RecoveryState::Active::exit()
   PG *pg = context< RecoveryMachine >().pg;
   pg->osd->local_reserver.cancel_reservation(pg->info.pgid);
 
+  pg->blocked_by.clear();
   pg->backfill_reserved = false;
   pg->backfill_reserving = false;
   pg->state_clear(PG_STATE_DEGRADED);