From: Sage Weil Date: Thu, 15 Jan 2015 00:41:45 +0000 (-0800) Subject: osd/PG: populate blocked_by with peers we are trying to activate X-Git-Tag: v0.93~213^2 X-Git-Url: http://git-server-git.apps.pok.os.sepia.ceph.com/?a=commitdiff_plain;h=6986ec1ceaa58054cffe14b87aacb3f9b3ab7db4;p=ceph.git osd/PG: populate blocked_by with peers we are trying to activate Once peering finishes, all osds need to persist their info and ack before we are fully active. Populate blocked_by with those peers so we can tell when they are stalling the process. Fixes: #10477 Signed-off-by: Sage Weil --- diff --git a/src/osd/PG.cc b/src/osd/PG.cc index 9fb255f2468..185af98f332 100644 --- a/src/osd/PG.cc +++ b/src/osd/PG.cc @@ -1847,6 +1847,7 @@ void PG::all_activated_and_committed() assert(is_primary()); assert(peer_activated.size() == actingbackfill.size()); assert(!actingbackfill.empty()); + assert(blocked_by.empty()); // info.last_epoch_started is set during activate() info.history.last_epoch_started = info.last_epoch_started; @@ -6244,6 +6245,17 @@ PG::RecoveryState::Active::Active(my_context ctx) *context< RecoveryMachine >().get_query_map(), context< RecoveryMachine >().get_info_map(), context< RecoveryMachine >().get_recovery_ctx()); + + // everyone has to commit/ack before we are truly active + pg->blocked_by.clear(); + for (set::iterator p = pg->actingbackfill.begin(); + p != pg->actingbackfill.end(); + ++p) { + if (p->shard != pg->pg_whoami.shard) { + pg->blocked_by.insert(p->shard); + } + } + pg->publish_stats_to_osd(); dout(10) << "Activate Finished" << dendl; } @@ -6373,7 +6385,8 @@ boost::statechart::result PG::RecoveryState::Active::react(const MInfoRec& infoe dout(10) << " peer osd." << infoevt.from << " activated and committed" << dendl; pg->peer_activated.insert(infoevt.from); - + pg->blocked_by.erase(infoevt.from.shard); + pg->publish_stats_to_osd(); if (pg->peer_activated.size() == pg->actingbackfill.size()) { pg->all_activated_and_committed(); } @@ -6480,6 +6493,7 @@ void PG::RecoveryState::Active::exit() PG *pg = context< RecoveryMachine >().pg; pg->osd->local_reserver.cancel_reservation(pg->info.pgid); + pg->blocked_by.clear(); pg->backfill_reserved = false; pg->backfill_reserving = false; pg->state_clear(PG_STATE_DEGRADED);