From: Samuel Just Date: Tue, 12 Jun 2012 19:53:02 +0000 (-0700) Subject: PG: track purged pgs during active X-Git-Tag: v0.48argonaut~73 X-Git-Url: http://git-server-git.apps.pok.os.sepia.ceph.com/?a=commitdiff_plain;h=f822c0257e4c7fad181332cd149205ad15a8b9db;p=ceph.git PG: track purged pgs during active See bug #2462. The following sequence could cause a log assuming a non-empty pg to an empty replica: 1. primary sends query to stray 2. stray sends notify to primary 3. primary sends purge to stray removing stray from peer_info 4. stray recieves query and sends a notify 5. stray recieves purge and purges its pg 6. primary recieves notify from stray and adds it to peer_info note: peer_info[stray] is now wrong 7. acting set changes, primary is still primary, stray is replica 8. primary sends log to replica based on incorrect info from 6. This patch adds a purged_peer set which is populated during purge_strays and cleared during start_peering_interval. The primary will ignore notifies from the peer once the peer is in this set. Signed-off-by: Samuel Just --- diff --git a/src/osd/PG.cc b/src/osd/PG.cc index 0a0c8234485..abf33d204ba 100644 --- a/src/osd/PG.cc +++ b/src/osd/PG.cc @@ -1707,6 +1707,7 @@ void PG::purge_strays() dout(10) << "not sending PGRemove to down osd." << *p << dendl; } peer_info.erase(*p); + peer_purged.insert(*p); removed = true; } @@ -3596,6 +3597,7 @@ void PG::start_peering_interval(const OSDMapRef lastmap, state_clear(PG_STATE_RECOVERING); peer_missing.clear(); + peer_purged.clear(); // reset primary state? if (oldrole == 0 || get_role() == 0) @@ -4147,6 +4149,10 @@ boost::statechart::result PG::RecoveryState::Active::react(const MNotifyRec& not dout(10) << "Active: got notify from " << notevt.from << ", already have info from that osd, ignoring" << dendl; + } else if (pg->peer_purged.count(notevt.from)) { + dout(10) << "Active: got notify from " << notevt.from + << ", already purged that peer, ignoring" + << dendl; } else { dout(10) << "Active: got notify from " << notevt.from << ", calling proc_replica_info and discover_all_missing" diff --git a/src/osd/PG.h b/src/osd/PG.h index eec356775be..fa4be911298 100644 --- a/src/osd/PG.h +++ b/src/osd/PG.h @@ -508,6 +508,7 @@ protected: set stray_set; // non-acting osds that have PG data. eversion_t oldest_update; // acting: lowest (valid) last_update in active set map peer_info; // info from peers (stray or prior) + set peer_purged; // peers purged map peer_missing; set peer_log_requested; // logs i've requested (and start stamps) set peer_missing_requested;