From: Samuel Just Date: Mon, 21 Sep 2015 19:00:49 +0000 (-0700) Subject: PG: ignore info from down osd X-Git-Tag: v9.1.0~44^2 X-Git-Url: http://git-server-git.apps.pok.os.sepia.ceph.com/?a=commitdiff_plain;h=930d8eb1fb0daacd692d2e80b5ff473448bd4e8d;p=ceph.git PG: ignore info from down osd Fixes: #12990 Backport: firefly, hammer Signed-off-by: Samuel Just --- diff --git a/src/osd/PG.cc b/src/osd/PG.cc index 3bdb13cbbd48..1ba7f262508d 100644 --- a/src/osd/PG.cc +++ b/src/osd/PG.cc @@ -316,7 +316,8 @@ void PG::proc_replica_log( peer_missing[from].swap(omissing); } -bool PG::proc_replica_info(pg_shard_t from, const pg_info_t &oinfo) +bool PG::proc_replica_info( + pg_shard_t from, const pg_info_t &oinfo, epoch_t send_epoch) { map::iterator p = peer_info.find(from); if (p != peer_info.end() && p->second.last_update == oinfo.last_update) { @@ -324,6 +325,12 @@ bool PG::proc_replica_info(pg_shard_t from, const pg_info_t &oinfo) return false; } + if (!get_osdmap()->has_been_up_since(from.osd, send_epoch)) { + dout(10) << " got info " << oinfo << " from down osd." << from + << " discarding" << dendl; + return false; + } + dout(10) << " got osd." << from << " " << oinfo << dendl; assert(is_primary()); peer_info[from] = oinfo; @@ -5312,7 +5319,8 @@ boost::statechart::result PG::RecoveryState::Initial::react(const Load& l) boost::statechart::result PG::RecoveryState::Initial::react(const MNotifyRec& notify) { PG *pg = context< RecoveryMachine >().pg; - pg->proc_replica_info(notify.from, notify.notify.info); + pg->proc_replica_info( + notify.from, notify.notify.info, notify.notify.epoch_sent); pg->update_heartbeat_peers(); pg->set_last_peering_reset(); return transit< Primary >(); @@ -5545,7 +5553,8 @@ boost::statechart::result PG::RecoveryState::Primary::react(const MNotifyRec& no dout(10) << *pg << " got dup osd." << notevt.from << " info " << notevt.notify.info << ", identical to ours" << dendl; } else { - pg->proc_replica_info(notevt.from, notevt.notify.info); + pg->proc_replica_info( + notevt.from, notevt.notify.info, notevt.notify.epoch_sent); } return discard_event(); } @@ -6388,7 +6397,8 @@ boost::statechart::result PG::RecoveryState::Active::react(const MNotifyRec& not dout(10) << "Active: got notify from " << notevt.from << ", calling proc_replica_info and discover_all_missing" << dendl; - pg->proc_replica_info(notevt.from, notevt.notify.info); + pg->proc_replica_info( + notevt.from, notevt.notify.info, notevt.notify.epoch_sent); if (pg->have_unfound()) { pg->discover_all_missing(*context< RecoveryMachine >().get_query_map()); } @@ -6825,7 +6835,8 @@ boost::statechart::result PG::RecoveryState::GetInfo::react(const MNotifyRec& in } epoch_t old_start = pg->info.history.last_epoch_started; - if (pg->proc_replica_info(infoevt.from, infoevt.notify.info)) { + if (pg->proc_replica_info( + infoevt.from, infoevt.notify.info, infoevt.notify.epoch_sent)) { // we got something new ... unique_ptr &prior_set = context< Peering >().prior_set; if (old_start < pg->info.history.last_epoch_started) { @@ -7182,7 +7193,8 @@ boost::statechart::result PG::RecoveryState::Incomplete::react(const MNotifyRec& << ", identical to ours" << dendl; return discard_event(); } else { - pg->proc_replica_info(notevt.from, notevt.notify.info); + pg->proc_replica_info( + notevt.from, notevt.notify.info, notevt.notify.epoch_sent); // try again! return transit< GetLog >(); } diff --git a/src/osd/PG.h b/src/osd/PG.h index 7859f1a5a3c0..575b82e0cf26 100644 --- a/src/osd/PG.h +++ b/src/osd/PG.h @@ -872,7 +872,8 @@ public: pg_missing_t& omissing, pg_shard_t from); void proc_master_log(ObjectStore::Transaction& t, pg_info_t &oinfo, pg_log_t &olog, pg_missing_t& omissing, pg_shard_t from); - bool proc_replica_info(pg_shard_t from, const pg_info_t &info); + bool proc_replica_info( + pg_shard_t from, const pg_info_t &info, epoch_t send_epoch); struct LogEntryTrimmer : public ObjectModDesc::Visitor {