From 7161a2c927a6ded0d6ffe7d7621b7abd7f18acc3 Mon Sep 17 00:00:00 2001 From: Samuel Just Date: Mon, 21 Sep 2015 12:00:49 -0700 Subject: [PATCH] PG: ignore info from down osd Fixes: #12990 Backport: firefly, hammer Signed-off-by: Samuel Just (cherry picked from commit 930d8eb1fb0daacd692d2e80b5ff473448bd4e8d) --- src/osd/PG.cc | 24 ++++++++++++++++++------ src/osd/PG.h | 3 ++- 2 files changed, 20 insertions(+), 7 deletions(-) diff --git a/src/osd/PG.cc b/src/osd/PG.cc index 7b91bf8c97d93..63f3a076b6593 100644 --- a/src/osd/PG.cc +++ b/src/osd/PG.cc @@ -313,7 +313,8 @@ void PG::proc_replica_log( peer_missing[from].swap(omissing); } -bool PG::proc_replica_info(pg_shard_t from, const pg_info_t &oinfo) +bool PG::proc_replica_info( + pg_shard_t from, const pg_info_t &oinfo, epoch_t send_epoch) { map::iterator p = peer_info.find(from); if (p != peer_info.end() && p->second.last_update == oinfo.last_update) { @@ -321,6 +322,12 @@ bool PG::proc_replica_info(pg_shard_t from, const pg_info_t &oinfo) return false; } + if (!get_osdmap()->has_been_up_since(from.osd, send_epoch)) { + dout(10) << " got info " << oinfo << " from down osd." << from + << " discarding" << dendl; + return false; + } + dout(10) << " got osd." << from << " " << oinfo << dendl; assert(is_primary()); peer_info[from] = oinfo; @@ -5377,7 +5384,8 @@ boost::statechart::result PG::RecoveryState::Initial::react(const Load& l) boost::statechart::result PG::RecoveryState::Initial::react(const MNotifyRec& notify) { PG *pg = context< RecoveryMachine >().pg; - pg->proc_replica_info(notify.from, notify.notify.info); + pg->proc_replica_info( + notify.from, notify.notify.info, notify.notify.epoch_sent); pg->update_heartbeat_peers(); pg->set_last_peering_reset(); return transit< Primary >(); @@ -5610,7 +5618,8 @@ boost::statechart::result PG::RecoveryState::Primary::react(const MNotifyRec& no dout(10) << *pg << " got dup osd." << notevt.from << " info " << notevt.notify.info << ", identical to ours" << dendl; } else { - pg->proc_replica_info(notevt.from, notevt.notify.info); + pg->proc_replica_info( + notevt.from, notevt.notify.info, notevt.notify.epoch_sent); } return discard_event(); } @@ -6467,7 +6476,8 @@ boost::statechart::result PG::RecoveryState::Active::react(const MNotifyRec& not dout(10) << "Active: got notify from " << notevt.from << ", calling proc_replica_info and discover_all_missing" << dendl; - pg->proc_replica_info(notevt.from, notevt.notify.info); + pg->proc_replica_info( + notevt.from, notevt.notify.info, notevt.notify.epoch_sent); if (pg->have_unfound()) { pg->discover_all_missing(*context< RecoveryMachine >().get_query_map()); } @@ -6904,7 +6914,8 @@ boost::statechart::result PG::RecoveryState::GetInfo::react(const MNotifyRec& in } epoch_t old_start = pg->info.history.last_epoch_started; - if (pg->proc_replica_info(infoevt.from, infoevt.notify.info)) { + if (pg->proc_replica_info( + infoevt.from, infoevt.notify.info, infoevt.notify.epoch_sent)) { // we got something new ... auto_ptr &prior_set = context< Peering >().prior_set; if (old_start < pg->info.history.last_epoch_started) { @@ -7259,7 +7270,8 @@ boost::statechart::result PG::RecoveryState::Incomplete::react(const MNotifyRec& << ", identical to ours" << dendl; return discard_event(); } else { - pg->proc_replica_info(notevt.from, notevt.notify.info); + pg->proc_replica_info( + notevt.from, notevt.notify.info, notevt.notify.epoch_sent); // try again! return transit< GetLog >(); } diff --git a/src/osd/PG.h b/src/osd/PG.h index 41de9d6d14a6a..30fdf2f70a4b6 100644 --- a/src/osd/PG.h +++ b/src/osd/PG.h @@ -843,7 +843,8 @@ public: pg_missing_t& omissing, pg_shard_t from); void proc_master_log(ObjectStore::Transaction& t, pg_info_t &oinfo, pg_log_t &olog, pg_missing_t& omissing, pg_shard_t from); - bool proc_replica_info(pg_shard_t from, const pg_info_t &info); + bool proc_replica_info( + pg_shard_t from, const pg_info_t &info, epoch_t send_epoch); struct LogEntryTrimmer : public ObjectModDesc::Visitor { -- 2.47.3