]> git.apps.os.sepia.ceph.com Git - ceph.git/commitdiff
PG: ignore info from down osd 6016/head 6117/head
authorSamuel Just <sjust@redhat.com>
Mon, 21 Sep 2015 19:00:49 +0000 (12:00 -0700)
committerSamuel Just <sjust@redhat.com>
Mon, 21 Sep 2015 19:14:02 +0000 (12:14 -0700)
Fixes: #12990
Backport: firefly, hammer
Signed-off-by: Samuel Just <sjust@redhat.com>
src/osd/PG.cc
src/osd/PG.h

index 3bdb13cbbd48b4d58025d34168fe0478f3c34fcf..1ba7f262508d1e0b0c139a9aa6cf4b975a670860 100644 (file)
@@ -316,7 +316,8 @@ void PG::proc_replica_log(
   peer_missing[from].swap(omissing);
 }
 
-bool PG::proc_replica_info(pg_shard_t from, const pg_info_t &oinfo)
+bool PG::proc_replica_info(
+  pg_shard_t from, const pg_info_t &oinfo, epoch_t send_epoch)
 {
   map<pg_shard_t, pg_info_t>::iterator p = peer_info.find(from);
   if (p != peer_info.end() && p->second.last_update == oinfo.last_update) {
@@ -324,6 +325,12 @@ bool PG::proc_replica_info(pg_shard_t from, const pg_info_t &oinfo)
     return false;
   }
 
+  if (!get_osdmap()->has_been_up_since(from.osd, send_epoch)) {
+    dout(10) << " got info " << oinfo << " from down osd." << from
+            << " discarding" << dendl;
+    return false;
+  }
+
   dout(10) << " got osd." << from << " " << oinfo << dendl;
   assert(is_primary());
   peer_info[from] = oinfo;
@@ -5312,7 +5319,8 @@ boost::statechart::result PG::RecoveryState::Initial::react(const Load& l)
 boost::statechart::result PG::RecoveryState::Initial::react(const MNotifyRec& notify)
 {
   PG *pg = context< RecoveryMachine >().pg;
-  pg->proc_replica_info(notify.from, notify.notify.info);
+  pg->proc_replica_info(
+    notify.from, notify.notify.info, notify.notify.epoch_sent);
   pg->update_heartbeat_peers();
   pg->set_last_peering_reset();
   return transit< Primary >();
@@ -5545,7 +5553,8 @@ boost::statechart::result PG::RecoveryState::Primary::react(const MNotifyRec& no
     dout(10) << *pg << " got dup osd." << notevt.from << " info " << notevt.notify.info
             << ", identical to ours" << dendl;
   } else {
-    pg->proc_replica_info(notevt.from, notevt.notify.info);
+    pg->proc_replica_info(
+      notevt.from, notevt.notify.info, notevt.notify.epoch_sent);
   }
   return discard_event();
 }
@@ -6388,7 +6397,8 @@ boost::statechart::result PG::RecoveryState::Active::react(const MNotifyRec& not
     dout(10) << "Active: got notify from " << notevt.from 
             << ", calling proc_replica_info and discover_all_missing"
             << dendl;
-    pg->proc_replica_info(notevt.from, notevt.notify.info);
+    pg->proc_replica_info(
+      notevt.from, notevt.notify.info, notevt.notify.epoch_sent);
     if (pg->have_unfound()) {
       pg->discover_all_missing(*context< RecoveryMachine >().get_query_map());
     }
@@ -6825,7 +6835,8 @@ boost::statechart::result PG::RecoveryState::GetInfo::react(const MNotifyRec& in
   }
 
   epoch_t old_start = pg->info.history.last_epoch_started;
-  if (pg->proc_replica_info(infoevt.from, infoevt.notify.info)) {
+  if (pg->proc_replica_info(
+       infoevt.from, infoevt.notify.info, infoevt.notify.epoch_sent)) {
     // we got something new ...
     unique_ptr<PriorSet> &prior_set = context< Peering >().prior_set;
     if (old_start < pg->info.history.last_epoch_started) {
@@ -7182,7 +7193,8 @@ boost::statechart::result PG::RecoveryState::Incomplete::react(const MNotifyRec&
             << ", identical to ours" << dendl;
     return discard_event();
   } else {
-    pg->proc_replica_info(notevt.from, notevt.notify.info);
+    pg->proc_replica_info(
+      notevt.from, notevt.notify.info, notevt.notify.epoch_sent);
     // try again!
     return transit< GetLog >();
   }
index 7859f1a5a3c0de7343382e43eb6e25daacc18000..575b82e0cf26db2662f53f83eeaf2b9fe2712c78 100644 (file)
@@ -872,7 +872,8 @@ public:
                        pg_missing_t& omissing, pg_shard_t from);
   void proc_master_log(ObjectStore::Transaction& t, pg_info_t &oinfo, pg_log_t &olog,
                       pg_missing_t& omissing, pg_shard_t from);
-  bool proc_replica_info(pg_shard_t from, const pg_info_t &info);
+  bool proc_replica_info(
+    pg_shard_t from, const pg_info_t &info, epoch_t send_epoch);
 
 
   struct LogEntryTrimmer : public ObjectModDesc::Visitor {