]> git.apps.os.sepia.ceph.com Git - ceph.git/commitdiff
OSD, PG: ignore peering messages from before the last peering restart
authorJosh Durgin <josh.durgin@dreamhost.com>
Fri, 20 May 2011 00:19:59 +0000 (17:19 -0700)
committerJosh Durgin <josh.durgin@dreamhost.com>
Fri, 20 May 2011 01:02:55 +0000 (18:02 -0700)
Check them before entering the state machine so we can
safely enter the Crashed state on unexpected messages
from the current interval.

Signed-off-by: Josh Durgin <josh.durgin@dreamhost.com>
src/osd/OSD.cc
src/osd/PG.cc
src/osd/PG.h

index cd6dee2ab2032e28ee45b52193f29f95abc99488..369e138d553b9d1363ec81a2d99561addcf16f49 100644 (file)
@@ -3979,6 +3979,12 @@ void OSD::handle_pg_notify(MOSDPGNotify *m)
     if (!pg)
       continue;
 
+    if (pg->old_peering_msg(m->get_epoch())) {
+      dout(10) << "ignoring old peering message " << *m << dendl;
+      pg->unlock();
+      continue;
+    }
+
     PG::RecoveryCtx rctx(&query_map, &info_map, 0, &fin->contexts, t);
     pg->handle_notify(from, *it, &rctx);
 
@@ -4018,6 +4024,12 @@ void OSD::handle_pg_log(MOSDPGLog *m)
     return;
   }
 
+  if (pg->old_peering_msg(m->get_epoch())) {
+    dout(10) << "ignoring old peering message " << *m << dendl;
+    pg->unlock();
+    return;
+  }
+
   map< int, map<pg_t,PG::Query> > query_map;
   map< int, MOSDPGInfo* > info_map;
   PG::RecoveryCtx rctx(&query_map, &info_map, 0, &fin->contexts, t);
@@ -4057,6 +4069,13 @@ void OSD::handle_pg_info(MOSDPGInfo *m)
     PG::RecoveryCtx rctx(0, &info_map, 0, &fin->contexts, t);
     if (!pg)
       continue;
+
+    if (pg->old_peering_msg(m->get_epoch())) {
+      dout(10) << "ignoring old peering message " << *m << dendl;
+      pg->unlock();
+      continue;
+    }
+
     pg->handle_info(from, *p, &rctx);
 
     int tr = store->queue_transaction(&pg->osr, t, new ObjectStore::C_DeleteTransaction(t), fin);
@@ -4207,6 +4226,12 @@ void OSD::handle_pg_query(MOSDPGQuery *m)
       continue;
     }
 
+    if (pg->old_peering_msg(m->get_epoch())) {
+      dout(10) << "ignoring old peering message " << *m << dendl;
+      pg->unlock();
+      continue;
+    }
+
     if (pg->deleting) {
       /*
        * We cancel deletion on pg change.  And the primary will never
index 0c40157c0a72656ed21cf58067cb80b0f5f83fbe..975cb73692ce12357632c8c7db3b78b2c86ca955 100644 (file)
@@ -3425,6 +3425,11 @@ bool PG::acting_up_affected(const vector<int>& newup, const vector<int>& newacti
   }
 }
 
+bool PG::old_peering_msg(const epoch_t &msg_epoch)
+{
+  return (last_warm_restart > msg_epoch);
+}
+
 /* Called before initializing peering during advance_map */
 void PG::warm_restart(const OSDMap& lastmap, const vector<int>& newup, const vector<int>& newacting)
 {
@@ -3433,6 +3438,8 @@ void PG::warm_restart(const OSDMap& lastmap, const vector<int>& newup, const vec
   // -- there was a change! --
   kick();
 
+  last_warm_restart = osdmap.get_epoch();
+
   vector<int> oldacting, oldup;
   int oldrole = get_role();
   int oldprimary = get_primary();
@@ -4613,7 +4620,7 @@ void PG::RecoveryState::RecoveryMachine::log_exit(const char *state_name, utime_
 }
 
 
-/*----RecoverState Methods-----*/
+/*----RecoveryState Methods-----*/
 #undef dout_prefix
 #define dout_prefix *_dout << machine.pg->gen_prefix() 
 
index ffa847703ce6ef3fbc684ae5a8d64ab76ab86d98..1045519c1a3bb91e307fa3cbe12ab713f8a95dcc 100644 (file)
@@ -1277,6 +1277,9 @@ protected:
   // primary-only, recovery-only state
   set<int>             might_have_unfound;  // These osds might have objects on them
                                            // which are unfound on the primary
+
+  epoch_t last_warm_restart;
+
   friend class OSD;
 
 
@@ -1481,6 +1484,7 @@ public:
     have_master_log(true),
     recovery_state(this),
     need_up_thru(false),
+    last_warm_restart(0),
     pg_stats_lock("PG::pg_stats_lock"),
     pg_stats_valid(false),
     finish_sync_event(NULL),
@@ -1563,7 +1567,8 @@ public:
                    pair<int, Info> &notify_info);
   void fulfill_log(int from, const Query &query);
   bool acting_up_affected(const vector<int>& newup, const vector<int>& newacting);
-    
+  bool old_peering_msg(const epoch_t &msg_epoch);
+
   // recovery bits
   void handle_notify(int from, PG::Info& i, RecoveryCtx *rctx) {
     recovery_state.handle_notify(from, i, rctx);