]> git-server-git.apps.pok.os.sepia.ceph.com Git - ceph.git/commitdiff
mds: handle state change race 26051/head
authorYan, Zheng <zyan@redhat.com>
Mon, 29 Oct 2018 03:03:00 +0000 (11:03 +0800)
committerYan, Zheng <zyan@redhat.com>
Mon, 21 Jan 2019 08:34:48 +0000 (16:34 +0800)
In multi-mds cluster, recovering mds may receive mdsmap that changes
its state after other mds. Furthermore, the recovering mds may receive
messages tiggered by its state change from other mds before it receive
corresponding mdsmap.

Fixes: http://tracker.ceph.com/issues/37594
Signed-off-by: "Yan, Zheng" <zyan@redhat.com>
(cherry picked from commit d3a444473abc98e5ce8121af24538a141a292777)

 Conflicts:
src/mds/Locker.cc
src/mds/MDCache.cc
src/mds/MDSRank.h

src/mds/Locker.cc
src/mds/MDCache.cc
src/mds/MDSRank.cc
src/mds/MDSRank.h

index ae95f0257439c43c717f43ca12052821c4400440..7bf717ed4b9e962c4198d0c38e621bbaf4915a40 100644 (file)
@@ -2273,7 +2273,13 @@ void Locker::request_inode_file_caps(CInode *in)
 void Locker::handle_inode_file_caps(MInodeFileCaps *m)
 {
   // nobody should be talking to us during recovery.
-  assert(mds->is_clientreplay() || mds->is_active() || mds->is_stopping());
+  if (mds->get_state() < MDSMap::STATE_CLIENTREPLAY) {
+    if (mds->get_want_state() >= MDSMap::STATE_CLIENTREPLAY) {
+      mds->wait_for_replay(new C_MDS_RetryMessage(mds, m));
+      return;
+    }
+    ceph_assert(!"got unexpected message during recovery");
+  }
 
   // ok
   CInode *in = mdcache->get_inode(m->get_ino());
index 9ec17384adbe062879757afc65e66877fbc435de..124bf6d3f4b3bbc95f83c3b8eeb1d6ac55bb83a4 100644 (file)
@@ -4729,7 +4729,13 @@ void MDCache::handle_cache_rejoin_strong(MMDSCacheRejoin *strong)
   mds_rank_t from = mds_rank_t(strong->get_source().num());
 
   // only a recovering node will get a strong rejoin.
-  assert(mds->is_rejoin());
+  if (!mds->is_rejoin()) {
+    if (mds->get_want_state() == MDSMap::STATE_REJOIN) {
+      mds->wait_for_rejoin(new C_MDS_RetryMessage(mds, strong));
+      return;
+    }
+    ceph_assert(!"got unexpected rejoin message during recovery");
+  }
 
   // assimilate any potentially dirty scatterlock state
   for (map<inodeno_t,MMDSCacheRejoin::lock_bls>::iterator p = strong->inode_scatterlocks.begin();
index 9ef53b14f5002869f33ddcd5f1548b30c6eed792..00e82041a8622c03837314ddb4e015eb934e93f6 100644 (file)
@@ -1881,6 +1881,7 @@ void MDSRank::rejoin_start()
 {
   dout(1) << "rejoin_start" << dendl;
   mdcache->rejoin_start(new C_MDS_VoidFn(this, &MDSRank::rejoin_done));
+  finish_contexts(g_ceph_context, waiting_for_rejoin);
 }
 void MDSRank::rejoin_done()
 {
index 93fd933b23c1890448367607163023ae6720667b..3837536ddfafd3607e3f6663fd19f9d4d00f22bc 100644 (file)
@@ -273,7 +273,8 @@ class MDSRank {
 
     ceph_tid_t last_tid;    // for mds-initiated requests (e.g. stray rename)
 
-    list<MDSInternalContextBase*> waiting_for_active, waiting_for_replay, waiting_for_reconnect, waiting_for_resolve;
+    list<MDSInternalContextBase*> waiting_for_active, waiting_for_replay, waiting_for_rejoin,
+                                 waiting_for_reconnect, waiting_for_resolve;
     list<MDSInternalContextBase*> waiting_for_any_client_connection;
     list<MDSInternalContextBase*> replay_queue;
     map<mds_rank_t, list<MDSInternalContextBase*> > waiting_for_active_peer;
@@ -413,6 +414,9 @@ class MDSRank {
     void wait_for_replay(MDSInternalContextBase *c) { 
       waiting_for_replay.push_back(c); 
     }
+    void wait_for_rejoin(MDSInternalContextBase *c) {
+      waiting_for_rejoin.push_back(c);
+    }
     void wait_for_reconnect(MDSInternalContextBase *c) {
       waiting_for_reconnect.push_back(c);
     }