]> git.apps.os.sepia.ceph.com Git - ceph.git/commitdiff
mds: check for session import race
authorPatrick Donnelly <pdonnell@redhat.com>
Mon, 14 May 2018 02:58:17 +0000 (19:58 -0700)
committerPatrick Donnelly <pdonnell@redhat.com>
Mon, 14 May 2018 20:31:22 +0000 (13:31 -0700)
Credit to Yan Zheng for identifying the race condition [1].

[1] https://bugzilla.redhat.com/show_bug.cgi?id=1572555#c7

Test-for: http://tracker.ceph.com/issues/24072

Signed-off-by: Patrick Donnelly <pdonnell@redhat.com>
(cherry picked from commit fbc25b44865f6c13c9a9c4710049f9e37169747b)

qa/cephfs/clusters/3-mds.yaml
qa/cephfs/clusters/9-mds.yaml
qa/tasks/cephfs/test_exports.py
src/common/options.cc
src/mds/MDSDaemon.cc
src/mds/MDSRank.h
src/mds/Migrator.cc
src/mds/Migrator.h

index 71f39af08954812c3151d356d2920cc7a385b7d9..c0d463a90d6120654dc6710ed03861d082e2c9d6 100644 (file)
@@ -1,7 +1,7 @@
 roles:
 - [mon.a, mon.c, mgr.y, mds.a, osd.0, osd.1, osd.2, osd.3]
 - [mon.b, mgr.x, mds.b, mds.c, osd.4, osd.5, osd.6, osd.7]
-- [client.0]
+- [client.0, client.1]
 openstack:
 - volumes: # attached to each instance
     count: 4
index 86be381ee6e1f32d85abee57687695bf29a0f4b9..0bf240272bc9942c087c2281480e8a433c91b316 100644 (file)
@@ -1,7 +1,7 @@
 roles:
 - [mon.a, mon.c, mgr.y, mds.a, mds.b, mds.c, mds.d, osd.0, osd.1, osd.2, osd.3]
 - [mon.b, mgr.x, mds.e, mds.f, mds.g, mds.h, mds.i, osd.4, osd.5, osd.6, osd.7]
-- [client.0]
+- [client.0, client.1]
 openstack:
 - volumes: # attached to each instance
     count: 4
index 913999db7733b7a835bc0e89b25883e66840d082..692403d3ac213ee609e1ffe712c8ef6194b5562f 100644 (file)
@@ -7,6 +7,7 @@ log = logging.getLogger(__name__)
 
 class TestExports(CephFSTestCase):
     MDSS_REQUIRED = 2
+    CLIENTS_REQUIRED = 2
 
     def _wait_subtrees(self, status, rank, test):
         timeout = 30
@@ -105,3 +106,41 @@ class TestExports(CephFSTestCase):
         self._wait_subtrees(status, 0, [('/1', 0), ('/1/4/5', 1), ('/1/2/3', 2), ('/a', 1), ('/aa/bb', 0)])
         self.mount_a.run_shell(["mv", "aa", "a/b/"])
         self._wait_subtrees(status, 0, [('/1', 0), ('/1/4/5', 1), ('/1/2/3', 2), ('/a', 1), ('/a/b/aa/bb', 0)])
+
+    def test_session_race(self):
+        """
+        Test session creation race.
+
+        See: https://tracker.ceph.com/issues/24072#change-113056
+        """
+
+        self.fs.set_max_mds(2)
+        status = self.fs.wait_for_daemons()
+
+        rank1 = self.fs.get_rank(rank=1, status=status)
+        name1 = 'mds.'+rank1['name']
+
+        # Create a directory that is pre-exported to rank 1
+        self.mount_a.run_shell(["mkdir", "-p", "a/aa"])
+        self.mount_a.setfattr("a", "ceph.dir.pin", "1")
+        self._wait_subtrees(status, 1, [('/a', 1)])
+
+        # Now set the mds config to allow the race
+        self.fs.rank_asok(["config", "set", "mds_inject_migrator_session_race", "true"], rank=1)
+
+        # Now create another directory and try to export it
+        self.mount_b.run_shell(["mkdir", "-p", "b/bb"])
+        self.mount_b.setfattr("b", "ceph.dir.pin", "1")
+
+        time.sleep(5)
+
+        # Now turn off the race so that it doesn't wait again
+        self.fs.rank_asok(["config", "set", "mds_inject_migrator_session_race", "false"], rank=1)
+
+        # Now try to create a session with rank 1 by accessing a dir known to
+        # be there, if buggy, this should cause the rank 1 to crash:
+        self.mount_b.run_shell(["ls", "a"])
+
+        # Check if rank1 changed (standby tookover?)
+        new_rank1 = self.fs.get_rank(rank=1)
+        self.assertEqual(rank1['gid'], new_rank1['gid'])
index e7f5eac92bff2ce9995c35324efd866edc0319c8..f9c9f061c5b2d76dc2342a12fa8f580726af5c69 100644 (file)
@@ -6940,6 +6940,9 @@ std::vector<Option> get_mds_options() {
     Option("mds_hack_allow_loading_invalid_metadata", Option::TYPE_BOOL, Option::LEVEL_ADVANCED)
      .set_default(0)
      .set_description("INTENTIONALLY CAUSE DATA LOSS by bypasing checks for invalid metadata on disk. Allows testing repair tools."),
+
+    Option("mds_inject_migrator_session_race", Option::TYPE_BOOL, Option::LEVEL_DEV)
+     .set_default(false),
   });
 }
 
index 7c752919a2b7ab70eae568c76f54c3ac8db8d082..f5c024a1089be64fc401079a2c22d5d41f154e65 100644 (file)
@@ -373,6 +373,7 @@ const char** MDSDaemon::get_tracked_conf_keys() const
     "mds_max_purge_ops",
     "mds_max_purge_ops_per_pg",
     "mds_max_purge_files",
+    "mds_inject_migrator_session_race",
     "clog_to_graylog",
     "clog_to_graylog_host",
     "clog_to_graylog_port",
@@ -1357,6 +1358,9 @@ bool MDSDaemon::ms_verify_authorizer(Connection *con, int peer_type,
       dout(10) << " new session " << s << " for " << s->info.inst << " con " << con << dendl;
       con->set_priv(s);
       s->connection = con;
+      if (mds_rank) {
+        mds_rank->kick_waiters_for_any_client_connection();
+      }
     } else {
       dout(10) << " existing session " << s << " for " << s->info.inst << " existing con " << s->connection
               << ", new/authorizing con " << con << dendl;
index 1f7331a633495d4b80b266e6369d2c101010420d..60a624bebf86e9867b7f3df60d636a9e3ac7272d 100644 (file)
@@ -224,6 +224,7 @@ class MDSRank {
     void handle_conf_change(const struct md_config_t *conf,
                             const std::set <std::string> &changed)
     {
+      mdcache->migrator->handle_conf_change(conf, changed, *mdsmap);
       purge_queue.handle_conf_change(conf, changed, *mdsmap);
     }
 
@@ -267,6 +268,7 @@ class MDSRank {
     ceph_tid_t last_tid;    // for mds-initiated requests (e.g. stray rename)
 
     list<MDSInternalContextBase*> waiting_for_active, waiting_for_replay, waiting_for_reconnect, waiting_for_resolve;
+    list<MDSInternalContextBase*> waiting_for_any_client_connection;
     list<MDSInternalContextBase*> replay_queue;
     map<mds_rank_t, list<MDSInternalContextBase*> > waiting_for_active_peer;
     map<epoch_t, list<MDSInternalContextBase*> > waiting_for_mdsmap;
@@ -381,6 +383,12 @@ class MDSRank {
       waiting_for_active_peer[MDS_RANK_NONE].push_back(c);
     }
 
+    void wait_for_any_client_connection(MDSInternalContextBase *c) {
+      waiting_for_any_client_connection.push_back(c);
+    }
+    void kick_waiters_for_any_client_connection(void) {
+      finish_contexts(g_ceph_context, waiting_for_any_client_connection);
+    }
     void wait_for_active(MDSInternalContextBase *c) {
       waiting_for_active.push_back(c);
     }
index 2cf9cff9d1c12614b55e8912936f2e702864d312..3c6019d972e7b1b596d980e464e05a5a7128bc35 100644 (file)
@@ -27,6 +27,7 @@
 #include "Mutation.h"
 
 #include "include/filepath.h"
+#include "common/likely.h"
 
 #include "events/EExport.h"
 #include "events/EImportStart.h"
@@ -118,7 +119,12 @@ void Migrator::dispatch(Message *m)
     handle_export_prep(static_cast<MExportDirPrep*>(m));
     break;
   case MSG_MDS_EXPORTDIR:
-    handle_export_dir(static_cast<MExportDir*>(m));
+    if (unlikely(inject_session_race)) {
+      dout(0) << "waiting for inject_session_race" << dendl;
+      mds->wait_for_any_client_connection(new C_MDS_RetryMessage(mds, m));
+    } else {
+      handle_export_dir(static_cast<MExportDir*>(m));
+    }
     break;
   case MSG_MDS_EXPORTDIRFINISH:
     handle_export_finish(static_cast<MExportDirFinish*>(m));
@@ -3387,3 +3393,13 @@ void Migrator::logged_import_caps(CInode *in,
   mds->locker->eval(in, CEPH_CAP_LOCKS, true);
   in->auth_unpin(this);
 }
+
+void Migrator::handle_conf_change(const struct md_config_t *conf,
+                                  const std::set <std::string> &changed,
+                                  const MDSMap &mds_map)
+{
+  if (changed.count("mds_inject_migrator_session_race")) {
+    inject_session_race = conf->get_val<bool>("mds_inject_migrator_session_race");
+    dout(0) << "mds_inject_migrator_session_race is " << inject_session_race << dendl;
+  }
+}
index 6e689d7bfe62fde13b6b6388716162ff2ee97d07..148b2fb4fd2c01422109e9f36455929c5269a291 100644 (file)
@@ -102,9 +102,13 @@ public:
   }
 
   // -- cons --
-  Migrator(MDSRank *m, MDCache *c) : mds(m), cache(c) {}
-
+  Migrator(MDSRank *m, MDCache *c) : mds(m), cache(c) {
+    inject_session_race = g_conf->get_val<bool>("mds_inject_migrator_session_race");
+  }
 
+  void handle_conf_change(const struct md_config_t *conf,
+                          const std::set <std::string> &changed,
+                          const MDSMap &mds_map);
 
 protected:
   // export fun
@@ -347,6 +351,7 @@ public:
 private:
   MDSRank *mds;
   MDCache *cache;
+  bool inject_session_race = false;
 };
 
 #endif