From 9f2c66d850fbe48ade0022646b328318f79dbf4f Mon Sep 17 00:00:00 2001 From: Patrick Donnelly Date: Sun, 13 May 2018 19:58:17 -0700 Subject: [PATCH] mds: check for session import race Credit to Yan Zheng for identifying the race condition [1]. [1] https://bugzilla.redhat.com/show_bug.cgi?id=1572555#c7 Test-for: http://tracker.ceph.com/issues/24072 Signed-off-by: Patrick Donnelly (cherry picked from commit fbc25b44865f6c13c9a9c4710049f9e37169747b) --- qa/cephfs/clusters/3-mds.yaml | 2 +- qa/cephfs/clusters/9-mds.yaml | 2 +- qa/tasks/cephfs/test_exports.py | 39 +++++++++++++++++++++++++++++++++ src/common/options.cc | 3 +++ src/mds/MDSDaemon.cc | 4 ++++ src/mds/MDSRank.h | 8 +++++++ src/mds/Migrator.cc | 18 ++++++++++++++- src/mds/Migrator.h | 9 ++++++-- 8 files changed, 80 insertions(+), 5 deletions(-) diff --git a/qa/cephfs/clusters/3-mds.yaml b/qa/cephfs/clusters/3-mds.yaml index 71f39af089548..c0d463a90d612 100644 --- a/qa/cephfs/clusters/3-mds.yaml +++ b/qa/cephfs/clusters/3-mds.yaml @@ -1,7 +1,7 @@ roles: - [mon.a, mon.c, mgr.y, mds.a, osd.0, osd.1, osd.2, osd.3] - [mon.b, mgr.x, mds.b, mds.c, osd.4, osd.5, osd.6, osd.7] -- [client.0] +- [client.0, client.1] openstack: - volumes: # attached to each instance count: 4 diff --git a/qa/cephfs/clusters/9-mds.yaml b/qa/cephfs/clusters/9-mds.yaml index 86be381ee6e1f..0bf240272bc99 100644 --- a/qa/cephfs/clusters/9-mds.yaml +++ b/qa/cephfs/clusters/9-mds.yaml @@ -1,7 +1,7 @@ roles: - [mon.a, mon.c, mgr.y, mds.a, mds.b, mds.c, mds.d, osd.0, osd.1, osd.2, osd.3] - [mon.b, mgr.x, mds.e, mds.f, mds.g, mds.h, mds.i, osd.4, osd.5, osd.6, osd.7] -- [client.0] +- [client.0, client.1] openstack: - volumes: # attached to each instance count: 4 diff --git a/qa/tasks/cephfs/test_exports.py b/qa/tasks/cephfs/test_exports.py index 913999db7733b..692403d3ac213 100644 --- a/qa/tasks/cephfs/test_exports.py +++ b/qa/tasks/cephfs/test_exports.py @@ -7,6 +7,7 @@ log = logging.getLogger(__name__) class TestExports(CephFSTestCase): MDSS_REQUIRED = 2 + CLIENTS_REQUIRED = 2 def _wait_subtrees(self, status, rank, test): timeout = 30 @@ -105,3 +106,41 @@ class TestExports(CephFSTestCase): self._wait_subtrees(status, 0, [('/1', 0), ('/1/4/5', 1), ('/1/2/3', 2), ('/a', 1), ('/aa/bb', 0)]) self.mount_a.run_shell(["mv", "aa", "a/b/"]) self._wait_subtrees(status, 0, [('/1', 0), ('/1/4/5', 1), ('/1/2/3', 2), ('/a', 1), ('/a/b/aa/bb', 0)]) + + def test_session_race(self): + """ + Test session creation race. + + See: https://tracker.ceph.com/issues/24072#change-113056 + """ + + self.fs.set_max_mds(2) + status = self.fs.wait_for_daemons() + + rank1 = self.fs.get_rank(rank=1, status=status) + name1 = 'mds.'+rank1['name'] + + # Create a directory that is pre-exported to rank 1 + self.mount_a.run_shell(["mkdir", "-p", "a/aa"]) + self.mount_a.setfattr("a", "ceph.dir.pin", "1") + self._wait_subtrees(status, 1, [('/a', 1)]) + + # Now set the mds config to allow the race + self.fs.rank_asok(["config", "set", "mds_inject_migrator_session_race", "true"], rank=1) + + # Now create another directory and try to export it + self.mount_b.run_shell(["mkdir", "-p", "b/bb"]) + self.mount_b.setfattr("b", "ceph.dir.pin", "1") + + time.sleep(5) + + # Now turn off the race so that it doesn't wait again + self.fs.rank_asok(["config", "set", "mds_inject_migrator_session_race", "false"], rank=1) + + # Now try to create a session with rank 1 by accessing a dir known to + # be there, if buggy, this should cause the rank 1 to crash: + self.mount_b.run_shell(["ls", "a"]) + + # Check if rank1 changed (standby tookover?) + new_rank1 = self.fs.get_rank(rank=1) + self.assertEqual(rank1['gid'], new_rank1['gid']) diff --git a/src/common/options.cc b/src/common/options.cc index e7f5eac92bff2..f9c9f061c5b2d 100644 --- a/src/common/options.cc +++ b/src/common/options.cc @@ -6940,6 +6940,9 @@ std::vector