From: John Spray <john.spray@redhat.com>
Date: Thu, 26 May 2016 13:23:32 +0000 (+0100)
Subject: tasks/cephfs: reproducer for #16022
X-Git-Tag: v11.1.1~58^2^2~177^2
X-Git-Url: http://git-server-git.apps.pok.os.sepia.ceph.com/?a=commitdiff_plain;h=b75e2a7da12e11f230d1842b16d26fa3c764b89a;p=ceph.git

tasks/cephfs: reproducer for #16022

Even though check_subs was broken, the multiclient
test was still passing because it didn't test that
clients survived a failover in the multi-filesystem
case.

Fixes: http://tracker.ceph.com/issues/16022
Signed-off-by: John Spray <john.spray@redhat.com>
---

diff --git a/tasks/cephfs/test_failover.py b/tasks/cephfs/test_failover.py
index f26fae8b2d2b..5d45378438ec 100644
--- a/tasks/cephfs/test_failover.py
+++ b/tasks/cephfs/test_failover.py
@@ -250,7 +250,6 @@ class TestMultiFilesystems(CephFSTestCase):
         self.mount_a.write_n_mb("test.bin", 2)
         a_created_ino = self.mount_a.path_to_ino("test.bin")
         self.mount_a.create_files()
-        self.mount_a.umount_wait()
 
         # Mount a client on fs_b
         fs_b.set_ceph_conf(
@@ -261,6 +260,26 @@ class TestMultiFilesystems(CephFSTestCase):
         self.mount_b.write_n_mb("test.bin", 1)
         b_created_ino = self.mount_b.path_to_ino("test.bin")
         self.mount_b.create_files()
+
+        # Check that a non-default filesystem mount survives an MDS
+        # failover (i.e. that map subscription is continuous, not
+        # just the first time), reproduces #16022
+        old_fs_b_mds = fs_b.get_active_names()[0]
+        self.mds_cluster.mds_stop(old_fs_b_mds)
+        self.mds_cluster.mds_fail(old_fs_b_mds)
+        fs_b.wait_for_daemons()
+        background = self.mount_b.write_background()
+        # Raise exception if the write doesn't finish (i.e. if client
+        # has not kept up with MDS failure)
+        try:
+            self.wait_until_true(lambda: background.finished, timeout=30)
+        except RuntimeError:
+            # The mount is stuck, we'll have to force it to fail cleanly
+            background.stdin.close()
+            self.mount_b.umount_wait(force=True)
+            raise
+
+        self.mount_a.umount_wait()
         self.mount_b.umount_wait()
 
         # See that the client's files went into the correct pool