From: Michael Sevilla <mikesevilla3@gmail.com>
Date: Fri, 26 Aug 2016 04:55:20 +0000 (-0700)
Subject: mds: add tests for mantle (programmable balancer)
X-Git-Tag: v11.1.1~58^2^2~66^2
X-Git-Url: http://git-server-git.apps.pok.os.sepia.ceph.com/?a=commitdiff_plain;h=de749df5618ad7afd43b8f36a34da6240a5dc9a7;p=ceph.git

mds: add tests for mantle (programmable balancer)
- rebased to add block rados pull test

Signed-off-by: Michael Sevilla <mikesevilla3@gmail.com>
---

diff --git a/tasks/cephfs/cephfs_test_case.py b/tasks/cephfs/cephfs_test_case.py
index 55529528ac0f..b9228e269ad4 100644
--- a/tasks/cephfs/cephfs_test_case.py
+++ b/tasks/cephfs/cephfs_test_case.py
@@ -348,7 +348,7 @@ class CephFSTestCase(unittest.TestCase):
         else:
             raise AssertionError("MDS daemon '{0}' did not crash as expected".format(daemon_id))
 
-    def assert_cluster_log(self, expected_pattern, invert_match=False):
+    def assert_cluster_log(self, expected_pattern, invert_match=False, timeout=10):
         """
         Context manager.  Assert that during execution, or up to 5 seconds later,
         the Ceph cluster log emits a message matching the expected pattern.
@@ -378,7 +378,7 @@ class CephFSTestCase(unittest.TestCase):
                         log.debug("No log hits yet, waiting...")
                         # Default monc tick interval is 10s, so wait that long and
                         # then some grace
-                        time.sleep(15)
+                        time.sleep(5 + timeout)
 
                 self.watcher_process.stdin.close()
                 try:
diff --git a/tasks/cephfs/test_mantle.py b/tasks/cephfs/test_mantle.py
new file mode 100644
index 000000000000..723af47b118d
--- /dev/null
+++ b/tasks/cephfs/test_mantle.py
@@ -0,0 +1,111 @@
+from tasks.cephfs.cephfs_test_case import CephFSTestCase
+import json
+import logging
+
+log = logging.getLogger(__name__)
+failure = "using old balancer; mantle failed for balancer="
+success = "mantle balancer version changed: "
+
+class TestMantle(CephFSTestCase):
+    def start_mantle(self):
+        self.wait_for_health_clear(timeout=30)
+        self.fs.mon_manager.raw_cluster_cmd_result('mds', 'set', "allow_multimds",
+                                                   "true", "--yes-i-really-mean-it")
+        self.fs.mon_manager.raw_cluster_cmd_result('mds', 'set', "max_mds", "2")
+        self.wait_until_equal(lambda: len(self.fs.get_active_names()), 2, 30,
+                              reject_fn=lambda v: v > 2 or v < 1)
+
+        for m in self.fs.get_active_names():
+            self.fs.mds_asok(['config', 'set', 'debug_objecter', '20'], mds_id=m)
+            self.fs.mds_asok(['config', 'set', 'debug_ms', '0'], mds_id=m)
+            self.fs.mds_asok(['config', 'set', 'debug_mds', '0'], mds_id=m)
+            self.fs.mds_asok(['config', 'set', 'debug_mds_balancer', '5'], mds_id=m)
+
+    def push_balancer(self, obj, lua_code, expect):
+        self.fs.mon_manager.raw_cluster_cmd_result('fs', 'set', self.fs.name, 'balancer', obj)
+        self.fs.rados(["put", obj, "-"], stdin_data=lua_code)
+        with self.assert_cluster_log(failure + obj + " " + expect):
+            log.info("run a " + obj + " balancer that expects=" + expect)
+
+    def test_version_empty(self):
+        self.start_mantle()
+        expect = " : (2) No such file or directory"
+
+        ret = self.fs.mon_manager.raw_cluster_cmd_result('fs', 'set', self.fs.name, 'balancer')
+        assert(ret == 22) # EINVAL
+
+        self.fs.mon_manager.raw_cluster_cmd_result('fs', 'set', self.fs.name, 'balancer', " ")
+        with self.assert_cluster_log(failure + " " + expect): pass
+
+    def test_version_not_in_rados(self):
+        self.start_mantle()
+        expect = failure + "ghost.lua : (2) No such file or directory"
+        self.fs.mon_manager.raw_cluster_cmd_result('fs', 'set', self.fs.name, 'balancer', "ghost.lua")
+        with self.assert_cluster_log(expect): pass
+
+    def test_balancer_invalid(self):
+        self.start_mantle()
+        expect = ": (22) Invalid argument"
+
+        lua_code = "this is invalid lua code!"
+        self.push_balancer("invalid.lua", lua_code, expect)
+
+        lua_code = "BAL_LOG()"
+        self.push_balancer("invalid_log.lua", lua_code, expect)
+
+        lua_code = "BAL_LOG(0)"
+        self.push_balancer("invalid_log_again.lua", lua_code, expect)
+
+    def test_balancer_valid(self):
+        self.start_mantle()
+        lua_code = "BAL_LOG(0, \"test\")\nreturn {3, 4}"
+        self.fs.mon_manager.raw_cluster_cmd_result('fs', 'set', self.fs.name, 'balancer', "valid.lua")
+        self.fs.rados(["put", "valid.lua", "-"], stdin_data=lua_code)
+        with self.assert_cluster_log(success + "valid.lua"):
+            log.info("run a valid.lua balancer")
+
+    def test_return_invalid(self):
+        self.start_mantle()
+        expect = ": (22) Invalid argument"
+
+        lua_code = "return \"hello\""
+        self.push_balancer("string.lua", lua_code, expect)
+
+        lua_code = "return 3"
+        self.push_balancer("number.lua", lua_code, expect)
+
+        lua_code = "return {}"
+        self.push_balancer("dict_empty.lua", lua_code, expect)
+
+        lua_code = "return {\"this\", \"is\", \"a\", \"test\"}"
+        self.push_balancer("dict_of_strings.lua", lua_code, expect)
+
+        lua_code = "return {3, \"test\"}"
+        self.push_balancer("dict_of_mixed.lua", lua_code, expect)
+
+        lua_code = "return {3}"
+        self.push_balancer("not_enough_numbers.lua", lua_code, expect)
+
+        lua_code = "return {3, 4, 5, 6, 7, 8, 9}"
+        self.push_balancer("too_many_numbers.lua", lua_code, expect)
+
+    def test_dead_osd(self):
+        self.start_mantle()
+        expect = " : (110) Connection timed out"
+
+        # kill the OSDs so that the balancer pull from RADOS times out
+        osd_map = json.loads(self.fs.mon_manager.raw_cluster_cmd('osd', 'dump', '--format=json-pretty'))
+        for i in range(0, len(osd_map['osds'])):
+          self.fs.mon_manager.raw_cluster_cmd_result('osd', 'down', str(i))
+          self.fs.mon_manager.raw_cluster_cmd_result('osd', 'out', str(i))
+
+        # trigger a pull from RADOS
+        self.fs.mon_manager.raw_cluster_cmd_result('fs', 'set', self.fs.name, 'balancer', "valid.lua")
+
+        # make the timeout a little longer since dead OSDs spam ceph -w
+        with self.assert_cluster_log(failure + "valid.lua" + expect, timeout=30):
+            log.info("run a balancer that should timeout")
+
+        # cleanup
+        for i in range(0, len(osd_map['osds'])):
+          self.fs.mon_manager.raw_cluster_cmd_result('osd', 'in', str(i))