From: Michael Sevilla Date: Fri, 26 Aug 2016 04:55:20 +0000 (-0700) Subject: mds: add tests for mantle (programmable balancer) X-Git-Tag: v11.1.1~58^2^2~66^2 X-Git-Url: http://git-server-git.apps.pok.os.sepia.ceph.com/?a=commitdiff_plain;h=de749df5618ad7afd43b8f36a34da6240a5dc9a7;p=ceph.git mds: add tests for mantle (programmable balancer) - rebased to add block rados pull test Signed-off-by: Michael Sevilla --- diff --git a/tasks/cephfs/cephfs_test_case.py b/tasks/cephfs/cephfs_test_case.py index 55529528ac0f..b9228e269ad4 100644 --- a/tasks/cephfs/cephfs_test_case.py +++ b/tasks/cephfs/cephfs_test_case.py @@ -348,7 +348,7 @@ class CephFSTestCase(unittest.TestCase): else: raise AssertionError("MDS daemon '{0}' did not crash as expected".format(daemon_id)) - def assert_cluster_log(self, expected_pattern, invert_match=False): + def assert_cluster_log(self, expected_pattern, invert_match=False, timeout=10): """ Context manager. Assert that during execution, or up to 5 seconds later, the Ceph cluster log emits a message matching the expected pattern. @@ -378,7 +378,7 @@ class CephFSTestCase(unittest.TestCase): log.debug("No log hits yet, waiting...") # Default monc tick interval is 10s, so wait that long and # then some grace - time.sleep(15) + time.sleep(5 + timeout) self.watcher_process.stdin.close() try: diff --git a/tasks/cephfs/test_mantle.py b/tasks/cephfs/test_mantle.py new file mode 100644 index 000000000000..723af47b118d --- /dev/null +++ b/tasks/cephfs/test_mantle.py @@ -0,0 +1,111 @@ +from tasks.cephfs.cephfs_test_case import CephFSTestCase +import json +import logging + +log = logging.getLogger(__name__) +failure = "using old balancer; mantle failed for balancer=" +success = "mantle balancer version changed: " + +class TestMantle(CephFSTestCase): + def start_mantle(self): + self.wait_for_health_clear(timeout=30) + self.fs.mon_manager.raw_cluster_cmd_result('mds', 'set', "allow_multimds", + "true", "--yes-i-really-mean-it") + self.fs.mon_manager.raw_cluster_cmd_result('mds', 'set', "max_mds", "2") + self.wait_until_equal(lambda: len(self.fs.get_active_names()), 2, 30, + reject_fn=lambda v: v > 2 or v < 1) + + for m in self.fs.get_active_names(): + self.fs.mds_asok(['config', 'set', 'debug_objecter', '20'], mds_id=m) + self.fs.mds_asok(['config', 'set', 'debug_ms', '0'], mds_id=m) + self.fs.mds_asok(['config', 'set', 'debug_mds', '0'], mds_id=m) + self.fs.mds_asok(['config', 'set', 'debug_mds_balancer', '5'], mds_id=m) + + def push_balancer(self, obj, lua_code, expect): + self.fs.mon_manager.raw_cluster_cmd_result('fs', 'set', self.fs.name, 'balancer', obj) + self.fs.rados(["put", obj, "-"], stdin_data=lua_code) + with self.assert_cluster_log(failure + obj + " " + expect): + log.info("run a " + obj + " balancer that expects=" + expect) + + def test_version_empty(self): + self.start_mantle() + expect = " : (2) No such file or directory" + + ret = self.fs.mon_manager.raw_cluster_cmd_result('fs', 'set', self.fs.name, 'balancer') + assert(ret == 22) # EINVAL + + self.fs.mon_manager.raw_cluster_cmd_result('fs', 'set', self.fs.name, 'balancer', " ") + with self.assert_cluster_log(failure + " " + expect): pass + + def test_version_not_in_rados(self): + self.start_mantle() + expect = failure + "ghost.lua : (2) No such file or directory" + self.fs.mon_manager.raw_cluster_cmd_result('fs', 'set', self.fs.name, 'balancer', "ghost.lua") + with self.assert_cluster_log(expect): pass + + def test_balancer_invalid(self): + self.start_mantle() + expect = ": (22) Invalid argument" + + lua_code = "this is invalid lua code!" + self.push_balancer("invalid.lua", lua_code, expect) + + lua_code = "BAL_LOG()" + self.push_balancer("invalid_log.lua", lua_code, expect) + + lua_code = "BAL_LOG(0)" + self.push_balancer("invalid_log_again.lua", lua_code, expect) + + def test_balancer_valid(self): + self.start_mantle() + lua_code = "BAL_LOG(0, \"test\")\nreturn {3, 4}" + self.fs.mon_manager.raw_cluster_cmd_result('fs', 'set', self.fs.name, 'balancer', "valid.lua") + self.fs.rados(["put", "valid.lua", "-"], stdin_data=lua_code) + with self.assert_cluster_log(success + "valid.lua"): + log.info("run a valid.lua balancer") + + def test_return_invalid(self): + self.start_mantle() + expect = ": (22) Invalid argument" + + lua_code = "return \"hello\"" + self.push_balancer("string.lua", lua_code, expect) + + lua_code = "return 3" + self.push_balancer("number.lua", lua_code, expect) + + lua_code = "return {}" + self.push_balancer("dict_empty.lua", lua_code, expect) + + lua_code = "return {\"this\", \"is\", \"a\", \"test\"}" + self.push_balancer("dict_of_strings.lua", lua_code, expect) + + lua_code = "return {3, \"test\"}" + self.push_balancer("dict_of_mixed.lua", lua_code, expect) + + lua_code = "return {3}" + self.push_balancer("not_enough_numbers.lua", lua_code, expect) + + lua_code = "return {3, 4, 5, 6, 7, 8, 9}" + self.push_balancer("too_many_numbers.lua", lua_code, expect) + + def test_dead_osd(self): + self.start_mantle() + expect = " : (110) Connection timed out" + + # kill the OSDs so that the balancer pull from RADOS times out + osd_map = json.loads(self.fs.mon_manager.raw_cluster_cmd('osd', 'dump', '--format=json-pretty')) + for i in range(0, len(osd_map['osds'])): + self.fs.mon_manager.raw_cluster_cmd_result('osd', 'down', str(i)) + self.fs.mon_manager.raw_cluster_cmd_result('osd', 'out', str(i)) + + # trigger a pull from RADOS + self.fs.mon_manager.raw_cluster_cmd_result('fs', 'set', self.fs.name, 'balancer', "valid.lua") + + # make the timeout a little longer since dead OSDs spam ceph -w + with self.assert_cluster_log(failure + "valid.lua" + expect, timeout=30): + log.info("run a balancer that should timeout") + + # cleanup + for i in range(0, len(osd_map['osds'])): + self.fs.mon_manager.raw_cluster_cmd_result('osd', 'in', str(i))