From 6c26c073dec90452ea09c4f8268047aa386afdc2 Mon Sep 17 00:00:00 2001 From: Greg Farnum Date: Mon, 13 Oct 2014 12:38:31 -0700 Subject: [PATCH] mds_scrub_checks: Run scrub and flush commands against the MDS. We mostly do a variety of successful ones, but we also corrupt the store using the rados tool and make sure we get the expected error codes. Includes a yaml fragment so the task gets run as part of the fs/basic suite. Signed-off-by: Greg Farnum --- suites/fs/basic/tasks/cephfs_scrub_tests.yaml | 21 ++ tasks/mds_scrub_checks.py | 225 ++++++++++++++++++ 2 files changed, 246 insertions(+) create mode 100644 suites/fs/basic/tasks/cephfs_scrub_tests.yaml create mode 100644 tasks/mds_scrub_checks.py diff --git a/suites/fs/basic/tasks/cephfs_scrub_tests.yaml b/suites/fs/basic/tasks/cephfs_scrub_tests.yaml new file mode 100644 index 0000000000000..ebb85fc4c92b6 --- /dev/null +++ b/suites/fs/basic/tasks/cephfs_scrub_tests.yaml @@ -0,0 +1,21 @@ +overrides: + ceph: + conf: + mds: + mds log max segments: 1 + mds cache max size: 1000 +tasks: +- ceph-fuse: +- mds_scrub_checks: + mds_id: a + path: /scrub/test/path + client: 0 + run_seq: 0 +- workunit: + clients: + client.0: [suites/pjd.sh] +- mds_scrub_checks: + mds_id: a + path: /scrub/test/path + client: 0 + run_seq: 1 diff --git a/tasks/mds_scrub_checks.py b/tasks/mds_scrub_checks.py new file mode 100644 index 0000000000000..a47b94dceebb4 --- /dev/null +++ b/tasks/mds_scrub_checks.py @@ -0,0 +1,225 @@ +""" +MDS admin socket scrubbing-related tests. +""" +from cStringIO import StringIO +import json +import logging + +from teuthology.orchestra import run +from teuthology import misc as teuthology + +log = logging.getLogger(__name__) + + +def task(ctx, config): + """ + Run flush and scrub commands on the specified files in the filesystem. This + task will run through a sequence of operations, but it is not comprehensive + on its own -- it doesn't manipulate the mds cache state to test on both + in- and out-of-memory parts of the hierarchy. So it's designed to be run + multiple times within a single test run, so that the test can manipulate + memory state. + + Usage: + mds_scrub_checks: + mds_id: a + path: path/to/test/dir + client: 0 + run_seq: [0-9]+ + + Increment the run_seq on subsequent invocations within a single test run; + it uses that value to generate unique folder and file names. + """ + + mds_id = config.get("mds_id") + test_path = config.get("path") + run_seq = config.get("run_seq") + client_id = config.get("client") + + if mds_id is None or test_path is None or run_seq is None: + raise ValueError("Must specify each of mds_id, test_path, run_seq," + "client_id in config!") + + teuthdir = teuthology.get_testdir(ctx) + client_path = "{teuthdir}/mnt.{id_}/{test_path}".\ + format(teuthdir=teuthdir, + id_=client_id, + test_path=test_path) + + log.info("Cloning repo into place (if not present)") + repo_path = clone_repo(ctx, client_id, client_path) + + log.info("Initiating mds_scrub_checks on mds.{id_}, " + "test_path {path}, run_seq {seq}".format( + id_=mds_id, path=test_path, seq=run_seq)) + + def json_validator(json, rc, element, expected_value): + if (rc != 0): + return False, "asok command returned error {rc}".format(rc=str(rc)) + element_value = json.get(element) + if element_value != expected_value: + return False, "unexpectedly got {jv} instead of {ev}!".format( + jv=element_value, ev=expected_value) + return True, "Succeeded" + + success_validator = lambda j, r: json_validator(j, r, "return_code", 0) + + nep = "{test_path}/i/dont/exist".format(test_path=test_path) + command = "flush_path {nep}".format(nep=nep) + asok_command(ctx, mds_id, command, + lambda j, r: json_validator(j, r, "return_code", -2)) + + command = "scrub_path {nep}".format(nep=nep) + asok_command(ctx, mds_id, command, + lambda j, r: json_validator(j, r, "return_code", -2)) + + test_repo_path = "{test_path}/ceph-qa-suite".format(test_path=test_path) + dirpath = "{repo_path}/suites".format(repo_path=test_repo_path) + + if (run_seq == 0): + log.info("First run: flushing {dirpath}".format(dirpath=dirpath)) + command = "flush_path {dirpath}".format(dirpath=dirpath) + asok_command(ctx, mds_id, command, success_validator) + command = "scrub_path {dirpath}".format(dirpath=dirpath) + asok_command(ctx, mds_id, command, success_validator) + + filepath = "{repo_path}/suites/fs/verify/validater/valgrind.yaml".format( + repo_path=test_repo_path) + if (run_seq == 0): + log.info("First run: flushing {filepath}".format(filepath=filepath)) + command = "flush_path {filepath}".format(filepath=filepath) + asok_command(ctx, mds_id, command, success_validator) + command = "scrub_path {filepath}".format(filepath=filepath) + asok_command(ctx, mds_id, command, success_validator) + + filepath = "{repo_path}/suites/fs/basic/clusters/fixed-3-cephfs.yaml".\ + format(repo_path=test_repo_path) + command = "scrub_path {filepath}".format(filepath=filepath) + asok_command(ctx, mds_id, command, + lambda j, r: json_validator(j, r, "performed_validation", + False)) + + if (run_seq == 0): + log.info("First run: flushing base dir /") + command = "flush_path /" + asok_command(ctx, mds_id, command, success_validator) + command = "scrub_path /" + asok_command(ctx, mds_id, command, success_validator) + + client = ctx.manager.find_remote("client", client_id) + new_dir = "{repo_path}/new_dir_{i}".format(repo_path=repo_path, i=run_seq) + test_new_dir = "{repo_path}/new_dir_{i}".format(repo_path=test_repo_path, + i=run_seq) + client.run(args=[ + "mkdir", new_dir]) + command = "flush_path {dir}".format(dir=test_new_dir) + asok_command(ctx, mds_id, command, success_validator) + + new_file = "{repo_path}/new_file_{i}".format(repo_path=repo_path, + i=run_seq) + test_new_file = "{repo_path}/new_file_{i}".format(repo_path=test_repo_path, + i=run_seq) + client.run(args=[ + "echo", "hello", run.Raw('>'), new_file]) + command = "flush_path {file}".format(file=test_new_file) + asok_command(ctx, mds_id, command, success_validator) + + # check that scrub fails on errors. First, get ino + client = ctx.manager.find_remote("client", 0) + proc = client.run( + args=[ + "ls", "-li", new_file, run.Raw('|'), + "grep", "-o", run.Raw('"^[0-9]*"') + ], + wait=False, + stdout=StringIO() + ) + proc.wait() + ino = int(proc.stdout.getvalue().strip()) + rados_obj_name = "{ino}.00000000".format(ino=hex(ino).split('x')[1]) + client.run( + args=[ + "rados", "-p", "data", "rmxattr", + rados_obj_name, "parent" + ] + ) + command = "scrub_path {file}".format(file=test_new_file) + asok_command(ctx, mds_id, command, + lambda j, r: json_validator(j, r, "return_code", -61)) + client.run( + args=[ + "rados", "-p", "data", "rm", rados_obj_name + ] + ) + asok_command(ctx, mds_id, command, + lambda j, r: json_validator(j, r, "return_code", -2)) + + command = "flush_path /" + asok_command(ctx, mds_id, command, success_validator) + + +class AsokCommandFailedError(Exception): + """ + Exception thrown when we get an unexpected response + on an admin socket command + """ + def __init__(self, command, rc, json, errstring): + self.command = command + self.rc = rc + self.json = json + self.errstring = errstring + + def __str__(self): + return "Admin socket: {command} failed with rc={rc}," + "json output={json}, because '{es}'".format( + command=self.command, rc=self.rc, + json=self.json, es=self.errstring) + + +def asok_command(ctx, mds_id, command, validator): + log.info("Running command '{command}'".format(command=command)) + + command_list = command.split() + + proc = ctx.manager.admin_socket('mds', mds_id, + command_list, check_status=False) + rout = proc.exitstatus + sout = proc.stdout.getvalue() + + if sout.strip(): + jout = json.loads(sout) + else: + jout = None + + log.info("command '{command}' got response code " + "'{rout}' and stdout '{sout}'".format( + command=command, rout=rout, sout=sout)) + + success, errstring = validator(jout, rout) + + if not success: + raise AsokCommandFailedError(command, rout, jout, errstring) + + return jout + + +def clone_repo(ctx, client_id, path): + repo = "ceph-qa-suite" + repo_path = "{path}/{repo}".format(path=path, repo=repo) + + client = ctx.manager.find_remote("client", client_id) + client.run( + args=[ + "mkdir", "-p", path + ] + ) + client.run( + args=[ + "ls", repo_path, run.Raw('||'), + "git", "clone", '--branch', 'giant', + "http://github.com/ceph/{repo}".format(repo=repo), + "{path}/{repo}".format(path=path, repo=repo) + ] + ) + + return repo_path -- 2.47.3