From 503626a97917a0890e1a94593b5658626adc248f Mon Sep 17 00:00:00 2001 From: Zack Cerza Date: Thu, 6 Oct 2016 11:45:52 -0600 Subject: [PATCH] suite: Add rerun capability Using a new --rerun argument for teuthology-suite, optionally paired with --rerun-statuses, we can now ask teuthology to re-schedule a suite-run. The main use-case for the feature is to re-schedule only those jobs from a given run which failed. Fixes: #10945 http://tracker.ceph.com/issues/10945 Signed-off-by: Zack Cerza --- docs/intro_testers.rst | 8 ++++++++ scripts/suite.py | 19 +++++++++++++++++++ teuthology/suite/__init__.py | 34 ++++++++++++++++++++++++++++++---- 3 files changed, 57 insertions(+), 4 deletions(-) diff --git a/docs/intro_testers.rst b/docs/intro_testers.rst index a54e3e45cf..2438fbb685 100644 --- a/docs/intro_testers.rst +++ b/docs/intro_testers.rst @@ -64,3 +64,11 @@ get an email when the test run completes. `__ that will display the current status of each job. The Sepia lab's pulpito instance is `here `__. + +There may be times when, after scheduling a run containing a large number of +jobs, that you want to reschedule only those jobs which have failed or died for +some other reason. For that use-case, `teuthology-suite` has a `--rerun`/`-r` +flag, and an optional `--rerun-statuses`/`-R` flag. An example of its usage +is:: + + teuthology-suite -v -m vps -r teuthology-2016-10-06_05:00:03-smoke-master-testing-basic-vps -R pass,running,queued diff --git a/scripts/suite.py b/scripts/suite.py index 5a1ff7cc5b..c5a49bc537 100644 --- a/scripts/suite.py +++ b/scripts/suite.py @@ -7,6 +7,7 @@ from teuthology.config import config doc = """ usage: teuthology-suite --help teuthology-suite [-v | -vv ] --suite [options] [...] + teuthology-suite [-v | -vv ] --rerun [options] [...] Run a suite of ceph integration tests. A suite is a directory containing facets. A facet is a directory containing config snippets. Running a suite @@ -98,6 +99,24 @@ Scheduler arguments: Useful to avoid bursts that may be too hard on the underlying infrastructure or exceed OpenStack API limits (server creation per minute for instance). + -r, --rerun Attempt to reschedule a run, selecting only those + jobs whose status are mentioned by + --rerun-status. + Note that this is implemented by scheduling an + entirely new suite and including only jobs whose + descriptions match the selected ones. It does so + using the same logic as --filter. + Of all the flags that were passed when scheduling + the original run, the resulting one will only + inherit the suite value. Any others must be + passed as normal while scheduling with this + feature. + -R, --rerun-statuses + A comma-separated list of statuses to be used + with --rerun. Supported statuses are: 'dead', + 'fail', 'pass', 'queued', 'running', 'waiting' + [default: fail,dead] + """.format(default_machine_type=config.default_machine_type, default_results_timeout=config.results_timeout) diff --git a/teuthology/suite/__init__.py b/teuthology/suite/__init__.py index d3be09a185..3949e4fb61 100644 --- a/teuthology/suite/__init__.py +++ b/teuthology/suite/__init__.py @@ -33,19 +33,26 @@ def process_args(args): key = key.lstrip('--').replace('-', '_') # Rename the key if necessary key = rename_args.get(key) or key - if key == 'suite': - value = value.replace('/', ':') + if key == 'suite' and value is not None: + value = normalize_suite_name(value) elif key in ('limit', 'priority', 'num', 'newest'): value = int(value) elif key == 'subset' and value is not None: # take input string '2/3' and turn into (2, 3) value = tuple(map(int, value.split('/'))) - elif key in ('filter_in', 'filter_out'): - value = [x.strip() for x in value.split(',')] + elif key in ('filter_in', 'filter_out', 'rerun_statuses'): + if not value: + value = [] + else: + value = [x.strip() for x in value.split(',')] conf[key] = value return conf +def normalize_suite_name(name): + return name.replace('/', ':') + + def main(args): conf = process_args(args) if conf.verbose: @@ -63,6 +70,12 @@ def main(args): config.archive_upload = conf.archive_upload log.info('Will upload archives to ' + conf.archive_upload) + if conf.rerun: + rerun_filters = get_rerun_filters(conf.rerun, conf.rerun_statuses) + print rerun_filters + conf.filter_in.extend(rerun_filters['descriptions']) + conf.suite = normalize_suite_name(rerun_filters['suite']) + run = Run(conf) name = run.name run.prepare_and_schedule() @@ -71,6 +84,19 @@ def main(args): conf.archive_upload_url) +def get_rerun_filters(name, statuses): + reporter = ResultsReporter() + run = reporter.get_run(name) + filters = dict() + filters['suite'] = run['suite'] + jobs = [] + for job in run['jobs']: + if job['status'] in statuses: + jobs.append(job) + filters['descriptions'] = [job['description'] for job in jobs] + return filters + + class WaitException(Exception): pass -- 2.39.5