]> git.apps.os.sepia.ceph.com Git - teuthology.git/commitdiff
suite: Add rerun capability 963/head
authorZack Cerza <zack@redhat.com>
Thu, 6 Oct 2016 17:45:52 +0000 (11:45 -0600)
committerZack Cerza <zack@redhat.com>
Thu, 6 Oct 2016 21:12:49 +0000 (15:12 -0600)
Using a new --rerun argument for teuthology-suite, optionally paired
with --rerun-statuses, we can now ask teuthology to re-schedule a
suite-run. The main use-case for the feature is to re-schedule only
those jobs from a given run which failed.

Fixes: #10945
http://tracker.ceph.com/issues/10945

Signed-off-by: Zack Cerza <zack@redhat.com>
docs/intro_testers.rst
scripts/suite.py
teuthology/suite/__init__.py

index a54e3e45cfef95aa0553e176e6b334e91e104f90..2438fbb685142f0982b78e3d5fc2a3a8ddf9b4ab 100644 (file)
@@ -64,3 +64,11 @@ get an email when the test run completes.
 <https://github.com/ceph/pulpito/>`__ that will display the current status of
 each job. The Sepia lab's pulpito instance is `here
 <http://pulpito.ceph.com/>`__.
+
+There may be times when, after scheduling a run containing a large number of
+jobs, that you want to reschedule only those jobs which have failed or died for
+some other reason. For that use-case, `teuthology-suite` has a `--rerun`/`-r`
+flag, and an optional `--rerun-statuses`/`-R` flag. An example of its usage
+is::
+
+    teuthology-suite -v -m vps -r teuthology-2016-10-06_05:00:03-smoke-master-testing-basic-vps -R pass,running,queued
index 5a1ff7cc5bbe42cc08ae98b6708bc2dc7902fcf6..c5a49bc537ac749dcbfcb71b1239cbaba55d3a6f 100644 (file)
@@ -7,6 +7,7 @@ from teuthology.config import config
 doc = """
 usage: teuthology-suite --help
        teuthology-suite [-v | -vv ] --suite <suite> [options] [<config_yaml>...]
+       teuthology-suite [-v | -vv ] --rerun <name>  [options] [<config_yaml>...]
 
 Run a suite of ceph integration tests. A suite is a directory containing
 facets. A facet is a directory containing config snippets. Running a suite
@@ -98,6 +99,24 @@ Scheduler arguments:
                               Useful to avoid bursts that may be too hard on
                               the underlying infrastructure or exceed OpenStack API
                               limits (server creation per minute for instance).
+  -r, --rerun <name>          Attempt to reschedule a run, selecting only those
+                              jobs whose status are mentioned by
+                              --rerun-status.
+                              Note that this is implemented by scheduling an
+                              entirely new suite and including only jobs whose
+                              descriptions match the selected ones. It does so
+                              using the same logic as --filter.
+                              Of all the flags that were passed when scheduling
+                              the original run, the resulting one will only
+                              inherit the suite value. Any others must be
+                              passed as normal while scheduling with this
+                              feature.
+ -R, --rerun-statuses <statuses>
+                              A comma-separated list of statuses to be used
+                              with --rerun. Supported statuses are: 'dead',
+                              'fail', 'pass', 'queued', 'running', 'waiting'
+                              [default: fail,dead]
+
 """.format(default_machine_type=config.default_machine_type,
            default_results_timeout=config.results_timeout)
 
index d3be09a18508b4fa601b4d6012a28ee8e9f90769..3949e4fb61c1e03adca8f14ef8c9eb1066d210ed 100644 (file)
@@ -33,19 +33,26 @@ def process_args(args):
         key = key.lstrip('--').replace('-', '_')
         # Rename the key if necessary
         key = rename_args.get(key) or key
-        if key == 'suite':
-            value = value.replace('/', ':')
+        if key == 'suite' and value is not None:
+            value = normalize_suite_name(value)
         elif key in ('limit', 'priority', 'num', 'newest'):
             value = int(value)
         elif key == 'subset' and value is not None:
             # take input string '2/3' and turn into (2, 3)
             value = tuple(map(int, value.split('/')))
-        elif key in ('filter_in', 'filter_out'):
-            value = [x.strip() for x in value.split(',')]
+        elif key in ('filter_in', 'filter_out', 'rerun_statuses'):
+            if not value:
+                value = []
+            else:
+                value = [x.strip() for x in value.split(',')]
         conf[key] = value
     return conf
 
 
+def normalize_suite_name(name):
+    return name.replace('/', ':')
+
+
 def main(args):
     conf = process_args(args)
     if conf.verbose:
@@ -63,6 +70,12 @@ def main(args):
         config.archive_upload = conf.archive_upload
         log.info('Will upload archives to ' + conf.archive_upload)
 
+    if conf.rerun:
+        rerun_filters = get_rerun_filters(conf.rerun, conf.rerun_statuses)
+        print rerun_filters
+        conf.filter_in.extend(rerun_filters['descriptions'])
+        conf.suite = normalize_suite_name(rerun_filters['suite'])
+
     run = Run(conf)
     name = run.name
     run.prepare_and_schedule()
@@ -71,6 +84,19 @@ def main(args):
                     conf.archive_upload_url)
 
 
+def get_rerun_filters(name, statuses):
+    reporter = ResultsReporter()
+    run = reporter.get_run(name)
+    filters = dict()
+    filters['suite'] = run['suite']
+    jobs = []
+    for job in run['jobs']:
+        if job['status'] in statuses:
+            jobs.append(job)
+    filters['descriptions'] = [job['description'] for job in jobs]
+    return filters
+
+
 class WaitException(Exception):
     pass