suite: Add rerun capability

author Zack Cerza <zack@redhat.com>

Thu, 6 Oct 2016 17:45:52 +0000 (11:45 -0600)

committer Zack Cerza <zack@redhat.com>

Thu, 6 Oct 2016 21:12:49 +0000 (15:12 -0600)
author Zack Cerza <zack@redhat.com>
Thu, 6 Oct 2016 17:45:52 +0000 (11:45 -0600)
committer Zack Cerza <zack@redhat.com>
Thu, 6 Oct 2016 21:12:49 +0000 (15:12 -0600)
diff --git a/docs/intro_testers.rst b/docs/intro_testers.rst

index a54e3e45cfef95aa0553e176e6b334e91e104f90..2438fbb685142f0982b78e3d5fc2a3a8ddf9b4ab 100644 (file)
--- a/docs/intro_testers.rst
+++ b/docs/intro_testers.rst
@@ -64,3 +64,11 @@ get an email when the test run completes.
  <https://github.com/ceph/pulpito/>`__ that will display the current status of
  each job. The Sepia lab's pulpito instance is `here
  <http://pulpito.ceph.com/>`__.
+
+There may be times when, after scheduling a run containing a large number of
+jobs, that you want to reschedule only those jobs which have failed or died for
+some other reason. For that use-case, `teuthology-suite` has a `--rerun`/`-r`
+flag, and an optional `--rerun-statuses`/`-R` flag. An example of its usage
+is::
+
+    teuthology-suite -v -m vps -r teuthology-2016-10-06_05:00:03-smoke-master-testing-basic-vps -R pass,running,queued
diff --git a/scripts/suite.py b/scripts/suite.py

index 5a1ff7cc5bbe42cc08ae98b6708bc2dc7902fcf6..c5a49bc537ac749dcbfcb71b1239cbaba55d3a6f 100644 (file)
--- a/scripts/suite.py
+++ b/scripts/suite.py
@@ -7,6 +7,7 @@ from teuthology.config import config
  doc = """
  usage: teuthology-suite --help
         teuthology-suite [-v | -vv ] --suite <suite> [options] [<config_yaml>...]
+       teuthology-suite [-v | -vv ] --rerun <name>  [options] [<config_yaml>...]
  
  Run a suite of ceph integration tests. A suite is a directory containing
  facets. A facet is a directory containing config snippets. Running a suite
@@ -98,6 +99,24 @@ Scheduler arguments:
                                Useful to avoid bursts that may be too hard on
                                the underlying infrastructure or exceed OpenStack API
                                limits (server creation per minute for instance).
+  -r, --rerun <name>          Attempt to reschedule a run, selecting only those
+                              jobs whose status are mentioned by
+                              --rerun-status.
+                              Note that this is implemented by scheduling an
+                              entirely new suite and including only jobs whose
+                              descriptions match the selected ones. It does so
+                              using the same logic as --filter.
+                              Of all the flags that were passed when scheduling
+                              the original run, the resulting one will only
+                              inherit the suite value. Any others must be
+                              passed as normal while scheduling with this
+                              feature.
+ -R, --rerun-statuses <statuses>
+                              A comma-separated list of statuses to be used
+                              with --rerun. Supported statuses are: 'dead',
+                              'fail', 'pass', 'queued', 'running', 'waiting'
+                              [default: fail,dead]
+
  """.format(default_machine_type=config.default_machine_type,
             default_results_timeout=config.results_timeout)
  
diff --git a/teuthology/suite/__init__.py b/teuthology/suite/__init__.py

index d3be09a18508b4fa601b4d6012a28ee8e9f90769..3949e4fb61c1e03adca8f14ef8c9eb1066d210ed 100644 (file)
--- a/teuthology/suite/__init__.py
+++ b/teuthology/suite/__init__.py
@@ -33,19 +33,26 @@ def process_args(args):
          key = key.lstrip('--').replace('-', '_')
          # Rename the key if necessary
          key = rename_args.get(key) or key
-        if key == 'suite':
-            value = value.replace('/', ':')
+        if key == 'suite' and value is not None:
+            value = normalize_suite_name(value)
          elif key in ('limit', 'priority', 'num', 'newest'):
              value = int(value)
          elif key == 'subset' and value is not None:
              # take input string '2/3' and turn into (2, 3)
              value = tuple(map(int, value.split('/')))
-        elif key in ('filter_in', 'filter_out'):
-            value = [x.strip() for x in value.split(',')]
+        elif key in ('filter_in', 'filter_out', 'rerun_statuses'):
+            if not value:
+                value = []
+            else:
+                value = [x.strip() for x in value.split(',')]
          conf[key] = value
      return conf
  
  
+def normalize_suite_name(name):
+    return name.replace('/', ':')
+
+
  def main(args):
      conf = process_args(args)
      if conf.verbose:
@@ -63,6 +70,12 @@ def main(args):
          config.archive_upload = conf.archive_upload
          log.info('Will upload archives to ' + conf.archive_upload)
  
+    if conf.rerun:
+        rerun_filters = get_rerun_filters(conf.rerun, conf.rerun_statuses)
+        print rerun_filters
+        conf.filter_in.extend(rerun_filters['descriptions'])
+        conf.suite = normalize_suite_name(rerun_filters['suite'])
+
      run = Run(conf)
      name = run.name
      run.prepare_and_schedule()
@@ -71,6 +84,19 @@ def main(args):
                      conf.archive_upload_url)
  
  
+def get_rerun_filters(name, statuses):
+    reporter = ResultsReporter()
+    run = reporter.get_run(name)
+    filters = dict()
+    filters['suite'] = run['suite']
+    jobs = []
+    for job in run['jobs']:
+        if job['status'] in statuses:
+            jobs.append(job)
+    filters['descriptions'] = [job['description'] for job in jobs]
+    return filters
+
+
  class WaitException(Exception):
      pass
author	Zack Cerza <zack@redhat.com>
	Thu, 6 Oct 2016 17:45:52 +0000 (11:45 -0600)
committer	Zack Cerza <zack@redhat.com>
	Thu, 6 Oct 2016 21:12:49 +0000 (15:12 -0600)
docs/intro_testers.rst		patch \| blob \| history
scripts/suite.py		patch \| blob \| history
teuthology/suite/__init__.py		patch \| blob \| history