suite: add --subset option to allow us to schedule subsets of a suite

author Samuel Just <sjust@redhat.com>

Fri, 27 Mar 2015 17:20:28 +0000 (10:20 -0700)

committer Samuel Just <sjust@redhat.com>

Mon, 4 May 2015 22:04:13 +0000 (15:04 -0700)
author Samuel Just <sjust@redhat.com>
Fri, 27 Mar 2015 17:20:28 +0000 (10:20 -0700)
committer Samuel Just <sjust@redhat.com>
Mon, 4 May 2015 22:04:13 +0000 (15:04 -0700)
diff --git a/scripts/suite.py b/scripts/suite.py

index 088b20c23f976346629ebc5d1deb8c85fac26321..013650c1aacca9145238161e7ec617a0abec4ac7 100644 (file)
--- a/scripts/suite.py
+++ b/scripts/suite.py
@@ -59,6 +59,12 @@ Scheduler arguments:
                                [default: 1]
    -l <jobs>, --limit <jobs>   Queue at most this many jobs
                                [default: 0]
+  --subset <index/outof>      Instead of scheduling the entire suite, break the
+                              set of jobs into <outof> pieces (each of which will
+                              contain each facet at least once) and schedule
+                              piece <index>.  Scheduling 0/<outof>, 1/<outof>,
+                              2/<outof> ... <outof>-1/<outof> will schedule all
+                              jobs in the suite (many more than once).
    -p <priority>, --priority <priority>
                                Job priority (lower is sooner)
                                [default: 1000]
diff --git a/teuthology/matrix.py b/teuthology/matrix.py

new file mode 100644 (file)

index 0000000..3f76cea
--- /dev/null
+++ b/teuthology/matrix.py
@@ -0,0 +1,276 @@
+import os
+from fractions import gcd
+
+
+class Matrix:
+    """
+    Interface for sets
+    """
+    def size(self):
+        pass
+
+    def index(self, i):
+        """
+        index() should return a recursive structure represending the paths
+        to concatenate for index i:
+
+        Result :: (PathSegment, Result) | {Result}
+        Path :: string
+
+        {Result} is a frozen_set of Results indicating that
+        the set of paths resulting from each of the contained
+        Results should be concatenated.  (PathSegment, Result)
+        indicates that PathSegment should be prepended to the
+        paths resulting from Result.
+        """
+        pass
+
+    def minscanlen(self):
+        """
+        min run require to get a good sample
+        """
+        pass
+
+    def cyclicity(self):
+        """
+        A cyclicity of N means that the set represented by the Matrix
+        can be chopped into N good subsets of sequential indices.
+        """
+        return self.size() / self.minscanlen()
+
+
+class Cycle(Matrix):
+    """
+    Run a matrix multiple times
+    """
+    def __init__(self, num, mat):
+        self.mat = mat
+        self.num = num
+
+    def size(self):
+        return self.mat.size() * self.num
+
+    def index(self, i):
+        return self.mat.index(i % self.mat.size())
+
+    def minscanlen(self):
+        return self.mat.minscanlen()
+
+
+class Base(Matrix):
+    """
+    Just a single item.
+    """
+    def __init__(self, item):
+        self.item = item
+
+    def size(self):
+        return 1
+
+    def index(self, i):
+        return self.item
+
+    def minscanlen(self):
+        return 1
+
+
+class Product(Matrix):
+    """
+    Builds items by taking one item from each submatrix.  Contiguous
+    subsequences should move through all dimensions.
+    """
+    def __init__(self, item, _submats):
+        assert len(_submats) > 0, \
+            "Product requires child submats to be passed in"
+        self.item = item
+
+        submats = sorted(
+            [((i.size(), ind), i) for (i, ind) in
+             zip(_submats, range(len(_submats)))], reverse=True)
+        self.submats = []
+        self._size = 1
+        for ((size, _), submat) in submats:
+            self.submats.append((self._size, submat))
+            self._size *= size
+        self.submats.reverse()
+
+        self._minscanlen = max([i.minscanlen() for i in _submats])
+
+    def minscanlen(self):
+        return self._minscanlen
+
+    def size(self):
+        return self._size
+
+    def _index(self, i, submats):
+        """
+        We recursively reduce the N dimension problem to a two
+        dimension problem.
+
+        index(i) = (lmat.index(i % lmat.size()), rmat.index(i %
+        rmat.size())) would simply work if lmat.size() and rmat.size()
+        are relatively prime.
+
+        In general, if the gcd(lmat.size(), rmat.size()) == N,
+        index(i) would be periodic on the interval (lmat.size() *
+        rmat.size()) / N.  To adjust, we increment the lmat index
+        number on each repeat.  Each of the N repeats must therefore
+        be distinct from the previous ones resulting in lmat.size() *
+        rmat.size() combinations.
+        """
+        assert len(submats) > 0, \
+            "_index requires non-empty submats"
+        if len(submats) == 1:
+            return frozenset([submats[0][1].index(i)])
+
+        lmat = submats[0][1]
+        lsize = lmat.size()
+
+        rsize = submats[0][0]
+
+        cycles = gcd(rsize, lsize)
+        clen = (rsize * lsize) / cycles
+        off = (i / clen) % cycles
+
+        def combine(r, s=frozenset()):
+            if type(r) is frozenset:
+                return s | r
+            return s | frozenset([r])
+
+        litems = lmat.index(i + off)
+        ritems = self._index(i, submats[1:])
+        return combine(litems, combine(ritems))
+
+    def index(self, i):
+        items = self._index(i, self.submats)
+        return (self.item, items)
+
+class Concat(Matrix):
+    """
+    Concatenates all items in child matrices
+    """
+    def __init__(self, item, submats):
+        self.submats = submats
+        self.item = item
+
+    def size(self):
+        return 1
+
+    def minscanlen(self):
+        return 1
+
+    def index(self, i):
+        out = frozenset()
+        for submat in self.submats:
+            for i in range(submat.size()):
+                out = out | frozenset([submat.index(i)])
+        return out
+
+class Sum(Matrix):
+    """
+    We want to mix the subsequences proportionately to their size.
+    """
+    def __init__(self, item, _submats):
+        assert len(_submats) > 0, \
+            "Sum requires non-empty _submats"
+        self.item = item
+
+        submats = sorted(
+            [((i.size(), ind), i) for (i, ind) in
+             zip(_submats, range(len(_submats)))], reverse=True)
+        self.submats = []
+        self._size = 0
+        for ((size, ind), submat) in submats:
+            self.submats.append((self._size, submat))
+            self._size += size
+        self.submats.reverse()
+
+        self._minscanlen = max(
+            [(self._size / i.size()) *
+             i.minscanlen() for i in _submats])
+
+    def minscanlen(self):
+        return self._minscanlen
+
+    def size(self):
+        return self._size
+
+    def _index(self, _i, submats):
+        """
+        We reduce the N sequence problem to a two sequence problem recursively.
+
+        If we have two sequences M and N of length m and n (n > m wlog), we
+        want to mix an M item into the stream every N / M items.  Once we run
+        out of N, we want to simply finish the M stream.
+        """
+        assert len(submats) > 0, \
+            "_index requires non-empty submats"
+        if len(submats) == 1:
+            return submats[0][1].index(_i)
+        lmat = submats[0][1]
+        lsize = lmat.size()
+
+        rsize = submats[0][0]
+
+        mult = rsize / lsize
+        clen = mult + 1
+        thresh = lsize * clen
+        i = _i % (rsize + lsize)
+        base = (_i / (rsize + lsize))
+        if i < thresh:
+            if i % clen == 0:
+                return lmat.index((i / clen) + (base * lsize))
+            else:
+                return self._index(((i / clen) * mult + ((i % clen) - 1)) +
+                                   (base * rsize),
+                                   submats[1:])
+        else:
+            return self._index(i - lsize, submats[1:])
+
+    def index(self, i):
+        return (self.item, self._index(i, self.submats))
+
+
+def generate_lists(result):
+    """
+    Generates a set of tuples representing paths to concatenate
+    """
+    if type(result) is frozenset:
+        ret = []
+        for i in result:
+            ret.extend(generate_lists(i))
+        return frozenset(ret)
+    elif type(result) is tuple:
+        ret = []
+        (item, children) = result
+        for f in generate_lists(children):
+            nf = [item]
+            nf.extend(f)
+            ret.append(tuple(nf))
+        return frozenset(ret)
+    else:
+        return frozenset([(result,)])
+
+
+def generate_paths(path, result, joinf=os.path.join):
+    """
+    Generates from the result set a list of sorted paths to concatenate
+    """
+    return [reduce(joinf, i, path) for i in sorted(generate_lists(result))]
+
+
+def generate_desc(joinf, result):
+    """
+    Generates the text description of the test represented by result
+    """
+    if type(result) is frozenset:
+        ret = []
+        for i in sorted(result):
+            ret.append(generate_desc(joinf, i))
+        return '{' + ' '.join(ret) + '}'
+    elif type(result) is tuple:
+        (item, children) = result
+        cdesc = generate_desc(joinf, children)
+        return joinf(str(item), cdesc)
+    else:
+        return str(result)
diff --git a/teuthology/suite.py b/teuthology/suite.py

index 214d2a5c9340e6007593357bfc6a979b47321830..2db71e6d313fe3b2727d454886852061e209e661 100644 (file)
--- a/teuthology/suite.py
+++ b/teuthology/suite.py
@@ -4,7 +4,6 @@
  
  import copy
  from datetime import datetime
-import itertools
  import logging
  import os
  import requests
@@ -14,10 +13,12 @@ import smtplib
  import socket
  import sys
  import yaml
+import math
  from email.mime.text import MIMEText
  from tempfile import NamedTemporaryFile
  
  import teuthology
+import matrix
  from . import lock
  from .config import config, JobConfig
  from .exceptions import BranchNotFoundError, ScheduleFailError
@@ -59,6 +60,11 @@ def main(args):
      filter_in = args['--filter']
      filter_out = args['--filter-out']
  
+    subset = None
+    if args['--subset']:
+        # take input string '2/3' and turn into (2, 3)
+        subset = tuple(map(int, args['--subset'].split('/')))
+
      name = make_run_name(suite, ceph_branch, kernel_branch, kernel_flavor,
                           machine_type)
  
@@ -102,6 +108,7 @@ def main(args):
                           verbose=verbose,
                           filter_in=filter_in,
                           filter_out=filter_out,
+                         subset=subset,
                           )
      os.remove(base_yaml_path)
  
@@ -240,7 +247,9 @@ def create_initial_config(suite, suite_branch, ceph_branch, teuthology_branch,
  
  def prepare_and_schedule(job_config, suite_repo_path, base_yaml_paths, limit,
                           num, timeout, dry_run, verbose,
-                         filter_in, filter_out):
+                         filter_in,
+                         filter_out,
+                         subset):
      """
      Puts together some "base arguments" with which to execute
      teuthology-schedule for each job, then passes them and other parameters to
@@ -281,6 +290,7 @@ def prepare_and_schedule(job_config, suite_repo_path, base_yaml_paths, limit,
          dry_run=dry_run,
          filter_in=filter_in,
          filter_out=filter_out,
+        subset=subset
      )
  
      if job_config.email and num_jobs:
@@ -456,6 +466,7 @@ def schedule_suite(job_config,
                     dry_run=True,
                     filter_in=None,
                     filter_out=None,
+                   subset=None
                     ):
      """
      schedule one suite.
@@ -464,7 +475,7 @@ def schedule_suite(job_config,
      suite_name = job_config.suite
      log.debug('Suite %s in %s' % (suite_name, path))
      configs = [(combine_path(suite_name, item[0]), item[1]) for item in
-               build_matrix(path)]
+               build_matrix(path, subset=subset)]
      log.info('Suite %s in %s generated %d jobs (not yet filtered)' % (
          suite_name, path, len(configs)))
  
@@ -675,8 +686,7 @@ def combine_path(left, right):
      return left
  
  
-def build_matrix(path, _isfile=os.path.isfile, _isdir=os.path.isdir,
-                 _listdir=os.listdir):
+def generate_combinations(path, mat, generate_from, generate_to):
      """
      Return a list of items describe by path
  
@@ -695,6 +705,51 @@ def build_matrix(path, _isfile=os.path.isfile, _isdir=os.path.isdir,
      for each item in the directory, and then do a product to generate
      a result list with all combinations.
  
+    The final description (after recursion) for each item will look
+    like a relative path.  If there was a % product, that path
+    component will appear as a file with braces listing the selection
+    of chosen subitems.
+    """
+    ret = []
+    for i in range(generate_from, generate_to):
+        output = mat.index(i)
+        ret.append((
+            matrix.generate_desc(combine_path, output),
+            matrix.generate_paths(path, output, combine_path)))
+    return ret
+
+
+def build_matrix(path, _isfile=os.path.isfile,
+                 _isdir=os.path.isdir,
+                 _listdir=os.listdir,
+                 subset=None):
+    """
+    Return a list of items descibed by path such that if the list of
+    items is chunked into mincyclicity pieces, each piece is still a
+    good subset of the suite.
+
+    A good subset of a product ensures that each facet member appears
+    at least once.  A good subset of a sum ensures that the subset of
+    each sub collection reflected in the subset is a good subset.
+
+    A mincyclicity of 0 does not attempt to enforce the good subset
+    property.
+
+    The input is just a path.  The output is an array of (description,
+    [file list]) tuples.
+
+    For a normal file we generate a new item for the result list.
+
+    For a directory, we (recursively) generate a new item for each
+    file/dir.
+
+    For a directory with a magic '+' file, we generate a single item
+    that concatenates all files/subdirs (A Sum).
+
+    For a directory with a magic '%' file, we generate a result set
+    for each item in the directory, and then do a product to generate
+    a result list with all combinations (A Product).
+
      The final description (after recursion) for each item will look
      like a relative path.  If there was a % product, that path
      component will appear as a file with braces listing the selection
@@ -704,56 +759,82 @@ def build_matrix(path, _isfile=os.path.isfile, _isdir=os.path.isdir,
      :param _isfile:     Custom os.path.isfile(); for testing only
      :param _isdir:      Custom os.path.isdir(); for testing only
      :param _listdir:   Custom os.listdir(); for testing only
-    """
+    :param subset:     (index, outof)
+    """
+    mat = None
+    first = None
+    matlimit = None
+    if subset:
+        (index, outof) = subset
+        mat = _build_matrix(path, _isfile, _isdir, _listdir, mincyclicity=outof)
+        first = (mat.size() / outof) * index
+        if index == outof or index == outof - 1:
+            matlimit = mat.size()
+        else:
+            matlimit = (mat.size() / outof) * (index + 1)
+    else:
+        first = 0
+        mat = _build_matrix(path, _isfile, _isdir, _listdir)
+        matlimit = mat.size()
+    return generate_combinations(path, mat, first, matlimit)
+
+def _build_matrix(path, _isfile=os.path.isfile,
+                  _isdir=os.path.isdir, _listdir=os.listdir, mincyclicity=0, item=''):
      if _isfile(path):
          if path.endswith('.yaml'):
-            return [(None, [path])]
-        return []
+            return matrix.Base(item)
+        assert False, "Invalid file seen in _build_matrix"
+        return None
      if _isdir(path):
          files = sorted(_listdir(path))
          if '+' in files:
              # concatenate items
              files.remove('+')
-            raw = []
-            for fn in files:
-                raw.extend(
-                    build_matrix(os.path.join(path, fn),
-                                 _isfile, _isdir, _listdir)
-                )
-            out = [(
-                '{' + ' '.join(files) + '}',
-                [a[1][0] for a in raw]
-            )]
-            return out
+            submats = []
+            for fn in sorted(files):
+                submats.append(
+                    _build_matrix(
+                        os.path.join(path, fn),
+                        _isfile,
+                        _isdir,
+                        _listdir,
+                        mincyclicity,
+                        fn))
+            return matrix.Concat(item, submats)
          elif '%' in files:
              # convolve items
              files.remove('%')
-            sublists = []
-            for fn in files:
-                raw = build_matrix(os.path.join(path, fn),
-                                   _isfile, _isdir, _listdir)
-                if raw:
-                    sublists.append([(combine_path(fn, item[0]), item[1])
-                                     for item in raw])
-            out = []
-            if sublists:
-                for sublist in itertools.product(*sublists):
-                    name = '{' + ' '.join([item[0] for item in sublist]) + '}'
-                    val = []
-                    for item in sublist:
-                        val.extend(item[1])
-                    out.append((name, val))
-            return out
+            submats = []
+            for fn in sorted(files):
+                submat = _build_matrix(
+                    os.path.join(path, fn),
+                    _isfile,
+                    _isdir,
+                    _listdir,
+                    mincyclicity=0,
+                    item=fn)
+                submats.append(submat)
+            return matrix.Product(item, submats)
          else:
              # list items
-            out = []
-            for fn in files:
-                raw = build_matrix(os.path.join(path, fn),
-                                   _isfile, _isdir, _listdir)
-                out.extend([(combine_path(fn, item[0]), item[1])
-                           for item in raw])
-            return out
-    return []
+            submats = []
+            for fn in sorted(files):
+                submat = _build_matrix(
+                    os.path.join(path, fn),
+                    _isfile,
+                    _isdir,
+                    _listdir,
+                    mincyclicity,
+                    fn)
+                if submat.cyclicity() < mincyclicity:
+                    submat = matrix.Cycle(
+                        int(math.ceil(
+                            mincyclicity / submat.cyclicity())),
+                        submat)
+                submats.append(submat)
+            return matrix.Sum(item, submats)
+    assert False, "Invalid path seen in _build_matrix"
+    return None
  
  
  def get_arch(machine_type):
diff --git a/teuthology/test/test_matrix.py b/teuthology/test/test_matrix.py

new file mode 100644 (file)

index 0000000..bad5efc
--- /dev/null
+++ b/teuthology/test/test_matrix.py
@@ -0,0 +1,71 @@
+from .. import matrix
+
+def verify_matrix_output_diversity(res):
+    """
+    Verifies that the size of the matrix passed matches the number of unique
+    outputs from res.index
+    """
+    sz = res.size()
+    s = frozenset([matrix.generate_lists(res.index(i)) for i in range(sz)])
+    for i in range(res.size()):
+        assert sz == len(s)
+
+def mbs(num, l):
+    return matrix.Sum(num*10, [matrix.Base(i + (100*num)) for i in l])
+
+class TestMatrix(object):
+    def test_simple(self):
+        verify_matrix_output_diversity(mbs(1, range(6)))
+
+    def test_simple2(self):
+        verify_matrix_output_diversity(mbs(1, range(5)))
+
+    # The test_product* tests differ by the degree by which dimension
+    # sizes share prime factors
+    def test_product_simple(self):
+        verify_matrix_output_diversity(
+            matrix.Product(1, [mbs(1, range(6)), mbs(2, range(2))]))
+
+    def test_product_3_facets_2_prime_factors(self):
+        verify_matrix_output_diversity(matrix.Product(1, [
+                    mbs(1, range(6)),
+                    mbs(2, range(2)),
+                    mbs(3, range(3)),
+                    ]))
+
+    def test_product_3_facets_2_prime_factors_one_larger(self):
+        verify_matrix_output_diversity(matrix.Product(1, [
+                    mbs(1, range(2)),
+                    mbs(2, range(5)),
+                    mbs(4, range(4)),
+                    ]))
+
+    def test_product_4_facets_2_prime_factors(self):
+        verify_matrix_output_diversity(matrix.Sum(1, [
+                    mbs(1, range(6)),
+                    mbs(3, range(3)),
+                    mbs(2, range(2)),
+                    mbs(4, range(9)),
+                    ]))
+
+    def test_product_2_facets_2_prime_factors(self):
+        verify_matrix_output_diversity(matrix.Sum(1, [
+                    mbs(1, range(2)),
+                    mbs(2, range(5)),
+                    ]))
+
+    def test_product_with_sum(self):
+        verify_matrix_output_diversity(matrix.Sum(
+                9,
+                [
+                    mbs(10, range(6)),
+                    matrix.Product(1, [
+                            mbs(1, range(2)),
+                            mbs(2, range(5)),
+                            mbs(4, range(4))]),
+                    matrix.Product(8, [
+                            mbs(7, range(2)),
+                            mbs(6, range(5)),
+                            mbs(5, range(4))])
+                    ]
+                ))
author	Samuel Just <sjust@redhat.com>
	Fri, 27 Mar 2015 17:20:28 +0000 (10:20 -0700)
committer	Samuel Just <sjust@redhat.com>
	Mon, 4 May 2015 22:04:13 +0000 (15:04 -0700)
scripts/suite.py		patch \| blob \| history
teuthology/matrix.py	[new file with mode: 0644]	patch \| blob
teuthology/suite.py		patch \| blob \| history
teuthology/test/test_matrix.py	[new file with mode: 0644]	patch \| blob