From 9a57759ec3df7a8b9490920538214ac189418c49 Mon Sep 17 00:00:00 2001 From: Samuel Just Date: Fri, 27 Mar 2015 10:20:28 -0700 Subject: [PATCH] suite: add --subset option to allow us to schedule subsets of a suite First, the old logic tended to do the following: [0,1,2,3] x [a,b,c,d] -> [(0, a), (0, b), (0, c), ...] which is a bummer since if we only schedule the first 1/4, we only get combinations with 0. Instead, we want: [0,1,2,3] x [a,b,c,d] -> [(0, a), (1, b), (2, c), ...] Second, the old logic tended to do the following: [0,1,2,3] + [a,b,c,d] -> [0,1,2,3,a,b,c,d] Which is a bummer since we need to all the way through the first set before we get examples from the second. Instead, we want: [0,1,2,3] + [a,b,c,d] -> [0,a,1,b,2,c,3,d] Lastly, if we have: [0,1,2] + ([0,1,2],[a,b,c]) and want to schedule 1/3 of the suite, we don't want to just schedule 0, we probably want to schedule all of [0,1,2] and 1/3 of the second combo. Thus, we want to detect such cases and multiply out [0,1,2] into [0,1,2,0,1,2,0,1,2]. Signed-off-by: Samuel Just --- scripts/suite.py | 6 + teuthology/matrix.py | 276 +++++++++++++++++++++++++++++++++ teuthology/suite.py | 167 +++++++++++++++----- teuthology/test/test_matrix.py | 71 +++++++++ 4 files changed, 477 insertions(+), 43 deletions(-) create mode 100644 teuthology/matrix.py create mode 100644 teuthology/test/test_matrix.py diff --git a/scripts/suite.py b/scripts/suite.py index 088b20c23f..013650c1aa 100644 --- a/scripts/suite.py +++ b/scripts/suite.py @@ -59,6 +59,12 @@ Scheduler arguments: [default: 1] -l , --limit Queue at most this many jobs [default: 0] + --subset Instead of scheduling the entire suite, break the + set of jobs into pieces (each of which will + contain each facet at least once) and schedule + piece . Scheduling 0/, 1/, + 2/ ... -1/ will schedule all + jobs in the suite (many more than once). -p , --priority Job priority (lower is sooner) [default: 1000] diff --git a/teuthology/matrix.py b/teuthology/matrix.py new file mode 100644 index 0000000000..3f76cea193 --- /dev/null +++ b/teuthology/matrix.py @@ -0,0 +1,276 @@ +import os +from fractions import gcd + + +class Matrix: + """ + Interface for sets + """ + def size(self): + pass + + def index(self, i): + """ + index() should return a recursive structure represending the paths + to concatenate for index i: + + Result :: (PathSegment, Result) | {Result} + Path :: string + + {Result} is a frozen_set of Results indicating that + the set of paths resulting from each of the contained + Results should be concatenated. (PathSegment, Result) + indicates that PathSegment should be prepended to the + paths resulting from Result. + """ + pass + + def minscanlen(self): + """ + min run require to get a good sample + """ + pass + + def cyclicity(self): + """ + A cyclicity of N means that the set represented by the Matrix + can be chopped into N good subsets of sequential indices. + """ + return self.size() / self.minscanlen() + + +class Cycle(Matrix): + """ + Run a matrix multiple times + """ + def __init__(self, num, mat): + self.mat = mat + self.num = num + + def size(self): + return self.mat.size() * self.num + + def index(self, i): + return self.mat.index(i % self.mat.size()) + + def minscanlen(self): + return self.mat.minscanlen() + + +class Base(Matrix): + """ + Just a single item. + """ + def __init__(self, item): + self.item = item + + def size(self): + return 1 + + def index(self, i): + return self.item + + def minscanlen(self): + return 1 + + +class Product(Matrix): + """ + Builds items by taking one item from each submatrix. Contiguous + subsequences should move through all dimensions. + """ + def __init__(self, item, _submats): + assert len(_submats) > 0, \ + "Product requires child submats to be passed in" + self.item = item + + submats = sorted( + [((i.size(), ind), i) for (i, ind) in + zip(_submats, range(len(_submats)))], reverse=True) + self.submats = [] + self._size = 1 + for ((size, _), submat) in submats: + self.submats.append((self._size, submat)) + self._size *= size + self.submats.reverse() + + self._minscanlen = max([i.minscanlen() for i in _submats]) + + def minscanlen(self): + return self._minscanlen + + def size(self): + return self._size + + def _index(self, i, submats): + """ + We recursively reduce the N dimension problem to a two + dimension problem. + + index(i) = (lmat.index(i % lmat.size()), rmat.index(i % + rmat.size())) would simply work if lmat.size() and rmat.size() + are relatively prime. + + In general, if the gcd(lmat.size(), rmat.size()) == N, + index(i) would be periodic on the interval (lmat.size() * + rmat.size()) / N. To adjust, we increment the lmat index + number on each repeat. Each of the N repeats must therefore + be distinct from the previous ones resulting in lmat.size() * + rmat.size() combinations. + """ + assert len(submats) > 0, \ + "_index requires non-empty submats" + if len(submats) == 1: + return frozenset([submats[0][1].index(i)]) + + lmat = submats[0][1] + lsize = lmat.size() + + rsize = submats[0][0] + + cycles = gcd(rsize, lsize) + clen = (rsize * lsize) / cycles + off = (i / clen) % cycles + + def combine(r, s=frozenset()): + if type(r) is frozenset: + return s | r + return s | frozenset([r]) + + litems = lmat.index(i + off) + ritems = self._index(i, submats[1:]) + return combine(litems, combine(ritems)) + + def index(self, i): + items = self._index(i, self.submats) + return (self.item, items) + +class Concat(Matrix): + """ + Concatenates all items in child matrices + """ + def __init__(self, item, submats): + self.submats = submats + self.item = item + + def size(self): + return 1 + + def minscanlen(self): + return 1 + + def index(self, i): + out = frozenset() + for submat in self.submats: + for i in range(submat.size()): + out = out | frozenset([submat.index(i)]) + return out + +class Sum(Matrix): + """ + We want to mix the subsequences proportionately to their size. + """ + def __init__(self, item, _submats): + assert len(_submats) > 0, \ + "Sum requires non-empty _submats" + self.item = item + + submats = sorted( + [((i.size(), ind), i) for (i, ind) in + zip(_submats, range(len(_submats)))], reverse=True) + self.submats = [] + self._size = 0 + for ((size, ind), submat) in submats: + self.submats.append((self._size, submat)) + self._size += size + self.submats.reverse() + + self._minscanlen = max( + [(self._size / i.size()) * + i.minscanlen() for i in _submats]) + + def minscanlen(self): + return self._minscanlen + + def size(self): + return self._size + + def _index(self, _i, submats): + """ + We reduce the N sequence problem to a two sequence problem recursively. + + If we have two sequences M and N of length m and n (n > m wlog), we + want to mix an M item into the stream every N / M items. Once we run + out of N, we want to simply finish the M stream. + """ + assert len(submats) > 0, \ + "_index requires non-empty submats" + if len(submats) == 1: + return submats[0][1].index(_i) + lmat = submats[0][1] + lsize = lmat.size() + + rsize = submats[0][0] + + mult = rsize / lsize + clen = mult + 1 + thresh = lsize * clen + i = _i % (rsize + lsize) + base = (_i / (rsize + lsize)) + if i < thresh: + if i % clen == 0: + return lmat.index((i / clen) + (base * lsize)) + else: + return self._index(((i / clen) * mult + ((i % clen) - 1)) + + (base * rsize), + submats[1:]) + else: + return self._index(i - lsize, submats[1:]) + + def index(self, i): + return (self.item, self._index(i, self.submats)) + + +def generate_lists(result): + """ + Generates a set of tuples representing paths to concatenate + """ + if type(result) is frozenset: + ret = [] + for i in result: + ret.extend(generate_lists(i)) + return frozenset(ret) + elif type(result) is tuple: + ret = [] + (item, children) = result + for f in generate_lists(children): + nf = [item] + nf.extend(f) + ret.append(tuple(nf)) + return frozenset(ret) + else: + return frozenset([(result,)]) + + +def generate_paths(path, result, joinf=os.path.join): + """ + Generates from the result set a list of sorted paths to concatenate + """ + return [reduce(joinf, i, path) for i in sorted(generate_lists(result))] + + +def generate_desc(joinf, result): + """ + Generates the text description of the test represented by result + """ + if type(result) is frozenset: + ret = [] + for i in sorted(result): + ret.append(generate_desc(joinf, i)) + return '{' + ' '.join(ret) + '}' + elif type(result) is tuple: + (item, children) = result + cdesc = generate_desc(joinf, children) + return joinf(str(item), cdesc) + else: + return str(result) diff --git a/teuthology/suite.py b/teuthology/suite.py index 214d2a5c93..2db71e6d31 100644 --- a/teuthology/suite.py +++ b/teuthology/suite.py @@ -4,7 +4,6 @@ import copy from datetime import datetime -import itertools import logging import os import requests @@ -14,10 +13,12 @@ import smtplib import socket import sys import yaml +import math from email.mime.text import MIMEText from tempfile import NamedTemporaryFile import teuthology +import matrix from . import lock from .config import config, JobConfig from .exceptions import BranchNotFoundError, ScheduleFailError @@ -59,6 +60,11 @@ def main(args): filter_in = args['--filter'] filter_out = args['--filter-out'] + subset = None + if args['--subset']: + # take input string '2/3' and turn into (2, 3) + subset = tuple(map(int, args['--subset'].split('/'))) + name = make_run_name(suite, ceph_branch, kernel_branch, kernel_flavor, machine_type) @@ -102,6 +108,7 @@ def main(args): verbose=verbose, filter_in=filter_in, filter_out=filter_out, + subset=subset, ) os.remove(base_yaml_path) @@ -240,7 +247,9 @@ def create_initial_config(suite, suite_branch, ceph_branch, teuthology_branch, def prepare_and_schedule(job_config, suite_repo_path, base_yaml_paths, limit, num, timeout, dry_run, verbose, - filter_in, filter_out): + filter_in, + filter_out, + subset): """ Puts together some "base arguments" with which to execute teuthology-schedule for each job, then passes them and other parameters to @@ -281,6 +290,7 @@ def prepare_and_schedule(job_config, suite_repo_path, base_yaml_paths, limit, dry_run=dry_run, filter_in=filter_in, filter_out=filter_out, + subset=subset ) if job_config.email and num_jobs: @@ -456,6 +466,7 @@ def schedule_suite(job_config, dry_run=True, filter_in=None, filter_out=None, + subset=None ): """ schedule one suite. @@ -464,7 +475,7 @@ def schedule_suite(job_config, suite_name = job_config.suite log.debug('Suite %s in %s' % (suite_name, path)) configs = [(combine_path(suite_name, item[0]), item[1]) for item in - build_matrix(path)] + build_matrix(path, subset=subset)] log.info('Suite %s in %s generated %d jobs (not yet filtered)' % ( suite_name, path, len(configs))) @@ -675,8 +686,7 @@ def combine_path(left, right): return left -def build_matrix(path, _isfile=os.path.isfile, _isdir=os.path.isdir, - _listdir=os.listdir): +def generate_combinations(path, mat, generate_from, generate_to): """ Return a list of items describe by path @@ -695,6 +705,51 @@ def build_matrix(path, _isfile=os.path.isfile, _isdir=os.path.isdir, for each item in the directory, and then do a product to generate a result list with all combinations. + The final description (after recursion) for each item will look + like a relative path. If there was a % product, that path + component will appear as a file with braces listing the selection + of chosen subitems. + """ + ret = [] + for i in range(generate_from, generate_to): + output = mat.index(i) + ret.append(( + matrix.generate_desc(combine_path, output), + matrix.generate_paths(path, output, combine_path))) + return ret + + +def build_matrix(path, _isfile=os.path.isfile, + _isdir=os.path.isdir, + _listdir=os.listdir, + subset=None): + """ + Return a list of items descibed by path such that if the list of + items is chunked into mincyclicity pieces, each piece is still a + good subset of the suite. + + A good subset of a product ensures that each facet member appears + at least once. A good subset of a sum ensures that the subset of + each sub collection reflected in the subset is a good subset. + + A mincyclicity of 0 does not attempt to enforce the good subset + property. + + The input is just a path. The output is an array of (description, + [file list]) tuples. + + For a normal file we generate a new item for the result list. + + For a directory, we (recursively) generate a new item for each + file/dir. + + For a directory with a magic '+' file, we generate a single item + that concatenates all files/subdirs (A Sum). + + For a directory with a magic '%' file, we generate a result set + for each item in the directory, and then do a product to generate + a result list with all combinations (A Product). + The final description (after recursion) for each item will look like a relative path. If there was a % product, that path component will appear as a file with braces listing the selection @@ -704,56 +759,82 @@ def build_matrix(path, _isfile=os.path.isfile, _isdir=os.path.isdir, :param _isfile: Custom os.path.isfile(); for testing only :param _isdir: Custom os.path.isdir(); for testing only :param _listdir: Custom os.listdir(); for testing only - """ + :param subset: (index, outof) + """ + mat = None + first = None + matlimit = None + if subset: + (index, outof) = subset + mat = _build_matrix(path, _isfile, _isdir, _listdir, mincyclicity=outof) + first = (mat.size() / outof) * index + if index == outof or index == outof - 1: + matlimit = mat.size() + else: + matlimit = (mat.size() / outof) * (index + 1) + else: + first = 0 + mat = _build_matrix(path, _isfile, _isdir, _listdir) + matlimit = mat.size() + return generate_combinations(path, mat, first, matlimit) + +def _build_matrix(path, _isfile=os.path.isfile, + _isdir=os.path.isdir, _listdir=os.listdir, mincyclicity=0, item=''): if _isfile(path): if path.endswith('.yaml'): - return [(None, [path])] - return [] + return matrix.Base(item) + assert False, "Invalid file seen in _build_matrix" + return None if _isdir(path): files = sorted(_listdir(path)) if '+' in files: # concatenate items files.remove('+') - raw = [] - for fn in files: - raw.extend( - build_matrix(os.path.join(path, fn), - _isfile, _isdir, _listdir) - ) - out = [( - '{' + ' '.join(files) + '}', - [a[1][0] for a in raw] - )] - return out + submats = [] + for fn in sorted(files): + submats.append( + _build_matrix( + os.path.join(path, fn), + _isfile, + _isdir, + _listdir, + mincyclicity, + fn)) + return matrix.Concat(item, submats) elif '%' in files: # convolve items files.remove('%') - sublists = [] - for fn in files: - raw = build_matrix(os.path.join(path, fn), - _isfile, _isdir, _listdir) - if raw: - sublists.append([(combine_path(fn, item[0]), item[1]) - for item in raw]) - out = [] - if sublists: - for sublist in itertools.product(*sublists): - name = '{' + ' '.join([item[0] for item in sublist]) + '}' - val = [] - for item in sublist: - val.extend(item[1]) - out.append((name, val)) - return out + submats = [] + for fn in sorted(files): + submat = _build_matrix( + os.path.join(path, fn), + _isfile, + _isdir, + _listdir, + mincyclicity=0, + item=fn) + submats.append(submat) + return matrix.Product(item, submats) else: # list items - out = [] - for fn in files: - raw = build_matrix(os.path.join(path, fn), - _isfile, _isdir, _listdir) - out.extend([(combine_path(fn, item[0]), item[1]) - for item in raw]) - return out - return [] + submats = [] + for fn in sorted(files): + submat = _build_matrix( + os.path.join(path, fn), + _isfile, + _isdir, + _listdir, + mincyclicity, + fn) + if submat.cyclicity() < mincyclicity: + submat = matrix.Cycle( + int(math.ceil( + mincyclicity / submat.cyclicity())), + submat) + submats.append(submat) + return matrix.Sum(item, submats) + assert False, "Invalid path seen in _build_matrix" + return None def get_arch(machine_type): diff --git a/teuthology/test/test_matrix.py b/teuthology/test/test_matrix.py new file mode 100644 index 0000000000..bad5efc494 --- /dev/null +++ b/teuthology/test/test_matrix.py @@ -0,0 +1,71 @@ +from .. import matrix + +def verify_matrix_output_diversity(res): + """ + Verifies that the size of the matrix passed matches the number of unique + outputs from res.index + """ + sz = res.size() + s = frozenset([matrix.generate_lists(res.index(i)) for i in range(sz)]) + for i in range(res.size()): + assert sz == len(s) + +def mbs(num, l): + return matrix.Sum(num*10, [matrix.Base(i + (100*num)) for i in l]) + +class TestMatrix(object): + def test_simple(self): + verify_matrix_output_diversity(mbs(1, range(6))) + + def test_simple2(self): + verify_matrix_output_diversity(mbs(1, range(5))) + + # The test_product* tests differ by the degree by which dimension + # sizes share prime factors + def test_product_simple(self): + verify_matrix_output_diversity( + matrix.Product(1, [mbs(1, range(6)), mbs(2, range(2))])) + + def test_product_3_facets_2_prime_factors(self): + verify_matrix_output_diversity(matrix.Product(1, [ + mbs(1, range(6)), + mbs(2, range(2)), + mbs(3, range(3)), + ])) + + def test_product_3_facets_2_prime_factors_one_larger(self): + verify_matrix_output_diversity(matrix.Product(1, [ + mbs(1, range(2)), + mbs(2, range(5)), + mbs(4, range(4)), + ])) + + def test_product_4_facets_2_prime_factors(self): + verify_matrix_output_diversity(matrix.Sum(1, [ + mbs(1, range(6)), + mbs(3, range(3)), + mbs(2, range(2)), + mbs(4, range(9)), + ])) + + def test_product_2_facets_2_prime_factors(self): + verify_matrix_output_diversity(matrix.Sum(1, [ + mbs(1, range(2)), + mbs(2, range(5)), + ])) + + def test_product_with_sum(self): + verify_matrix_output_diversity(matrix.Sum( + 9, + [ + mbs(10, range(6)), + matrix.Product(1, [ + mbs(1, range(2)), + mbs(2, range(5)), + mbs(4, range(4))]), + matrix.Product(8, [ + mbs(7, range(2)), + mbs(6, range(5)), + mbs(5, range(4))]) + ] + )) -- 2.39.5