From: Samuel Just Date: Mon, 30 Nov 2015 19:13:48 +0000 (-0800) Subject: matrix: reimpliment Sum X-Git-Tag: 1.1.0~680^2 X-Git-Url: http://git-server-git.apps.pok.os.sepia.ceph.com/?a=commitdiff_plain;h=refs%2Fpull%2F773%2Fhead;p=teuthology.git matrix: reimpliment Sum See the docstring for details on the new implementation. The old one didn't really satisfy the minscanlen properties at the tail of the sequence. Signed-off-by: Samuel Just --- diff --git a/teuthology/matrix.py b/teuthology/matrix.py index aa7339ee0..f4d91a584 100644 --- a/teuthology/matrix.py +++ b/teuthology/matrix.py @@ -1,6 +1,11 @@ import os +import heapq from fractions import gcd +def lcm(a, b): + return a*b // gcd(a, b) +def lcml(l): + return reduce(lcm, l) class Matrix: """ @@ -195,25 +200,77 @@ class Concat(Matrix): class Sum(Matrix): """ We want to mix the subsequences proportionately to their size. + + The intuition is that we map all of the subsequences uniformly + onto rational numbers in [0, 1). The ith subsequence with length + l will have index k map onto i* + k*(1/l). i* + ensures that no two subsequences have an index which shares a + mapping in [0, 1) as long as is chosen to be small + enough. + + Rather than actually dealing with rational numbers, however, we'll + instead map onto whole numbers in [0, pseudo_size) where + pseudo_size is the lcm of the subsequence lengths * the number of + subsequences. Including the number of subsequences in the product + allows us to use 1 as . For each subsequence, we designate + an offset (position in input list) and a multiple (pseudo_size / size) + such that the psuedo_index for index i is + i*. + + I don't have a good way to map index to pseudo index, so we'll + precompute a mapping in the constructor (self._i_so_sis) from + index to (subset_index, subset). """ def __init__(self, item, _submats): assert len(_submats) > 0, \ "Sum requires non-empty _submats" self.item = item - submats = sorted( - [((i.size(), ind), i) for (i, ind) in - zip(_submats, range(len(_submats)))], reverse=True) - self.submats = [] - self._size = 0 - for ((size, ind), submat) in submats: - self.submats.append((self._size, submat)) - self._size += size - self.submats.reverse() + self._pseudo_size = lcml((i.size() for i in _submats)) * len(_submats) + self._size = sum((i.size() for i in _submats)) + self._submats = [ + ((i, self._pseudo_size / s.size()), s) for (i, s) in \ + zip(range(len(_submats)), _submats) + ] + + def sm_to_pmsl(((offset, multiple), submat)): + """ + submat tuple to pseudo minscanlen + """ + return submat.minscanlen() * multiple + + def index_to_pindex_generator(submats): + assert len(submats) > 0, "submats must be non-empty" + h = [] + for (offset, multiple), submat in submats: + heapq.heappush(h, (offset, 0, multiple, submat)) + while True: + cur, si, multiple, submat = heapq.heappop(h) + heapq.heappush( + h, + (cur + multiple, si + 1, multiple, submat)) + yield si, submat + + self._i_to_sis = dict( + zip(range(self._size), index_to_pindex_generator(self._submats)) + ) + + self._minscanlen = self.pseudo_index_to_index( + max(map(sm_to_pmsl, self._submats))) + + def pi_to_sis(self, pi, (offset, multiple)): + """ + max(i) s.t. offset + i*multiple <= pi + """ + if pi < offset: + return -1 + return (pi - offset) / multiple + + def pseudo_index_to_index(self, pi): + """ + Count all pseudoindex values <= pi with corresponding subset indices + """ + return sum((self.pi_to_sis(pi, i) + 1 for i, _ in self._submats)) - 1 - self._minscanlen = max( - [(self._size / i.size()) * - i.minscanlen() for i in _submats]) def tostr(self, depth): ret = '\t'*depth + "Sum({item}):\n".format(item=self.item) return ret + ''.join([i[1].tostr(depth+1) for i in self._submats]) @@ -224,41 +281,9 @@ class Sum(Matrix): def size(self): return self._size - def _index(self, _i, submats): - """ - We reduce the N sequence problem to a two sequence problem recursively. - - If we have two sequences M and N of length m and n (n > m wlog), we - want to mix an M item into the stream every N / M items. Once we run - out of N, we want to simply finish the M stream. - """ - assert len(submats) > 0, \ - "_index requires non-empty submats" - if len(submats) == 1: - return submats[0][1].index(_i) - lmat = submats[0][1] - lsize = lmat.size() - - rsize = submats[0][0] - - mult = rsize / lsize - clen = mult + 1 - thresh = lsize * clen - i = _i % (rsize + lsize) - base = (_i / (rsize + lsize)) - if i < thresh: - if i % clen == 0: - return lmat.index((i / clen) + (base * lsize)) - else: - return self._index(((i / clen) * mult + ((i % clen) - 1)) + - (base * rsize), - submats[1:]) - else: - return self._index(i - lsize, submats[1:]) - def index(self, i): - return (self.item, self._index(i, self.submats)) - + si, submat = self._i_to_sis[i % self._size] + return (self.item, submat.index(si)) def generate_lists(result): """