]> git.apps.os.sepia.ceph.com Git - ceph.git/commit
interval_set: optimize intersection_of
authorZac Medico <zmedico@gmail.com>
Thu, 31 Aug 2017 03:59:32 +0000 (20:59 -0700)
committerIgor Fedotov <ifedotov@suse.com>
Thu, 12 Apr 2018 18:34:27 +0000 (21:34 +0300)
commit4aefab0364064930d13370f4f8cc8e9e1e964024
treefb1862853aa730b04bb38e9c93112f0ea201fa5f
parent719e13ccfcc6cdf163d03ddfb0f55dbe0e338f48
interval_set: optimize intersection_of

Iterate over all elements of the smaller set, and use find_inc to
locate elements from the larger set in logarithmic time. This greatly
improves performance when one set is much larger than the other:

     2 +-+--+----+----+----+----+----+----+----+----+--+-+
P      +*                                                +
E      |*                                                |
R  1.8 +*                                                +
F      | *                                               |
O      | *                                               |
R  1.6 + *                                               +
M      | *                                               |
A      | *                                               |
N  1.4 + *                                               +
C      |  *                                              |
E      |  *                                              |
   1.2 +   *                                             +
R      |    *                                            |
A      |     *                                           |
T    1 +      ***                                        +
I      |         ******                                  |
O      +               ***********************************
   0.8 +-+--+----+----+----+----+----+----+----+----+--+-+
       0   0.1  0.2  0.3  0.4  0.5  0.6  0.7  0.8  0.9   1

                          SET SIZE RATIO

The above plot compares performance of the new intersection_size_asym
function to the existing intersection_of function. The performance of
intersection_size_asym gets worse as the set size ratio approaches 1.
For set size ratios where the performance ratio is greater than 1, the
performance of intersection_size_asym is superior. Therefore, this
patch only uses intersection_size_asym when the set size ratio is less
than or equal to 0.1 (code uses the reciprocal which is 10).

The plot was generated using benchmark results produced by the
following program:

#include <iostream>
#include <sys/timeb.h>
#include "include/interval_set.h"

int main()
{
  const int interval_count = 100000;
  const int interval_distance = 4;
  const int interval_size = 2;
  const int sample_count = 8;
  const int max_offset = interval_count * interval_distance;
  interval_set<int> a, b, intersection;

  for (int i = 0; i < max_offset; i+=interval_distance) {
    a.insert(i, interval_size);
  }

  for (int m = 1; m < 100; m++) {
    float ratio = 1 / float(m);

    for (int i = 0; i < max_offset; i+=interval_distance*m) {
      b.insert(i, interval_size);
    }

    struct timeb start, end;
    int ms = 0;
    for (int i = 0; i < sample_count; i++) {
      ftime(&start);
      intersection.intersection_of(a, b);
      ftime(&end);
      ms += (int) (1000.0 * (end.time - start.time)
        + (end.millitm - start.millitm));
      intersection.clear();
    }
    b.clear();

    std::cout << ratio << "\t" << ms << std::endl << std::flush;
  }
}

Signed-off-by: Zac Medico <zmedico@gmail.com>
(cherry picked from commit 825470fcf9190c94422716454dbf11b24350a748)
src/include/interval_set.h