]> git.apps.os.sepia.ceph.com Git - ceph.git/commitdiff
interval_set: optimize intersect_of insert operations
authorZac Medico <zmedico@gmail.com>
Fri, 25 Aug 2017 16:41:07 +0000 (09:41 -0700)
committerNathan Cutler <ncutler@suse.com>
Wed, 6 Sep 2017 08:40:39 +0000 (10:40 +0200)
Use the std::map insert method with hint iterator to optimize
inserts. This increases performance more than 3.5 times for
large numbers of intervals. This will help performance
especially in the PGPool::update method, where profiling data
has shown that intersection operations are a hot spot. The
following benchmark data is for 400000 intervals:

    4 +-+--+----+----+----+----+----+----+----+----+--+-+
P     +    +    +    +    +    +    +    +  *************
E     |                             ********            |
R 3.5 +-+                       ****                  +-+
F     |                   ******                        |
O     |                 **                              |
R   3 +-+           ****                              +-+
M     |          ***                                    |
A     |        **                                       |
N 2.5 +-+     *                                       +-+
C     |     **                                          |
E     |     *                                           |
    2 +-+ **                                          +-+
R     |  **                                             |
A     | **                                              |
T 1.5 +**                                             +-+
I     |**                                               |
O     +*   +    +    +    +    +    +    +    +    +    +
    1 +*+--+----+----+----+----+----+----+----+----+--+-+
      0   0.1  0.2  0.3  0.4  0.5  0.6  0.7  0.8  0.9

                        SET SIZE RATIO

The above chart was generated using benchmark results
from the following program:

#include <iostream>
#include <sys/timeb.h>
#include "include/interval_set.h"

int main(int argc, char *argv[])
{
  const int interval_count = std::stoi(argv[1]);
  const int interval_distance = 4;
  const int interval_size = 2;
  const int sample_count = 8;
  const int max_offset = interval_count * interval_distance;
  interval_set<int> a, b, intersection;

  for (int i = 0; i < max_offset; i+=interval_distance) {
    a.insert(i, interval_size);
  }

  for (int m = 1; m < 100; m++) {
    float ratio = 1 / float(m);

    for (int i = 0; i < max_offset; i+=interval_distance*m) {
      b.insert(i, interval_size);
    }

    struct timeb start, end;
    int ms = 0;
    for (int i = 0; i < sample_count; i++) {
      ftime(&start);
      intersection.intersection_of(a, b);
      ftime(&end);
      ms += (int) (1000.0 * (end.time - start.time)
        + (end.millitm - start.millitm));
      intersection.clear();
    }
    b.clear();

    std::cout << ratio << "\t" << ms << std::endl << std::flush;
  }
}

Signed-off-by: Zac Medico <zmedico@gmail.com>
(cherry picked from commit 32bc0430f70b057d1bba623252e92ab9f279028d)

src/include/interval_set.h

index c903dc33311ac48b87d42c684afcb8f71a534df7..6a7dcc6d12734c43920dc8ea2022be75fb3ba735 100644 (file)
@@ -447,7 +447,8 @@ class interval_set {
 
     typename std::map<T,T>::const_iterator pa = a.m.begin();
     typename std::map<T,T>::const_iterator pb = b.m.begin();
-    
+    typename decltype(m)::iterator mi = m.begin();
+
     while (pa != a.m.end() && pb != b.m.end()) {
       // passing?
       if (pa->first + pa->second <= pb->first) 
@@ -457,7 +458,9 @@ class interval_set {
       T start = MAX(pa->first, pb->first);
       T en = MIN(pa->first+pa->second, pb->first+pb->second);
       assert(en > start);
-      insert(start, en-start);
+      typename decltype(m)::value_type i{start, en - start};
+      mi = m.insert(mi, i);
+      _size += i.second;
       if (pa->first+pa->second > pb->first+pb->second)
         pb++;
       else