From 34976bae148aec013b53e1376b45a7af98e69b93 Mon Sep 17 00:00:00 2001
From: =?utf8?q?Stephan=20M=C3=BCller?= <smueller@suse.com>
Date: Fri, 14 Jun 2019 15:45:53 +0200
Subject: [PATCH] mgr: Add get_rates_from_data to mgr_util.py
MIME-Version: 1.0
Content-Type: text/plain; charset=utf8
Content-Transfer-Encoding: 8bit

The function calculates the difference between the values
in a time series list.

The previous location of it was in the dashboard, that's why the
function has doc tests. In order to be able to test them as part of the
manager utilities a bit of tox magic was added.

Fixes: https://tracker.ceph.com/issues/40365
Signed-off-by: Stephan MÃ¼ller <smueller@suse.com>
---
 src/pybind/mgr/dashboard/controllers/osd.py   |   8 +-
 .../mgr/dashboard/services/ceph_service.py    |  58 +--------
 .../mgr/dashboard/services/tcmu_service.py    |   9 +-
 src/pybind/mgr/mgr_util.py                    | 122 ++++++++++++++++++
 4 files changed, 139 insertions(+), 58 deletions(-)

diff --git a/src/pybind/mgr/dashboard/controllers/osd.py b/src/pybind/mgr/dashboard/controllers/osd.py
index 189da4bb63484..064a48be0d8e8 100644
--- a/src/pybind/mgr/dashboard/controllers/osd.py
+++ b/src/pybind/mgr/dashboard/controllers/osd.py
@@ -2,6 +2,9 @@
 from __future__ import absolute_import
 import json
 import logging
+
+from mgr_util import get_most_recent_rate
+
 from . import ApiController, RESTController, Endpoint, ReadPermission, UpdatePermission
 from .. import mgr
 from ..security import Scope
@@ -48,8 +51,9 @@ class Osd(RESTController):
                 continue
             for stat in ['osd.op_w', 'osd.op_in_bytes', 'osd.op_r', 'osd.op_out_bytes']:
                 prop = stat.split('.')[1]
-                osd['stats'][prop] = CephService.get_rate('osd', osd_spec, stat)
-                osd['stats_history'][prop] = CephService.get_rates('osd', osd_spec, stat)
+                rates = CephService.get_rates('osd', osd_spec, stat)
+                osd['stats'][prop] = get_most_recent_rate(rates)
+                osd['stats_history'][prop] = rates
             # Gauge stats
             for stat in ['osd.numpg', 'osd.stat_bytes', 'osd.stat_bytes_used']:
                 osd['stats'][stat.split('.')[1]] = mgr.get_latest('osd', osd_spec, stat)
diff --git a/src/pybind/mgr/dashboard/services/ceph_service.py b/src/pybind/mgr/dashboard/services/ceph_service.py
index 11f19090db7ae..170fc128e0c97 100644
--- a/src/pybind/mgr/dashboard/services/ceph_service.py
+++ b/src/pybind/mgr/dashboard/services/ceph_service.py
@@ -7,15 +7,7 @@ from six.moves import reduce
 import rados
 
 from mgr_module import CommandResult
-
-try:
-    from more_itertools import pairwise
-except ImportError:
-    def pairwise(iterable):
-        from itertools import tee
-        a, b = tee(iterable)
-        next(b, None)
-        return zip(a, b)
+from mgr_util import get_time_series_rates, get_most_recent_rate
 
 from .. import mgr
 
@@ -116,16 +108,12 @@ class CephService(object):
             stats = pool_stats[pool['pool']]
             s = {}
 
-            def get_rate(series):
-                if len(series) >= 2:
-                    return differentiate(*list(series)[-2:])
-                return 0
-
             for stat_name, stat_series in stats.items():
+                rates = get_time_series_rates(stat_series)
                 s[stat_name] = {
                     'latest': stat_series[0][1],
-                    'rate': get_rate(stat_series),
-                    'rates': get_rates_from_data(stat_series)
+                    'rate': get_most_recent_rate(rates),
+                    'rates': rates
                 }
             pool['stats'] = s
             pools_w_stats.append(pool)
@@ -225,16 +213,12 @@ class CephService(object):
         :return: the derivative of mgr.get_counter()
         :rtype: list[tuple[int, float]]"""
         data = mgr.get_counter(svc_type, svc_name, path)[path]
-        return get_rates_from_data(data)
+        return get_time_series_rates(data)
 
     @classmethod
     def get_rate(cls, svc_type, svc_name, path):
         """returns most recent rate"""
-        data = mgr.get_counter(svc_type, svc_name, path)[path]
-
-        if data and len(data) > 1:
-            return differentiate(*data[-2:])
-        return 0.0
+        return get_most_recent_rate(cls.get_rates(svc_type, svc_name, path))
 
     @classmethod
     def get_client_perf(cls):
@@ -300,33 +284,3 @@ class CephService(object):
             'statuses': pg_summary['all'],
             'pgs_per_osd': pgs_per_osd,
         }
-
-
-def get_rates_from_data(data):
-    """
-    >>> get_rates_from_data([])
-    [(0, 0.0)]
-    >>> get_rates_from_data([[1, 42]])
-    [(1, 0.0)]
-    >>> get_rates_from_data([[0, 100], [2, 101], [3, 100], [4, 100]])
-    [(2, 0.5), (3, 1.0), (4, 0.0)]
-    """
-    if not data:
-        return [(0, 0.0)]
-    if len(data) == 1:
-        return [(data[0][0], 0.0)]
-    return [(data2[0], differentiate(data1, data2)) for data1, data2 in pairwise(data)]
-
-
-def differentiate(data1, data2):
-    """
-    >>> times = [0, 2]
-    >>> values = [100, 101]
-    >>> differentiate(*zip(times, values))
-    0.5
-    >>> times = [0, 2]
-    >>> values = [100, 99]
-    >>> differentiate(*zip(times, values))
-    0.5
-    """
-    return abs((data2[1] - data1[1]) / float(data2[0] - data1[0]))
diff --git a/src/pybind/mgr/dashboard/services/tcmu_service.py b/src/pybind/mgr/dashboard/services/tcmu_service.py
index 67d2118aa541c..1f3e4d8d78b49 100644
--- a/src/pybind/mgr/dashboard/services/tcmu_service.py
+++ b/src/pybind/mgr/dashboard/services/tcmu_service.py
@@ -1,3 +1,5 @@
+from mgr_util import get_most_recent_rate
+
 from dashboard.services.ceph_service import CephService
 from .. import mgr
 
@@ -60,10 +62,9 @@ class TcmuService(object):
                     image['stats_history'] = {}
                     for s in ['rd', 'wr', 'rd_bytes', 'wr_bytes']:
                         perf_key = "{}{}".format(perf_key_prefix, s)
-                        image['stats'][s] = CephService.get_rate(
-                            'tcmu-runner', service_id, perf_key)
-                        image['stats_history'][s] = CephService.get_rates(
-                            'tcmu-runner', service_id, perf_key)
+                        rates = CephService.get_rates('tcmu-runner', service_id, perf_key)
+                        image['stats'][s] = get_most_recent_rate(rates)
+                        image['stats_history'][s] = rates
             else:
                 daemon['non_optimized_paths'] += 1
                 image['non_optimized_paths'].append(hostname)
diff --git a/src/pybind/mgr/mgr_util.py b/src/pybind/mgr/mgr_util.py
index da9051b0aa0ce..9edffc2e9719e 100644
--- a/src/pybind/mgr/mgr_util.py
+++ b/src/pybind/mgr/mgr_util.py
@@ -183,3 +183,125 @@ def verify_tls_files(cert_fname, pkey_fname):
         logger.warning(
             'Private key {} and certificate {} do not match up: {}'.format(
                 pkey_fname, cert_fname, str(e)))
+
+def get_most_recent_rate(rates):
+    """ Get most recent rate from rates
+
+    :param rates: The derivative between all time series data points [time in seconds, value]
+    :type rates: list[tuple[int, float]]
+
+    :return: The last derivative or 0.0 if none exists
+    :rtype: float
+
+    >>> get_most_recent_rate(None)
+    0.0
+    >>> get_most_recent_rate([])
+    0.0
+    >>> get_most_recent_rate([(1, -2.0)])
+    -2.0
+    >>> get_most_recent_rate([(1, 2.0), (2, 1.5), (3, 5.0)])
+    5.0
+    """
+    if not rates:
+        return 0.0
+    return rates[-1][1]
+
+def get_time_series_rates(data):
+    """ Rates from time series data
+
+    :param data: Time series data [time in seconds, value]
+    :type data: list[tuple[int, float]]
+
+    :return: The derivative between all time series data points [time in seconds, value]
+    :rtype: list[tuple[int, float]]
+
+    >>> logger.debug = lambda s,x,y: print(s % (x,y))
+    >>> get_time_series_rates([])
+    []
+    >>> get_time_series_rates([[0, 1], [1, 3]])
+    [(1, 2.0)]
+    >>> get_time_series_rates([[0, 2], [0, 3], [0, 1], [1, 2], [1, 3]])
+    Duplicate timestamp in time series data: [0, 2], [0, 3]
+    Duplicate timestamp in time series data: [0, 3], [0, 1]
+    Duplicate timestamp in time series data: [1, 2], [1, 3]
+    [(1, 2.0)]
+    >>> get_time_series_rates([[1, 1], [2, 3], [4, 11], [5, 16], [6, 22]])
+    [(2, 2.0), (4, 4.0), (5, 5.0), (6, 6.0)]
+    """
+    data = _filter_time_series(data)
+    if not data:
+        return []
+    return [(data2[0], _derivative(data1, data2)) for data1, data2 in
+            _pairwise(data)]
+
+def _filter_time_series(data):
+    """ Filters time series data
+
+    Filters out samples with the same timestamp in given time series data.
+    It also enforces the list to contain at least two samples.
+
+    All filtered values will be shown in the debug log. If values were filtered it's a bug in the
+    time series data collector, please report it.
+
+    :param data: Time series data [time in seconds, value]
+    :type data: list[tuple[int, float]]
+
+    :return: Filtered time series data [time in seconds, value]
+    :rtype: list[tuple[int, float]]
+
+    >>> logger.debug = lambda s,x,y: print(s % (x,y))
+    >>> _filter_time_series([])
+    []
+    >>> _filter_time_series([[1, 42]])
+    []
+    >>> _filter_time_series([[10, 2], [10, 3]])
+    Duplicate timestamp in time series data: [10, 2], [10, 3]
+    []
+    >>> _filter_time_series([[0, 1], [1, 2]])
+    [[0, 1], [1, 2]]
+    >>> _filter_time_series([[0, 2], [0, 3], [0, 1], [1, 2], [1, 3]])
+    Duplicate timestamp in time series data: [0, 2], [0, 3]
+    Duplicate timestamp in time series data: [0, 3], [0, 1]
+    Duplicate timestamp in time series data: [1, 2], [1, 3]
+    [[0, 1], [1, 3]]
+    >>> _filter_time_series([[1, 1], [2, 3], [4, 11], [5, 16], [6, 22]])
+    [[1, 1], [2, 3], [4, 11], [5, 16], [6, 22]]
+    """
+    filtered = []
+    for i in range(len(data) - 1):
+        if data[i][0] == data[i + 1][0]:  # Same timestamp
+            logger.debug("Duplicate timestamp in time series data: %s, %s", data[i], data[i + 1])
+            continue
+        filtered.append(data[i])
+    if not filtered:
+        return []
+    filtered.append(data[-1])
+    return filtered
+
+def _derivative(p1, p2):
+    """ Derivative between two time series data points
+
+    :param p1: Time series data [time in seconds, value]
+    :type p1: tuple[int, float]
+    :param p2: Time series data [time in seconds, value]
+    :type p2: tuple[int, float]
+
+    :return: Derivative between both points
+    :rtype: float
+
+    >>> _derivative([0, 0], [2, 1])
+    0.5
+    >>> _derivative([0, 1], [2, 0])
+    -0.5
+    >>> _derivative([0, 0], [3, 1])
+    0.3333333333333333
+    """
+    return (p2[1] - p1[1]) / float(p2[0] - p1[0])
+
+def _pairwise(iterable):
+    it = iter(iterable)
+    a = next(it, None)
+
+    for b in it:
+        yield (a, b)
+        a = b
-- 
2.39.5