From: Yaarit Hatuka Date: Mon, 27 Jan 2020 13:57:55 +0000 (-0500) Subject: mgr/devicehealth: fix telemetry stops sending device reports after 48 hours X-Git-Tag: v15.1.0~19^2 X-Git-Url: http://git.apps.os.sepia.ceph.com/?a=commitdiff_plain;h=5f7e4a980a73e8cacb2c9bde47d822a32fb8c440;p=ceph-ci.git mgr/devicehealth: fix telemetry stops sending device reports after 48 hours Telemetry module fetches device metrics which were scraped in the last "telemetry interval"*2 (=48 hours by default) by calling _get_device_metrics() with min_sample. _get_device_metrics() fetches the metrics from omap and breaks on the first one that is older than min_sample. But because it fetched in ascending order (from oldest to newest) it was breaking on the first one it received, if it was older than the interval above. We need to pass min_sample to get_omap_vals() so it will start fetching from that value. Fixes: https://tracker.ceph.com/issues/43837 Signed-off-by: Yaarit Hatuka --- diff --git a/src/pybind/mgr/devicehealth/module.py b/src/pybind/mgr/devicehealth/module.py index 406d93ec52e..e28ebf84018 100644 --- a/src/pybind/mgr/devicehealth/module.py +++ b/src/pybind/mgr/devicehealth/module.py @@ -429,7 +429,7 @@ class Module(MgrModule): return {} with ioctx: with rados.ReadOpCtx() as op: - omap_iter, ret = ioctx.get_omap_vals(op, "", sample or '', + omap_iter, ret = ioctx.get_omap_vals(op, min_sample or '', sample or '', MAX_SAMPLES) # fixme assert ret == 0 try: