From a5f86cb85fcbf8a9003bf511b57a6d6b4a0e1f41 Mon Sep 17 00:00:00 2001 From: Patrick Seidensal Date: Tue, 9 Jun 2020 15:58:54 +0200 Subject: [PATCH] mgr/prometheus: log time it takes to collect metrics When debug is enabled, the time it takes to gather the various types of metrics is logged by type. Fixes: https://tracker.ceph.com/issues/46444 Signed-off-by: Patrick Seidensal --- src/pybind/mgr/mgr_module.py | 2 ++ src/pybind/mgr/mgr_util.py | 24 +++++++++++++++++++++++- src/pybind/mgr/prometheus/module.py | 15 +++++++++++++-- 3 files changed, 38 insertions(+), 3 deletions(-) diff --git a/src/pybind/mgr/mgr_module.py b/src/pybind/mgr/mgr_module.py index 8dedef44d95e9..df1100c0ec5bf 100644 --- a/src/pybind/mgr/mgr_module.py +++ b/src/pybind/mgr/mgr_module.py @@ -13,6 +13,7 @@ from collections import defaultdict, namedtuple import rados import re import time +from mgr_util import profile_method PG_STATES = [ "active", @@ -1379,6 +1380,7 @@ class MgrModule(ceph_module.BaseMgrModule, MgrModuleLoggingMixin): else: return 0, 0 + @profile_method() def get_all_perf_counters(self, prio_limit=PRIO_USEFUL, services=("mds", "mon", "osd", "rbd-mirror", "rgw", "tcmu-runner")): diff --git a/src/pybind/mgr/mgr_util.py b/src/pybind/mgr/mgr_util.py index 550df62a34c6e..38dc86245679c 100644 --- a/src/pybind/mgr/mgr_util.py +++ b/src/pybind/mgr/mgr_util.py @@ -9,13 +9,14 @@ import logging import sys from threading import Lock, Condition, Event from typing import no_type_check +from functools import wraps if sys.version_info >= (3, 3): from threading import Timer else: from threading import _Timer as Timer try: - from typing import Tuple + from typing import Tuple, Any, Callable except ImportError: TYPE_CHECKING = False # just for type checking @@ -627,3 +628,24 @@ def to_pretty_timedelta(n): if n < datetime.timedelta(days=365*2): return str(n.days // 30) + 'M' return str(n.days // 365) + 'y' + + +def profile_method(skip_attribute=False): + """ + Decorator for methods of the Module class. Logs the name of the given + function f with the time it takes to execute it. + """ + def outer(f): + @wraps(f) + def wrapper(*args, **kwargs): + self = args[0] + t = time.time() + self.log.debug('Starting method {}.'.format(f.__name__)) + result = f(*args, **kwargs) + duration = time.time() - t + if not skip_attribute: + wrapper._execution_duration = duration # type: ignore + self.log.debug('Method {} ran {:.3f} seconds.'.format(f.__name__, duration)) + return result + return wrapper + return outer diff --git a/src/pybind/mgr/prometheus/module.py b/src/pybind/mgr/prometheus/module.py index 1fdf08debab33..2ed5d1714d27a 100644 --- a/src/pybind/mgr/prometheus/module.py +++ b/src/pybind/mgr/prometheus/module.py @@ -9,9 +9,8 @@ import socket import threading import time from mgr_module import MgrModule, MgrStandbyModule, CommandResult, PG_STATES -from mgr_util import get_default_addr +from mgr_util import get_default_addr, profile_method from rbd import RBD - try: from typing import Optional, Dict, Any, Set except: @@ -434,12 +433,14 @@ class Module(MgrModule): return metrics + @profile_method() def get_health(self): health = json.loads(self.get('health')['json']) self.metrics['health_status'].set( health_status_to_number(health['status']) ) + @profile_method() def get_pool_stats(self): # retrieve pool stats to provide per pool recovery metrics # (osd_pool_stats moved to mgr in Mimic) @@ -451,6 +452,7 @@ class Module(MgrModule): (pool['pool_id'],) ) + @profile_method() def get_df(self): # maybe get the to-be-exported metrics from a config? df = self.get('df') @@ -464,6 +466,7 @@ class Module(MgrModule): (pool['id'],) ) + @profile_method() def get_fs(self): fs_map = self.get('fs_map') servers = self.get_service_list() @@ -497,6 +500,7 @@ class Module(MgrModule): daemon['rank'], host_version[1] )) + @profile_method() def get_quorum_status(self): mon_status = json.loads(self.get('mon_status')['json']) servers = self.get_service_list() @@ -514,6 +518,7 @@ class Module(MgrModule): 'mon.{}'.format(id_), )) + @profile_method() def get_mgr_status(self): mgr_map = self.get('mgr_map') servers = self.get_service_list() @@ -559,6 +564,7 @@ class Module(MgrModule): self.metrics['mgr_module_status'].set(_state, (mod_name,)) self.metrics['mgr_module_can_run'].set(_can_run, (mod_name,)) + @profile_method() def get_pg_status(self): pg_summary = self.get('pg_summary') @@ -578,6 +584,7 @@ class Module(MgrModule): except KeyError: self.log.warning("skipping pg in unknown state {}".format(state)) + @profile_method() def get_osd_stats(self): osd_stats = self.get('osd_stats') for osd in osd_stats['osd_stats']: @@ -597,6 +604,7 @@ class Module(MgrModule): ret.update({(service['id'], service['type']): (host, version)}) return ret + @profile_method() def get_metadata_and_osd_status(self): osd_map = self.get('osd_map') osd_flags = osd_map['flags'].split(',') @@ -720,12 +728,14 @@ class Module(MgrModule): for k in RBD_MIRROR_METADATA) ) + @profile_method() def get_num_objects(self): pg_sum = self.get('pg_summary')['pg_stats_sum']['stat_sum'] for obj in NUM_OBJECTS: stat = 'num_objects_{}'.format(obj) self.metrics[stat].set(pg_sum[stat]) + @profile_method() def get_rbd_stats(self): # Per RBD image stats is collected by registering a dynamic osd perf # stats query that tells OSDs to group stats for requests associated @@ -998,6 +1008,7 @@ class Module(MgrModule): self.metrics.update(new_metrics) + @profile_method(True) def collect(self): # Clear the metrics before scraping for k in self.metrics.keys(): -- 2.39.5