From c00dd55f40ab795db3f43522755e49758c9cc737 Mon Sep 17 00:00:00 2001 From: Boris Ranto Date: Thu, 12 Jul 2018 19:01:50 +0200 Subject: [PATCH] prometheus: Remove the Metrics class The class was introduced to account for the 'pending' mechanism but that is no longer being used by the code so we no longer need it since it serves a purpose of a simple dictionary, now. This simplifies the code and brings a bit of performance improvements since the Metric classes in the collect method are created only if they did not exist before. Signed-off-by: Boris Ranto --- src/pybind/mgr/prometheus/module.py | 343 +++++++++++++--------------- 1 file changed, 163 insertions(+), 180 deletions(-) diff --git a/src/pybind/mgr/prometheus/module.py b/src/pybind/mgr/prometheus/module.py index edc68b65806..c140816dc42 100644 --- a/src/pybind/mgr/prometheus/module.py +++ b/src/pybind/mgr/prometheus/module.py @@ -75,32 +75,103 @@ DISK_OCCUPATION = ( 'ceph_daemon', 'device','instance') NUM_OBJECTS = ['degraded', 'misplaced', 'unfound'] -class Metrics(object): - def __init__(self): - self.metrics = self._setup_static_metrics() +class Metric(object): + def __init__(self, mtype, name, desc, labels=None): + self.mtype = mtype + self.name = name + self.desc = desc + self.labelnames = labels # tuple if present + self.value = {} # indexed by label values def clear(self): - ''' - Clear all the metrics data. This does not remove the initiated Metric classes. - ''' - for k in self.metrics.keys(): - self.metrics[k].clear() + self.value = {} + + def set(self, value, labelvalues=None): + # labelvalues must be a tuple + labelvalues = labelvalues or ('',) + self.value[labelvalues] = value - def set(self, key, value, labels=('',)): - ''' - Set the value of a single Metric (with labels). Use this to set the value of any metric. - ''' - self.metrics[key].set(value, labels) + def str_expfmt(self): - def all(self): - ''' - Return the dict of all the metrics. - ''' - return self.metrics + def promethize(path): + ''' replace illegal metric name characters ''' + result = path.replace('.', '_').replace('+', '_plus').replace('::', '_') + + # Hyphens usually turn into underscores, unless they are + # trailing + if result.endswith("-"): + result = result[0:-1] + "_minus" + else: + result = result.replace("-", "_") + + return "ceph_{0}".format(result) + + def floatstr(value): + ''' represent as Go-compatible float ''' + if value == float('inf'): + return '+Inf' + if value == float('-inf'): + return '-Inf' + if math.isnan(value): + return 'NaN' + return repr(float(value)) - def add_metric(self, path, metric): - if path not in self.metrics: - self.metrics[path] = metric + name = promethize(self.name) + expfmt = ''' +# HELP {name} {desc} +# TYPE {name} {mtype}'''.format( + name=name, + desc=self.desc, + mtype=self.mtype, + ) + + for labelvalues, value in self.value.items(): + if self.labelnames: + labels = zip(self.labelnames, labelvalues) + labels = ','.join('%s="%s"' % (k, v) for k, v in labels) + else: + labels = '' + if labels: + fmtstr = '\n{name}{{{labels}}} {value}' + else: + fmtstr = '\n{name} {value}' + expfmt += fmtstr.format( + name=name, + labels=labels, + value=floatstr(value), + ) + return expfmt + + +class Module(MgrModule): + COMMANDS = [ + { + "cmd": "prometheus self-test", + "desc": "Run a self test on the prometheus module", + "perm": "rw" + }, + { + "cmd": "prometheus file_sd_config", + "desc": "Return file_sd compatible prometheus config for mgr cluster", + "perm": "r" + }, + ] + + OPTIONS = [ + {'name': 'server_addr'}, + {'name': 'server_port'}, + {'name': 'scrape_interval'}, + ] + + def __init__(self, *args, **kwargs): + super(Module, self).__init__(*args, **kwargs) + self.metrics = self._setup_static_metrics() + self.shutdown_event = threading.Event() + self.collect_lock = threading.RLock() + self.collect_time = 0 + self.collect_timeout = 5.0 + self.collect_cache = None + _global_instance['plugin'] = self def _setup_static_metrics(self): metrics = {} @@ -225,122 +296,24 @@ class Metrics(object): return metrics - -class Metric(object): - def __init__(self, mtype, name, desc, labels=None): - self.mtype = mtype - self.name = name - self.desc = desc - self.labelnames = labels # tuple if present - self.value = {} # indexed by label values - - def clear(self): - self.value = {} - - def set(self, value, labelvalues=None): - # labelvalues must be a tuple - labelvalues = labelvalues or ('',) - self.value[labelvalues] = value - - def str_expfmt(self): - - def promethize(path): - ''' replace illegal metric name characters ''' - result = path.replace('.', '_').replace('+', '_plus').replace('::', '_') - - # Hyphens usually turn into underscores, unless they are - # trailing - if result.endswith("-"): - result = result[0:-1] + "_minus" - else: - result = result.replace("-", "_") - - return "ceph_{0}".format(result) - - def floatstr(value): - ''' represent as Go-compatible float ''' - if value == float('inf'): - return '+Inf' - if value == float('-inf'): - return '-Inf' - if math.isnan(value): - return 'NaN' - return repr(float(value)) - - name = promethize(self.name) - expfmt = ''' -# HELP {name} {desc} -# TYPE {name} {mtype}'''.format( - name=name, - desc=self.desc, - mtype=self.mtype, - ) - - for labelvalues, value in self.value.items(): - if self.labelnames: - labels = zip(self.labelnames, labelvalues) - labels = ','.join('%s="%s"' % (k, v) for k, v in labels) - else: - labels = '' - if labels: - fmtstr = '\n{name}{{{labels}}} {value}' - else: - fmtstr = '\n{name} {value}' - expfmt += fmtstr.format( - name=name, - labels=labels, - value=floatstr(value), - ) - return expfmt - - -class Module(MgrModule): - COMMANDS = [ - { - "cmd": "prometheus self-test", - "desc": "Run a self test on the prometheus module", - "perm": "rw" - }, - { - "cmd": "prometheus file_sd_config", - "desc": "Return file_sd compatible prometheus config for mgr cluster", - "perm": "r" - }, - ] - - OPTIONS = [ - {'name': 'server_addr'}, - {'name': 'server_port'}, - {'name': 'scrape_interval'}, - ] - - def __init__(self, *args, **kwargs): - super(Module, self).__init__(*args, **kwargs) - self.metrics = Metrics() - self.shutdown_event = threading.Event() - self.collect_lock = threading.RLock() - self.collect_time = 0 - self.collect_timeout = 5.0 - self.collect_cache = None - _global_instance['plugin'] = self - def get_health(self): health = json.loads(self.get('health')['json']) - self.metrics.set('health_status', - health_status_to_number(health['status']) + self.metrics['health_status'].set( + health_status_to_number(health['status']) ) def get_df(self): # maybe get the to-be-exported metrics from a config? df = self.get('df') for stat in DF_CLUSTER: - self.metrics.set('cluster_{}'.format(stat), df['stats'][stat]) + self.metrics['cluster_{}'.format(stat)].set(df['stats'][stat]) for pool in df['pools']: for stat in DF_POOL: - self.metrics.set('pool_{}'.format(stat), - pool['stats'][stat], - (pool['id'],)) + self.metrics['pool_{}'.format(stat)].set( + pool['stats'][stat], + (pool['id'],) + ) def get_fs(self): fs_map = self.get('fs_map') @@ -349,19 +322,21 @@ class Module(MgrModule): for fs in fs_map['filesystems']: # collect fs metadata data_pools = ",".join([str(pool) for pool in fs['mdsmap']['data_pools']]) - self.metrics.set('fs_metadata', 1, - (data_pools, - fs['id'], - fs['mdsmap']['metadata_pool'], - fs['mdsmap']['fs_name'])) + self.metrics['fs_metadata'].set(1, ( + data_pools, + fs['id'], + fs['mdsmap']['metadata_pool'], + fs['mdsmap']['fs_name'] + )) self.log.debug('mdsmap: {}'.format(fs['mdsmap'])) for gid, daemon in fs['mdsmap']['info'].items(): id_ = daemon['name'] host_version = servers.get((id_, 'mds'), ('','')) - self.metrics.set('mds_metadata', 1, - ('mds.{}'.format(id_), fs['id'], - host_version[0], daemon['addr'], - daemon['rank'], host_version[1])) + self.metrics['mds_metadata'].set(1, ( + 'mds.{}'.format(id_), fs['id'], + host_version[0], daemon['addr'], + daemon['rank'], host_version[1] + )) def get_quorum_status(self): mon_status = json.loads(self.get('mon_status')['json']) @@ -370,20 +345,22 @@ class Module(MgrModule): rank = mon['rank'] id_ = mon['name'] host_version = servers.get((id_, 'mon'), ('','')) - self.metrics.set('mon_metadata', 1, - ('mon.{}'.format(id_), host_version[0], - mon['public_addr'].split(':')[0], rank, - host_version[1])) + self.metrics['mon_metadata'].set(1, ( + 'mon.{}'.format(id_), host_version[0], + mon['public_addr'].split(':')[0], rank, + host_version[1] + )) in_quorum = int(rank in mon_status['quorum']) - self.metrics.set('mon_quorum_status', in_quorum, - ('mon.{}'.format(id_),)) + self.metrics['mon_quorum_status'].set(in_quorum, ( + 'mon.{}'.format(id_), + )) def get_pg_status(self): # TODO add per pool status? pg_status = self.get('pg_status') # Set total count of PGs, first - self.metrics.set('pg_total', pg_status['num_pgs']) + self.metrics['pg_total'].set(pg_status['num_pgs']) reported_states = {} for pg in pg_status['pgs_by_state']: @@ -393,14 +370,14 @@ class Module(MgrModule): for state in reported_states: path = 'pg_{}'.format(state) try: - self.metrics.set(path, reported_states[state]) + self.metrics[path].set(reported_states[state]) except KeyError: self.log.warn("skipping pg in unknown state {}".format(state)) for state in PG_STATES: if state not in reported_states: try: - self.metrics.set('pg_{}'.format(state), 0) + self.metrics['pg_{}'.format(state)].set(0) except KeyError: self.log.warn("skipping pg in unknown state {}".format(state)) @@ -410,8 +387,9 @@ class Module(MgrModule): id_ = osd['osd'] for stat in OSD_STATS: val = osd['perf_stat'][stat] - self.metrics.set('osd_{}'.format(stat), val, - ('osd.{}'.format(id_),)) + self.metrics['osd_{}'.format(stat)].set(val, ( + 'osd.{}'.format(id_), + )) def get_service_list(self): ret = {} @@ -426,8 +404,9 @@ class Module(MgrModule): osd_map = self.get('osd_map') osd_flags = osd_map['flags'].split(',') for flag in OSD_FLAGS: - self.metrics.set('osd_flag_{}'.format(flag), - int(flag in osd_flags)) + self.metrics['osd_flag_{}'.format(flag)].set( + int(flag in osd_flags) + ) osd_devices = self.get('osd_map_crush')['devices'] servers = self.get_service_list() @@ -458,7 +437,7 @@ class Module(MgrModule): host_version = servers.get((str(id_), 'osd'), ('','')) - self.metrics.set('osd_metadata', 1, ( + self.metrics['osd_metadata'].set(1, ( 'osd.{}'.format(id_), c_addr, dev_class, @@ -469,8 +448,9 @@ class Module(MgrModule): # collect osd status for state in OSD_STATUS: status = osd[state] - self.metrics.set('osd_{}'.format(state), status, - ('osd.{}'.format(id_),)) + self.metrics['osd_{}'.format(state)].set(status, ( + 'osd.{}'.format(id_), + )) # collect disk occupation metadata osd_metadata = self.get_metadata("osd", str(id_)) @@ -487,7 +467,7 @@ class Module(MgrModule): if osd_dev_node and osd_hostname: self.log.debug("Got dev for osd {0}: {1}/{2}".format( id_, osd_hostname, osd_dev_node)) - self.metrics.set('disk_occupation', 1, ( + self.metrics['disk_occupation'].set(1, ( "osd.{0}".format(id_), osd_dev_node, osd_hostname @@ -498,7 +478,7 @@ class Module(MgrModule): pool_meta = [] for pool in osd_map['pools']: - self.metrics.set('pool_metadata', 1, (pool['pool'], pool['pool_name'])) + self.metrics['pool_metadata'].set(1, (pool['pool'], pool['pool_name'])) # Populate rgw_metadata for key, value in servers.items(): @@ -506,8 +486,7 @@ class Module(MgrModule): if service_type != 'rgw': continue hostname, version = value - self.metrics.set( - 'rgw_metadata', + self.metrics['rgw_metadata'].set( 1, ('{}.{}'.format(service_type, service_id), hostname, version) ) @@ -516,11 +495,12 @@ class Module(MgrModule): pg_sum = self.get('pg_summary')['pg_stats_sum']['stat_sum'] for obj in NUM_OBJECTS: stat = 'num_objects_{}'.format(obj) - self.metrics.set(stat, pg_sum[stat]) + self.metrics[stat].set(pg_sum[stat]) def collect(self): # Clear the metrics before scraping - self.metrics.clear() + for k in self.metrics.keys(): + self.metrics[k].clear() self.get_health() self.get_df() @@ -545,32 +525,35 @@ class Module(MgrModule): # Represent the long running avgs as sum/count pairs if counter_info['type'] & self.PERFCOUNTER_LONGRUNAVG: _path = path + '_sum' - self.metrics.add_metric(_path, Metric( - stattype, - _path, - counter_info['description'] + ' Total', - ("ceph_daemon",), - )) - self.metrics.set(_path, value, (daemon,)) + if _path not in self.metrics: + self.metrics[_path] = Metric( + stattype, + _path, + counter_info['description'] + ' Total', + ("ceph_daemon",), + ) + self.metrics[_path].set(value, (daemon,)) _path = path + '_count' - self.metrics.add_metric(_path, Metric( - 'counter', - _path, - counter_info['description'] + ' Count', - ("ceph_daemon",), - )) - self.metrics.set(_path, counter_info['count'], (daemon,)) + if _path not in self.metrics: + self.metrics[_path] = Metric( + 'counter', + _path, + counter_info['description'] + ' Count', + ("ceph_daemon",), + ) + self.metrics[_path].set(counter_info['count'], (daemon,)) else: - self.metrics.add_metric(path, Metric( - stattype, - path, - counter_info['description'], - ("ceph_daemon",), - )) - self.metrics.set(path, value, (daemon,)) - - return self.metrics.all() + if path not in self.metrics: + self.metrics[path] = Metric( + stattype, + path, + counter_info['description'], + ("ceph_daemon",), + ) + self.metrics[path].set(value, (daemon,)) + + return self.metrics def get_file_sd_config(self): servers = self.list_servers() -- 2.39.5