import time
PG_STATES = [
- "active",
- "clean",
- "down",
- "recovery_unfound",
- "backfill_unfound",
- "scrubbing",
- "degraded",
- "inconsistent",
- "peering",
- "repair",
- "recovering",
- "forced_recovery",
- "backfill_wait",
- "incomplete",
- "stale",
- "remapped",
- "deep",
- "backfilling",
- "forced_backfill",
- "backfill_toofull",
- "recovery_wait",
- "recovery_toofull",
- "undersized",
- "activating",
- "peered",
- "snaptrim",
- "snaptrim_wait",
- "snaptrim_error",
- "creating",
- "unknown"]
+ "active",
+ "clean",
+ "down",
+ "recovery_unfound",
+ "backfill_unfound",
+ "scrubbing",
+ "degraded",
+ "inconsistent",
+ "peering",
+ "repair",
+ "recovering",
+ "forced_recovery",
+ "backfill_wait",
+ "incomplete",
+ "stale",
+ "remapped",
+ "deep",
+ "backfilling",
+ "forced_backfill",
+ "backfill_toofull",
+ "recovery_wait",
+ "recovery_toofull",
+ "undersized",
+ "activating",
+ "peered",
+ "snaptrim",
+ "snaptrim_wait",
+ "snaptrim_error",
+ "creating",
+ "unknown"]
+
class CPlusPlusHandler(logging.Handler):
def __init__(self, module_inst):
def configure_logger(module_inst, name):
logger = logging.getLogger(name)
-
# Don't filter any logs at the python level, leave it to C++
logger.setLevel(logging.DEBUG)
def unconfigure_logger(module_inst, name):
logger = logging.getLogger(name)
- rm_handlers = [h for h in logger.handlers if isinstance(h, CPlusPlusHandler)]
+ rm_handlers = [
+ h for h in logger.handlers if isinstance(h, CPlusPlusHandler)]
for h in rm_handlers:
logger.removeHandler(h)
+
class CommandResult(object):
"""
Use with MgrModule.send_command
"""
+
def __init__(self, tag=None):
self.ev = threading.Event()
self.outs = ""
return self._get_pools_by_take(take).get('pools', [])
def calc_pg_upmaps(self, inc,
- max_deviation=.01, max_iterations=10, pools=[]):
+ max_deviation=.01, max_iterations=10, pools=None):
+ if pools is None:
+ pools = []
return self._calc_pg_upmaps(
inc,
max_deviation, max_iterations, pools)
def get_take_weight_osd_map(self, root):
uglymap = self._get_take_weight_osd_map(root)
- return { int(k): v for k, v in six.iteritems(uglymap.get('weights', {})) }
+ return {int(k): v for k, v in six.iteritems(uglymap.get('weights', {}))}
@staticmethod
def have_default_choose_args(dump):
r = default
return r
+
class MgrModule(ceph_module.BaseMgrModule):
COMMANDS = []
MODULE_OPTIONS = []
elif unit == self.BYTES:
return "B/s"
- def to_pretty_iec(self, n):
+ @staticmethod
+ def to_pretty_iec(n):
for bits, suffix in [(60, 'Ei'), (50, 'Pi'), (40, 'Ti'), (30, 'Gi'),
- (20, 'Mi'), (10, 'Ki')]:
+ (20, 'Mi'), (10, 'Ki')]:
if n > 10 << bits:
return str(n >> bits) + ' ' + suffix
return str(n) + ' '
- def get_pretty_row(self, elems, width):
+ @staticmethod
+ def get_pretty_row(elems, width):
"""
Takes an array of elements and returns a string with those elements
formatted as a table row. Useful for polling modules.
This is information that ceph-mgr has gleaned from the daemon metadata
reported by daemons running on a particular server.
- :param hostname: a hostame
+ :param hostname: a hostname
"""
return self._ceph_get_server(hostname)
Like ``get_server``, but gives information about all servers (i.e. all
unique hostnames that have been mentioned in daemon metadata)
- :return: a list of infomration about all servers
+ :return: a list of information about all servers
:rtype: list
"""
return self._ceph_get_server(None)
in their schema.
"""
if key not in [o['name'] for o in self.MODULE_OPTIONS]:
- raise RuntimeError("Config option '{0}' is not in {1}.MODULE_OPTIONS".\
- format(key, self.__class__.__name__))
+ raise RuntimeError("Config option '{0}' is not in {1}.MODULE_OPTIONS".
+ format(key, self.__class__.__name__))
def _get_module_option(self, key, default):
r = self._ceph_get_module_option(key)
Retrieve the value of a persistent configuration setting
:param str key:
+ :param str default:
:return: str
"""
self._validate_module_option(key)
"""
Set localized configuration for this ceph-mgr instance
:param str key:
- :param str default:
+ :param str val:
:return: str
"""
self._validate_module_option(key)
def set_localized_store(self, key, val):
return self._set_localized(key, val, self.set_store)
-
def self_test(self):
"""
Run a self-test on the module. Override this function and implement
return self._ceph_get_osdmap()
def get_latest(self, daemon_type, daemon_name, counter):
- data = self.get_latest_counter(daemon_type, daemon_name, counter)[counter]
+ data = self.get_latest_counter(
+ daemon_type, daemon_name, counter)[counter]
if data:
return data[1]
else:
return 0
def get_latest_avg(self, daemon_type, daemon_name, counter):
- data = self.get_latest_counter(daemon_type, daemon_name, counter)[counter]
+ data = self.get_latest_counter(
+ daemon_type, daemon_name, counter)[counter]
if data:
- return (data[1], data[2])
+ return data[1], data[2]
else:
- return (0, 0)
+ return 0, 0
def get_all_perf_counters(self, prio_limit=PRIO_USEFUL,
services=("mds", "mon", "osd",
result = defaultdict(dict)
-
for server in self.list_servers():
for service in server['services']:
if service['type'] not in services:
# Value is returned in a potentially-multi-service format,
# get just the service we're asking about
- svc_full_name = "{0}.{1}".format(service['type'], service['id'])
+ svc_full_name = "{0}.{1}".format(
+ service['type'], service['id'])
schema = schema[svc_full_name]
# Populate latest values
os._exit = os_exit_noop
-
# to access things in class Module from subclass Root. Because
# it's a dict, the writer doesn't need to declare 'global' for access
def health_status_to_number(status):
-
if status == 'HEALTH_OK':
return 0
elif status == 'HEALTH_WARN':
elif status == 'HEALTH_ERR':
return 2
+
DF_CLUSTER = ['total_bytes', 'total_used_bytes', 'total_used_raw_bytes']
DF_POOL = ['max_avail', 'stored', 'stored_raw', 'objects', 'dirty',
MDS_METADATA = ('ceph_daemon', 'fs_id', 'hostname', 'public_addr', 'rank',
'ceph_version')
-MON_METADATA = ('ceph_daemon', 'hostname', 'public_addr', 'rank', 'ceph_version')
+MON_METADATA = ('ceph_daemon', 'hostname',
+ 'public_addr', 'rank', 'ceph_version')
OSD_METADATA = ('back_iface', 'ceph_daemon', 'cluster_addr', 'device_class',
'front_iface', 'hostname', 'objectstore', 'public_addr',
RBD_MIRROR_METADATA = ('ceph_daemon', 'id', 'instance_id', 'hostname',
'ceph_version')
-DISK_OCCUPATION = ('ceph_daemon', 'device', 'db_device', 'wal_device', 'instance')
+DISK_OCCUPATION = ('ceph_daemon', 'device', 'db_device',
+ 'wal_device', 'instance')
NUM_OBJECTS = ['degraded', 'misplaced', 'unfound']
def promethize(path):
''' replace illegal metric name characters '''
- result = path.replace('.', '_').replace('+', '_plus').replace('::', '_')
+ result = path.replace('.', '_').replace(
+ '+', '_plus').replace('::', '_')
# Hyphens usually turn into underscores, unless they are
# trailing
]
MODULE_OPTIONS = [
- {'name': 'server_addr'},
- {'name': 'server_port'},
- {'name': 'scrape_interval'},
- {'name': 'rbd_stats_pools'},
- {'name': 'rbd_stats_pools_refresh_interval'},
+ {'name': 'server_addr'},
+ {'name': 'server_port'},
+ {'name': 'scrape_interval'},
+ {'name': 'rbd_stats_pools'},
+ {'name': 'rbd_stats_pools_refresh_interval'},
]
def __init__(self, *args, **kwargs):
self.collect_timeout = 5.0
self.collect_cache = None
self.rbd_stats = {
- 'pools' : {},
- 'pools_refresh_time' : 0,
- 'counters_info' : {
- 'write_ops' : {'type' : self.PERFCOUNTER_COUNTER,
- 'desc' : 'RBD image writes count'},
- 'read_ops' : {'type' : self.PERFCOUNTER_COUNTER,
- 'desc' : 'RBD image reads count'},
- 'write_bytes' : {'type' : self.PERFCOUNTER_COUNTER,
- 'desc' : 'RBD image bytes written'},
- 'read_bytes' : {'type' : self.PERFCOUNTER_COUNTER,
- 'desc' : 'RBD image bytes read'},
- 'write_latency' : {'type' : self.PERFCOUNTER_LONGRUNAVG,
- 'desc' : 'RBD image writes latency (msec)'},
- 'read_latency' : {'type' : self.PERFCOUNTER_LONGRUNAVG,
- 'desc' : 'RBD image reads latency (msec)'},
+ 'pools': {},
+ 'pools_refresh_time': 0,
+ 'counters_info': {
+ 'write_ops': {'type': self.PERFCOUNTER_COUNTER,
+ 'desc': 'RBD image writes count'},
+ 'read_ops': {'type': self.PERFCOUNTER_COUNTER,
+ 'desc': 'RBD image reads count'},
+ 'write_bytes': {'type': self.PERFCOUNTER_COUNTER,
+ 'desc': 'RBD image bytes written'},
+ 'read_bytes': {'type': self.PERFCOUNTER_COUNTER,
+ 'desc': 'RBD image bytes read'},
+ 'write_latency': {'type': self.PERFCOUNTER_LONGRUNAVG,
+ 'desc': 'RBD image writes latency (msec)'},
+ 'read_latency': {'type': self.PERFCOUNTER_LONGRUNAVG,
+ 'desc': 'RBD image reads latency (msec)'},
},
}
_global_instance['plugin'] = self
active_daemons = []
for fs in fs_map['filesystems']:
# collect fs metadata
- data_pools = ",".join([str(pool) for pool in fs['mdsmap']['data_pools']])
+ data_pools = ",".join([str(pool)
+ for pool in fs['mdsmap']['data_pools']])
self.metrics['fs_metadata'].set(1, (
data_pools,
fs['id'],
self.log.debug('mdsmap: {}'.format(fs['mdsmap']))
for gid, daemon in fs['mdsmap']['info'].items():
id_ = daemon['name']
- host_version = servers.get((id_, 'mds'), ('',''))
+ host_version = servers.get((id_, 'mds'), ('', ''))
self.metrics['mds_metadata'].set(1, (
'mds.{}'.format(id_), fs['id'],
host_version[0], daemon['addr'],
for mon in mon_status['monmap']['mons']:
rank = mon['rank']
id_ = mon['name']
- host_version = servers.get((id_, 'mon'), ('',''))
+ host_version = servers.get((id_, 'mon'), ('', ''))
self.metrics['mon_metadata'].set(1, (
'mon.{}'.format(id_), host_version[0],
mon['public_addr'].split(':')[0], rank,
reported_states = {}
for pg in pg_status['pgs_by_state']:
for state in pg['state_name'].split('+'):
- reported_states[state] = reported_states.get(state, 0) + pg['count']
+ reported_states[state] = reported_states.get(
+ state, 0) + pg['count']
for state in reported_states:
path = 'pg_{}'.format(state)
try:
self.metrics['pg_{}'.format(state)].set(0)
except KeyError:
- self.log.warn("skipping pg in unknown state {}".format(state))
+ self.log.warn(
+ "skipping pg in unknown state {}".format(state))
def get_osd_stats(self):
osd_stats = self.get('osd_stats')
id_))
continue
- host_version = servers.get((str(id_), 'osd'), ('',''))
+ host_version = servers.get((str(id_), 'osd'), ('', ''))
# collect disk occupation metadata
osd_metadata = self.get_metadata("osd", str(id_))
osd_objectstore = osd_metadata.get('osd_objectstore', None)
if osd_objectstore == "filestore":
- # collect filestore backend device
- osd_dev_node = osd_metadata.get('backend_filestore_dev_node', None)
- # collect filestore journal device
+ # collect filestore backend device
+ osd_dev_node = osd_metadata.get(
+ 'backend_filestore_dev_node', None)
+ # collect filestore journal device
osd_wal_dev_node = osd_metadata.get('osd_journal', '')
osd_db_dev_node = ''
elif osd_objectstore == "bluestore":
- # collect bluestore backend device
- osd_dev_node = osd_metadata.get('bluestore_bdev_dev_node', None)
- # collect bluestore wal backend
+ # collect bluestore backend device
+ osd_dev_node = osd_metadata.get(
+ 'bluestore_bdev_dev_node', None)
+ # collect bluestore wal backend
osd_wal_dev_node = osd_metadata.get('bluefs_wal_dev_node', '')
- # collect bluestore db backend
+ # collect bluestore db backend
osd_db_dev_node = osd_metadata.get('bluefs_db_dev_node', '')
if osd_dev_node and osd_dev_node == "unknown":
osd_dev_node = None
))
else:
self.log.info("Missing dev node metadata for osd {0}, skipping "
- "occupation record for this osd".format(id_))
+ "occupation record for this osd".format(id_))
pool_meta = []
for pool in osd_map['pools']:
- self.metrics['pool_metadata'].set(1, (pool['pool'], pool['pool_name']))
+ self.metrics['pool_metadata'].set(
+ 1, (pool['pool'], pool['pool_name']))
# Populate other servers metadata
for key, value in servers.items():
mirror_metadata['ceph_daemon'] = '{}.{}'.format(service_type,
service_id)
self.metrics['rbd_mirror_metadata'].set(
- 1, (mirror_metadata.get(k, '') for k in RBD_MIRROR_METADATA)
+ 1, (mirror_metadata.get(k, '')
+ for k in RBD_MIRROR_METADATA)
)
def get_num_objects(self):
if pools:
next_refresh = self.rbd_stats['pools_refresh_time'] + \
self.get_localized_module_option(
- 'rbd_stats_pools_refresh_interval', 300)
+ 'rbd_stats_pools_refresh_interval', 300)
if rbd_stats_pools != pools or time.time() >= next_refresh:
self.refresh_rbd_stats_pools(pools)
pools_refreshed = True
break
if nspace_names:
namespace_regex = '^(' + \
- '|'.join([re.escape(x) for x in set(nspace_names)]) + ')$'
+ "|".join([re.escape(x)
+ for x in set(nspace_names)]) + ')$'
else:
namespace_regex = '^(.*)$'
if 'query' in self.rbd_stats and \
(pool_id_regex != self.rbd_stats['query']['key_descriptor'][0]['regex'] or
- namespace_regex != self.rbd_stats['query']['key_descriptor'][1]['regex']):
+ namespace_regex != self.rbd_stats['query']['key_descriptor'][1]['regex']):
self.remove_osd_perf_query(self.rbd_stats['query_id'])
del self.rbd_stats['query_id']
del self.rbd_stats['query']
label_names,
)
self.metrics[path].set(counters[i][1], labels)
- i += 1;
+ i += 1
def refresh_rbd_stats_pools(self, pools):
self.log.debug('refreshing rbd pools %s' % (pools))
pool_id = self.rados.pool_lookup(pool_name)
with self.rados.open_ioctx(pool_name) as ioctx:
if pool_id not in self.rbd_stats['pools']:
- self.rbd_stats['pools'][pool_id] = {'images' : {}}
+ self.rbd_stats['pools'][pool_id] = {'images': {}}
pool = self.rbd_stats['pools'][pool_id]
pool['name'] = pool_name
pool['ns_names'] = cfg_ns_names
del pool['images'][nspace_name]
for nspace_name in nspace_names:
if (nspace_name and
- not rbd.namespace_exists(ioctx, nspace_name)):
+ not rbd.namespace_exists(ioctx, nspace_name)):
self.log.debug('unknown namespace %s for pool %s' %
(nspace_name, pool_name))
continue
namespace = pool['images'][nspace_name]
images = {}
for image_meta in RBD().list2(ioctx):
- image = {'n' : image_meta['name']}
+ image = {'n': image_meta['name']}
image_id = image_meta['id']
if image_id in namespace:
image['c'] = namespace[image_id]['c']
continue
# Get the value of the counter
- value = self._perfvalue_to_value(counter_info['type'], counter_info['value'])
+ value = self._perfvalue_to_value(
+ counter_info['type'], counter_info['value'])
# Represent the long running avgs as sum/count pairs
if counter_info['type'] & self.PERFCOUNTER_LONGRUNAVG:
)
self.metrics[path].set(value, (daemon,))
- self.get_rbd_stats();
+ self.get_rbd_stats()
# Return formatted metrics and clear no longer used data
_metrics = [m.str_expfmt() for m in self.metrics.values()]
"prefix": "config-key get",
'key': "config/mgr/mgr/prometheus/{}/server_port".format(id_),
}),
- "")
+ "")
r, outb, outs = result.wait()
if r != 0:
global_instance().log.error("Failed to retrieve port for mgr {}: {}".format(id_, outs))
finally:
instance.collect_lock.release()
- def _metrics(self, instance):
+ @staticmethod
+ def _metrics(instance):
# Return cached data if available and collected before the cache times out
- if instance.collect_cache and time.time() - instance.collect_time < instance.collect_timeout:
+ if instance.collect_cache and time.time() - instance.collect_time < instance.collect_timeout:
cherrypy.response.headers['Content-Type'] = 'text/plain'
return instance.collect_cache
raise cherrypy.HTTPError(503, 'No MON connection')
# Make the cache timeout for collecting configurable
- self.collect_timeout = self.get_localized_module_option('scrape_interval', 5.0)
+ self.collect_timeout = self.get_localized_module_option(
+ 'scrape_interval', 5.0)
- server_addr = self.get_localized_module_option('server_addr', DEFAULT_ADDR)
- server_port = self.get_localized_module_option('server_port', DEFAULT_PORT)
+ server_addr = self.get_localized_module_option(
+ 'server_addr', DEFAULT_ADDR)
+ server_port = self.get_localized_module_option(
+ 'server_port', DEFAULT_PORT)
self.log.info(
"server_addr: %s server_port: %s" %
(server_addr, server_port)
def serve(self):
server_addr = self.get_localized_module_option('server_addr', '::')
- server_port = self.get_localized_module_option('server_port', DEFAULT_PORT)
- self.log.info("server_addr: %s server_port: %s" % (server_addr, server_port))
+ server_port = self.get_localized_module_option(
+ 'server_port', DEFAULT_PORT)
+ self.log.info("server_addr: %s server_port: %s" %
+ (server_addr, server_port))
cherrypy.config.update({
'server.socket_host': server_addr,
'server.socket_port': int(server_port),
module = self
class Root(object):
-
@cherrypy.expose
def index(self):
active_uri = module.get_active_uri()