From a6bc96dfe93f7cbcefa8030a3b6830117516931e Mon Sep 17 00:00:00 2001 From: John Spray Date: Tue, 12 Sep 2017 08:05:28 -0400 Subject: [PATCH] mgr/influx: remove file-based config ...and also trim down the configuration to what's really needed. In general users don't need to pick and choose metrics. We could add it back if there was a strong motivation. Signed-off-by: John Spray (cherry picked from commit 6776d4645afc49a4bfb4b62673c91384239037f4) --- doc/mgr/influx.rst | 119 ++++++++++++------------------ src/pybind/mgr/influx/influx.conf | 16 ---- src/pybind/mgr/influx/module.py | 56 ++++++-------- 3 files changed, 72 insertions(+), 119 deletions(-) delete mode 100644 src/pybind/mgr/influx/influx.conf diff --git a/doc/mgr/influx.rst b/doc/mgr/influx.rst index 6d8a2f0e8ebb1..bdb579dd2b09a 100644 --- a/doc/mgr/influx.rst +++ b/doc/mgr/influx.rst @@ -2,73 +2,58 @@ Influx Plugin ============= -The influx plugin continuously collects and sends time series data to an influxdb database. Users have the option to specify what type of stats they want to collect. -Some default counters are already set. However, users will have the option to choose some additional counters to collect. +The influx plugin continuously collects and sends time series data to an +influxdb database. -------------- -Configuration -------------- - -In order for this module to work, the following configuration should be created ``/etc/ceph/influx.conf``. +The influx plugin was introduced in the 13.x *Mimic* release. -^^^^^^^^ -Required -^^^^^^^^ +-------- +Enabling +-------- -The configurations must include the following under the header ``[influx]``. +To enable the module, use the following command: -:Configuration: **Description** -:interval: Sets how often the module will collect the stats and send it to influx -:hostname: Influx host -:username: Influx username -:password: Influx password -:database: Influx database (if a database does not already exist in influx, the module will create one) -:port: Influx port -:stats: Stats about the osd, pool, and cluster can be collected. Specify as many as you would like, but seperate each type by a comma. +:: + ceph mgr module enable influx -^^^^^^^^ -Optional -^^^^^^^^ +If you wish to subsequently disable the module, you can use the equivalent +*disable* command: -Users have the ability to collect additional counters for each osd or each cluster under the the header ``[extended]``. -More information on the extended option can be found below under the *extended* section. Seperate each additional configurations with a comma. +:: -Example config file: + ceph mgr module disable influx -:: +------------- +Configuration +------------- - [influx] - interval = 10 - hostname = samplehost - username = admin - password = pass - database = default - port = 8086 - stats = osd, pool, cluster +For the influx module to send statistics to an InfluxDB server, it +is necessary to configure the servers address and some authentication +credentials. - [extended] - osd = op_latency, recovery_ops - cluster = op_latency +Set configuration values using the following command: --------- -Enabling --------- +:: -To enable the module, the following should be performed: + ceph config-key set mgr/influx/ -- Load module by including this in the ceph.conf file.:: - [mgr] - mgr_modules = influx +The most important settings are ``hostname``, ``username`` and ``password``. +For example, a typical configuration might look like this: -- Initialize the module to run every set interval ``ceph mgr module enable influx``. +:: ---------- -Disabling ---------- + ceph config-key set mgr/influx/hostname influx.mydomain.com + ceph config-key set mgr/influx/username admin123 + ceph config-key set mgr/influx/password p4ssw0rd + +Additional optional configuration settings are: -``ceph mgr module disable influx`` +:interval: Time between reports to InfluxDB. Default 5 seconds. +:database: InfluxDB database name. Default "ceph" +:port: InfluxDB server port. Default 8086 + --------- Debugging @@ -85,15 +70,16 @@ To make use of the debugging option in the module: - Use this command ``ceph tell mgr. influx self-test``. - Check the log files. Users may find it easier to filter the log files using *mgr[influx]*. ------ -Usage ------ +-------------------- +Interesting counters +-------------------- -^^^^^^^^^^^^^^^^ -Default Counters -^^^^^^^^^^^^^^^^ +The following tables describe a subset of the values output by +this module. -**pool** +^^^^^ +Pools +^^^^^ +---------------+-----------------------------------------------------+ |Counter | Description | @@ -113,7 +99,9 @@ Default Counters |raw_bytes_used | Bytes used in pool including copies made | +---------------+-----------------------------------------------------+ -**osd** +^^^^ +OSDs +^^^^ +------------+------------------------------------+ |Counter | Description | @@ -128,20 +116,6 @@ Default Counters +------------+------------------------------------+ -**cluster** -The cluster will collect the same type of data as the osd by default but instead of collecting per osd, it will sum up the performance counter -for all osd. - -^^^^^^^^ -extended -^^^^^^^^ -There are many other counters that can be collected by configuring the module such as operational counters and suboperational counters. A couple of counters are listed and described below, but additional counters -can be found here https://github.com/ceph/ceph/blob/5a197c5817f591fc514f55b9929982e90d90084e/src/osd/OSD.cc - -**Operations** - -- Latency counters are measured in microseconds unless otherwise specified in the description. - +------------------------+--------------------------------------------------------------------------+ |Counter | Description | +========================+==========================================================================+ @@ -183,3 +157,6 @@ can be found here https://github.com/ceph/ceph/blob/5a197c5817f591fc514f55b99299 +------------------------+--------------------------------------------------------------------------+ |op_before_dequeue_op_lat| Latency of IO before calling dequeue_op(already dequeued and get PG lock)| +------------------------+--------------------------------------------------------------------------+ + +Latency counters are measured in microseconds unless otherwise specified in the description. + diff --git a/src/pybind/mgr/influx/influx.conf b/src/pybind/mgr/influx/influx.conf deleted file mode 100644 index 8561b737a1775..0000000000000 --- a/src/pybind/mgr/influx/influx.conf +++ /dev/null @@ -1,16 +0,0 @@ -[influx] -interval = 10 -hostname = samplehost -username = admin -password = pass -database = default -port = 8086 -stats = osd, pool, cluster - -[extended] -osd = op_latency -cluster = op_latency, recovery_ops - - - - diff --git a/src/pybind/mgr/influx/module.py b/src/pybind/mgr/influx/module.py index 7268394425b4d..ca18f6e2117ef 100644 --- a/src/pybind/mgr/influx/module.py +++ b/src/pybind/mgr/influx/module.py @@ -1,7 +1,6 @@ from datetime import datetime from threading import Event -from ConfigParser import SafeConfigParser import json import errno @@ -98,35 +97,28 @@ class Module(MgrModule): return data def send_to_influx(self): - config = SafeConfigParser() - config.read('/etc/ceph/influx.conf') - host = config.get('influx','hostname') - username = config.get('influx', 'username') - password = config.get('influx', 'password') - database = config.get('influx', 'database') - port = int(config.get('influx','port')) - stats = config.get('influx', 'stats').replace(' ', '').split(',') - client = InfluxDBClient(host, port, username, password, database) + host = self.get_config("hostname") + if not host: + self.log.error("No InfluxDB server configured, please set" + "`hostname` configuration key.") + return + + port = int(self.get_config("port", default="8086")) + database = self.get_config("database", default="ceph") + + # If influx server has authentication turned off then + # missing username/password is valid. + username = self.get_config("username", default="") + password = self.get_config("password", default="") + + client = InfluxDBClient(host, port, username, password, database) databases_avail = client.get_list_database() - daemon_stats = self.get_daemon_stats() - for database_avail in databases_avail: - if database_avail == database: - break - else: - client.create_database(database) - - for stat in stats: - if stat == "pool": - client.write_points(self.get_df_stats(), 'ms') - - elif stat == "osd": - client.write_points(daemon_stats, 'ms') - self.log.debug("wrote osd stats") - - elif stat == "cluster": - self.log.debug("wrote cluster stats") - else: - self.log.error("invalid stat") + if database not in databases_avail: + self.log.info("Creating database '{0}'".format(database)) + client.create_database(database) + + client.write_points(self.get_df_stats(), 'ms') + client.write_points(self.get_daemon_stats(), 'ms') def shutdown(self): self.log.info('Stopping influx module') @@ -155,12 +147,12 @@ class Module(MgrModule): self.log.info('Starting influx module') self.run = True - config = SafeConfigParser() - config.read('/etc/ceph/influx.conf') while self.run: self.send_to_influx() self.log.debug("Running interval loop") - interval = int(config.get('influx','interval')) + interval = self.get_config("interval") + if interval is None: + interval = 5 self.log.debug("sleeping for %d seconds",interval) self.event.wait(interval) \ No newline at end of file -- 2.39.5