]> git.apps.os.sepia.ceph.com Git - ceph.git/commitdiff
mgr/telemetry: Add Ceph Telemetry module to send reports back to project 21970/head
authorWido den Hollander <wido@42on.com>
Fri, 27 Apr 2018 07:14:05 +0000 (09:14 +0200)
committerWido den Hollander <wido@42on.com>
Mon, 14 May 2018 09:03:46 +0000 (11:03 +0200)
This Manager Module will send statistics and version information from
a Ceph cluster back to telemetry.ceph.com if the user has opted-in on sending
this information.

Additionally a user can tell that the information is allowed to be made
public which then allows other users to see this information.

Signed-off-by: Wido den Hollander <wido@42on.com>
13 files changed:
doc/mgr/index.rst
doc/mgr/telemetry.rst [new file with mode: 0644]
qa/tasks/mgr/test_module_selftest.py
src/pybind/mgr/telemetry/__init__.py [new file with mode: 0644]
src/pybind/mgr/telemetry/module.py [new file with mode: 0644]
src/telemetry/README.md [new file with mode: 0644]
src/telemetry/server/app.wsgi [new file with mode: 0644]
src/telemetry/server/ceph_telemetry/__init__.py [new file with mode: 0644]
src/telemetry/server/ceph_telemetry/app.py [new file with mode: 0755]
src/telemetry/server/ceph_telemetry/rest/__init__.py [new file with mode: 0644]
src/telemetry/server/ceph_telemetry/rest/index.py [new file with mode: 0644]
src/telemetry/server/ceph_telemetry/rest/report.py [new file with mode: 0644]
src/telemetry/server/requirements.txt [new file with mode: 0644]

index 21fe2bed9927600d8732cde12118210b8e47887a..5737b0211b032b4f7d6a88c31557277232f59ad6 100644 (file)
@@ -35,4 +35,4 @@ sensible.
     Prometheus plugin <prometheus>
     Influx plugin <influx>
     Hello plugin <hello>
-
+    Telemetry plugin <telemetry>
diff --git a/doc/mgr/telemetry.rst b/doc/mgr/telemetry.rst
new file mode 100644 (file)
index 0000000..8bb194b
--- /dev/null
@@ -0,0 +1,36 @@
+Telemetry plugin
+================
+The telemetry plugin sends anonymous data about the cluster, in which it is running, back to the Ceph project.
+
+The data being sent back to the project does not contain any sensitive data like pool names, object names, object contents or hostnames.
+
+It contains counters and statistics on how the cluster has been deployed, the version of Ceph, the distribition of the hosts and other parameters which help the project to gain a better understanding of the way Ceph is used.
+
+Data is sent over HTTPS to *telemetry.ceph.com*
+
+Enabling
+--------
+
+The *telemetry* module is enabled with::
+
+  ceph mgr module enable telemetry
+
+
+Interval
+--------
+The module compiles and sends a new report every 72 hours by default.
+
+Contact and Description
+-----------------------
+A contact and description can be added to the report, this is optional.
+
+  ceph telemetry config-set contact 'John Doe <john.doe@example.com>'
+  ceph telemetry config-set description 'My first Ceph cluster'
+
+Show report
+-----------
+The report is sent in JSON format, and can be printed::
+
+  ceph telemetry show
+
+So you can inspect the content if you have privacy concerns.
index d13f67920431d8100a9392557a6defb28825dbaa..c96ee47b04459a062ba908c1b203169b63b846f0 100644 (file)
@@ -52,6 +52,9 @@ class TestModuleSelftest(MgrTestCase):
         self._load_module("selftest")
         self.mgr_cluster.mon_manager.raw_cluster_cmd("mgr", "self-test", "run")
 
+    def test_telemetry(self):
+        self._selftest_plugin("telemetry")
+
     def test_selftest_config_update(self):
         """
         That configuration updates are seen by running mgr modules
diff --git a/src/pybind/mgr/telemetry/__init__.py b/src/pybind/mgr/telemetry/__init__.py
new file mode 100644 (file)
index 0000000..8f210ac
--- /dev/null
@@ -0,0 +1 @@
+from .module import Module
diff --git a/src/pybind/mgr/telemetry/module.py b/src/pybind/mgr/telemetry/module.py
new file mode 100644 (file)
index 0000000..d1b91fd
--- /dev/null
@@ -0,0 +1,374 @@
+"""
+Telemetry module for ceph-mgr
+
+Collect statistics from Ceph cluster and send this back to the Ceph project
+when user has opted-in
+"""
+import errno
+import json
+import re
+import requests
+import uuid
+import time
+from datetime import datetime
+from threading import Event
+from collections import defaultdict
+
+from mgr_module import MgrModule
+
+
+class Module(MgrModule):
+    config = dict()
+
+    metadata_keys = [
+            "arch",
+            "ceph_version",
+            "os",
+            "cpu",
+            "kernel_description",
+            "kernel_version",
+            "distro_description",
+            "distro"
+    ]
+
+    OPTIONS = [
+        {
+            'name': 'url',
+            'default': 'https://telemetry.ceph.com/report'
+        },
+        {
+            'name': 'enabled',
+            'default': True
+        },
+        {
+            'name': 'leaderboard',
+            'default': False
+        },
+        {
+            'name': 'description',
+            'default': None
+        },
+        {
+            'name': 'contact',
+            'default': None
+        },
+        {
+            'name': 'organization',
+            'default': None
+        },
+        {
+            'name': 'proxy',
+            'default': None
+        },
+        {
+            'name': 'interval',
+            'default': 72
+        }
+    ]
+
+    COMMANDS = [
+        {
+            "cmd": "telemetry config-set name=key,type=CephString "
+                   "name=value,type=CephString",
+            "desc": "Set a configuration value",
+            "perm": "rw"
+        },
+        {
+            "cmd": "telemetry config-show",
+            "desc": "Show current configuration",
+            "perm": "r"
+        },
+        {
+            "cmd": "telemetry send",
+            "desc": "Force sending data to Ceph telemetry",
+            "perm": "rw"
+        },
+        {
+            "cmd": "telemetry show",
+            "desc": "Show last report or report to be sent",
+            "perm": "r"
+        },
+        {
+            "cmd": "telemetry self-test",
+            "desc": "Perform a self-test",
+            "perm": "r"
+        }
+    ]
+
+    @property
+    def config_keys(self):
+        return dict((o['name'], o.get('default', None)) for o in self.OPTIONS)
+
+    def __init__(self, *args, **kwargs):
+        super(Module, self).__init__(*args, **kwargs)
+        self.event = Event()
+        self.run = False
+        self.last_upload = None
+        self.last_report = dict()
+        self.report_id = None
+
+    @staticmethod
+    def str_to_bool(string):
+        return str(string).lower() in ['true', 'yes', 'on']
+
+    @staticmethod
+    def is_valid_email(email):
+        regexp = "^.+@([?)[a-zA-Z0-9-.]+.([a-zA-Z]{2,3}|[0-9]{1,3})(]?))$"
+        try:
+            if len(email) <= 7 or len(email) > 255:
+                return False
+
+            if not re.match(regexp, email):
+                return False
+
+            return True
+        except:
+            pass
+
+        return False
+
+    @staticmethod
+    def parse_timestamp(timestamp):
+        return datetime.strptime(timestamp, '%Y-%m-%d %H:%M:%S.%f')
+
+    def set_config_option(self, option, value):
+        if option not in self.config_keys.keys():
+            raise RuntimeError('{0} is a unknown configuration '
+                               'option'.format(option))
+
+        if option == 'interval':
+            try:
+                value = int(value)
+            except (ValueError, TypeError):
+                raise RuntimeError('invalid interval. Please provide a valid '
+                                   'integer')
+
+            if value < 24:
+                raise RuntimeError('interval should be set to at least 24 hours')
+
+        if option in ['leaderboard', 'enabled']:
+            value = self.str_to_bool(value)
+
+        if option == 'contact':
+            if value and not self.is_valid_email(value):
+                raise RuntimeError('%s is not a valid e-mail address as a '
+                                   'contact', value)
+
+        if option in ['description', 'organization']:
+            if value and len(value) > 256:
+                raise RuntimeError('%s should be limited to 256 '
+                                   'characters', option)
+
+        self.config[option] = value
+        return True
+
+    def init_module_config(self):
+        for key, default in self.config_keys.items():
+            self.set_config_option(key, self.get_config(key, default))
+
+        self.last_upload = self.get_store('last_upload', None)
+        if self.last_upload is not None:
+            self.last_upload = int(self.last_upload)
+
+        self.report_id = self.get_store('report_id', None)
+        if self.report_id is None:
+            self.report_id = str(uuid.uuid4())
+            self.set_store('report_id', self.report_id)
+
+    def gather_osd_metadata(self, osd_map):
+        keys = ["osd_objectstore", "rotational"]
+        keys += self.metadata_keys
+
+        metadata = dict()
+        for key in keys:
+            metadata[key] = defaultdict(int)
+
+        for osd in osd_map['osds']:
+            for k, v in self.get_metadata('osd', str(osd['osd'])).items():
+                if k not in keys:
+                    continue
+
+                metadata[k][v] += 1
+
+        return metadata
+
+    def gather_mon_metadata(self, mon_map):
+        keys = list()
+        keys += self.metadata_keys
+
+        metadata = dict()
+        for key in keys:
+            metadata[key] = defaultdict(int)
+
+        for mon in mon_map['mons']:
+            for k, v in self.get_metadata('mon', mon['name']).items():
+                if k not in keys:
+                    continue
+
+                metadata[k][v] += 1
+
+        return metadata
+
+    def compile_report(self):
+        report = {'leaderboard': False, 'report_version': 1}
+
+        if self.str_to_bool(self.config['leaderboard']):
+            report['leaderboard'] = True
+
+        for option in ['description', 'contact', 'organization']:
+            report[option] = self.config.get(option, None)
+
+        mon_map = self.get('mon_map')
+        osd_map = self.get('osd_map')
+        service_map = self.get('service_map')
+        fs_map = self.get('fs_map')
+        df = self.get('df')
+
+        report['report_id'] = self.report_id
+        report['created'] = self.parse_timestamp(mon_map['created']).isoformat()
+
+        report['mon'] = {
+            'count': len(mon_map['mons']),
+            'features': mon_map['features']
+        }
+
+        num_pg = 0
+        report['pools'] = list()
+        for pool in osd_map['pools']:
+            num_pg += pool['pg_num']
+            report['pools'].append(
+                {
+                    'pool': pool['pool'],
+                    'type': pool['type'],
+                    'pg_num': pool['pg_num'],
+                    'pgp_num': pool['pg_placement_num'],
+                    'size': pool['size'],
+                    'min_size': pool['min_size'],
+                    'crush_rule': pool['crush_rule']
+                }
+            )
+
+        report['osd'] = {
+            'count': len(osd_map['osds']),
+            'require_osd_release': osd_map['require_osd_release'],
+            'require_min_compat_client': osd_map['require_min_compat_client']
+        }
+
+        report['fs'] = {
+            'count': len(fs_map['filesystems'])
+        }
+
+        report['metadata'] = dict()
+        report['metadata']['osd'] = self.gather_osd_metadata(osd_map)
+        report['metadata']['mon'] = self.gather_mon_metadata(mon_map)
+
+        report['usage'] = {
+            'pools': len(df['pools']),
+            'pg_num:': num_pg,
+            'total_objects': df['stats']['total_objects'],
+            'total_used_bytes': df['stats']['total_used_bytes'],
+            'total_bytes': df['stats']['total_bytes'],
+            'total_avail_bytes': df['stats']['total_avail_bytes']
+        }
+
+        report['services'] = defaultdict(int)
+        for key, value in service_map['services'].items():
+            report['services'][key] += 1
+
+        return report
+
+    def send(self, report):
+        self.log.info('Upload report to: %s', self.config['url'])
+        proxies = dict()
+        if self.config['proxy']:
+            self.log.info('Using HTTP(S) proxy: %s', self.config['proxy'])
+            proxies['http'] = self.config['proxy']
+            proxies['https'] = self.config['proxy']
+
+        requests.put(url=self.config['url'], json=report, proxies=proxies)
+
+    def handle_command(self, command):
+        if command['prefix'] == 'telemetry config-show':
+            return 0, json.dumps(self.config), ''
+        elif command['prefix'] == 'telemetry config-set':
+            key = command['key']
+            value = command['value']
+            if not value:
+                return -errno.EINVAL, '', 'Value should not be empty or None'
+
+            self.log.debug('Setting configuration option %s to %s', key, value)
+            self.set_config_option(key, value)
+            self.set_config(key, value)
+            return 0, 'Configuration option {0} updated'.format(key), ''
+        elif command['prefix'] == 'telemetry send':
+            self.last_report = self.compile_report()
+            self.send(self.last_report)
+            return 0, 'Report send to {0}'.format(self.config['url']), ''
+        elif command['prefix'] == 'telemetry show':
+            report = self.last_report
+            if not report:
+                report = self.compile_report()
+            return 0, json.dumps(report), ''
+        elif command['prefix'] == 'telemetry self-test':
+            self.self_test()
+            return 0, 'Self-test succeeded', ''
+        else:
+            return (-errno.EINVAL, '',
+                    "Command not found '{0}'".format(command['prefix']))
+
+    def self_test(self):
+        report = self.compile_report()
+        if len(report) == 0:
+            raise RuntimeError('Report is empty')
+
+        if 'report_id' not in report:
+            raise RuntimeError('report_id not found in report')
+
+    def shutdown(self):
+        self.run = False
+        self.event.set()
+
+    def serve(self):
+        self.init_module_config()
+        self.run = True
+
+        self.log.debug('Waiting for mgr to warm up')
+        self.event.wait(10)
+
+        while self.run:
+            if self.config['enabled']:
+                self.log.info('Not sending report until configured to do so')
+                self.event.wait(1800)
+                continue
+
+            now = int(time.time())
+            if not self.last_upload or (now - self.last_upload) > \
+                            self.config['interval'] * 3600:
+                self.log.info('Compiling and sending report to %s',
+                              self.config['url'])
+
+                try:
+                    self.last_report = self.compile_report()
+                except:
+                    self.log.exception('Exception while compiling report:')
+
+                try:
+                    self.send(self.last_report)
+                    self.last_upload = now
+                    self.set_store('last_upload', str(now))
+                except:
+                    self.log.exception('Exception while sending report:')
+            else:
+                self.log.info('Interval for sending new report has not expired')
+
+            sleep = 3600
+            self.log.debug('Sleeping for %d seconds', sleep)
+            self.event.wait(sleep)
+
+    def self_test(self):
+        self.compile_report()
+        return True
+
+    @staticmethod
+    def can_run():
+        return True, ''
diff --git a/src/telemetry/README.md b/src/telemetry/README.md
new file mode 100644 (file)
index 0000000..f5cdcca
--- /dev/null
@@ -0,0 +1,4 @@
+# Ceph-telemetry
+This is a renewed version of `ceph-brag`. It enables the Ceph project to collect statistics from users' clusters where the `telemetry` module is enabled.
+
+Statistics are indexed in Elasticsearch.
diff --git a/src/telemetry/server/app.wsgi b/src/telemetry/server/app.wsgi
new file mode 100644 (file)
index 0000000..280a777
--- /dev/null
@@ -0,0 +1,10 @@
+import sys
+import os
+pwd = os.path.dirname(os.path.realpath(__file__))
+sys.path.insert(0, pwd)
+
+from ceph_telemetry import create_app
+
+app = create_app(__name__)
+
+application = app
diff --git a/src/telemetry/server/ceph_telemetry/__init__.py b/src/telemetry/server/ceph_telemetry/__init__.py
new file mode 100644 (file)
index 0000000..b23f70e
--- /dev/null
@@ -0,0 +1 @@
+from .app import create_app
\ No newline at end of file
diff --git a/src/telemetry/server/ceph_telemetry/app.py b/src/telemetry/server/ceph_telemetry/app.py
new file mode 100755 (executable)
index 0000000..f2c2b99
--- /dev/null
@@ -0,0 +1,24 @@
+#!/usr/bin/env python3
+import argparse
+from flask_restful import Api
+from flask import Flask
+from ceph_telemetry.rest import Index, Report
+
+
+def create_app(name):
+    app = Flask(name)
+    api = Api(app, catch_all_404s=True)
+    api.add_resource(Index, '/')
+    api.add_resource(Report, '/report')
+    return app
+
+
+if __name__ == '__main__':
+    parser = argparse.ArgumentParser(description='Ceph Telemetry REST API')
+    parser.add_argument("--host", action="store", dest="host",
+                        default="::", help="Host/IP to bind on")
+    parser.add_argument("--port", action="store", dest="port", type=int,
+                        default=9000, help="Port to listen on")
+    args = parser.parse_args()
+    app = create_app(__name__)
+    app.run(debug=True, host=args.host, port=args.port)
diff --git a/src/telemetry/server/ceph_telemetry/rest/__init__.py b/src/telemetry/server/ceph_telemetry/rest/__init__.py
new file mode 100644 (file)
index 0000000..3152144
--- /dev/null
@@ -0,0 +1,2 @@
+from .index import Index
+from .report import Report
diff --git a/src/telemetry/server/ceph_telemetry/rest/index.py b/src/telemetry/server/ceph_telemetry/rest/index.py
new file mode 100644 (file)
index 0000000..413d494
--- /dev/null
@@ -0,0 +1,7 @@
+from flask import jsonify
+from flask_restful import Resource
+
+
+class Index(Resource):
+    def get(self):
+        return jsonify(status=True)
diff --git a/src/telemetry/server/ceph_telemetry/rest/report.py b/src/telemetry/server/ceph_telemetry/rest/report.py
new file mode 100644 (file)
index 0000000..e589e65
--- /dev/null
@@ -0,0 +1,12 @@
+from flask import request, jsonify
+from flask_restful import Resource
+from elasticsearch import Elasticsearch
+
+
+class Report(Resource):
+    def put(self):
+        report = request.get_json(force=True)
+        es = Elasticsearch()
+        es.index(index='telemetry', doc_type='report', id=report['report_id'],
+                 body=report)
+        return jsonify(status=True)
diff --git a/src/telemetry/server/requirements.txt b/src/telemetry/server/requirements.txt
new file mode 100644 (file)
index 0000000..dca9819
--- /dev/null
@@ -0,0 +1,3 @@
+flask
+flask_restful
+elasticsearch