From: Dan Mick Date: Thu, 25 Apr 2019 23:48:40 +0000 (-0700) Subject: ceph_telemetry: accept older formats and transform X-Git-Tag: v15.1.0~2777^2 X-Git-Url: http://git-server-git.apps.pok.os.sepia.ceph.com/?a=commitdiff_plain;h=refs%2Fpull%2F27802%2Fhead;p=ceph.git ceph_telemetry: accept older formats and transform Turn crashes from dict into list, and add a datestamp (either from the report or from received time) to the Elasticsearch document ID, remove hostname from crashdumps Also add test code and requirements.txt for the tests for ease of virtualenv creation Signed-off-by: Dan Mick --- diff --git a/src/telemetry/server/ceph_telemetry/requirements.txt b/src/telemetry/server/ceph_telemetry/requirements.txt new file mode 100644 index 000000000000..9efdf8468811 --- /dev/null +++ b/src/telemetry/server/ceph_telemetry/requirements.txt @@ -0,0 +1,4 @@ +elasticsearch +requests +flask +flask_restful diff --git a/src/telemetry/server/ceph_telemetry/rest/report.py b/src/telemetry/server/ceph_telemetry/rest/report.py index e589e65b0917..694526eea608 100644 --- a/src/telemetry/server/ceph_telemetry/rest/report.py +++ b/src/telemetry/server/ceph_telemetry/rest/report.py @@ -1,12 +1,82 @@ from flask import request, jsonify from flask_restful import Resource from elasticsearch import Elasticsearch +import datetime class Report(Resource): + def __init__(self, report=None): + super(Report, self).__init__() + self.report = report + + def _dots_to_percent(self, obj=None): + ''' + Key names cannot contain '.' in Elasticsearch, so change + them to '%', first escaping any existing '%' to '%%'. + Don't worry about values. Modifies keys in-place. + ''' + + # handle first call; others are recursive + if obj is None: + obj = self.report + + for k, v in obj.items(): + if isinstance(v, dict): + self._dots_to_percent(v) + if '.' in k: + del obj[k] + newk = k.replace('%', '%%') + newk = newk.replace('.', '%') + obj[newk] = v + + def _crashes_to_list(self): + ''' + Early versions of telemetry sent crashes as a dict, keyed + by crash_id. This isn't particularly useful, so if we see it, + change to the current convention of "a list of crash dicts", + which contains the crash_id. Modifies report in-place. + ''' + + if ('crashes' in self.report and isinstance(self.report['crashes'], dict)): + newcrashes = list() + for crashinfo in self.report['crashes'].values(): + newcrashes.append(crashinfo) + self.report['crashes'] = newcrashes + + def _report_id(self): + ''' + Make a unique Elasticsearch document ID. Earlier versions + of telemetry did not contain a report_timestamp, so use + time-of-receipt if not present. + ''' + + if 'report_timestamp' in self.report: + timestamp = self.report['report_timestamp'] + else: + timestamp = datetime.datetime.utcnow().isoformat() + + return '.'.join((self.report['report_id'], timestamp)) + + def _purge_hostname_from_crash(self): + ''' + hostname can be FQDN and undesirable to make public. + Remove from crashdump data (newer telemetry modules don't + submit it at all). + ''' + if 'crashes' in self.report: + if not isinstance(self.report['crashes'], list): + self._crashes_to_list() + for crash in self.report['crashes']: + if 'utsname_hostname' in crash: + del crash['utsname_hostname'] + def put(self): - report = request.get_json(force=True) + self.report = request.get_json(force=True) + self._crashes_to_list() + self._dots_to_percent() + self._purge_hostname_from_crash() + es_id = self._report_id() es = Elasticsearch() - es.index(index='telemetry', doc_type='report', id=report['report_id'], - body=report) + es.index(index='telemetry', doc_type='report', id=es_id, + body=self.report) return jsonify(status=True) diff --git a/src/telemetry/server/ceph_telemetry/test_rest.py b/src/telemetry/server/ceph_telemetry/test_rest.py new file mode 100644 index 000000000000..11381dc675eb --- /dev/null +++ b/src/telemetry/server/ceph_telemetry/test_rest.py @@ -0,0 +1,57 @@ +from copy import deepcopy +from rest.report import Report + +report_data = { + "crashes": { + "crashid1": {"crash_id": "crashreport1"}, + "crashid2": { + "os_name": "TheBestOS", + "utsname_hostname": "foo.bar.baz.com", + }, + }, + "key.with.dots": "value.with.dots.and.%", + "key.with.dots.and.%": "value.with.dots.and.%", + "key1": { + "key2": { + "key3.with.dots": "value3", + }, + }, + "report_timestamp": "2019-04-25T22:42:59.083915", + "report_id": "cc74d980-51ba-4c29-8534-fa813e759a7c", +} + + + +def test_dots_to_percent(): + report = Report(report_data) + report._dots_to_percent() + assert('key.with.dots' not in report.report) + assert('key%with%dots' in report.report) + assert('key%with%dots%and%%%' in report.report) + assert(report.report['key%with%dots'] == 'value.with.dots.and.%') + assert('key3%with%dots' in report.report['key1']['key2']) + + +def test_crashes_to_list(): + report = Report(report_data) + report._crashes_to_list() + assert(isinstance(report.report['crashes'], list)) + assert(len(report.report['crashes']) == 2) + assert({'crash_id' : 'crashreport1'} in report.report['crashes']) + assert({"os_name": "TheBestOS", "utsname_hostname": "foo.bar.baz.com"} in report.report['crashes']) + + +def test_report_id(): + report = Report(report_data) + assert(report._report_id() == + 'cc74d980-51ba-4c29-8534-fa813e759a7c.2019-04-25T22:42:59.083915') + del report.report['report_timestamp'] + es_id = report._report_id() + assert(es_id.startswith('cc74d980-51ba-4c29-8534-fa813e759a7c')) + + +def test_purge_hostname_from_crash(): + report = Report(report_data) + report._purge_hostname_from_crash() + assert({"os_name": "TheBestOS", "utsname_hostname": "foo.bar.baz.com"} not in report.report['crashes']) + assert({"os_name": "TheBestOS"} in report.report['crashes'])