From 0222d0d62b278b56011aab977109ee807a797464 Mon Sep 17 00:00:00 2001 From: Dan Mick Date: Thu, 25 Apr 2019 16:48:40 -0700 Subject: [PATCH] ceph_telemetry: accept older formats and transform Turn crashes from dict into list, and add a datestamp (either from the report or from received time) to the Elasticsearch document ID, remove hostname from crashdumps Also add test code and requirements.txt for the tests for ease of virtualenv creation Signed-off-by: Dan Mick --- .../server/ceph_telemetry/requirements.txt | 4 + .../server/ceph_telemetry/rest/report.py | 76 ++++++++++++++++++- .../server/ceph_telemetry/test_rest.py | 57 ++++++++++++++ 3 files changed, 134 insertions(+), 3 deletions(-) create mode 100644 src/telemetry/server/ceph_telemetry/requirements.txt create mode 100644 src/telemetry/server/ceph_telemetry/test_rest.py diff --git a/src/telemetry/server/ceph_telemetry/requirements.txt b/src/telemetry/server/ceph_telemetry/requirements.txt new file mode 100644 index 00000000000..9efdf846881 --- /dev/null +++ b/src/telemetry/server/ceph_telemetry/requirements.txt @@ -0,0 +1,4 @@ +elasticsearch +requests +flask +flask_restful diff --git a/src/telemetry/server/ceph_telemetry/rest/report.py b/src/telemetry/server/ceph_telemetry/rest/report.py index e589e65b091..694526eea60 100644 --- a/src/telemetry/server/ceph_telemetry/rest/report.py +++ b/src/telemetry/server/ceph_telemetry/rest/report.py @@ -1,12 +1,82 @@ from flask import request, jsonify from flask_restful import Resource from elasticsearch import Elasticsearch +import datetime class Report(Resource): + def __init__(self, report=None): + super(Report, self).__init__() + self.report = report + + def _dots_to_percent(self, obj=None): + ''' + Key names cannot contain '.' in Elasticsearch, so change + them to '%', first escaping any existing '%' to '%%'. + Don't worry about values. Modifies keys in-place. + ''' + + # handle first call; others are recursive + if obj is None: + obj = self.report + + for k, v in obj.items(): + if isinstance(v, dict): + self._dots_to_percent(v) + if '.' in k: + del obj[k] + newk = k.replace('%', '%%') + newk = newk.replace('.', '%') + obj[newk] = v + + def _crashes_to_list(self): + ''' + Early versions of telemetry sent crashes as a dict, keyed + by crash_id. This isn't particularly useful, so if we see it, + change to the current convention of "a list of crash dicts", + which contains the crash_id. Modifies report in-place. + ''' + + if ('crashes' in self.report and isinstance(self.report['crashes'], dict)): + newcrashes = list() + for crashinfo in self.report['crashes'].values(): + newcrashes.append(crashinfo) + self.report['crashes'] = newcrashes + + def _report_id(self): + ''' + Make a unique Elasticsearch document ID. Earlier versions + of telemetry did not contain a report_timestamp, so use + time-of-receipt if not present. + ''' + + if 'report_timestamp' in self.report: + timestamp = self.report['report_timestamp'] + else: + timestamp = datetime.datetime.utcnow().isoformat() + + return '.'.join((self.report['report_id'], timestamp)) + + def _purge_hostname_from_crash(self): + ''' + hostname can be FQDN and undesirable to make public. + Remove from crashdump data (newer telemetry modules don't + submit it at all). + ''' + if 'crashes' in self.report: + if not isinstance(self.report['crashes'], list): + self._crashes_to_list() + for crash in self.report['crashes']: + if 'utsname_hostname' in crash: + del crash['utsname_hostname'] + def put(self): - report = request.get_json(force=True) + self.report = request.get_json(force=True) + self._crashes_to_list() + self._dots_to_percent() + self._purge_hostname_from_crash() + es_id = self._report_id() es = Elasticsearch() - es.index(index='telemetry', doc_type='report', id=report['report_id'], - body=report) + es.index(index='telemetry', doc_type='report', id=es_id, + body=self.report) return jsonify(status=True) diff --git a/src/telemetry/server/ceph_telemetry/test_rest.py b/src/telemetry/server/ceph_telemetry/test_rest.py new file mode 100644 index 00000000000..11381dc675e --- /dev/null +++ b/src/telemetry/server/ceph_telemetry/test_rest.py @@ -0,0 +1,57 @@ +from copy import deepcopy +from rest.report import Report + +report_data = { + "crashes": { + "crashid1": {"crash_id": "crashreport1"}, + "crashid2": { + "os_name": "TheBestOS", + "utsname_hostname": "foo.bar.baz.com", + }, + }, + "key.with.dots": "value.with.dots.and.%", + "key.with.dots.and.%": "value.with.dots.and.%", + "key1": { + "key2": { + "key3.with.dots": "value3", + }, + }, + "report_timestamp": "2019-04-25T22:42:59.083915", + "report_id": "cc74d980-51ba-4c29-8534-fa813e759a7c", +} + + + +def test_dots_to_percent(): + report = Report(report_data) + report._dots_to_percent() + assert('key.with.dots' not in report.report) + assert('key%with%dots' in report.report) + assert('key%with%dots%and%%%' in report.report) + assert(report.report['key%with%dots'] == 'value.with.dots.and.%') + assert('key3%with%dots' in report.report['key1']['key2']) + + +def test_crashes_to_list(): + report = Report(report_data) + report._crashes_to_list() + assert(isinstance(report.report['crashes'], list)) + assert(len(report.report['crashes']) == 2) + assert({'crash_id' : 'crashreport1'} in report.report['crashes']) + assert({"os_name": "TheBestOS", "utsname_hostname": "foo.bar.baz.com"} in report.report['crashes']) + + +def test_report_id(): + report = Report(report_data) + assert(report._report_id() == + 'cc74d980-51ba-4c29-8534-fa813e759a7c.2019-04-25T22:42:59.083915') + del report.report['report_timestamp'] + es_id = report._report_id() + assert(es_id.startswith('cc74d980-51ba-4c29-8534-fa813e759a7c')) + + +def test_purge_hostname_from_crash(): + report = Report(report_data) + report._purge_hostname_from_crash() + assert({"os_name": "TheBestOS", "utsname_hostname": "foo.bar.baz.com"} not in report.report['crashes']) + assert({"os_name": "TheBestOS"} in report.report['crashes']) -- 2.39.5