]> git.apps.os.sepia.ceph.com Git - ceph-ci.git/commitdiff
ceph_telemetry: accept older formats and transform
authorDan Mick <dan.mick@redhat.com>
Thu, 25 Apr 2019 23:48:40 +0000 (16:48 -0700)
committerDan Mick <dan.mick@redhat.com>
Mon, 29 Apr 2019 22:42:59 +0000 (15:42 -0700)
Turn crashes from dict into list, and add a datestamp
(either from the report or from received time) to the
Elasticsearch document ID, remove hostname from crashdumps

Also add test code and requirements.txt for the tests for
ease of virtualenv creation

Signed-off-by: Dan Mick <dan.mick@redhat.com>
src/telemetry/server/ceph_telemetry/requirements.txt [new file with mode: 0644]
src/telemetry/server/ceph_telemetry/rest/report.py
src/telemetry/server/ceph_telemetry/test_rest.py [new file with mode: 0644]

diff --git a/src/telemetry/server/ceph_telemetry/requirements.txt b/src/telemetry/server/ceph_telemetry/requirements.txt
new file mode 100644 (file)
index 0000000..9efdf84
--- /dev/null
@@ -0,0 +1,4 @@
+elasticsearch
+requests
+flask
+flask_restful
index e589e65b0917200bc64670ec39513e5342e57064..694526eea6085ed1f20d15f9909f53f6153a41d9 100644 (file)
@@ -1,12 +1,82 @@
 from flask import request, jsonify
 from flask_restful import Resource
 from elasticsearch import Elasticsearch
+import datetime
 
 
 class Report(Resource):
+    def __init__(self, report=None):
+        super(Report, self).__init__()
+        self.report = report
+
+    def _dots_to_percent(self, obj=None):
+        '''
+        Key names cannot contain '.' in Elasticsearch, so change
+        them to '%', first escaping any existing '%' to '%%'.
+        Don't worry about values.  Modifies keys in-place.
+        '''
+
+        # handle first call; others are recursive
+        if obj is None:
+            obj = self.report
+
+        for k, v in obj.items():
+            if isinstance(v, dict):
+                self._dots_to_percent(v)
+            if '.' in k:
+                del obj[k]
+                newk = k.replace('%', '%%')
+                newk = newk.replace('.', '%')
+                obj[newk] = v
+
+    def _crashes_to_list(self):
+        '''
+        Early versions of telemetry sent crashes as a dict, keyed
+        by crash_id.  This isn't particularly useful, so if we see it,
+        change to the current convention of "a list of crash dicts",
+        which contains the crash_id.  Modifies report in-place.
+        '''
+
+        if ('crashes' in self.report and isinstance(self.report['crashes'], dict)):
+            newcrashes = list()
+            for crashinfo in self.report['crashes'].values():
+                newcrashes.append(crashinfo)
+            self.report['crashes'] = newcrashes
+
+    def _report_id(self):
+        '''
+        Make a unique Elasticsearch document ID.  Earlier versions
+        of telemetry did not contain a report_timestamp, so use
+        time-of-receipt if not present.
+        '''
+
+        if 'report_timestamp' in self.report:
+            timestamp = self.report['report_timestamp']
+        else:
+            timestamp = datetime.datetime.utcnow().isoformat()
+
+        return '.'.join((self.report['report_id'], timestamp))
+
+    def _purge_hostname_from_crash(self):
+        '''
+        hostname can be FQDN and undesirable to make public.
+        Remove from crashdump data (newer telemetry modules don't
+        submit it at all).
+        '''
+        if 'crashes' in self.report:
+            if not isinstance(self.report['crashes'], list):
+                self._crashes_to_list()
+            for crash in self.report['crashes']:
+                if 'utsname_hostname' in crash:
+                    del crash['utsname_hostname']
+
     def put(self):
-        report = request.get_json(force=True)
+        self.report = request.get_json(force=True)
+        self._crashes_to_list()
+        self._dots_to_percent()
+        self._purge_hostname_from_crash()
+        es_id = self._report_id()
         es = Elasticsearch()
-        es.index(index='telemetry', doc_type='report', id=report['report_id'],
-                 body=report)
+        es.index(index='telemetry', doc_type='report', id=es_id,
+                 body=self.report)
         return jsonify(status=True)
diff --git a/src/telemetry/server/ceph_telemetry/test_rest.py b/src/telemetry/server/ceph_telemetry/test_rest.py
new file mode 100644 (file)
index 0000000..11381dc
--- /dev/null
@@ -0,0 +1,57 @@
+from copy import deepcopy
+from rest.report import Report
+
+report_data = {
+    "crashes": {
+        "crashid1": {"crash_id": "crashreport1"},
+        "crashid2": {
+            "os_name": "TheBestOS",
+            "utsname_hostname": "foo.bar.baz.com",
+        },
+    },
+    "key.with.dots": "value.with.dots.and.%",
+    "key.with.dots.and.%": "value.with.dots.and.%",
+    "key1": {
+        "key2": {
+            "key3.with.dots": "value3",
+        },
+    },
+    "report_timestamp": "2019-04-25T22:42:59.083915",
+    "report_id": "cc74d980-51ba-4c29-8534-fa813e759a7c",
+}
+
+
+
+def test_dots_to_percent():
+    report = Report(report_data)
+    report._dots_to_percent()
+    assert('key.with.dots' not in report.report)
+    assert('key%with%dots' in report.report)
+    assert('key%with%dots%and%%%' in report.report)
+    assert(report.report['key%with%dots'] == 'value.with.dots.and.%')
+    assert('key3%with%dots' in report.report['key1']['key2'])
+
+
+def test_crashes_to_list():
+    report = Report(report_data)
+    report._crashes_to_list()
+    assert(isinstance(report.report['crashes'], list))
+    assert(len(report.report['crashes']) == 2)
+    assert({'crash_id' : 'crashreport1'} in report.report['crashes'])
+    assert({"os_name": "TheBestOS", "utsname_hostname": "foo.bar.baz.com"} in report.report['crashes'])
+
+
+def test_report_id():
+    report = Report(report_data)
+    assert(report._report_id() ==
+           'cc74d980-51ba-4c29-8534-fa813e759a7c.2019-04-25T22:42:59.083915')
+    del report.report['report_timestamp']
+    es_id = report._report_id()
+    assert(es_id.startswith('cc74d980-51ba-4c29-8534-fa813e759a7c'))
+
+
+def test_purge_hostname_from_crash():
+    report = Report(report_data)
+    report._purge_hostname_from_crash()
+    assert({"os_name": "TheBestOS", "utsname_hostname": "foo.bar.baz.com"} not in report.report['crashes'])
+    assert({"os_name": "TheBestOS"} in report.report['crashes'])