From: Yaarit Hatuka Date: Thu, 27 Aug 2020 03:04:34 +0000 (-0400) Subject: mgr/telemetry: fix device id splitting when anonymizing serial X-Git-Tag: v14.2.12~61^2 X-Git-Url: http://git-server-git.apps.pok.os.sepia.ceph.com/?a=commitdiff_plain;h=0dccfc4196900e2f9e03e6d2c4ab3edcc46023aa;p=ceph.git mgr/telemetry: fix device id splitting when anonymizing serial Anonymizing the serial number in the device id string fails in rare cases where 'vendor' and 'model' are missing from the device id string. Ideally, device id is generated (in blkdev.cc) as 'vendor_model_serial', in case all fields were successfully retrieved from the device. In cases where they were not, device id can also be generated as 'model_serial' or 'serial'. Splitting by '_' fails in the latter case (since 'serial' is the only element in the string). In order to anonymize serial numbers in smartctl reports we now rely on the serial number value as retrieved from the raw smartctl report itself (as opposed to the one in device id). That's in order to avoid possible inconsistencies between the serial retrieved from device id and the one in the report. Fixes: https://tracker.ceph.com/issues/46977 Signed-off-by: Yaarit Hatuka (cherry picked from commit e5099a7b58bcf39d80beb908c192c3bf639db1a4) Conflicts: src/pybind/mgr/telemetry/module.py In master we use Python 3's f-string formatting to create 'anon_devid': anon_devid = f"{devid.rsplit('_', 1)[0]}_{uuid.uuid1()}" The conflict happened since Nautilus still uses Python 2, and 'anon_id' is created via string concatenation. anon_devid = devid[:devid.rfind('_')] + '_' + str(uuid.uuid1()) --- diff --git a/src/pybind/mgr/telemetry/module.py b/src/pybind/mgr/telemetry/module.py index 115c8fd14f42..305b59b3a8af 100644 --- a/src/pybind/mgr/telemetry/module.py +++ b/src/pybind/mgr/telemetry/module.py @@ -397,21 +397,29 @@ class Module(MgrModule): if not anon_host: anon_host = str(uuid.uuid1()) self.set_store('host-id/%s' % host, anon_host) + serial = None for dev, rep in m.items(): rep['host_id'] = anon_host + if serial is None and 'serial_number' in rep: + serial = rep['serial_number'] # anonymize device id anon_devid = self.get_store('devid-id/%s' % devid) if not anon_devid: - anon_devid = devid[:devid.rfind('_')] + '_' + str(uuid.uuid1()) + # ideally devid is 'vendor_model_serial', + # but can also be 'model_serial', 'serial' + if '_' in devid: + anon_devid = devid[:devid.rfind('_')] + '_' + str(uuid.uuid1()) + else: + anon_devid = str(uuid.uuid1()) self.set_store('devid-id/%s' % devid, anon_devid) self.log.info('devid %s / %s, host %s / %s' % (devid, anon_devid, host, anon_host)) # anonymize the smartctl report itself - serial = devid.rsplit('_', 1)[1] - m_str = json.dumps(m) - m = json.loads(m_str.replace(serial, 'deleted')) + if serial: + m_str = json.dumps(m) + m = json.loads(m_str.replace(serial, 'deleted')) if anon_host not in res: res[anon_host] = {}