From 6e0548785748e4577a40b265ccee764bf9b234b6 Mon Sep 17 00:00:00 2001 From: Yaarit Hatuka Date: Wed, 1 Jun 2022 04:46:17 +0000 Subject: [PATCH] mgr/telemetry: add Rook data Add the first Rook data collection to telemetry's basic channel. We choose to nag with this collection since we wish to know the volume of Rook deployments in the wild. The next Rook collections should have consecutive numbers (basic_rook_v02, basic_rook_v03, ...). See tracker below for more details. Fixes: https://tracker.ceph.com/issues/55740 Signed-off-by: Yaarit Hatuka (cherry picked from commit 63f5dcdb520ea4f5e0400e9c6d9f0da29998e437) --- src/pybind/mgr/telemetry/module.py | 77 +++++++++++++++++++++++++++++- 1 file changed, 76 insertions(+), 1 deletion(-) diff --git a/src/pybind/mgr/telemetry/module.py b/src/pybind/mgr/telemetry/module.py index ffaa47b8f6b35..ea97df47741ba 100644 --- a/src/pybind/mgr/telemetry/module.py +++ b/src/pybind/mgr/telemetry/module.py @@ -68,6 +68,7 @@ class Collection(str, enum.Enum): basic_mds_metadata = 'basic_mds_metadata' basic_pool_usage = 'basic_pool_usage' basic_usage_by_class = 'basic_usage_by_class' + basic_rook_v01 = 'basic_rook_v01' MODULE_COLLECTION : List[Dict] = [ { @@ -117,7 +118,42 @@ MODULE_COLLECTION : List[Dict] = [ "description": "Default device class usage statistics", "channel": "basic", "nag": False - } + }, + { + "name": Collection.basic_rook_v01, + "description": "Basic Rook deployment data", + "channel": "basic", + "nag": True + }, +] + +ROOK_KEYS_BY_COLLECTION : List[Tuple[str, Collection]] = [ + # Note: a key cannot be both a node and a leaf, e.g. + # "rook/a/b" + # "rook/a/b/c" + ("rook/version", Collection.basic_rook_v01), + ("rook/kubernetes/version", Collection.basic_rook_v01), + ("rook/csi/version", Collection.basic_rook_v01), + ("rook/node/count/kubernetes-total", Collection.basic_rook_v01), + ("rook/node/count/with-ceph-daemons", Collection.basic_rook_v01), + ("rook/node/count/with-csi-rbd-plugin", Collection.basic_rook_v01), + ("rook/node/count/with-csi-cephfs-plugin", Collection.basic_rook_v01), + ("rook/node/count/with-csi-nfs-plugin", Collection.basic_rook_v01), + ("rook/usage/storage-class/count/total", Collection.basic_rook_v01), + ("rook/usage/storage-class/count/rbd", Collection.basic_rook_v01), + ("rook/usage/storage-class/count/cephfs", Collection.basic_rook_v01), + ("rook/usage/storage-class/count/nfs", Collection.basic_rook_v01), + ("rook/usage/storage-class/count/bucket", Collection.basic_rook_v01), + ("rook/cluster/storage/device-set/count/total", Collection.basic_rook_v01), + ("rook/cluster/storage/device-set/count/portable", Collection.basic_rook_v01), + ("rook/cluster/storage/device-set/count/non-portable", Collection.basic_rook_v01), + ("rook/cluster/mon/count", Collection.basic_rook_v01), + ("rook/cluster/mon/allow-multiple-per-node", Collection.basic_rook_v01), + ("rook/cluster/mon/max-id", Collection.basic_rook_v01), + ("rook/cluster/mon/pvc/enabled", Collection.basic_rook_v01), + ("rook/cluster/mon/stretch/enabled", Collection.basic_rook_v01), + ("rook/cluster/network/provider", Collection.basic_rook_v01), + ("rook/cluster/external-mode", Collection.basic_rook_v01), ] class Module(MgrModule): @@ -1165,6 +1201,9 @@ class Module(MgrModule): 'active': False } + # Rook + self.get_rook_data(report) + if 'crash' in channels: report['crashes'] = self.gather_crashinfo() @@ -1184,6 +1223,42 @@ class Module(MgrModule): return report + def get_rook_data(self, report: Dict[str, object]) -> None: + r, outb, outs = self.mon_command({ + 'prefix': 'config-key dump', + 'format': 'json' + }) + if r != 0: + return + try: + config_kv_dump = json.loads(outb) + except json.decoder.JSONDecodeError: + return + + for elem in ROOK_KEYS_BY_COLLECTION: + # elem[0] is the full key path (e.g. "rook/node/count/with-csi-nfs-plugin") + # elem[1] is the Collection this key belongs to + if self.is_enabled_collection(elem[1]): + self.add_kv_to_report(report, elem[0], config_kv_dump.get(elem[0])) + + def add_kv_to_report(self, report: Dict[str, object], key_path: str, value: Any) -> None: + last_node = key_path.split('/')[-1] + for node in key_path.split('/')[0:-1]: + if node not in report: + report[node] = {} + report = report[node] # type: ignore + + # sanity check of keys correctness + if not isinstance(report, dict): + self.log.error(f"'{key_path}' is an invalid key, expected type 'dict' but got {type(report)}") + return + + if last_node in report: + self.log.error(f"'{key_path}' is an invalid key, last part must not exist at this point") + return + + report[last_node] = value + def _try_post(self, what: str, url: str, report: Dict[str, Dict[str, str]]) -> Optional[str]: self.log.info('Sending %s to: %s' % (what, url)) proxies = dict() -- 2.39.5