From 713dbc9722888d3bf60d772dbca23e13b0cafc38 Mon Sep 17 00:00:00 2001 From: Sage Weil Date: Fri, 1 Nov 2019 07:58:54 -0500 Subject: [PATCH] mgr/telemetry: add stats about crush map Signed-off-by: Sage Weil --- PendingReleaseNotes | 1 + src/pybind/mgr/telemetry/module.py | 43 ++++++++++++++++++++++++++++++ 2 files changed, 44 insertions(+) diff --git a/PendingReleaseNotes b/PendingReleaseNotes index de3dfa6e472de..9edf474c8faf4 100644 --- a/PendingReleaseNotes +++ b/PendingReleaseNotes @@ -214,6 +214,7 @@ - whether a separate OSD cluster network is being used - how many RBD pools and images are in the cluster, and how many pools have RBD mirroring enabled - how many RGW daemons, zones, and zonegroups are present; which RGW frontends are in use + - aggregate stats about the CRUSH map, like which algorithms are used, how big buckets are, how many rules are defined, and what tunables are in use If you had telemetry enabled, you will need to re-opt-in with:: diff --git a/src/pybind/mgr/telemetry/module.py b/src/pybind/mgr/telemetry/module.py index c7cc5738d75bb..8eea3b21eda47 100644 --- a/src/pybind/mgr/telemetry/module.py +++ b/src/pybind/mgr/telemetry/module.py @@ -57,6 +57,7 @@ REVISION = 3 # - whether an OSD cluster network is in use # - rbd pool and image count, and rbd mirror mode (pool-level) # - rgw daemons, zones, zonegroups; which rgw frontends +# - crush map stats class Module(MgrModule): config = dict() @@ -245,6 +246,45 @@ class Module(MgrModule): return metadata + def gather_crush_info(self): + osdmap = self.get_osdmap() + crush_raw = osdmap.get_crush() + crush = crush_raw.dump() + + def inc(d, k): + if k in d: + d[k] += 1 + else: + d[k] = 1 + + device_classes = {} + for dev in crush['devices']: + inc(device_classes, dev.get('class', '')) + + bucket_algs = {} + bucket_types = {} + bucket_sizes = {} + for bucket in crush['buckets']: + if '~' in bucket['name']: # ignore shadow buckets + continue + inc(bucket_algs, bucket['alg']) + inc(bucket_types, bucket['type_id']) + inc(bucket_sizes, len(bucket['items'])) + + return { + 'num_devices': len(crush['devices']), + 'num_types': len(crush['types']), + 'num_buckets': len(crush['buckets']), + 'num_rules': len(crush['rules']), + 'device_classes': list(device_classes.values()), + 'tunables': crush['tunables'], + 'compat_weight_set': '-1' in crush['choose_args'], + 'num_weight_sets': len(crush['choose_args']), + 'bucket_algs': bucket_algs, + 'bucket_sizes': bucket_sizes, + 'bucket_types': bucket_types, + } + def gather_configs(self): # cluster config options cluster = set() @@ -472,6 +512,9 @@ class Module(MgrModule): 'cluster_network': cluster_network, } + # crush + report['crush'] = self.gather_crush_info() + # cephfs report['fs'] = { 'count': len(fs_map['filesystems']), -- 2.39.5