From: Sage Weil Date: Fri, 1 Nov 2019 12:58:54 +0000 (-0500) Subject: mgr/telemetry: add stats about crush map X-Git-Tag: v15.1.0~1083^2 X-Git-Url: http://git-server-git.apps.pok.os.sepia.ceph.com/?a=commitdiff_plain;h=refs%2Fpull%2F31226%2Fhead;p=ceph.git mgr/telemetry: add stats about crush map Signed-off-by: Sage Weil --- diff --git a/PendingReleaseNotes b/PendingReleaseNotes index de3dfa6e472d..9edf474c8faf 100644 --- a/PendingReleaseNotes +++ b/PendingReleaseNotes @@ -214,6 +214,7 @@ - whether a separate OSD cluster network is being used - how many RBD pools and images are in the cluster, and how many pools have RBD mirroring enabled - how many RGW daemons, zones, and zonegroups are present; which RGW frontends are in use + - aggregate stats about the CRUSH map, like which algorithms are used, how big buckets are, how many rules are defined, and what tunables are in use If you had telemetry enabled, you will need to re-opt-in with:: diff --git a/src/pybind/mgr/telemetry/module.py b/src/pybind/mgr/telemetry/module.py index c7cc5738d75b..8eea3b21eda4 100644 --- a/src/pybind/mgr/telemetry/module.py +++ b/src/pybind/mgr/telemetry/module.py @@ -57,6 +57,7 @@ REVISION = 3 # - whether an OSD cluster network is in use # - rbd pool and image count, and rbd mirror mode (pool-level) # - rgw daemons, zones, zonegroups; which rgw frontends +# - crush map stats class Module(MgrModule): config = dict() @@ -245,6 +246,45 @@ class Module(MgrModule): return metadata + def gather_crush_info(self): + osdmap = self.get_osdmap() + crush_raw = osdmap.get_crush() + crush = crush_raw.dump() + + def inc(d, k): + if k in d: + d[k] += 1 + else: + d[k] = 1 + + device_classes = {} + for dev in crush['devices']: + inc(device_classes, dev.get('class', '')) + + bucket_algs = {} + bucket_types = {} + bucket_sizes = {} + for bucket in crush['buckets']: + if '~' in bucket['name']: # ignore shadow buckets + continue + inc(bucket_algs, bucket['alg']) + inc(bucket_types, bucket['type_id']) + inc(bucket_sizes, len(bucket['items'])) + + return { + 'num_devices': len(crush['devices']), + 'num_types': len(crush['types']), + 'num_buckets': len(crush['buckets']), + 'num_rules': len(crush['rules']), + 'device_classes': list(device_classes.values()), + 'tunables': crush['tunables'], + 'compat_weight_set': '-1' in crush['choose_args'], + 'num_weight_sets': len(crush['choose_args']), + 'bucket_algs': bucket_algs, + 'bucket_sizes': bucket_sizes, + 'bucket_types': bucket_types, + } + def gather_configs(self): # cluster config options cluster = set() @@ -472,6 +512,9 @@ class Module(MgrModule): 'cluster_network': cluster_network, } + # crush + report['crush'] = self.gather_crush_info() + # cephfs report['fs'] = { 'count': len(fs_map['filesystems']),