]> git.apps.os.sepia.ceph.com Git - ceph.git/commitdiff
mgr/telemetry: add stats about crush map
authorSage Weil <sage@redhat.com>
Fri, 1 Nov 2019 12:58:54 +0000 (07:58 -0500)
committerSage Weil <sage@redhat.com>
Wed, 6 Nov 2019 12:41:50 +0000 (06:41 -0600)
Signed-off-by: Sage Weil <sage@redhat.com>
(cherry picked from commit 713dbc9722888d3bf60d772dbca23e13b0cafc38)

PendingReleaseNotes
src/pybind/mgr/telemetry/module.py

index dac84744fd421d6251c92ce09d7343e9427da489..3d13db9f46cf0e2446ce0f29226291992453482a 100644 (file)
@@ -69,6 +69,7 @@
     - whether a separate OSD cluster network is being used
     - how many RBD pools and images are in the cluster, and how many pools have RBD mirroring enabled
     - how many RGW daemons, zones, and zonegroups are present; which RGW frontends are in use
+    - aggregate stats about the CRUSH map, like which algorithms are used, how big buckets are, how many rules are defined, and what tunables are in use
 
   If you had telemetry enabled, you will need to re-opt-in with::
 
index b31582777724d562f5973b171fc62819e7771b50..65f956a6c76ecf74fae4dd9ae5cd0e021646744f 100644 (file)
@@ -57,6 +57,7 @@ REVISION = 3
 #   - whether an OSD cluster network is in use
 #   - rbd pool and image count, and rbd mirror mode (pool-level)
 #   - rgw daemons, zones, zonegroups; which rgw frontends
+#   - crush map stats
 
 class Module(MgrModule):
     config = dict()
@@ -255,6 +256,45 @@ class Module(MgrModule):
 
         return metadata
 
+    def gather_crush_info(self):
+        osdmap = self.get_osdmap()
+        crush_raw = osdmap.get_crush()
+        crush = crush_raw.dump()
+
+        def inc(d, k):
+            if k in d:
+                d[k] += 1
+            else:
+                d[k] = 1
+
+        device_classes = {}
+        for dev in crush['devices']:
+            inc(device_classes, dev.get('class', ''))
+
+        bucket_algs = {}
+        bucket_types = {}
+        bucket_sizes = {}
+        for bucket in crush['buckets']:
+            if '~' in bucket['name']:  # ignore shadow buckets
+                continue
+            inc(bucket_algs, bucket['alg'])
+            inc(bucket_types, bucket['type_id'])
+            inc(bucket_sizes, len(bucket['items']))
+
+        return {
+            'num_devices': len(crush['devices']),
+            'num_types': len(crush['types']),
+            'num_buckets': len(crush['buckets']),
+            'num_rules': len(crush['rules']),
+            'device_classes': list(device_classes.values()),
+            'tunables': crush['tunables'],
+            'compat_weight_set': '-1' in crush['choose_args'],
+            'num_weight_sets': len(crush['choose_args']),
+            'bucket_algs': bucket_algs,
+            'bucket_sizes': bucket_sizes,
+            'bucket_types': bucket_types,
+        }
+
     def gather_configs(self):
         # cluster config options
         cluster = set()
@@ -485,6 +525,9 @@ class Module(MgrModule):
                 'cluster_network': cluster_network,
             }
 
+            # crush
+            report['crush'] = self.gather_crush_info()
+
             # cephfs
             report['fs'] = {
                 'count': len(fs_map['filesystems']),