From 4dff975047a9a33edeb5577aa275f15c1b627a0e Mon Sep 17 00:00:00 2001 From: Sage Weil Date: Tue, 29 Oct 2019 11:08:42 -0500 Subject: [PATCH] mgr/telemetry: add CephFS metadata Signed-off-by: Sage Weil (cherry picked from commit 7f6aad677b76847514f6f9b893827412dfb35a6b) --- PendingReleaseNotes | 27 +++++++++++++++++++++++++++ src/pybind/mgr/telemetry/module.py | 26 +++++++++++++++++++++++++- 2 files changed, 52 insertions(+), 1 deletion(-) diff --git a/PendingReleaseNotes b/PendingReleaseNotes index a84635eeb9804..a3ececc3500b3 100644 --- a/PendingReleaseNotes +++ b/PendingReleaseNotes @@ -25,6 +25,9 @@ objects and the other deletes them. Read the troubleshooting section of the dynamic resharding docs for details. +14.2.5 +------ + * The telemetry module now has a 'device' channel, enabled by default, that will report anonymized hard disk and SSD health metrics to telemetry.ceph.com in order to build and improve device failure prediction algorithms. Because @@ -43,3 +46,27 @@ ceph config set mgr mgr/telemetry/channel_crash false ceph telemetry on + +* The telemetry module now reports more information about CephFS file systems, + including: + + - how many MDS daemons (in total and per file system) + - which features are (or have been) enabled + - how many data pools + - approximate file system age (year + month of creation) + + If you had telemetry enabled, you will need to re-opt-in with:: + + ceph telemetry on + + You can view exactly what information will be reported first with:: + + ceph telemetry show # see everything + ceph telemetry show basic # basic cluster info, including the new CephFS info + +* Following invalid settings now are not tolerated anymore + for the command `ceph osd erasure-code-profile set xxx`. + * invalid `m` for "reed_sol_r6_op" erasure technique + * invalid `m` and invalid `w` for "liber8tion" erasure technique + +## NOTE remove this previous item once these telemetry items are cherry-picked ## \ No newline at end of file diff --git a/src/pybind/mgr/telemetry/module.py b/src/pybind/mgr/telemetry/module.py index b3630bff90417..74d718af5aecb 100644 --- a/src/pybind/mgr/telemetry/module.py +++ b/src/pybind/mgr/telemetry/module.py @@ -48,6 +48,7 @@ REVISION = 3 # # Version 3: # - added device health metrics (i.e., SMART data, minus serial number) +# - added CephFS metadata (how many MDSs, fs features, how many data pools) class Module(MgrModule): config = dict() @@ -416,8 +417,31 @@ class Module(MgrModule): } report['fs'] = { - 'count': len(fs_map['filesystems']) + 'count': len(fs_map['filesystems']), + 'feature_flags': fs_map['feature_flags'], + 'num_standby_mds': len(fs_map['standbys']), + 'filesystems': [], } + num_mds = len(fs_map['standbys']) + for fsm in fs_map['filesystems']: + fs = fsm['mdsmap'] + report['fs']['filesystems'].append({ + 'max_mds': fs['max_mds'], + 'ever_allowed_features': fs['ever_allowed_features'], + 'explicitly_allowed_features': fs['explicitly_allowed_features'], + 'num_in': len(fs['in']), + 'num_up': len(fs['up']), + 'num_standby_replay': len( + [mds for gid, mds in fs['info'].items() + if mds['state'] == 'up:standby-replay']), + 'num_mds': len(fs['info']), + 'balancer_enabled': len(fs['balancer']) > 0, + 'num_data_pools': len(fs['data_pools']), + 'standby_count_wanted': fs['standby_count_wanted'], + 'approx_ctime': fs['created'][0:7], + }) + num_mds += len(fs['info']) + report['fs']['total_num_mds'] = num_mds report['metadata'] = dict() report['metadata']['osd'] = self.gather_osd_metadata(osd_map) -- 2.39.5