From f1670fdafc4bb8ceb7c6cbbc683ace2c0dd08872 Mon Sep 17 00:00:00 2001 From: Prashant D Date: Fri, 30 Oct 2020 06:40:43 -0400 Subject: [PATCH] mon: Log "ceph health detail" periodically in cluster log change mon_health_to_clog_interval from 1_hr -> 10_min to log health summary or detail more frequently. No HealthMonitor class in nautilus. Fixes: https://tracker.ceph.com/issues/48042 Signed-off-by: Prashant Dhange (cherry picked from commit f45712c19077c5cf5a9938fc3fd17b64ffe3a4ec) Conflicts: PendingReleaseNotes - add and restructure 14.2.16 --- PendingReleaseNotes | 22 +++++++++++++++++++--- qa/tasks/ceph.conf.template | 1 + src/common/legacy_config_opts.h | 1 + src/common/options.cc | 6 +++++- src/mon/Monitor.cc | 11 ++++++++++- 5 files changed, 36 insertions(+), 5 deletions(-) diff --git a/PendingReleaseNotes b/PendingReleaseNotes index 1722a43bfaff3..acbe6dcc42979 100644 --- a/PendingReleaseNotes +++ b/PendingReleaseNotes @@ -1,9 +1,25 @@ +14.2.16 +------- + +* The structured output of ``ceph status`` or ``ceph -s`` is now more + concise, particularly the ``mgrmap`` and ``monmap`` sections, and the + structure of the ``osdmap`` section has been cleaned up. + +* MON: The cluster log now logs health detail every ``mon_health_to_clog_interval``, + which has been changed from 1hr to 10min. Logging of health detail will be + skipped if there is no change in health summary since last known. + + 14.2.15 ------- * MGR: progress module can now be turned on/off, using the commands: ``ceph progress on`` and ``ceph progress off``. -* The structured output of ``ceph status`` or ``ceph -s`` is now more - concise, particularly the ``mgrmap`` and ``monmap`` sections, and the - structure of the ``osdmap`` section has been cleaned up. + +14.2.13 +------- + +* This release fixes a regression introduced in 14.2.12 which broke deployments + that referred to MON hosts using DNS names instead of IP addresses in the + ``mon_host`` parameter in ``/etc/ceph/ceph.conf``. diff --git a/qa/tasks/ceph.conf.template b/qa/tasks/ceph.conf.template index 5be8c06bd16d5..0c0f503abfd0a 100644 --- a/qa/tasks/ceph.conf.template +++ b/qa/tasks/ceph.conf.template @@ -36,6 +36,7 @@ mon cluster log file level = debug debug asserts on shutdown = true + mon health detail to clog = false # we see this fail in qa on *nautilus*; bump up retries mon_client_directed_command_retry = 4 diff --git a/src/common/legacy_config_opts.h b/src/common/legacy_config_opts.h index 586b3f6d8e161..d1869cc003180 100644 --- a/src/common/legacy_config_opts.h +++ b/src/common/legacy_config_opts.h @@ -272,6 +272,7 @@ OPTION(mon_reweight_max_change, OPT_DOUBLE) OPTION(mon_health_to_clog, OPT_BOOL) OPTION(mon_health_to_clog_interval, OPT_INT) OPTION(mon_health_to_clog_tick_interval, OPT_DOUBLE) +OPTION(mon_health_detail_to_clog, OPT_BOOL) OPTION(mon_data_avail_crit, OPT_INT) OPTION(mon_data_avail_warn, OPT_INT) OPTION(mon_data_size_warn, OPT_U64) // issue a warning when the monitor's data store goes over 15GB (in bytes) diff --git a/src/common/options.cc b/src/common/options.cc index a4bd01a0f18ac..df16c9457f746 100644 --- a/src/common/options.cc +++ b/src/common/options.cc @@ -1871,7 +1871,7 @@ std::vector