From 9ef298eabe5265efc7913702cb6f8016f4dd760f Mon Sep 17 00:00:00 2001 From: John Spray Date: Fri, 23 Jun 2017 06:37:53 -0400 Subject: [PATCH] mon: prettify health check log messages Add a "Cluster is now healthy" to give clarity after a series of "health check cleared" that they were the last ones. Convert certain health check messages into well formed sentences. Don't print severity in the log string (it's already expressed in the severity of the log entry. Signed-off-by: John Spray --- src/mon/Monitor.cc | 37 +++++++++++++++++++++++++++++++------ 1 file changed, 31 insertions(+), 6 deletions(-) diff --git a/src/mon/Monitor.cc b/src/mon/Monitor.cc index 1b999a81b20..9421b4cbf14 100644 --- a/src/mon/Monitor.cc +++ b/src/mon/Monitor.cc @@ -2545,8 +2545,8 @@ void Monitor::log_health( if (q == previous.checks.end()) { // new ostringstream ss; - ss << p.second.severity << " " << p.first << ": " - << p.second.summary; + ss << "Health check failed: " << p.second.summary << " (" + << p.first << ")"; if (p.second.severity == HEALTH_WARN) clog->warn() << ss.str(); else @@ -2556,8 +2556,7 @@ void Monitor::log_health( p.second.severity != q->second.severity) { // summary or severity changed (ignore detail changes at this level) ostringstream ss; - ss << p.second.severity << " " << p.first << " (update): " - << p.second.summary; + ss << "Health check update: " << p.second.summary << " (" << p.first << ")"; if (p.second.severity == HEALTH_WARN) clog->warn() << ss.str(); else @@ -2569,8 +2568,34 @@ void Monitor::log_health( if (!updated.checks.count(p.first)) { // cleared ostringstream ss; - ss << HEALTH_OK << " " << p.first << ": " << p.second.summary; - clog->info() << ss.str(); + if (p.first == "DEGRADED_OBJECTS") { + clog->info() << "All degraded objects recovered"; + } else if (p.first == "OSD_FLAGS") { + clog->info() << "OSD flags cleared"; + } else { + clog->info() << "Health check cleared: " << p.first << " (was: " + << p.second.summary << ")"; + } + } + } + + if (previous.checks.size() && updated.checks.size() == 0) { + // We might be going into a fully healthy state, check + // other subsystems + bool any_checks = false; + for (auto& svc : paxos_service) { + if (&(svc->get_health_checks()) == &(previous)) { + // Ignore the ones we're clearing right now + continue; + } + + if (svc->get_health_checks().checks.size() > 0) { + any_checks = true; + break; + } + } + if (!any_checks) { + clog->info() << "Cluster is now healthy"; } } } -- 2.39.5