From: John Spray Date: Fri, 23 Jun 2017 10:37:53 +0000 (-0400) Subject: mon: prettify health check log messages X-Git-Tag: v12.1.1~58^2~15 X-Git-Url: http://git-server-git.apps.pok.os.sepia.ceph.com/?a=commitdiff_plain;h=9ef298eabe5265efc7913702cb6f8016f4dd760f;p=ceph.git mon: prettify health check log messages Add a "Cluster is now healthy" to give clarity after a series of "health check cleared" that they were the last ones. Convert certain health check messages into well formed sentences. Don't print severity in the log string (it's already expressed in the severity of the log entry. Signed-off-by: John Spray --- diff --git a/src/mon/Monitor.cc b/src/mon/Monitor.cc index 1b999a81b20..9421b4cbf14 100644 --- a/src/mon/Monitor.cc +++ b/src/mon/Monitor.cc @@ -2545,8 +2545,8 @@ void Monitor::log_health( if (q == previous.checks.end()) { // new ostringstream ss; - ss << p.second.severity << " " << p.first << ": " - << p.second.summary; + ss << "Health check failed: " << p.second.summary << " (" + << p.first << ")"; if (p.second.severity == HEALTH_WARN) clog->warn() << ss.str(); else @@ -2556,8 +2556,7 @@ void Monitor::log_health( p.second.severity != q->second.severity) { // summary or severity changed (ignore detail changes at this level) ostringstream ss; - ss << p.second.severity << " " << p.first << " (update): " - << p.second.summary; + ss << "Health check update: " << p.second.summary << " (" << p.first << ")"; if (p.second.severity == HEALTH_WARN) clog->warn() << ss.str(); else @@ -2569,8 +2568,34 @@ void Monitor::log_health( if (!updated.checks.count(p.first)) { // cleared ostringstream ss; - ss << HEALTH_OK << " " << p.first << ": " << p.second.summary; - clog->info() << ss.str(); + if (p.first == "DEGRADED_OBJECTS") { + clog->info() << "All degraded objects recovered"; + } else if (p.first == "OSD_FLAGS") { + clog->info() << "OSD flags cleared"; + } else { + clog->info() << "Health check cleared: " << p.first << " (was: " + << p.second.summary << ")"; + } + } + } + + if (previous.checks.size() && updated.checks.size() == 0) { + // We might be going into a fully healthy state, check + // other subsystems + bool any_checks = false; + for (auto& svc : paxos_service) { + if (&(svc->get_health_checks()) == &(previous)) { + // Ignore the ones we're clearing right now + continue; + } + + if (svc->get_health_checks().checks.size() > 0) { + any_checks = true; + break; + } + } + if (!any_checks) { + clog->info() << "Cluster is now healthy"; } } }