From: Sage Weil Date: Wed, 31 Jul 2019 07:04:20 +0000 (-0500) Subject: mon/HealthMonitor: allow muted alert counts to decrease but not increase X-Git-Tag: v15.1.0~1877^2~12 X-Git-Url: http://git.apps.os.sepia.ceph.com/?a=commitdiff_plain;h=0acff80f64471d4a56e8e17d0c6471ca5d1c8b2d;p=ceph-ci.git mon/HealthMonitor: allow muted alert counts to decrease but not increase If the summary starts with a digit, parse a count. If the count goes up, clear the mute. If the count goes down, update the mute so that we ratchet the threshold down. Signed-off-by: Sage Weil --- diff --git a/src/mon/HealthMonitor.cc b/src/mon/HealthMonitor.cc index f0ac5412f84..7c51e025baa 100644 --- a/src/mon/HealthMonitor.cc +++ b/src/mon/HealthMonitor.cc @@ -387,15 +387,44 @@ bool HealthMonitor::check_mutes() << " cleared (passed TTL " << p->second.ttl << ")"; p = pending_mutes.erase(p); changed = true; - } else if (!p->second.sticky && - all.checks.count(p->first) == 0) { - mon->clog->info() << "Health alert mute " << p->first - << " cleared (health alert cleared)"; - p = pending_mutes.erase(p); - changed = true; - } else { - ++p; + continue; + } + if (!p->second.sticky) { + auto q = all.checks.find(p->first); + if (q == all.checks.end()) { + mon->clog->info() << "Health alert mute " << p->first + << " cleared (health alert cleared)"; + p = pending_mutes.erase(p); + changed = true; + continue; + } + if (p->second.summary.size() && std::isdigit(p->second.summary[0])) { + int64_t mute_val = atoll(p->second.summary.c_str()); + int64_t cur_val = atoll(q->second.summary.c_str()); + if (cur_val > mute_val) { + mon->clog->info() << "Health alert mute " << p->first + << " cleared (count increased from " << mute_val + << " to " << cur_val << ")"; + p = pending_mutes.erase(p); + changed = true; + continue; + } + if (p->second.summary != q->second.summary) { + // update summary string for good measure + p->second.summary = q->second.summary; + changed = true; + } + } else { + if (p->second.summary != q->second.summary) { + mon->clog->info() << "Health alert mute " << p->first + << " cleared (summary changed)"; + p = pending_mutes.erase(p); + changed = true; + continue; + } + } } + ++p; } return changed; }