From: Kefu Chai Date: Thu, 10 Dec 2020 08:08:50 +0000 (+0800) Subject: mon/HealthMonitor: split prepare_health_checks() into smaller functions X-Git-Tag: v16.1.0~203^2~4 X-Git-Url: http://git-server-git.apps.pok.os.sepia.ceph.com/?a=commitdiff_plain;h=f22e74a8be566325803037e4f784bf559e35d913;p=ceph.git mon/HealthMonitor: split prepare_health_checks() into smaller functions for better readability. and take this opportunity to reformat them. Signed-off-by: Kefu Chai --- diff --git a/src/mon/HealthMonitor.cc b/src/mon/HealthMonitor.cc index ac36bb45fc68..953164df9e1d 100644 --- a/src/mon/HealthMonitor.cc +++ b/src/mon/HealthMonitor.cc @@ -684,77 +684,98 @@ bool HealthMonitor::check_leader_health() health_check_map_t next; - static utime_t old_version_first_time; - // DAEMON_OLD_VERSION if (g_conf().get_val("mon_warn_on_older_version")) { - utime_t now = ceph_clock_now(); - if (old_version_first_time == utime_t()) - old_version_first_time = now; - if ((now - old_version_first_time) > g_conf().get_val("mon_warn_older_version_delay")) { - std::map > all_versions; - mon.get_all_versions(all_versions); - if (all_versions.size() > 1) { - dout(20) << __func__ << " all_versions=" << all_versions << dendl; - // The last entry has the largest version - dout(20) << __func__ << " highest version daemon count " - << all_versions.rbegin()->second.size() << dendl; - // Erase last element (the highest version running) - all_versions.erase(all_versions.rbegin()->first); - ceph_assert(all_versions.size() > 0); - ostringstream ss; - unsigned daemon_count = 0; - for (auto& g:all_versions) { - daemon_count += g.second.size(); - } - int ver_count = all_versions.size(); - ceph_assert(!(daemon_count == 1 && ver_count != 1)); - ss << "There " << (daemon_count == 1 ? "is a daemon" : "are daemons") - << " running " << (ver_count > 1 ? "multiple old versions" : "an older version") << " of ceph"; - health_status_t status; - if (ver_count > 1) - status = HEALTH_ERR; - else - status = HEALTH_WARN; - auto& d = next.add("DAEMON_OLD_VERSION", status, ss.str(), all_versions.size()); - for (auto& g:all_versions) { - ostringstream ds; - for (auto& i : g.second) { // Daemon list - ds << i << " "; - } - ds << (g.second.size() == 1 ? "is" : "are") - << " running an older version of ceph: " << g.first; - d.detail.push_back(ds.str()); - } - } else { - old_version_first_time = utime_t(); + check_for_older_version(&next); } + // MON_DOWN + check_for_mon_down(&next); + // MON_CLOCK_SKEW + check_for_clock_skew(&next); + // MON_MSGR2_NOT_ENABLED + if (g_conf().get_val("mon_warn_on_msgr2_not_enabled")) { + check_if_msgr2_enabled(&next); } + + if (next != leader_checks) { + changed = true; + leader_checks = next; } + return changed; +} - // MON_DOWN - { - int max = mon.monmap->size(); - int actual = mon.get_quorum().size(); - if (actual < max) { +void HealthMonitor::check_for_older_version(health_check_map_t *checks) +{ + utime_t now = ceph_clock_now(); + static utime_t old_version_first_time; + + if (old_version_first_time == utime_t()) + old_version_first_time = now; + if ((now - old_version_first_time) > g_conf().get_val("mon_warn_older_version_delay")) { + std::map > all_versions; + mon.get_all_versions(all_versions); + if (all_versions.size() > 1) { + dout(20) << __func__ << " all_versions=" << all_versions << dendl; + // The last entry has the largest version + dout(20) << __func__ << " highest version daemon count " + << all_versions.rbegin()->second.size() << dendl; + // Erase last element (the highest version running) + all_versions.erase(all_versions.rbegin()->first); + ceph_assert(all_versions.size() > 0); ostringstream ss; - ss << (max-actual) << "/" << max << " mons down, quorum " - << mon.get_quorum_names(); - auto& d = next.add("MON_DOWN", HEALTH_WARN, ss.str(), max - actual); - set q = mon.get_quorum(); - for (int i=0; iget_name(i) << " (rank " << i - << ") addr " << mon.monmap->get_addrs(i) - << " is down (out of quorum)"; - d.detail.push_back(ss.str()); + unsigned daemon_count = 0; + for (auto& g : all_versions) { + daemon_count += g.second.size(); + } + int ver_count = all_versions.size(); + ceph_assert(!(daemon_count == 1 && ver_count != 1)); + ss << "There " << (daemon_count == 1 ? "is a daemon" : "are daemons") + << " running " << (ver_count > 1 ? "multiple old versions" : "an older version") << " of ceph"; + health_status_t status; + if (ver_count > 1) + status = HEALTH_ERR; + else + status = HEALTH_WARN; + auto& d = checks->add("DAEMON_OLD_VERSION", status, ss.str(), all_versions.size()); + for (auto& g : all_versions) { + ostringstream ds; + for (auto& i : g.second) { // Daemon list + ds << i << " "; } + ds << (g.second.size() == 1 ? "is" : "are") + << " running an older version of ceph: " << g.first; + d.detail.push_back(ds.str()); } + } else { + old_version_first_time = utime_t(); } } +} - // MON_CLOCK_SKEW +void HealthMonitor::check_for_mon_down(health_check_map_t *checks) +{ + int max = mon.monmap->size(); + int actual = mon.get_quorum().size(); + if (actual < max) { + ostringstream ss; + ss << (max-actual) << "/" << max << " mons down, quorum " + << mon.get_quorum_names(); + auto& d = checks->add("MON_DOWN", HEALTH_WARN, ss.str(), max - actual); + set q = mon.get_quorum(); + for (int i=0; iget_name(i) << " (rank " << i + << ") addr " << mon.monmap->get_addrs(i) + << " is down (out of quorum)"; + d.detail.push_back(ss.str()); + } + } + } +} + +void HealthMonitor::check_for_clock_skew(health_check_map_t *checks) +{ if (!mon.timecheck_skews.empty()) { list warns; list details; @@ -781,14 +802,15 @@ bool HealthMonitor::check_leader_health() if (!warns.empty()) ss << ","; } - auto& d = next.add("MON_CLOCK_SKEW", HEALTH_WARN, ss.str(), details.size()); + auto& d = checks->add("MON_CLOCK_SKEW", HEALTH_WARN, ss.str(), details.size()); d.detail.swap(details); } } +} - // MON_MSGR2_NOT_ENABLED +void HealthMonitor::check_if_msgr2_enabled(health_check_map_t *checks) +{ if (g_conf().get_val("ms_bind_msgr2") && - g_conf().get_val("mon_warn_on_msgr2_not_enabled") && mon.monmap->get_required_features().contains_all( ceph::features::mon::FEATURE_NAUTILUS)) { list details; @@ -803,15 +825,9 @@ bool HealthMonitor::check_leader_health() if (!details.empty()) { ostringstream ss; ss << details.size() << " monitors have not enabled msgr2"; - auto& d = next.add("MON_MSGR2_NOT_ENABLED", HEALTH_WARN, ss.str(), - details.size()); + auto &d = checks->add("MON_MSGR2_NOT_ENABLED", HEALTH_WARN, ss.str(), + details.size()); d.detail.swap(details); } } - - if (next != leader_checks) { - changed = true; - leader_checks = next; - } - return changed; } diff --git a/src/mon/HealthMonitor.h b/src/mon/HealthMonitor.h index 9697ba86bf96..c0e79d03375d 100644 --- a/src/mon/HealthMonitor.h +++ b/src/mon/HealthMonitor.h @@ -63,6 +63,10 @@ private: bool prepare_command(MonOpRequestRef op); bool prepare_health_checks(MonOpRequestRef op); + void check_for_older_version(health_check_map_t *checks); + void check_for_mon_down(health_check_map_t *checks); + void check_for_clock_skew(health_check_map_t *checks); + void check_if_msgr2_enabled(health_check_map_t *checks); bool check_leader_health(); bool check_member_health(); bool check_mutes();