From f937604a75e0883f1b5953d9897f534c9724eaf9 Mon Sep 17 00:00:00 2001 From: Kamoltat Sirivadhna Date: Thu, 8 Aug 2024 20:18:27 +0000 Subject: [PATCH] src/mon/HealthMonitor.cc: Add NONEXISTENT_MON_CRUSH_LOC_STRETCH_MODE In streth mode, warn the user when we encounter a MON that has nonexistent crush location, with the tiebreaker MON being the only exception to this. Fixes: https://tracker.ceph.com/issues/63861 Signed-off-by: Kamoltat Sirivadhna (cherry picked from commit 9b1e75fa9888b9d13300bbbb6ee5d5c47d7ab69f) --- src/mon/HealthMonitor.cc | 33 +++++++++++++++++++++++++++++++++ src/mon/HealthMonitor.h | 1 + 2 files changed, 34 insertions(+) diff --git a/src/mon/HealthMonitor.cc b/src/mon/HealthMonitor.cc index 45563f87d3d04..2a78c46af829e 100644 --- a/src/mon/HealthMonitor.cc +++ b/src/mon/HealthMonitor.cc @@ -25,6 +25,7 @@ #include "mon/Monitor.h" #include "mon/HealthMonitor.h" +#include "mon/OSDMonitor.h" #include "messages/MMonHealthChecks.h" @@ -740,6 +741,8 @@ bool HealthMonitor::check_leader_health() if (g_conf().get_val("mon_warn_on_msgr2_not_enabled")) { check_if_msgr2_enabled(&next); } + // STRETCH MODE + check_mon_crush_loc_stretch_mode(&next); if (next != leader_checks) { changed = true; @@ -885,3 +888,33 @@ void HealthMonitor::check_if_msgr2_enabled(health_check_map_t *checks) } } } + +void HealthMonitor::check_mon_crush_loc_stretch_mode(health_check_map_t *checks) +{ + // Check if the CRUSH location exists for all MONs + if (!mon.monmap->stretch_mode_enabled){ + return; + } + list details; + for (auto& i : mon.monmap->mon_info) { + // Skip the tiebreaker monitor + if (i.second.name == mon.monmap->tiebreaker_mon) { + continue; + } + for (auto& pair : i.second.crush_loc){ + if (!mon.osdmon()->osdmap.crush->name_exists(pair.second)) { + ostringstream ds; + ds << "CRUSH location " << pair.second << " does not exist"; + details.push_back(ds.str()); + } + } + } + // WARN in ceph -s if the CRUSH location does not exist + if (!details.empty()) { + ostringstream ss; + ss << details.size() << " monitor(s) have nonexistent CRUSH location"; + auto &d = checks->add("NONEXISTENT_MON_CRUSH_LOC_STRETCH_MODE", HEALTH_WARN, ss.str(), + details.size()); + d.detail.swap(details); + } +} diff --git a/src/mon/HealthMonitor.h b/src/mon/HealthMonitor.h index c0e79d03375d5..2182b6bbfce58 100644 --- a/src/mon/HealthMonitor.h +++ b/src/mon/HealthMonitor.h @@ -66,6 +66,7 @@ private: void check_for_older_version(health_check_map_t *checks); void check_for_mon_down(health_check_map_t *checks); void check_for_clock_skew(health_check_map_t *checks); + void check_mon_crush_loc_stretch_mode(health_check_map_t *checks); void check_if_msgr2_enabled(health_check_map_t *checks); bool check_leader_health(); bool check_member_health(); -- 2.39.5