From decee90d40300d92f83999d3068bde2587ea586f Mon Sep 17 00:00:00 2001 From: Sage Weil Date: Thu, 18 Mar 2021 11:45:48 -0500 Subject: [PATCH] mon/MgrStatMonitor: ignore MMgrReport from non-active mgr If it's not the active mgr, we should ignore it. Since the mgr instance is best identified by the gid, add that to the message. (We can't use the source_addrs for the message since that is the MgrStandby monc addr, not the active mgr addrs in the MgrMap.) This fixes a problem where a just-demoted mgr report gets processed and a new mgr gets a ServiceMap with an epoch >= its pending map. (At least, that is my theory!) Fixes: https://tracker.ceph.com/issues/48022 Signed-off-by: Sage Weil (cherry picked from commit 4d447092c3542bf57dfb4942db766adf2923c069) --- src/messages/MMonMgrReport.h | 10 ++++++++-- src/mgr/DaemonServer.cc | 1 + src/mon/MgrStatMonitor.cc | 8 ++++++++ 3 files changed, 17 insertions(+), 2 deletions(-) diff --git a/src/messages/MMonMgrReport.h b/src/messages/MMonMgrReport.h index 0ca37a8ef9dd4..f5a68c7d3989c 100644 --- a/src/messages/MMonMgrReport.h +++ b/src/messages/MMonMgrReport.h @@ -23,7 +23,7 @@ class MMonMgrReport final : public PaxosServiceMessage { private: - static constexpr int HEAD_VERSION = 2; + static constexpr int HEAD_VERSION = 3; static constexpr int COMPAT_VERSION = 1; public: @@ -31,6 +31,7 @@ public: health_check_map_t health_checks; ceph::buffer::list service_map_bl; // encoded ServiceMap std::map progress_events; + uint64_t gid = 0; MMonMgrReport() : PaxosServiceMessage{MSG_MON_MGR_REPORT, 0, HEAD_VERSION, COMPAT_VERSION} @@ -42,7 +43,8 @@ public: std::string_view get_type_name() const override { return "monmgrreport"; } void print(std::ostream& out) const override { - out << get_type_name() << "(" << health_checks.checks.size() << " checks, " + out << get_type_name() << "(gid " << gid + << ", " << health_checks.checks.size() << " checks, " << progress_events.size() << " progress events)"; } @@ -52,6 +54,7 @@ public: encode(health_checks, payload); encode(service_map_bl, payload); encode(progress_events, payload); + encode(gid, payload); if (!HAVE_FEATURE(features, SERVER_NAUTILUS) || !HAVE_FEATURE(features, SERVER_MIMIC)) { @@ -79,6 +82,9 @@ public: if (header.version >= 2) { decode(progress_events, p); } + if (header.version >= 3) { + decode(gid, p); + } } private: template diff --git a/src/mgr/DaemonServer.cc b/src/mgr/DaemonServer.cc index 0d3b07e285c9d..fadc7efb1e3ee 100644 --- a/src/mgr/DaemonServer.cc +++ b/src/mgr/DaemonServer.cc @@ -2532,6 +2532,7 @@ void DaemonServer::send_report() } auto m = ceph::make_message(); + m->gid = monc->get_global_id(); py_modules.get_health_checks(&m->health_checks); py_modules.get_progress_events(&m->progress_events); diff --git a/src/mon/MgrStatMonitor.cc b/src/mon/MgrStatMonitor.cc index 40a322d76989e..4996c9b38bf80 100644 --- a/src/mon/MgrStatMonitor.cc +++ b/src/mon/MgrStatMonitor.cc @@ -3,6 +3,7 @@ #include "MgrStatMonitor.h" #include "mon/OSDMonitor.h" +#include "mon/MgrMonitor.h" #include "mon/PGMap.h" #include "messages/MGetPoolStats.h" #include "messages/MGetPoolStatsReply.h" @@ -211,7 +212,14 @@ bool MgrStatMonitor::prepare_update(MonOpRequestRef op) bool MgrStatMonitor::preprocess_report(MonOpRequestRef op) { + auto m = op->get_req(); mon.no_reply(op); + if (m->gid && + m->gid != mon.mgrmon()->get_map().get_active_gid()) { + dout(10) << "ignoring report from non-active mgr " << m->gid + << dendl; + return true; + } return false; } -- 2.39.5