From 8d53b058ff5c76615cd375d1cc3ac7e2b6c0bba1 Mon Sep 17 00:00:00 2001 From: Sage Weil Date: Thu, 18 Mar 2021 11:45:48 -0500 Subject: [PATCH] mon/MgrStatMonitor: ignore MMgrReport from non-active mgr If it's not the active mgr, we should ignore it. Since the mgr instance is best identified by the gid, add that to the message. (We can't use the source_addrs for the message since that is the MgrStandby monc addr, not the active mgr addrs in the MgrMap.) This fixes a problem where a just-demoted mgr report gets processed and a new mgr gets a ServiceMap with an epoch >= its pending map. (At least, that is my theory!) Fixes: https://tracker.ceph.com/issues/48022 Signed-off-by: Sage Weil (cherry picked from commit 4d447092c3542bf57dfb4942db766adf2923c069) Conflicts: src/messages/MMonMgrReport.h src/mon/MgrStatMonitor.cc --- src/messages/MMonMgrReport.h | 12 +++++++++--- src/mgr/DaemonServer.cc | 1 + src/mon/MgrStatMonitor.cc | 8 ++++++++ 3 files changed, 18 insertions(+), 3 deletions(-) diff --git a/src/messages/MMonMgrReport.h b/src/messages/MMonMgrReport.h index f695b78c7af..15ca2ad1727 100644 --- a/src/messages/MMonMgrReport.h +++ b/src/messages/MMonMgrReport.h @@ -23,7 +23,7 @@ class MMonMgrReport : public PaxosServiceMessage { private: - static constexpr int HEAD_VERSION = 2; + static constexpr int HEAD_VERSION = 3; static constexpr int COMPAT_VERSION = 1; public: @@ -31,6 +31,7 @@ public: health_check_map_t health_checks; bufferlist service_map_bl; // encoded ServiceMap std::map progress_events; + uint64_t gid = 0; MMonMgrReport() : PaxosServiceMessage{MSG_MON_MGR_REPORT, 0, HEAD_VERSION, COMPAT_VERSION} @@ -41,8 +42,9 @@ private: public: std::string_view get_type_name() const override { return "monmgrreport"; } - void print(ostream& out) const override { - out << get_type_name() << "(" << health_checks.checks.size() << " checks, " + void print(std::ostream& out) const override { + out << get_type_name() << "(gid " << gid + << ", " << health_checks.checks.size() << " checks, " << progress_events.size() << " progress events)"; } @@ -52,6 +54,7 @@ public: encode(health_checks, payload); encode(service_map_bl, payload); encode(progress_events, payload); + encode(gid, payload); if (!HAVE_FEATURE(features, SERVER_NAUTILUS) || !HAVE_FEATURE(features, SERVER_MIMIC)) { @@ -78,6 +81,9 @@ public: if (header.version >= 2) { decode(progress_events, p); } + if (header.version >= 3) { + decode(gid, p); + } } private: template diff --git a/src/mgr/DaemonServer.cc b/src/mgr/DaemonServer.cc index 8f02c121164..9b5af411d39 100644 --- a/src/mgr/DaemonServer.cc +++ b/src/mgr/DaemonServer.cc @@ -2405,6 +2405,7 @@ void DaemonServer::send_report() } auto m = ceph::make_message(); + m->gid = monc->get_global_id(); py_modules.get_health_checks(&m->health_checks); py_modules.get_progress_events(&m->progress_events); diff --git a/src/mon/MgrStatMonitor.cc b/src/mon/MgrStatMonitor.cc index 854bc0e090a..b27bf5d680a 100644 --- a/src/mon/MgrStatMonitor.cc +++ b/src/mon/MgrStatMonitor.cc @@ -3,6 +3,7 @@ #include "MgrStatMonitor.h" #include "mon/OSDMonitor.h" +#include "mon/MgrMonitor.h" #include "mon/PGMap.h" #include "messages/MGetPoolStats.h" #include "messages/MGetPoolStatsReply.h" @@ -184,7 +185,14 @@ bool MgrStatMonitor::prepare_update(MonOpRequestRef op) bool MgrStatMonitor::preprocess_report(MonOpRequestRef op) { + auto m = op->get_req(); mon->no_reply(op); + if (m->gid && + m->gid != mon->mgrmon()->get_map().get_active_gid()) { + dout(10) << "ignoring report from non-active mgr " << m->gid + << dendl; + return true; + } return false; } -- 2.47.3