From: Greg Farnum Date: Tue, 13 Apr 2021 22:53:03 +0000 (+0000) Subject: mon: MMonProbe: direct MMonJoin messages to the leader, instead of the first mon X-Git-Tag: v17.1.0~2260^2 X-Git-Url: http://git-server-git.apps.pok.os.sepia.ceph.com/?a=commitdiff_plain;h=ffa7ff35b4bba48a737b5d81b5b12089dda0323f;p=ceph.git mon: MMonProbe: direct MMonJoin messages to the leader, instead of the first mon When monitors are joining a cluster, they may send an MMonJoin message to place themselves correctly in the map in either handle_probe_reply() or finish_election(). These messages must be sent to the leader -- monitors do not forward each other's messages. Unfortunately, this scenario was missed when converting the monitors to support connectivity-based elections, and they're sending these messages to quorum.begin(). Fix this by including an explicit leader in MMonProbe (that the new monitor may reference in handle_probe_reply) and using the leader value in both locations. Fixes: https://tracker.ceph.com/issues/50345 Signed-off-by: Greg Farnum --- diff --git a/src/messages/MMonProbe.h b/src/messages/MMonProbe.h index bb5012300cf..ae5fc302891 100644 --- a/src/messages/MMonProbe.h +++ b/src/messages/MMonProbe.h @@ -23,7 +23,7 @@ class MMonProbe final : public Message { public: - static constexpr int HEAD_VERSION = 7; + static constexpr int HEAD_VERSION = 8; static constexpr int COMPAT_VERSION = 5; enum { @@ -51,6 +51,7 @@ public: int32_t op = 0; std::string name; std::set quorum; + int leader = -1; ceph::buffer::list monmap_bl; version_t paxos_first_version = 0; version_t paxos_last_version = 0; @@ -79,6 +80,7 @@ public: out << "mon_probe(" << get_opname(op) << " " << fsid << " name " << name; if (quorum.size()) out << " quorum " << quorum; + out << " leader " << leader; if (op == OP_REPLY) { out << " paxos(" << " fc " << paxos_first_version @@ -116,6 +118,7 @@ public: encode(paxos_last_version, payload); encode(required_features, payload); encode(mon_release, payload); + encode(leader, payload); } void decode_payload() override { using ceph::decode; @@ -136,6 +139,11 @@ public: decode(mon_release, p); else mon_release = ceph_release_t::unknown; + if (header.version >= 8) { + decode(leader, p); + } else if (quorum.size()) { + leader = *quorum.begin(); + } } private: template diff --git a/src/mon/Monitor.cc b/src/mon/Monitor.cc index 30cd877ff69..be344a617c8 100644 --- a/src/mon/Monitor.cc +++ b/src/mon/Monitor.cc @@ -1954,6 +1954,7 @@ void Monitor::handle_probe_probe(MonOpRequestRef op) ceph_release()); r->name = name; r->quorum = quorum; + r->leader = leader; monmap->encode(r->monmap_bl, m->get_connection()->get_features()); r->paxos_first_version = paxos->get_first_committed(); r->paxos_last_version = paxos->get_version(); @@ -2121,7 +2122,7 @@ void Monitor::handle_probe_reply(MonOpRequestRef op) send_mon_message(new MMonJoin(monmap->fsid, name, messenger->get_myaddrs(), crush_loc, need_set_crush_loc), - *m->quorum.begin()); + m->leader); } } else { if (monmap->contains(m->name)) { @@ -2396,7 +2397,7 @@ void Monitor::finish_election() << map_crush_loc <<" -> " << name << "/" << crush_loc << dendl; send_mon_message(new MMonJoin(monmap->fsid, name, messenger->get_myaddrs(), crush_loc, need_set_crush_loc), - *quorum.begin()); + leader); return; } do_stretch_mode_election_work();