]> git-server-git.apps.pok.os.sepia.ceph.com Git - ceph.git/commitdiff
mon: MMonProbe: direct MMonJoin messages to the leader, instead of the first mon 40839/head
authorGreg Farnum <gfarnum@redhat.com>
Tue, 13 Apr 2021 22:53:03 +0000 (22:53 +0000)
committerGreg Farnum <gfarnum@redhat.com>
Tue, 13 Apr 2021 23:29:39 +0000 (23:29 +0000)
When monitors are joining a cluster, they may send an MMonJoin message to place
themselves correctly in the map in either handle_probe_reply() or
finish_election(). These messages must be sent to the leader -- monitors do not
forward each other's messages.

Unfortunately, this scenario was missed when converting the monitors to support
connectivity-based elections, and they're sending these messages to
quorum.begin(). Fix this by including an explicit leader in MMonProbe (that the
new monitor may reference in handle_probe_reply) and using the leader
value in both locations.

Fixes: https://tracker.ceph.com/issues/50345
Signed-off-by: Greg Farnum <gfarnum@redhat.com>
src/messages/MMonProbe.h
src/mon/Monitor.cc

index bb5012300cf932722235202db59b49d53da65672..ae5fc3028911c84b2763b9e5e61cc89b4479ad58 100644 (file)
@@ -23,7 +23,7 @@
 
 class MMonProbe final : public Message {
 public:
-  static constexpr int HEAD_VERSION = 7;
+  static constexpr int HEAD_VERSION = 8;
   static constexpr int COMPAT_VERSION = 5;
 
   enum {
@@ -51,6 +51,7 @@ public:
   int32_t op = 0;
   std::string name;
   std::set<int32_t> quorum;
+  int leader = -1;
   ceph::buffer::list monmap_bl;
   version_t paxos_first_version = 0;
   version_t paxos_last_version = 0;
@@ -79,6 +80,7 @@ public:
     out << "mon_probe(" << get_opname(op) << " " << fsid << " name " << name;
     if (quorum.size())
       out << " quorum " << quorum;
+    out << " leader " << leader;
     if (op == OP_REPLY) {
       out << " paxos("
        << " fc " << paxos_first_version
@@ -116,6 +118,7 @@ public:
     encode(paxos_last_version, payload);
     encode(required_features, payload);
     encode(mon_release, payload);
+    encode(leader, payload);
   }
   void decode_payload() override {
     using ceph::decode;
@@ -136,6 +139,11 @@ public:
       decode(mon_release, p);
     else
       mon_release = ceph_release_t::unknown;
+    if (header.version >= 8) {
+      decode(leader, p);
+    } else if (quorum.size()) {
+      leader = *quorum.begin();
+    }
   }
 private:
   template<class T, typename... Args>
index 30cd877ff698790cbaecd2131607319627d46d61..be344a617c83b36f9d553a358e68f0029b653c67 100644 (file)
@@ -1954,6 +1954,7 @@ void Monitor::handle_probe_probe(MonOpRequestRef op)
                    ceph_release());
   r->name = name;
   r->quorum = quorum;
+  r->leader = leader;
   monmap->encode(r->monmap_bl, m->get_connection()->get_features());
   r->paxos_first_version = paxos->get_first_committed();
   r->paxos_last_version = paxos->get_version();
@@ -2121,7 +2122,7 @@ void Monitor::handle_probe_reply(MonOpRequestRef op)
       send_mon_message(new MMonJoin(monmap->fsid, name,
                                    messenger->get_myaddrs(), crush_loc,
                                    need_set_crush_loc),
-                      *m->quorum.begin());
+                      m->leader);
     }
   } else {
     if (monmap->contains(m->name)) {
@@ -2396,7 +2397,7 @@ void Monitor::finish_election()
             << map_crush_loc <<" -> " << name << "/" << crush_loc << dendl;
     send_mon_message(new MMonJoin(monmap->fsid, name, messenger->get_myaddrs(),
                                  crush_loc, need_set_crush_loc),
-                    *quorum.begin());
+                    leader);
     return;
   }
   do_stretch_mode_election_work();