]> git.apps.os.sepia.ceph.com Git - ceph.git/commitdiff
mon/MonClient: respect priority in SRV
authorKefu Chai <kchai@redhat.com>
Fri, 22 Mar 2019 11:32:32 +0000 (19:32 +0800)
committerKefu Chai <kchai@redhat.com>
Fri, 22 Mar 2019 14:10:26 +0000 (22:10 +0800)
before this change, we always choose the monitors with the lowest
priority and ignore the ones with higher priorty. but per
https://www.ietf.org/rfc/rfc2782.txt

> A client MUST attempt to contact the target host with the
> lowest-numbered priority it can reach

so, if a monitor is not reachable, we should try the ones with the
lowest ones *reachable*.

after this change, MonClient will memorize the monitors have been
contacted. and will continue hunting the ones not tried if the last
batch fail.

Signed-off-by: Kefu Chai <kchai@redhat.com>
src/mon/MonClient.cc
src/mon/MonClient.h

index 7ca17f31b2e2ca21459e0f6b7378900c85a598f4..ef81f5356e89f7599c7fe7cbd672d6de0e864781 100644 (file)
  */
 
 #include <random>
+#include <boost/range/adaptor/map.hpp>
+#include <boost/range/adaptor/filtered.hpp>
+#include <boost/range/algorithm/copy.hpp>
+#include <boost/range/algorithm_ext/copy_n.hpp>
 #include "common/weighted_shuffle.h"
 
 #include "include/scope_guard.h"
@@ -368,8 +372,8 @@ void MonClient::handle_monmap(MMonMap *m)
   ldout(cct, 10) << __func__ << " " << *m << dendl;
   auto con_addrs = m->get_source_addrs();
   string old_name = monmap.get_name(con_addrs);
+  const auto old_epoch = monmap.get_epoch();
 
-  // NOTE: we're not paying attention to the epoch, here.
   auto p = m->monmapbl.cbegin();
   decode(monmap, p);
 
@@ -381,6 +385,9 @@ void MonClient::handle_monmap(MMonMap *m)
   monmap.print(*_dout);
   *_dout << dendl;
 
+  if (old_epoch != monmap.get_epoch()) {
+    tried.clear();
+  }
   if (old_name.size() == 0) {
     ldout(cct,10) << " can't identify which mon we were connected to" << dendl;
     _reopen_session();
@@ -688,13 +695,34 @@ MonConnection& MonClient::_add_conn(unsigned rank, uint64_t global_id)
 
 void MonClient::_add_conns(uint64_t global_id)
 {
-  map<uint16_t, vector<unsigned>> rank_by_priority;
-  for (const auto& m : monmap.mon_info) {
-    rank_by_priority[m.second.priority].push_back(monmap.get_rank(m.first));
-  }
-  vector<unsigned> ranks;
-  ceph_assert(!rank_by_priority.empty());
-  ranks = rank_by_priority.begin()->second;
+  // collect the next batch of candidates who are listed right next to the ones
+  // already tried
+  auto get_next_batch = [this]() -> vector<unsigned> {
+    multimap<uint16_t, unsigned> ranks_by_priority;
+    boost::copy(monmap.mon_info | boost::adaptors::filtered([this](auto& info) {
+                  auto rank = monmap.get_rank(info.first);
+                  return tried.count(rank) == 0;
+                }) | boost::adaptors::transformed([this](auto& info) {
+                  auto rank = monmap.get_rank(info.first);
+                  return make_pair(info.second.priority, rank);
+                }), std::inserter(ranks_by_priority, end(ranks_by_priority)));
+    if (ranks_by_priority.empty()) {
+      return {};
+    }
+    // only choose the monitors with lowest priority
+    auto cands = boost::make_iterator_range(
+      ranks_by_priority.equal_range(ranks_by_priority.begin()->first));
+    vector<unsigned> ranks;
+    boost::range::copy(cands | boost::adaptors::map_values,
+                      std::back_inserter(ranks));
+    return ranks;
+  };
+  auto ranks = get_next_batch();
+  if (ranks.empty()) {
+    tried.clear();  // start over
+    ranks = get_next_batch();
+  }
+  ceph_assert(!ranks.empty());
   if (ranks.size() > 1) {
     vector<uint16_t> weights;
     for (auto i : ranks) {
@@ -712,6 +740,7 @@ void MonClient::_add_conns(uint64_t global_id)
   }
   for (unsigned i = 0; i < n; i++) {
     _add_conn(ranks[i], global_id);
+    tried.insert(ranks[i]);
   }
 }
 
index e4c018289bee5334e87ac27a569cb592c8be8a6b..3e3f2b0214caa6740d20e2cca5883c5f7e7de923 100644 (file)
@@ -15,6 +15,7 @@
 #define CEPH_MONCLIENT_H
 
 #include <memory>
+#include <set>
 
 #include "msg/Messenger.h"
 
@@ -242,6 +243,7 @@ private:
 
   std::unique_ptr<MonConnection> active_con;
   std::map<entity_addrvec_t, MonConnection> pending_cons;
+  std::set<unsigned> tried;
 
   EntityName entity_name;