From: Kefu Chai Date: Fri, 22 Mar 2019 11:32:32 +0000 (+0800) Subject: mon/MonClient: respect priority in SRV X-Git-Tag: v15.0.0~104^2~2 X-Git-Url: http://git-server-git.apps.pok.os.sepia.ceph.com/?a=commitdiff_plain;h=77b45ebaaa3de2aaf90fbaf9c6e93469c3e21066;p=ceph.git mon/MonClient: respect priority in SRV before this change, we always choose the monitors with the lowest priority and ignore the ones with higher priorty. but per https://www.ietf.org/rfc/rfc2782.txt > A client MUST attempt to contact the target host with the > lowest-numbered priority it can reach so, if a monitor is not reachable, we should try the ones with the lowest ones *reachable*. after this change, MonClient will memorize the monitors have been contacted. and will continue hunting the ones not tried if the last batch fail. Signed-off-by: Kefu Chai --- diff --git a/src/mon/MonClient.cc b/src/mon/MonClient.cc index 7ca17f31b2e2..ef81f5356e89 100644 --- a/src/mon/MonClient.cc +++ b/src/mon/MonClient.cc @@ -13,6 +13,10 @@ */ #include +#include +#include +#include +#include #include "common/weighted_shuffle.h" #include "include/scope_guard.h" @@ -368,8 +372,8 @@ void MonClient::handle_monmap(MMonMap *m) ldout(cct, 10) << __func__ << " " << *m << dendl; auto con_addrs = m->get_source_addrs(); string old_name = monmap.get_name(con_addrs); + const auto old_epoch = monmap.get_epoch(); - // NOTE: we're not paying attention to the epoch, here. auto p = m->monmapbl.cbegin(); decode(monmap, p); @@ -381,6 +385,9 @@ void MonClient::handle_monmap(MMonMap *m) monmap.print(*_dout); *_dout << dendl; + if (old_epoch != monmap.get_epoch()) { + tried.clear(); + } if (old_name.size() == 0) { ldout(cct,10) << " can't identify which mon we were connected to" << dendl; _reopen_session(); @@ -688,13 +695,34 @@ MonConnection& MonClient::_add_conn(unsigned rank, uint64_t global_id) void MonClient::_add_conns(uint64_t global_id) { - map> rank_by_priority; - for (const auto& m : monmap.mon_info) { - rank_by_priority[m.second.priority].push_back(monmap.get_rank(m.first)); - } - vector ranks; - ceph_assert(!rank_by_priority.empty()); - ranks = rank_by_priority.begin()->second; + // collect the next batch of candidates who are listed right next to the ones + // already tried + auto get_next_batch = [this]() -> vector { + multimap ranks_by_priority; + boost::copy(monmap.mon_info | boost::adaptors::filtered([this](auto& info) { + auto rank = monmap.get_rank(info.first); + return tried.count(rank) == 0; + }) | boost::adaptors::transformed([this](auto& info) { + auto rank = monmap.get_rank(info.first); + return make_pair(info.second.priority, rank); + }), std::inserter(ranks_by_priority, end(ranks_by_priority))); + if (ranks_by_priority.empty()) { + return {}; + } + // only choose the monitors with lowest priority + auto cands = boost::make_iterator_range( + ranks_by_priority.equal_range(ranks_by_priority.begin()->first)); + vector ranks; + boost::range::copy(cands | boost::adaptors::map_values, + std::back_inserter(ranks)); + return ranks; + }; + auto ranks = get_next_batch(); + if (ranks.empty()) { + tried.clear(); // start over + ranks = get_next_batch(); + } + ceph_assert(!ranks.empty()); if (ranks.size() > 1) { vector weights; for (auto i : ranks) { @@ -712,6 +740,7 @@ void MonClient::_add_conns(uint64_t global_id) } for (unsigned i = 0; i < n; i++) { _add_conn(ranks[i], global_id); + tried.insert(ranks[i]); } } diff --git a/src/mon/MonClient.h b/src/mon/MonClient.h index e4c018289bee..3e3f2b0214ca 100644 --- a/src/mon/MonClient.h +++ b/src/mon/MonClient.h @@ -15,6 +15,7 @@ #define CEPH_MONCLIENT_H #include +#include #include "msg/Messenger.h" @@ -242,6 +243,7 @@ private: std::unique_ptr active_con; std::map pending_cons; + std::set tried; EntityName entity_name;