From: Sage Weil Date: Wed, 23 May 2018 17:52:06 +0000 (-0500) Subject: mon/MonMap: separate rank ordering from entity_addr_t X-Git-Tag: v14.0.1~1257^2~4 X-Git-Url: http://git-server-git.apps.pok.os.sepia.ceph.com/?a=commitdiff_plain;h=50381d885ff1b10e891e187170ed4f7f24167b08;p=ceph.git mon/MonMap: separate rank ordering from entity_addr_t We currently define the mon rank ordering based on the sort order of the mon addresses. Change that so that the rank order is explicitly encoded in the ranks field of the MonMap. If we load a legacy MonMap, calculate the legacy ordering. If the monmap does not require nautilus features yet, force the legacy ordering. Once all mons are >= nautilus, we can reorder ranks. Note that the daemons and clients (MonClients) may see a different rank ordering. That should be okay. Signed-off-by: Sage Weil --- diff --git a/src/mon/MonMap.cc b/src/mon/MonMap.cc index 6e87f4df61435..ed0f9c7dc4b24 100644 --- a/src/mon/MonMap.cc +++ b/src/mon/MonMap.cc @@ -45,44 +45,6 @@ void mon_info_t::print(ostream& out) const << " priority " << priority; } -void MonMap::sanitize_mons(map& o) -{ - // if mon_info is populated, it means we decoded a map encoded - // by someone who understands the new format (i.e., is able to - // encode 'mon_info'). This means they must also have provided - // a properly populated 'mon_addr' (which we have dropped with - // this patch), 'o' being the contents of said map. In this - // case, 'o' must have the same number of entries as 'mon_info'. - // - // Also, for each entry in 'o', there has to be a matching - // 'mon_info' entry, properly populated with a name and a matching - // 'public_addr'. - // - // OTOH, if 'mon_info' is not populated, it means the one that - // originally encoded the map does not know the new format, and - // 'o' will be our only source of info about the monitors in the - // cluster -- and we will use it to populate our 'mon_info' map. - - bool has_mon_info = false; - if (mon_info.size() > 0) { - assert(o.size() == mon_info.size()); - has_mon_info = true; - } - - for (auto p : o) { - if (has_mon_info) { - // make sure the info we have is accurate - assert(mon_info.count(p.first)); - assert(mon_info[p.first].name == p.first); - assert(mon_info[p.first].public_addr == p.second); - } else { - mon_info_t &m = mon_info[p.first]; - m.name = p.first; - m.public_addr = p.second; - } - } -} - namespace { struct rank_cmp { bool operator()(const mon_info_t &a, const mon_info_t &b) const { @@ -93,10 +55,9 @@ namespace { }; } -void MonMap::calc_ranks() { - +void MonMap::calc_legacy_ranks() +{ ranks.resize(mon_info.size()); - addr_mons.clear(); // Used to order entries according to public_addr, because that's // how the ranks are expected to be ordered by. We may expand this @@ -119,10 +80,6 @@ void MonMap::calc_ranks() { ++p) { mon_info_t &m = p->second; tmp.insert(m); - - // populate addr_mons - assert(addr_mons.count(m.public_addr) == 0); - addr_mons[m.public_addr] = m.name; } // map the set to the actual ranks etc @@ -151,6 +108,13 @@ void MonMap::encode(bufferlist& blist, uint64_t con_features) const return; } + map legacy_mon_addr; + for (map::const_iterator p = mon_info.begin(); + p != mon_info.end(); + ++p) { + legacy_mon_addr[p->first] = p->second.public_addr; + } + if ((con_features & CEPH_FEATURE_MONENC) == 0) { /* we keep the mon_addr map when encoding to ensure compatibility * with clients and other monitors that do not yet support the 'mons' @@ -159,13 +123,6 @@ void MonMap::encode(bufferlist& blist, uint64_t con_features) const * address -- which is obtained from the public address of each entry * in the 'mons' map. */ - map legacy_mon_addr; - for (map::const_iterator p = mon_info.begin(); - p != mon_info.end(); - ++p) { - legacy_mon_addr[p->first] = p->second.public_addr; - } - using ceph::encode; __u16 v = 2; encode(v, blist); @@ -174,24 +131,39 @@ void MonMap::encode(bufferlist& blist, uint64_t con_features) const encode(legacy_mon_addr, blist, con_features); encode(last_changed, blist); encode(created, blist); + return; } - ENCODE_START(5, 3, blist); + if (!HAVE_FEATURE(con_features, SERVER_NAUTILUS)) { + ENCODE_START(5, 3, blist); + encode_raw(fsid, blist); + encode(epoch, blist); + encode(legacy_mon_addr, blist, con_features); + encode(last_changed, blist); + encode(created, blist); + encode(persistent_features, blist); + encode(optional_features, blist); + encode(mon_info, blist, con_features); + ENCODE_FINISH(blist); + return; + } + + ENCODE_START(6, 6, blist); encode_raw(fsid, blist); encode(epoch, blist); - encode(mon_addr, blist, con_features); encode(last_changed, blist); encode(created, blist); encode(persistent_features, blist); encode(optional_features, blist); encode(mon_info, blist, con_features); + encode(ranks, blist); ENCODE_FINISH(blist); } void MonMap::decode(bufferlist::const_iterator& p) { map mon_addr; - DECODE_START_LEGACY_COMPAT_LEN_16(5, 3, 3, p); + DECODE_START_LEGACY_COMPAT_LEN_16(6, 3, 3, p); decode_raw(fsid, p); decode(epoch, p); if (struct_v == 1) { @@ -204,7 +176,7 @@ void MonMap::decode(bufferlist::const_iterator& p) string name = n; mon_addr[name] = mon_inst[i].addr; } - } else { + } else if (struct_v < 6) { decode(mon_addr, p); } decode(last_changed, p); @@ -213,17 +185,23 @@ void MonMap::decode(bufferlist::const_iterator& p) decode(persistent_features, p); decode(optional_features, p); } - if (struct_v >= 5) { + if (struct_v < 5) { + // generate mon_info from legacy mon_addr + for (auto& p : mon_addr) { + mon_info_t &m = mon_info[p.first]; + m.name = p.first; + m.public_addr = p.second; + } + } else { decode(mon_info, p); + } + if (struct_v < 6) { + calc_legacy_ranks(); } else { - // we may be decoding to an existing monmap; if we do not - // clear the mon_info map now, we will likely incur in problems - // later on MonMap::sanitize_mons() - mon_info.clear(); + decode(ranks, p); } + calc_addr_mons(); DECODE_FINISH(p); - sanitize_mons(mon_addr); - calc_ranks(); } void MonMap::generate_test_instances(list& o) diff --git a/src/mon/MonMap.h b/src/mon/MonMap.h index 4d08608670380..5d2e5def38574 100644 --- a/src/mon/MonMap.h +++ b/src/mon/MonMap.h @@ -113,8 +113,14 @@ class MonMap { } public: - void sanitize_mons(map& o); - void calc_ranks(); + void calc_legacy_ranks(); + void calc_addr_mons() { + // populate addr_mons + addr_mons.clear(); + for (auto& p : mon_info) { + addr_mons[p.second.public_addr] = p.first; + } + } MonMap() : epoch(0) { @@ -147,11 +153,18 @@ public: * * @param m monitor info of the new monitor */ - void add(mon_info_t &&m) { + void add(const mon_info_t& m) { assert(mon_info.count(m.name) == 0); assert(addr_mons.count(m.public_addr) == 0); - mon_info[m.name] = std::move(m); - calc_ranks(); + mon_info[m.name] = m; + if (get_required_features().contains_all( + ceph::features::mon::FEATURE_NAUTILUS)) { + ranks.push_back(m.name); + assert(ranks.size() == mon_info.size()); + } else { + calc_legacy_ranks(); + } + calc_addr_mons(); } /** @@ -173,7 +186,14 @@ public: assert(mon_info.count(name)); mon_info.erase(name); assert(mon_info.count(name) == 0); - calc_ranks(); + if (get_required_features().contains_all( + ceph::features::mon::FEATURE_NAUTILUS)) { + ranks.erase(std::find(ranks.begin(), ranks.end(), name)); + assert(ranks.size() == mon_info.size()); + } else { + calc_legacy_ranks(); + } + calc_addr_mons(); } /** @@ -188,7 +208,14 @@ public: mon_info[newname] = mon_info[oldname]; mon_info.erase(oldname); mon_info[newname].name = newname; - calc_ranks(); + if (get_required_features().contains_all( + ceph::features::mon::FEATURE_NAUTILUS)) { + *std::find(ranks.begin(), ranks.end(), oldname) = newname; + assert(ranks.size() == mon_info.size()); + } else { + calc_legacy_ranks(); + } + calc_addr_mons(); } bool contains(const string& name) const { @@ -261,7 +288,6 @@ public: void set_addr(const string& n, const entity_addr_t& a) { assert(mon_info.count(n)); mon_info[n].public_addr = a; - calc_ranks(); } entity_inst_t get_inst(const string& n) { assert(mon_info.count(n));