From 39481462fb1c8f596fd42a4d5629ac5f62a3dbd9 Mon Sep 17 00:00:00 2001 From: Kamoltat Date: Wed, 2 Nov 2022 01:59:52 +0000 Subject: [PATCH] mon: change how we handle removed_ranks when a new monitor joins, there is a chance that it will recive a monmap that recently removed a monitor and ``removed_rank`` will have some content in it. A new monitor that joins should never remove rank in peer_tracker but rather call ``notify_clear_peer_state()`` to reset the `peer_report`. In the case when it is a monitor that has joined quorum before and is only 1 epoch behind the newest monmap provided by the probe_replied monitor. We can actually remove and adjust ranks in `peer_report` since we are sure that if there is any content in removed_ranks, then it has to be because in the next epoch we are removing a rank, since every update of an epoch we always clear the removed_ranks. There is no point in keeping the content of ``removed_ranks`` after monmap gets updated to the epoch. Therefore, clear ``removed_ranks`` every update. When there is discontinuity between monmaps for more 1 epoch or the new monitor never joined quorum before, we always reset `peer_tracker`. Moreover, beneficial for monitor log to also log which rank has been removed at the current time of the monmap. So add removed_ranks to `print_summary` and `dump` in MonMap.cc. Fixes: https://tracker.ceph.com/issues/58049 Signed-off-by: Kamoltat --- src/mon/MonMap.cc | 3 ++- src/mon/Monitor.cc | 28 +++++++++++++++++++--------- src/mon/Monitor.h | 2 +- src/mon/MonmapMonitor.cc | 4 +--- 4 files changed, 23 insertions(+), 14 deletions(-) diff --git a/src/mon/MonMap.cc b/src/mon/MonMap.cc index d60da1b99725d..33b9aa8fa2880 100644 --- a/src/mon/MonMap.cc +++ b/src/mon/MonMap.cc @@ -368,7 +368,7 @@ void MonMap::print_summary(ostream& out) const out << p->first << "=" << p->second.public_addrs; has_printed = true; } - out << "}"; + out << "}" << " removed_ranks: {" << removed_ranks << "}"; } void MonMap::print(ostream& out) const @@ -412,6 +412,7 @@ void MonMap::dump(Formatter *f) const f->dump_stream("disallowed_leaders: ") << disallowed_leaders; f->dump_bool("stretch_mode", stretch_mode_enabled); f->dump_string("tiebreaker_mon", tiebreaker_mon); + f->dump_stream("removed_ranks: ") << removed_ranks; f->open_object_section("features"); persistent_features.dump(f, "persistent"); optional_features.dump(f, "optional"); diff --git a/src/mon/Monitor.cc b/src/mon/Monitor.cc index 611d7dd12a9ed..e12b7f0102c79 100644 --- a/src/mon/Monitor.cc +++ b/src/mon/Monitor.cc @@ -1996,10 +1996,16 @@ void Monitor::handle_probe_reply(MonOpRequestRef op) !has_ever_joined)) { dout(10) << " got newer/committed monmap epoch " << newmap->get_epoch() << ", mine was " << monmap->get_epoch() << dendl; + int epoch_diff = newmap->get_epoch() - monmap->get_epoch(); delete newmap; monmap->decode(m->monmap_bl); - notify_new_monmap(false); - + dout(20) << "has_ever_joined: " << has_ever_joined << dendl; + if (epoch_diff == 1 && has_ever_joined) { + notify_new_monmap(false); + } else { + notify_new_monmap(false, false); + elector.notify_clear_peer_state(); + } bootstrap(); return; } @@ -6595,7 +6601,7 @@ void Monitor::set_mon_crush_location(const string& loc) need_set_crush_loc = true; } -void Monitor::notify_new_monmap(bool can_change_external_state) +void Monitor::notify_new_monmap(bool can_change_external_state, bool remove_rank_elector) { if (need_set_crush_loc) { auto my_info_i = monmap->mon_info.find(name); @@ -6605,12 +6611,16 @@ void Monitor::notify_new_monmap(bool can_change_external_state) } } elector.notify_strategy_maybe_changed(monmap->strategy); - dout(30) << __func__ << "we have " << monmap->removed_ranks.size() << " removed ranks" << dendl; - for (auto i = monmap->removed_ranks.rbegin(); - i != monmap->removed_ranks.rend(); ++i) { - unsigned rank = *i; - dout(10) << __func__ << "removing rank " << rank << dendl; - elector.notify_rank_removed(rank); + if (remove_rank_elector){ + dout(10) << __func__ << " we have " << monmap->ranks.size()<< " ranks" << dendl; + dout(10) << __func__ << " we have " << monmap->removed_ranks.size() << " removed ranks" << dendl; + for (auto i = monmap->removed_ranks.rbegin(); + i != monmap->removed_ranks.rend(); ++i) { + int rank = *i; + dout(10) << __func__ << " removing rank " << rank << dendl; + int new_rank = monmap->get_rank(messenger->get_myaddrs()); + elector.notify_rank_removed(rank, new_rank); + } } if (monmap->stretch_mode_enabled) { diff --git a/src/mon/Monitor.h b/src/mon/Monitor.h index 60fa164a09dbb..998fe91eb6035 100644 --- a/src/mon/Monitor.h +++ b/src/mon/Monitor.h @@ -876,7 +876,7 @@ public: /** can_change_external_state if we can do things like * call elections as a result of the new map. */ - void notify_new_monmap(bool can_change_external_state=false); + void notify_new_monmap(bool can_change_external_state=false, bool remove_rank_elector=true); public: struct C_Command : public C_MonOp { diff --git a/src/mon/MonmapMonitor.cc b/src/mon/MonmapMonitor.cc index 6c5a9ce5736a7..d162b8e9414cf 100644 --- a/src/mon/MonmapMonitor.cc +++ b/src/mon/MonmapMonitor.cc @@ -129,7 +129,7 @@ void MonmapMonitor::create_pending() pending_map = *mon.monmap; pending_map.epoch++; pending_map.last_changed = ceph_clock_now(); - dout(10) << __func__ << " monmap epoch " << pending_map.epoch << dendl; + pending_map.removed_ranks.clear(); } void MonmapMonitor::encode_pending(MonitorDBStore::TransactionRef t) @@ -755,8 +755,6 @@ bool MonmapMonitor::prepare_command(MonOpRequestRef op) pending_map.remove(name); pending_map.disallowed_leaders.erase(name); pending_map.last_changed = ceph_clock_now(); - ss << "removing mon." << name << " at " << addrs - << ", there will be " << pending_map.size() << " monitors" ; propose = true; err = 0; -- 2.39.5