From 2edf4f568ddb2fdb5c702e36fec5e05450719e6b Mon Sep 17 00:00:00 2001 From: Ilya Dryomov Date: Tue, 23 Mar 2021 10:40:18 +0100 Subject: [PATCH] crimson/monc: discard active/pending connections when reopening Otherwise pending_conns vector just keeps growing with redundant connections all trying to reach the same set of monitors. When one of the attempts finally succeeds, _finish_auth() will pick the first connection with a matching entity_addr_t, designate it as active and close all others. The match is very likely to be wrong and hence the actual authenticated connection gets closed, leaving the OSD with a bogus active_con and no monitor session. Signed-off-by: Ilya Dryomov --- src/crimson/mon/MonClient.cc | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/src/crimson/mon/MonClient.cc b/src/crimson/mon/MonClient.cc index 2d82aadc4fc..2628998eb4a 100644 --- a/src/crimson/mon/MonClient.cc +++ b/src/crimson/mon/MonClient.cc @@ -515,8 +515,6 @@ void Client::ms_handle_reset(crimson::net::ConnectionRef conn, bool /* is_replac return seastar::now(); } else if (active_con && active_con->is_my_peer(conn->get_peer_addr())) { logger().warn("active conn reset {}", conn->get_peer_addr()); - active_con->close(); - active_con.reset(); return reopen_session(-1).then([this](bool opened) { if (opened) { return on_session_opened(); @@ -929,6 +927,16 @@ static entity_addr_t choose_client_addr( seastar::future Client::reopen_session(int rank) { logger().info("{} to mon.{}", __func__, rank); + if (active_con) { + active_con->close(); + active_con.reset(); + ceph_assert(pending_conns.empty()); + } else { + for (auto& pending_con : pending_conns) { + pending_con->close(); + } + pending_conns.clear(); + } vector mons; if (rank >= 0) { mons.push_back(rank); -- 2.39.5