From 90f44d53f39d0acb127e2c1c3533d362cba2d490 Mon Sep 17 00:00:00 2001 From: Ilya Dryomov Date: Wed, 24 Apr 2024 14:42:38 +0200 Subject: [PATCH] rbd-mirror: shut down and remove pool replayer if peer changes The code in Mirror::update_pool_replayers() responsible for shutting down and removing stale pool replayers kicks in only in case the peer is removed, but not if the peer changes. However, the code responsible for (re)starting pool replayers in the same method _does_ create and start a new pool replayer in that case. As a result, we can end up with nearly identical pool replayers running at the same time, hogging OS resources and confusing instance_id tracking logic and mirror status reporting at the very least. The root cause is that PeerSpec is matched normally (i.e. based on all fields) when it comes to m_pool_replayers, and based only on UUID when it comes to pool_peers. This was missed in commit 5463e1a1e1b7 ("rbd-mirror: extract optional peer mon_host/key values from MON"). Signed-off-by: Ilya Dryomov (cherry picked from commit def93cb0ad93e3e27d7c4534f68c95ef922ab750) --- src/tools/rbd_mirror/Mirror.cc | 24 ++++++++++++++++-------- 1 file changed, 16 insertions(+), 8 deletions(-) diff --git a/src/tools/rbd_mirror/Mirror.cc b/src/tools/rbd_mirror/Mirror.cc index 4cf0524f5fa..1328bfffe06 100644 --- a/src/tools/rbd_mirror/Mirror.cc +++ b/src/tools/rbd_mirror/Mirror.cc @@ -704,15 +704,23 @@ void Mirror::update_pool_replayers(const PoolPeers &pool_peers, for (auto it = m_pool_replayers.begin(); it != m_pool_replayers.end();) { auto &peer = it->first.second; auto pool_peer_it = pool_peers.find(it->first.first); - if (pool_peer_it == pool_peers.end() || - pool_peer_it->second.find(peer) == pool_peer_it->second.end()) { - dout(20) << "removing pool replayer for " << peer << dendl; - // TODO: make async - it->second->shut_down(); - it = m_pool_replayers.erase(it); - } else { - ++it; + if (pool_peer_it != pool_peers.end()) { + // look up this pool replayer's peer by UUID + auto peer_it = pool_peer_it->second.find(peer); + if (peer_it != pool_peer_it->second.end()) { + // keep this pool replayer only if its peer is a full match + // otherwise, the pool replayer should be recreated since the + // peer was updated + if (*peer_it == peer) { + ++it; + continue; + } + } } + dout(20) << "removing pool replayer for " << peer << dendl; + // TODO: make async + it->second->shut_down(); + it = m_pool_replayers.erase(it); } for (auto &kv : pool_peers) { -- 2.39.5