From 2f55aa5e33b2fe242ebb9702ba9ff6f8d5cef96c Mon Sep 17 00:00:00 2001 From: Jason Dillaman Date: Thu, 23 Jun 2016 20:28:33 -0400 Subject: [PATCH] rbd-mirror: gracefully restart pool replayer when blacklisted Fixes: http://tracker.ceph.com/issues/16349 Signed-off-by: Jason Dillaman --- src/tools/rbd_mirror/ImageReplayer.h | 7 ++++++- src/tools/rbd_mirror/Mirror.cc | 8 ++++++-- src/tools/rbd_mirror/PoolWatcher.cc | 8 ++++++++ src/tools/rbd_mirror/PoolWatcher.h | 3 +++ src/tools/rbd_mirror/Replayer.cc | 21 +++++++++++++++++++-- src/tools/rbd_mirror/Replayer.h | 5 ++++- 6 files changed, 46 insertions(+), 6 deletions(-) diff --git a/src/tools/rbd_mirror/ImageReplayer.h b/src/tools/rbd_mirror/ImageReplayer.h index 0148ad00b3b8a..17196a300425f 100644 --- a/src/tools/rbd_mirror/ImageReplayer.h +++ b/src/tools/rbd_mirror/ImageReplayer.h @@ -83,6 +83,11 @@ public: std::string get_name() { Mutex::Locker l(m_lock); return m_name; }; void set_state_description(int r, const std::string &desc); + inline bool is_blacklisted() const { + Mutex::Locker locker(m_lock); + return (m_last_r == -EBLACKLISTED); + } + inline int64_t get_local_pool_id() const { return m_local_pool_id; } @@ -218,7 +223,7 @@ private: std::string m_remote_image_id, m_local_image_id, m_global_image_id; std::string m_local_image_name; std::string m_name; - Mutex m_lock; + mutable Mutex m_lock; State m_state = STATE_STOPPED; int m_last_r = 0; std::string m_state_desc; diff --git a/src/tools/rbd_mirror/Mirror.cc b/src/tools/rbd_mirror/Mirror.cc index 4e2d9ce7f84d3..7add4d09b34ba 100644 --- a/src/tools/rbd_mirror/Mirror.cc +++ b/src/tools/rbd_mirror/Mirror.cc @@ -358,8 +358,12 @@ void Mirror::update_replayers(const PoolPeers &pool_peers) for (auto it = m_replayers.begin(); it != m_replayers.end();) { auto &peer = it->first.second; auto pool_peer_it = pool_peers.find(it->first.first); - if (pool_peer_it == pool_peers.end() || - pool_peer_it->second.find(peer) == pool_peer_it->second.end()) { + if (it->second->is_blacklisted()) { + derr << "removing blacklisted replayer for " << peer << dendl; + // TODO: make async + it = m_replayers.erase(it); + } else if (pool_peer_it == pool_peers.end() || + pool_peer_it->second.find(peer) == pool_peer_it->second.end()) { dout(20) << "removing replayer for " << peer << dendl; // TODO: make async it = m_replayers.erase(it); diff --git a/src/tools/rbd_mirror/PoolWatcher.cc b/src/tools/rbd_mirror/PoolWatcher.cc index 21a2633775fe3..3e431f22ee11f 100644 --- a/src/tools/rbd_mirror/PoolWatcher.cc +++ b/src/tools/rbd_mirror/PoolWatcher.cc @@ -48,6 +48,11 @@ PoolWatcher::~PoolWatcher() m_timer.shutdown(); } +bool PoolWatcher::is_blacklisted() const { + assert(m_lock.is_locked()); + return m_blacklisted; +} + const PoolWatcher::ImageIds& PoolWatcher::get_images() const { assert(m_lock.is_locked()); @@ -62,6 +67,9 @@ void PoolWatcher::refresh_images(bool reschedule) Mutex::Locker l(m_lock); if (r >= 0) { m_images = std::move(image_ids); + } else if (r == -EBLACKLISTED) { + derr << "blacklisted during image refresh" << dendl; + m_blacklisted = true; } if (!m_stopping && reschedule) { diff --git a/src/tools/rbd_mirror/PoolWatcher.h b/src/tools/rbd_mirror/PoolWatcher.h index d29a6309e1ad9..4aeca3dc23cdc 100644 --- a/src/tools/rbd_mirror/PoolWatcher.h +++ b/src/tools/rbd_mirror/PoolWatcher.h @@ -50,6 +50,8 @@ public: PoolWatcher(const PoolWatcher&) = delete; PoolWatcher& operator=(const PoolWatcher&) = delete; + bool is_blacklisted() const; + const ImageIds& get_images() const; void refresh_images(bool reschedule=true); @@ -59,6 +61,7 @@ private: Cond &m_refresh_cond; bool m_stopping = false; + bool m_blacklisted = false; SafeTimer m_timer; double m_interval; diff --git a/src/tools/rbd_mirror/Replayer.cc b/src/tools/rbd_mirror/Replayer.cc index acb9910138c14..9cefd78edeaa0 100644 --- a/src/tools/rbd_mirror/Replayer.cc +++ b/src/tools/rbd_mirror/Replayer.cc @@ -259,6 +259,11 @@ Replayer::~Replayer() } } +bool Replayer::is_blacklisted() const { + Mutex::Locker locker(m_lock); + return m_blacklisted; +} + int Replayer::init() { dout(20) << "replaying for " << m_peer << dendl; @@ -440,10 +445,17 @@ void Replayer::run() m_asok_hook_name, this); } - Mutex::Locker l(m_lock); - if (!m_manual_stop) { + Mutex::Locker locker(m_lock); + if (m_pool_watcher->is_blacklisted()) { + m_blacklisted = true; + m_stopping.set(1); + } else if (!m_manual_stop) { set_sources(m_pool_watcher->get_images()); } + + if (m_blacklisted) { + break; + } m_cond.WaitInterval(g_ceph_context, m_lock, seconds(30)); } @@ -698,6 +710,11 @@ void Replayer::start_image_replayer(unique_ptr > &image_replayer if (!image_replayer->is_stopped()) { return; + } else if (image_replayer->is_blacklisted()) { + derr << "blacklisted detected during image replay" << dendl; + m_blacklisted = true; + m_stopping.set(1); + return; } if (image_name) { diff --git a/src/tools/rbd_mirror/Replayer.h b/src/tools/rbd_mirror/Replayer.h index 5bbed3e347a44..c5b975e26bba4 100644 --- a/src/tools/rbd_mirror/Replayer.h +++ b/src/tools/rbd_mirror/Replayer.h @@ -41,6 +41,8 @@ public: Replayer(const Replayer&) = delete; Replayer& operator=(const Replayer&) = delete; + bool is_blacklisted() const; + int init(); void run(); @@ -71,10 +73,11 @@ private: Threads *m_threads; std::shared_ptr m_image_deleter; ImageSyncThrottlerRef<> m_image_sync_throttler; - Mutex m_lock; + mutable Mutex m_lock; Cond m_cond; atomic_t m_stopping; bool m_manual_stop = false; + bool m_blacklisted = false; peer_t m_peer; std::vector m_args; -- 2.39.5