From 938d3a69624adc42274b819798b6545efce06d2b Mon Sep 17 00:00:00 2001 From: Jason Dillaman Date: Fri, 14 Jul 2017 13:14:25 -0400 Subject: [PATCH] rbd-mirror: prevent image health status from ping-ponging during re-attempt Signed-off-by: Jason Dillaman --- .../test_mock_BootstrapRequest.cc | 1 - .../rbd_mirror/test_mock_ImageReplayer.cc | 10 ++++ src/tools/rbd_mirror/ImageReplayer.cc | 49 +++++++++++++++++-- src/tools/rbd_mirror/ImageReplayer.h | 36 ++++++++------ .../image_replayer/BootstrapRequest.cc | 21 +++++--- .../image_replayer/BootstrapRequest.h | 4 +- src/tools/rbd_mirror/image_replayer/Types.h | 21 ++++++++ 7 files changed, 114 insertions(+), 28 deletions(-) create mode 100644 src/tools/rbd_mirror/image_replayer/Types.h diff --git a/src/test/rbd_mirror/image_replayer/test_mock_BootstrapRequest.cc b/src/test/rbd_mirror/image_replayer/test_mock_BootstrapRequest.cc index 19580620eccac..c7b995387e1d3 100644 --- a/src/test/rbd_mirror/image_replayer/test_mock_BootstrapRequest.cc +++ b/src/test/rbd_mirror/image_replayer/test_mock_BootstrapRequest.cc @@ -245,7 +245,6 @@ OpenLocalImageRequest* // template definitions #include "tools/rbd_mirror/image_replayer/BootstrapRequest.cc" -template class rbd::mirror::image_replayer::BootstrapRequest; namespace rbd { namespace mirror { diff --git a/src/test/rbd_mirror/test_mock_ImageReplayer.cc b/src/test/rbd_mirror/test_mock_ImageReplayer.cc index 7cff043ba7ad3..aa514db923abf 100644 --- a/src/test/rbd_mirror/test_mock_ImageReplayer.cc +++ b/src/test/rbd_mirror/test_mock_ImageReplayer.cc @@ -152,6 +152,10 @@ struct BootstrapRequest { void get() { } + inline bool is_syncing() const { + return false; + } + MOCK_METHOD0(send, void()); MOCK_METHOD0(cancel, void()); }; @@ -500,6 +504,8 @@ TEST_F(TestMockImageReplayer, StartStop) { C_SaferCond start_ctx; m_image_replayer->start(&start_ctx); ASSERT_EQ(0, start_ctx.wait()); + ASSERT_EQ(image_replayer::HEALTH_STATE_OK, + m_image_replayer->get_health_state()); // STOP @@ -517,6 +523,8 @@ TEST_F(TestMockImageReplayer, StartStop) { C_SaferCond stop_ctx; m_image_replayer->stop(&stop_ctx); ASSERT_EQ(0, stop_ctx.wait()); + ASSERT_EQ(image_replayer::HEALTH_STATE_OK, + m_image_replayer->get_health_state()); } TEST_F(TestMockImageReplayer, LocalImagePrimary) { @@ -663,6 +671,8 @@ TEST_F(TestMockImageReplayer, StartExternalReplayError) { C_SaferCond start_ctx; m_image_replayer->start(&start_ctx); ASSERT_EQ(-EINVAL, start_ctx.wait()); + ASSERT_EQ(image_replayer::HEALTH_STATE_ERROR, + m_image_replayer->get_health_state()); } TEST_F(TestMockImageReplayer, StopError) { diff --git a/src/tools/rbd_mirror/ImageReplayer.cc b/src/tools/rbd_mirror/ImageReplayer.cc index fb3cb89a83ac1..37bc528cefe35 100644 --- a/src/tools/rbd_mirror/ImageReplayer.cc +++ b/src/tools/rbd_mirror/ImageReplayer.cc @@ -326,6 +326,19 @@ ImageReplayer::~ImageReplayer() delete m_asok_hook; } +template +image_replayer::HealthState ImageReplayer::get_health_state() const { + Mutex::Locker locker(m_lock); + + if (!m_mirror_image_status_state) { + return image_replayer::HEALTH_STATE_OK; + } else if (*m_mirror_image_status_state == + cls::rbd::MIRROR_IMAGE_STATUS_STATE_SYNCING) { + return image_replayer::HEALTH_STATE_WARNING; + } + return image_replayer::HEALTH_STATE_ERROR; +} + template void ImageReplayer::add_remote_image(const std::string &mirror_uuid, const std::string &image_id, @@ -1302,15 +1315,30 @@ void ImageReplayer::send_mirror_status_update(const OptionalState &opt_state) State state; std::string state_desc; int last_r; - bool bootstrapping; bool stopping_replay; + + OptionalMirrorImageStatusState mirror_image_status_state{ + boost::make_optional(false, cls::rbd::MirrorImageStatusState{})}; + image_replayer::BootstrapRequest* bootstrap_request = nullptr; { Mutex::Locker locker(m_lock); state = m_state; state_desc = m_state_desc; + mirror_image_status_state = m_mirror_image_status_state; last_r = m_last_r; - bootstrapping = (m_bootstrap_request != nullptr); stopping_replay = (m_local_image_ctx != nullptr); + + if (m_bootstrap_request != nullptr) { + bootstrap_request = m_bootstrap_request; + bootstrap_request->get(); + } + } + + bool syncing = false; + if (bootstrap_request != nullptr) { + syncing = bootstrap_request->is_syncing(); + bootstrap_request->put(); + bootstrap_request = nullptr; } if (opt_state) { @@ -1321,9 +1349,10 @@ void ImageReplayer::send_mirror_status_update(const OptionalState &opt_state) status.up = true; switch (state) { case STATE_STARTING: - if (bootstrapping) { + if (syncing) { status.state = cls::rbd::MIRROR_IMAGE_STATUS_STATE_SYNCING; status.description = state_desc.empty() ? "syncing" : state_desc; + mirror_image_status_state = status.state; } else { status.state = cls::rbd::MIRROR_IMAGE_STATUS_STATE_STARTING_REPLAY; status.description = "starting replay"; @@ -1351,6 +1380,7 @@ void ImageReplayer::send_mirror_status_update(const OptionalState &opt_state) return; } status.description = "replaying, " + desc; + mirror_image_status_state = boost::none; } break; case STATE_STOPPING: @@ -1364,15 +1394,28 @@ void ImageReplayer::send_mirror_status_update(const OptionalState &opt_state) if (last_r < 0) { status.state = cls::rbd::MIRROR_IMAGE_STATUS_STATE_ERROR; status.description = state_desc; + mirror_image_status_state = status.state; } else { status.state = cls::rbd::MIRROR_IMAGE_STATUS_STATE_STOPPED; status.description = state_desc.empty() ? "stopped" : state_desc; + mirror_image_status_state = boost::none; } break; default: assert(!"invalid state"); } + { + Mutex::Locker locker(m_lock); + m_mirror_image_status_state = mirror_image_status_state; + } + + // prevent the status from ping-ponging when failed replays are restarted + if (mirror_image_status_state && + *mirror_image_status_state == cls::rbd::MIRROR_IMAGE_STATUS_STATE_ERROR) { + status.state = *mirror_image_status_state; + } + dout(20) << "status=" << status << dendl; librados::ObjectWriteOperation op; librbd::cls_client::mirror_image_status_set(&op, m_global_image_id, status); diff --git a/src/tools/rbd_mirror/ImageReplayer.h b/src/tools/rbd_mirror/ImageReplayer.h index 1e3eca583954b..3ea41dc5378f5 100644 --- a/src/tools/rbd_mirror/ImageReplayer.h +++ b/src/tools/rbd_mirror/ImageReplayer.h @@ -18,6 +18,7 @@ #include "ImageDeleter.h" #include "ProgressContext.h" #include "types.h" +#include "tools/rbd_mirror/image_replayer/Types.h" #include #include @@ -60,17 +61,6 @@ namespace image_replayer { template class ReplayStatusFormatter; } template class ImageReplayer { public: - typedef typename librbd::journal::TypeTraits::ReplayEntry ReplayEntry; - - enum State { - STATE_UNKNOWN, - STATE_STARTING, - STATE_REPLAYING, - STATE_REPLAY_FLUSHING, - STATE_STOPPING, - STATE_STOPPED, - }; - static ImageReplayer *create( Threads *threads, ImageDeleter* image_deleter, InstanceWatcher *instance_watcher, @@ -93,7 +83,6 @@ public: ImageReplayer(const ImageReplayer&) = delete; ImageReplayer& operator=(const ImageReplayer&) = delete; - State get_state() { Mutex::Locker l(m_lock); return get_state_(); } bool is_stopped() { Mutex::Locker l(m_lock); return is_stopped_(); } bool is_running() { Mutex::Locker l(m_lock); return is_running_(); } bool is_replaying() { Mutex::Locker l(m_lock); return is_replaying_(); } @@ -106,6 +95,8 @@ public: return (m_last_r == -EBLACKLISTED); } + image_replayer::HealthState get_health_state() const; + void add_remote_image(const std::string &remote_mirror_uuid, const std::string &remote_image_id, librados::IoCtx &remote_io_ctx); @@ -215,6 +206,17 @@ protected: bool on_replay_interrupted(); private: + typedef typename librbd::journal::TypeTraits::ReplayEntry ReplayEntry; + + enum State { + STATE_UNKNOWN, + STATE_STARTING, + STATE_REPLAYING, + STATE_REPLAY_FLUSHING, + STATE_STOPPING, + STATE_STOPPED, + }; + struct RemoteImage { std::string mirror_uuid; std::string image_id; @@ -248,6 +250,8 @@ private: typedef typename librbd::journal::TypeTraits::Journaler Journaler; typedef boost::optional OptionalState; + typedef boost::optional + OptionalMirrorImageStatusState; struct JournalListener : public librbd::journal::Listener { ImageReplayer *img_replayer; @@ -296,8 +300,11 @@ private: std::string m_name; mutable Mutex m_lock; State m_state = STATE_STOPPED; - int m_last_r = 0; std::string m_state_desc; + + OptionalMirrorImageStatusState m_mirror_image_status_state = boost::none; + int m_last_r = 0; + BootstrapProgressContext m_progress_cxt; bool m_do_resync{false}; image_replayer::EventPreprocessor *m_event_preprocessor = nullptr; @@ -364,9 +371,6 @@ private: static std::string to_string(const State state); - State get_state_() const { - return m_state; - } bool is_stopped_() const { return m_state == STATE_STOPPED; } diff --git a/src/tools/rbd_mirror/image_replayer/BootstrapRequest.cc b/src/tools/rbd_mirror/image_replayer/BootstrapRequest.cc index 4dadc744f6c9e..434aa783c0505 100644 --- a/src/tools/rbd_mirror/image_replayer/BootstrapRequest.cc +++ b/src/tools/rbd_mirror/image_replayer/BootstrapRequest.cc @@ -74,6 +74,12 @@ BootstrapRequest::~BootstrapRequest() { assert(m_remote_image_ctx == nullptr); } +template +bool BootstrapRequest::is_syncing() const { + Mutex::Locker locker(m_lock); + return (m_image_sync != nullptr); +} + template void BootstrapRequest::send() { *m_do_resync = false; @@ -637,24 +643,26 @@ void BootstrapRequest::image_sync() { } dout(20) << dendl; - update_progress("IMAGE_SYNC"); - - Context *ctx = create_context_callback< - BootstrapRequest, &BootstrapRequest::handle_image_sync>( - this); - { Mutex::Locker locker(m_lock); if (m_canceled) { m_ret_val = -ECANCELED; } else { assert(m_image_sync == nullptr); + + Context *ctx = create_context_callback< + BootstrapRequest, &BootstrapRequest::handle_image_sync>(this); m_image_sync = ImageSync::create( *m_local_image_ctx, m_remote_image_ctx, m_timer, m_timer_lock, m_local_mirror_uuid, m_journaler, m_client_meta, m_work_queue, m_instance_watcher, ctx, m_progress_ctx); m_image_sync->get(); + + m_lock.Unlock(); + update_progress("IMAGE_SYNC"); + m_lock.Lock(); + m_image_sync->send(); return; } @@ -670,7 +678,6 @@ void BootstrapRequest::handle_image_sync(int r) { { Mutex::Locker locker(m_lock); - m_image_sync->put(); m_image_sync = nullptr; diff --git a/src/tools/rbd_mirror/image_replayer/BootstrapRequest.h b/src/tools/rbd_mirror/image_replayer/BootstrapRequest.h index 5696cce61fdd1..cebd568972ba8 100644 --- a/src/tools/rbd_mirror/image_replayer/BootstrapRequest.h +++ b/src/tools/rbd_mirror/image_replayer/BootstrapRequest.h @@ -80,6 +80,8 @@ public: bool *do_resync, ProgressContext *progress_ctx = nullptr); ~BootstrapRequest() override; + bool is_syncing() const; + void send() override; void cancel() override; @@ -163,7 +165,7 @@ private: ProgressContext *m_progress_ctx; bool *m_do_resync; - Mutex m_lock; + mutable Mutex m_lock; bool m_canceled = false; Tags m_remote_tags; diff --git a/src/tools/rbd_mirror/image_replayer/Types.h b/src/tools/rbd_mirror/image_replayer/Types.h new file mode 100644 index 0000000000000..6ab988a7662c0 --- /dev/null +++ b/src/tools/rbd_mirror/image_replayer/Types.h @@ -0,0 +1,21 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab + +#ifndef CEPH_RBD_MIRROR_IMAGE_REPLAYER_TYPES_H +#define CEPH_RBD_MIRROR_IMAGE_REPLAYER_TYPES_H + +namespace rbd { +namespace mirror { +namespace image_replayer { + +enum HealthState { + HEALTH_STATE_OK, + HEALTH_STATE_WARNING, + HEALTH_STATE_ERROR +}; + +} // namespace image_replayer +} // namespace mirror +} // namespace rbd + +#endif // CEPH_RBD_MIRROR_IMAGE_REPLAYER_TYPES_H -- 2.39.5