From 98ceff7c0e34644db2bf1fc3b7e6704243989afd Mon Sep 17 00:00:00 2001 From: Mykola Golub Date: Thu, 18 Mar 2021 18:38:26 +0000 Subject: [PATCH] rbd-mirror: request group snapshot creation when creating group image snapshot It makes the group image replayers to synchronize and to have the group snapshot created. Signed-off-by: Mykola Golub Signed-off-by: Prasanna Kumar Kalever --- src/librbd/mirror/GetInfoRequest.cc | 2 +- .../journal/test_mock_Replayer.cc | 5 + .../snapshot/test_mock_Replayer.cc | 5 + src/tools/rbd_mirror/GroupReplayer.cc | 400 +++++++++++++----- src/tools/rbd_mirror/GroupReplayer.h | 68 +++ src/tools/rbd_mirror/ImageReplayer.cc | 35 +- src/tools/rbd_mirror/ImageReplayer.h | 4 + src/tools/rbd_mirror/MirrorStatusWatcher.cc | 3 +- src/tools/rbd_mirror/Types.h | 18 + .../group_replayer/BootstrapRequest.cc | 57 ++- .../group_replayer/BootstrapRequest.h | 13 +- .../image_replayer/BootstrapRequest.cc | 2 +- .../PrepareLocalImageRequest.cc | 11 +- .../image_replayer/ReplayerListener.h | 8 + .../image_replayer/snapshot/Replayer.cc | 76 +++- .../image_replayer/snapshot/Replayer.h | 16 + 16 files changed, 582 insertions(+), 141 deletions(-) diff --git a/src/librbd/mirror/GetInfoRequest.cc b/src/librbd/mirror/GetInfoRequest.cc index e904a21bba1c0..edcf5bff3a741 100644 --- a/src/librbd/mirror/GetInfoRequest.cc +++ b/src/librbd/mirror/GetInfoRequest.cc @@ -57,7 +57,7 @@ void GetInfoRequest::send() { template void GetInfoRequest::get_mirror_image() { - ldout(m_cct, 20) << dendl; + ldout(m_cct, 20) << "image_id: " << m_image_id << dendl; librados::ObjectReadOperation op; cls_client::mirror_image_get_start(&op, m_image_id); diff --git a/src/test/rbd_mirror/image_replayer/journal/test_mock_Replayer.cc b/src/test/rbd_mirror/image_replayer/journal/test_mock_Replayer.cc index 7c8defb3d60e8..aa50d4d1b343b 100644 --- a/src/test/rbd_mirror/image_replayer/journal/test_mock_Replayer.cc +++ b/src/test/rbd_mirror/image_replayer/journal/test_mock_Replayer.cc @@ -119,6 +119,11 @@ namespace { struct MockReplayerListener : public image_replayer::ReplayerListener { MOCK_METHOD0(handle_notification, void()); + MOCK_METHOD5(create_mirror_snapshot_start, + void(const cls::rbd::MirrorSnapshotNamespace &, + int64_t *, std::string *, std::string *, Context *)); + MOCK_METHOD3(create_mirror_snapshot_finish, void(const std::string &, + uint64_t, Context *)); }; } // anonymous namespace diff --git a/src/test/rbd_mirror/image_replayer/snapshot/test_mock_Replayer.cc b/src/test/rbd_mirror/image_replayer/snapshot/test_mock_Replayer.cc index ccc419e9af5fe..ae6e503c0c90c 100644 --- a/src/test/rbd_mirror/image_replayer/snapshot/test_mock_Replayer.cc +++ b/src/test/rbd_mirror/image_replayer/snapshot/test_mock_Replayer.cc @@ -287,6 +287,11 @@ namespace { struct MockReplayerListener : public image_replayer::ReplayerListener { MOCK_METHOD0(handle_notification, void()); + MOCK_METHOD5(create_mirror_snapshot_start, + void(const cls::rbd::MirrorSnapshotNamespace &, + int64_t *, std::string *, std::string *, Context *)); + MOCK_METHOD3(create_mirror_snapshot_finish, void(const std::string &, + uint64_t, Context *)); }; } // anonymous namespace diff --git a/src/tools/rbd_mirror/GroupReplayer.cc b/src/tools/rbd_mirror/GroupReplayer.cc index b53a807e56df0..59d474f3c2d3a 100644 --- a/src/tools/rbd_mirror/GroupReplayer.cc +++ b/src/tools/rbd_mirror/GroupReplayer.cc @@ -16,6 +16,8 @@ #include "tools/rbd_mirror/image_replayer/Utils.h" #include "GroupReplayer.h" +#include + #define dout_context g_ceph_context #define dout_subsys ceph_subsys_rbd_mirror #undef dout_prefix @@ -28,20 +30,11 @@ namespace rbd { namespace mirror { using librbd::util::create_context_callback; +using librbd::util::create_rados_callback; using librbd::util::unique_lock_name; namespace { -std::string calc_ind_mirror_snap_name(uint64_t pool_id, - const std::string &group_id, - const std::string &snap_id) -{ - std::stringstream ind_snap_name_stream; - ind_snap_name_stream << ".mirror." << std::hex << pool_id << "_" - << group_id << "_" << snap_id; - return ind_snap_name_stream.str(); -} - template class GroupReplayerAdminSocketCommand { public: @@ -227,14 +220,16 @@ GroupReplayer::~GroupReplayer() { template bool GroupReplayer::needs_restart() const { - std::lock_guard locker{m_lock}; + dout(10) << dendl; + std::lock_guard locker{m_lock}; if (m_state != STATE_REPLAYING) { return false; } for (auto &[_, image_replayer] : m_image_replayers) { if (image_replayer->is_stopped()) { + dout(10) << "image replayer is in stopped state, needs restart" << dendl; return true; } } @@ -277,7 +272,7 @@ void GroupReplayer::start(Context *on_finish, bool manual, bool restart) { { std::lock_guard locker{m_lock}; if (!is_stopped_()) { - derr << "already running" << dendl; + derr << "already running: " << m_state << dendl; r = -EINVAL; } else if (m_manual_stop && !manual) { dout(5) << "stopped manually, ignoring start without manual flag" @@ -290,6 +285,11 @@ void GroupReplayer::start(Context *on_finish, bool manual, bool restart) { m_state = STATE_STARTING; m_last_r = 0; m_state_desc.clear(); + m_local_group_snaps.clear(); + m_remote_group_snaps.clear(); + m_image_replayers.clear(); + m_image_replayer_index.clear(); + m_get_remote_group_snap_ret_vals.clear(); m_manual_stop = false; ceph_assert(m_on_start_finish == nullptr); std::swap(m_on_start_finish, on_finish); @@ -312,6 +312,7 @@ void GroupReplayer::stop(Context *on_finish, bool manual, bool restart) { << ", restart=" << restart << dendl; group_replayer::BootstrapRequest *bootstrap_request = nullptr; + std::map *, Context *>> create_snap_requests; bool shut_down_replay = false; bool running = true; { @@ -321,13 +322,16 @@ void GroupReplayer::stop(Context *on_finish, bool manual, bool restart) { m_restart_requested = true; } + dout(10) << "state: " << m_state << ", m_stop_requested: " << m_stop_requested << dendl; if (!is_running_()) { + dout(10) << "replayers not running" << dendl; running = false; if (!restart && m_restart_requested) { dout(10) << "canceling restart" << dendl; m_restart_requested = false; } } else { + dout(10) << "replayers still running" << dendl; if (!is_stopped_()) { if (m_state == STATE_STARTING) { dout(10) << "canceling start" << dendl; @@ -335,11 +339,32 @@ void GroupReplayer::stop(Context *on_finish, bool manual, bool restart) { bootstrap_request = m_bootstrap_request; bootstrap_request->get(); } - } else { - dout(10) << "interrupting replay" << dendl; shut_down_replay = true; - m_state = STATE_STOPPING; + } else { + dout(10) << "interrupting replay" << dendl; + shut_down_replay = true; + for (auto it = m_create_snap_requests.begin(); + it != m_create_snap_requests.end(); ) { + auto &remote_group_snap_id = it->first; + auto &requests = it->second; + if (m_get_remote_group_snap_ret_vals.count(remote_group_snap_id) == 0) { + dout(20) << "getting remote group snap for " + << remote_group_snap_id << " is still in-progress" + << dendl; + shut_down_replay = false; + } else if (m_pending_snap_create.count(remote_group_snap_id) > 0) { + dout(20) << "group snap create for " << remote_group_snap_id + << " is still in-progress" << dendl; + shut_down_replay = false; + } else { + create_snap_requests[remote_group_snap_id] = requests; + it = m_create_snap_requests.erase(it); + continue; + } + it++; + } } + m_state = STATE_STOPPING; ceph_assert(m_on_stop_finish == nullptr); std::swap(m_on_stop_finish, on_finish); @@ -355,12 +380,10 @@ void GroupReplayer::stop(Context *on_finish, bool manual, bool restart) { bootstrap_request->put(); } - if (!running) { - dout(20) << "not running" << dendl; - if (on_finish) { - on_finish->complete(-EINVAL); + for (auto &[_, requests] : create_snap_requests) { + for (auto &[_, on_finish] : requests) { + on_finish->complete(-ESTALE); } - return; } if (shut_down_replay) { @@ -373,13 +396,22 @@ void GroupReplayer::stop(Context *on_finish, bool manual, bool restart) { } on_finish->complete(0); } + + if (!running && shut_down_replay) { + dout(20) << "not running" << dendl; + if (on_finish) { + on_finish->complete(-EINVAL); + } + } } template void GroupReplayer::restart(Context *on_finish) { + dout(10) << dendl; { std::lock_guard locker{m_lock}; m_restart_requested = true; + m_on_start_finish = nullptr; } auto ctx = new LambdaContext( @@ -457,7 +489,8 @@ void GroupReplayer::bootstrap_group() { m_threads, m_local_io_ctx, m_remote_group_peer.io_ctx, m_global_group_id, m_local_mirror_uuid, m_instance_watcher, m_local_status_updater, m_remote_group_peer.mirror_status_updater, m_cache_manager_handler, - m_pool_meta_cache, &m_local_group_ctx, &m_image_replayers, ctx); + m_pool_meta_cache, &m_remote_group_id, &m_local_group_ctx, + &m_image_replayers, &m_image_replayer_index, ctx); request->get(); m_bootstrap_request = request; @@ -473,10 +506,22 @@ void GroupReplayer::handle_bootstrap_group(int r) { dout(10) << "r=" << r << dendl; { std::lock_guard locker{m_lock}; - m_bootstrap_request->put(); - m_bootstrap_request = nullptr; + if (m_state == STATE_STOPPING || m_state == STATE_STOPPED) { + dout(10) << "stop prevailed" <put(); + m_bootstrap_request = nullptr; + } + m_local_group_ctx.listener = &m_listener; + if (!m_local_group_ctx.name.empty()) { + m_local_group_name = m_local_group_ctx.name; + } } + reregister_admin_socket_hook(); + if (finish_start_if_interrupted()) { return; } else if (r == -ENOENT) { @@ -493,12 +538,6 @@ void GroupReplayer::handle_bootstrap_group(int r) { return; } - { - std::lock_guard locker{m_lock}; - m_local_group_name = m_local_group_ctx.name; - } - reregister_admin_socket_hook(); - start_image_replayers(); } @@ -524,6 +563,13 @@ void GroupReplayer::start_image_replayers() { template void GroupReplayer::handle_start_image_replayers(int r) { dout(10) << "r=" << r << dendl; + { + std::lock_guard locker{m_lock}; + if (m_state == STATE_STOPPING || m_state == STATE_STOPPED) { + dout(10) << "stop prevailed" <::handle_stop_image_replayers(int r) { m_state = STATE_STOPPED; std::swap(on_finish, m_on_stop_finish); + m_image_replayer_index.clear(); + for (auto &[_, image_replayer] : m_image_replayers) { delete image_replayer; } @@ -766,98 +814,227 @@ void GroupReplayer::set_mirror_group_status_update( template void GroupReplayer::create_mirror_snapshot_start( - const std::string &remote_group_snap_id, ImageReplayer *image_replayer, - int64_t *local_group_pool_id, std::string *local_group_id, - std::string *local_group_snap_id, Context *on_finish) { - dout(20) << remote_group_snap_id << " " << image_replayer << dendl; + const cls::rbd::MirrorSnapshotNamespace &remote_group_snap_ns, + ImageReplayer *image_replayer, int64_t *local_group_pool_id, + std::string *local_group_id, std::string *local_group_snap_id, + Context *on_finish) { + dout(20) << remote_group_snap_ns << " " << image_replayer << dendl; + + ceph_assert(remote_group_snap_ns.is_primary()); + int r = 0; std::unique_lock locker{m_lock}; - ceph_assert(m_pending_snap_create == false); + auto &remote_group_snap_id = remote_group_snap_ns.group_snap_id; + if (m_local_group_snaps.find(remote_group_snap_id) != m_local_group_snaps.end() && + m_local_group_snaps[remote_group_snap_id].state == cls::rbd::GROUP_SNAPSHOT_STATE_COMPLETE) { + dout(20) << "group snapshot: " << remote_group_snap_id << " already exists" + << dendl; + r = -EEXIST; + } else if (m_state == STATE_STARTING) { + derr << "group replayer is not ready yet, m_state: " << m_state << dendl; + r = -EAGAIN; + } else if (m_state != STATE_REPLAYING) { + derr << "group replayer is not in replaying state, m_state: " + << m_state << dendl; + r = -ESTALE; + } - if (m_state != STATE_REPLAYING) { - derr << "not in replaying state" << dendl; + if (r != 0) { locker.unlock(); - on_finish->complete(-ESTALE); + on_finish->complete(r); return; } - if (m_remote_group_snap_id.empty()) { - ceph_assert(m_create_snap_requests.empty()); - m_remote_group_snap_id = remote_group_snap_id; + auto requests_it = m_create_snap_requests.find(remote_group_snap_id); + + if (requests_it == m_create_snap_requests.end()) { + ceph_assert(m_local_group_snaps.count(remote_group_snap_id) == 0); + + requests_it = m_create_snap_requests.insert( + {remote_group_snap_id, {}}).first; + + auto snap_state = + remote_group_snap_ns.state == cls::rbd::MIRROR_SNAPSHOT_STATE_PRIMARY ? + cls::rbd::MIRROR_SNAPSHOT_STATE_NON_PRIMARY : + cls::rbd::MIRROR_SNAPSHOT_STATE_NON_PRIMARY_DEMOTED; + + m_local_group_snaps[remote_group_snap_id] = + {remote_group_snap_id, + cls::rbd::MirrorGroupSnapshotNamespace{ + snap_state, {}, m_remote_group_peer.uuid, remote_group_snap_id}, + {}, cls::rbd::GROUP_SNAPSHOT_STATE_INCOMPLETE}; - // XXXMG: make the same name as the primary snapshot has - m_group_snap = {librbd::util::generate_uuid(m_local_io_ctx), - cls::rbd::MirrorGroupSnapshotNamespace{ - cls::rbd::MIRROR_SNAPSHOT_STATE_NON_PRIMARY, - {}, m_remote_group_peer.uuid, m_remote_group_snap_id}, - calc_ind_mirror_snap_name( - m_remote_group_peer.io_ctx.get_id(), m_remote_group_id, - m_remote_group_snap_id), - cls::rbd::GROUP_SNAPSHOT_STATE_INCOMPLETE}; + librados::ObjectReadOperation op; + librbd::cls_client::group_snap_get_by_id_start(&op, remote_group_snap_id); + auto ctx = new C_GetRemoteGroupSnap(this, remote_group_snap_id); + auto comp = create_rados_callback(ctx); + + r = m_remote_group_peer.io_ctx.aio_operate( + librbd::util::group_header_name(m_remote_group_id), comp, &op, + &ctx->bl); + ceph_assert(r == 0); + comp->release(); } - ceph_assert(m_create_snap_requests.count(image_replayer) == 0); - m_create_snap_requests[image_replayer] = on_finish; + ceph_assert(requests_it->second.count(image_replayer) == 0); + requests_it->second[image_replayer] = on_finish; + + ceph_assert(m_local_group_snaps.count(remote_group_snap_id) > 0); + auto &local_group_snap = m_local_group_snaps[remote_group_snap_id]; + + local_group_snap.snaps.emplace_back(image_replayer->get_local_pool_id(), + image_replayer->get_local_image_id(), + CEPH_NOSNAP); + ceph_assert(!local_group_snap.snaps.back().image_id.empty()); *local_group_pool_id = m_local_io_ctx.get_id(); *local_group_id = m_local_group_ctx.group_id; - *local_group_snap_id = m_group_snap.id; - - if (m_remote_group_snap_id != remote_group_snap_id) { - dout(15) << "request snap_id " << remote_group_snap_id - << " does not match us " << m_remote_group_snap_id - << " -- restarting" << dendl; - auto create_snap_requests = m_create_snap_requests; - m_create_snap_requests.clear(); + *local_group_snap_id = m_local_group_snaps[remote_group_snap_id].id; + + if (m_get_remote_group_snap_ret_vals.count(remote_group_snap_id) == 0) { + dout(20) << "getting remote group snap is still in-progress" << dendl; locker.unlock(); - for (auto &[_, on_finish] : create_snap_requests) { - on_finish->complete(-EAGAIN); - } return; } - // XXXMG: m_image_replayers.size() will not always work - if (m_create_snap_requests.size() < m_image_replayers.size()) { - return; - } + maybe_create_mirror_snapshot(locker, remote_group_snap_id); +} + +template +void GroupReplayer::maybe_create_mirror_snapshot( + std::unique_lock& locker, + const std::string &remote_group_snap_id) { + ceph_assert(ceph_mutex_is_locked_by_me(m_lock)); + + dout(20) << remote_group_snap_id << dendl; - m_pending_snap_create = true; + auto &remote_group_snap = m_remote_group_snaps[remote_group_snap_id]; + ceph_assert(!remote_group_snap.id.empty()); - for (auto &[io_ctx, image_replayer] : m_image_replayers) { - m_group_snap.snaps.emplace_back(io_ctx.get_id(), - image_replayer->get_local_image_id(), - CEPH_NOSNAP); - ceph_assert(!m_group_snap.snaps.back().image_id.empty()); + ceph_assert(m_create_snap_requests.count(remote_group_snap_id) > 0); + auto &create_snap_requests = m_create_snap_requests[remote_group_snap_id]; + + for (auto &s : remote_group_snap.snaps) { + auto it = m_image_replayer_index.find({s.pool, s.image_id}); + if (it == m_image_replayer_index.end()) { + continue; + } + + if (create_snap_requests.count(it->second) == 0) { + dout(20) << "waiting for create_mirror_snapshot_start " + << remote_group_snap_id << " from " << it->second << dendl; + locker.unlock(); + return; + } } - dout(20) << m_group_snap.id << " " << m_group_snap.name << dendl; + ceph_assert(m_local_group_snaps.count(remote_group_snap_id) > 0); + auto &local_group_snap = m_local_group_snaps[remote_group_snap_id]; + + dout(20) << local_group_snap.id << " " << local_group_snap.name << dendl; + + bool inserted = m_pending_snap_create.insert(remote_group_snap_id).second; + ceph_assert(inserted); + + ceph_assert(m_get_remote_group_snap_ret_vals.count(remote_group_snap_id) > 0); + int r = m_get_remote_group_snap_ret_vals[remote_group_snap_id]; + m_get_remote_group_snap_ret_vals.erase(remote_group_snap_id); + if (r < 0) { + locker.unlock(); + handle_create_mirror_snapshot_start(remote_group_snap_id, r); + return; + } librados::ObjectWriteOperation op; - librbd::cls_client::group_snap_set(&op, m_group_snap); - auto comp = create_rados_callback< - GroupReplayer, - &GroupReplayer::handle_create_mirror_snapshot_start>(this); + librbd::cls_client::group_snap_set(&op, local_group_snap); + auto comp = create_rados_callback( + new LambdaContext([this, remote_group_snap_id](int r) { + handle_create_mirror_snapshot_start(remote_group_snap_id, r); + })); - int r = m_local_io_ctx.aio_operate( + r = m_local_io_ctx.aio_operate( librbd::util::group_header_name(m_local_group_ctx.group_id), comp, &op); + locker.unlock(); ceph_assert(r == 0); comp->release(); } template -void GroupReplayer::handle_create_mirror_snapshot_start(int r) { - dout(20) << m_remote_group_snap_id << " r=" << r << dendl; +void GroupReplayer::handle_get_remote_group_snapshot( + const std::string &remote_group_snap_id, bufferlist &out_bl, int r) { + dout(20) << remote_group_snap_id << " r=" << r << dendl; + + auto &remote_group_snap = m_remote_group_snaps[remote_group_snap_id]; std::unique_lock locker{m_lock}; - ceph_assert(m_pending_snap_create == true); + if (r == 0) { + auto iter = out_bl.cbegin(); + r = librbd::cls_client::group_snap_get_by_id_finish( + &iter, &remote_group_snap); + } + + if (r < 0) { + derr << "failed to get remote group snapshot: " << cpp_strerror(r) << dendl; + } else { + m_local_group_snaps[remote_group_snap_id].name = remote_group_snap.name; + } - auto create_snap_requests = m_create_snap_requests; - m_create_snap_requests.clear(); + if (m_state == STATE_STOPPING) { + dout(20) << "interrupted" << dendl; + m_local_group_snaps.erase(remote_group_snap_id); + m_remote_group_snaps.erase(remote_group_snap_id); + auto create_snap_requests = m_create_snap_requests[remote_group_snap_id]; + m_create_snap_requests.erase(remote_group_snap_id); + bool shut_down_replay = m_pending_snap_create.empty() && + m_create_snap_requests.empty(); + locker.unlock(); + for (auto &[_, on_finish] : create_snap_requests) { + on_finish->complete(r); + } + if (shut_down_replay) { + stop_image_replayers(); + } + return; + } + + m_get_remote_group_snap_ret_vals[remote_group_snap_id] = r; + + maybe_create_mirror_snapshot(locker, remote_group_snap_id); +} + +template +void GroupReplayer::handle_create_mirror_snapshot_start( + const std::string &remote_group_snap_id, int r) { + dout(20) << remote_group_snap_id << " r=" << r << dendl; + + std::unique_lock locker{m_lock}; + ceph_assert(m_pending_snap_create.count(remote_group_snap_id) > 0); + + ceph_assert(m_create_snap_requests.count(remote_group_snap_id) > 0); + auto create_snap_requests = m_create_snap_requests[remote_group_snap_id]; + m_create_snap_requests.erase(remote_group_snap_id); + + bool shut_down_replay = false; + if (r == -EEXIST) { + dout(20) << "group snapshot: " << remote_group_snap_id << " already exists" + << dendl; + r = 0; + } else if (r < 0) { + m_pending_snap_create.erase(remote_group_snap_id); + m_remote_group_snaps.erase(remote_group_snap_id); + m_local_group_snaps.erase(remote_group_snap_id); + shut_down_replay = m_state == STATE_STOPPING && !m_restart_requested && + m_pending_snap_create.empty() && + m_create_snap_requests.empty(); + } locker.unlock(); for (auto &[_, on_finish] : create_snap_requests) { on_finish->complete(r); } + if (shut_down_replay) { + stop_image_replayers(); + } } template @@ -868,36 +1045,44 @@ void GroupReplayer::create_mirror_snapshot_finish( << snap_id << dendl; std::lock_guard locker{m_lock}; - ceph_assert(m_pending_snap_create == true); - ceph_assert(m_state == STATE_REPLAYING || m_state == STATE_STOPPING); - ceph_assert(m_remote_group_snap_id == remote_group_snap_id); - ceph_assert(m_create_snap_requests.count(image_replayer) == 0); + if (m_local_group_snaps.find(remote_group_snap_id) != m_local_group_snaps.end() && + m_local_group_snaps[remote_group_snap_id].state == cls::rbd::GROUP_SNAPSHOT_STATE_COMPLETE) { + on_finish->complete(-EEXIST); + return; + } - m_create_snap_requests[image_replayer] = on_finish; + ceph_assert(m_pending_snap_create.count(remote_group_snap_id) > 0); + + auto &create_snap_requests = m_create_snap_requests[remote_group_snap_id]; + + ceph_assert(create_snap_requests.count(image_replayer) == 0); + create_snap_requests[image_replayer] = on_finish; auto pool = image_replayer->get_local_pool_id(); auto image_id = image_replayer->get_local_image_id(); + auto &local_group_snap = m_local_group_snaps[remote_group_snap_id]; auto it = std::find_if( - m_group_snap.snaps.begin(), m_group_snap.snaps.end(), + local_group_snap.snaps.begin(), local_group_snap.snaps.end(), [&pool, &image_id](const cls::rbd::ImageSnapshotSpec &s) { return pool == s.pool && image_id == s.image_id; }); - ceph_assert(it != m_group_snap.snaps.end()); + ceph_assert(it != local_group_snap.snaps.end()); it->snap_id = snap_id; - if (m_create_snap_requests.size() < m_image_replayers.size()) { + if (create_snap_requests.size() < local_group_snap.snaps.size()) { return; } - m_group_snap.state = cls::rbd::GROUP_SNAPSHOT_STATE_COMPLETE; + local_group_snap.state = cls::rbd::GROUP_SNAPSHOT_STATE_COMPLETE; - dout(20) << m_group_snap.id << " " << m_group_snap.name << dendl; + dout(20) << local_group_snap.id << " " << local_group_snap.name << dendl; librados::ObjectWriteOperation op; - librbd::cls_client::group_snap_set(&op, m_group_snap); - auto comp = create_rados_callback< - GroupReplayer, - &GroupReplayer::handle_create_mirror_snapshot_finish>(this); + librbd::cls_client::group_snap_set(&op, local_group_snap); + auto comp = create_rados_callback( + new LambdaContext([this, remote_group_snap_id](int r) { + handle_create_mirror_snapshot_finish(remote_group_snap_id, r); + })); int r = m_local_io_ctx.aio_operate( librbd::util::group_header_name(m_local_group_ctx.group_id), comp, &op); @@ -906,23 +1091,26 @@ void GroupReplayer::create_mirror_snapshot_finish( } template -void GroupReplayer::handle_create_mirror_snapshot_finish(int r) { - dout(20) << m_remote_group_snap_id << " r=" << r << dendl; +void GroupReplayer::handle_create_mirror_snapshot_finish( + const std::string &remote_group_snap_id, int r) { + dout(20) << remote_group_snap_id << " r=" << r << dendl; std::unique_lock locker{m_lock}; - ceph_assert(m_pending_snap_create == true); - m_pending_snap_create = false; - m_remote_group_snap_id.clear(); - bool stopping = (m_state == STATE_STOPPING); - auto create_snap_requests = m_create_snap_requests; - m_create_snap_requests.clear(); + auto count = m_pending_snap_create.erase(remote_group_snap_id); + ceph_assert(count > 0); + + auto create_snap_requests = m_create_snap_requests[remote_group_snap_id]; + m_create_snap_requests.erase(remote_group_snap_id); + bool shut_down_replay = m_state == STATE_STOPPING && !m_restart_requested && + m_pending_snap_create.empty() && + m_create_snap_requests.empty(); locker.unlock(); for (auto &[_, on_finish] : create_snap_requests) { on_finish->complete(r); } - if (stopping) { + if (shut_down_replay) { stop_image_replayers(); } } diff --git a/src/tools/rbd_mirror/GroupReplayer.h b/src/tools/rbd_mirror/GroupReplayer.h index 572645d8e2d5a..06baf18cca3f1 100644 --- a/src/tools/rbd_mirror/GroupReplayer.h +++ b/src/tools/rbd_mirror/GroupReplayer.h @@ -164,6 +164,45 @@ private: STATE_STOPPED, }; + struct Listener : public GroupCtx::Listener { + GroupReplayer *group_replayer; + + Listener(GroupReplayer *group_replayer) : group_replayer(group_replayer) { + } + + void create_mirror_snapshot_start( + const cls::rbd::MirrorSnapshotNamespace &remote_group_snap_ns, + void *arg, int64_t *local_group_pool_id, std::string *local_group_id, + std::string *local_group_snap_id, Context *on_finish) override { + group_replayer->create_mirror_snapshot_start( + remote_group_snap_ns, static_cast *>(arg), + local_group_pool_id, local_group_id, local_group_snap_id, on_finish); + } + + void create_mirror_snapshot_finish(const std::string &remote_group_snap_id, + void *arg, uint64_t snap_id, + Context *on_finish) override { + group_replayer->create_mirror_snapshot_finish( + remote_group_snap_id, static_cast *>(arg), + snap_id, on_finish); + } + }; + + struct C_GetRemoteGroupSnap : public Context { + GroupReplayer *group_replayer; + std::string group_snap_id; + bufferlist bl; + + C_GetRemoteGroupSnap(GroupReplayer *group_replayer, + const std::string &group_snap_id) + : group_replayer(group_replayer), group_snap_id(group_snap_id) { + } + + void finish(int r) override { + group_replayer->handle_get_remote_group_snapshot(group_snap_id, bl, r); + } + }; + librados::IoCtx &m_local_io_ctx; std::string m_local_mirror_uuid; std::string m_global_group_id; @@ -178,6 +217,7 @@ private: GroupCtx m_local_group_ctx; Peers m_peers; Peer m_remote_group_peer; + std::string m_remote_group_id; mutable ceph::mutex m_lock; State m_state = STATE_STOPPED; @@ -196,6 +236,14 @@ private: group_replayer::BootstrapRequest *m_bootstrap_request = nullptr; std::list *>> m_image_replayers; + Listener m_listener = {this}; + std::map, ImageReplayer *> m_image_replayer_index; + std::map m_local_group_snaps; + std::map m_remote_group_snaps; + std::map m_get_remote_group_snap_ret_vals; + std::map *, Context *>> m_create_snap_requests; + std::set m_pending_snap_create; + static std::string state_to_string(const State &state) { switch (state) { case STATE_STARTING: @@ -240,6 +288,26 @@ private: void set_mirror_group_status_update(cls::rbd::MirrorGroupStatusState state, const std::string &desc); + + void create_mirror_snapshot_start( + const cls::rbd::MirrorSnapshotNamespace &remote_group_snap_ns, + ImageReplayer *image_replayer, int64_t *local_group_pool_id, + std::string *local_group_id, std::string *local_group_snap_id, + Context *on_finish); + void handle_create_mirror_snapshot_start( + const std::string &remote_group_snap_id, int r); + void handle_get_remote_group_snapshot( + const std::string &remote_group_snap_id, bufferlist &out_bl, int r); + void maybe_create_mirror_snapshot( + std::unique_lock& locker, + const std::string &remote_group_snap_id); + + void create_mirror_snapshot_finish( + const std::string &remote_group_snap_id, + ImageReplayer *image_replayer, + uint64_t snap_id, Context *on_finish); + void handle_create_mirror_snapshot_finish( + const std::string &remote_group_snap_id, int r); }; } // namespace mirror diff --git a/src/tools/rbd_mirror/ImageReplayer.cc b/src/tools/rbd_mirror/ImageReplayer.cc index 76e4e35d7c7c4..98da0f3f588cb 100644 --- a/src/tools/rbd_mirror/ImageReplayer.cc +++ b/src/tools/rbd_mirror/ImageReplayer.cc @@ -24,7 +24,6 @@ #include "Threads.h" #include "tools/rbd_mirror/image_replayer/BootstrapRequest.h" #include "tools/rbd_mirror/image_replayer/ReplayerListener.h" -#include "tools/rbd_mirror/image_replayer/StateBuilder.h" #include "tools/rbd_mirror/image_replayer/Utils.h" #include "tools/rbd_mirror/image_replayer/journal/Replayer.h" #include "tools/rbd_mirror/image_replayer/journal/StateBuilder.h" @@ -203,14 +202,41 @@ template struct ImageReplayer::ReplayerListener : public image_replayer::ReplayerListener { ImageReplayer* image_replayer; + GroupCtx *local_group_ctx; - ReplayerListener(ImageReplayer* image_replayer) - : image_replayer(image_replayer) { + ReplayerListener(ImageReplayer* image_replayer, GroupCtx *local_group_ctx) + : image_replayer(image_replayer), local_group_ctx(local_group_ctx) { } void handle_notification() override { image_replayer->handle_replayer_notification(); } + + void create_mirror_snapshot_start( + const cls::rbd::MirrorSnapshotNamespace &remote_group_snap_ns, + int64_t *local_group_pool_id, std::string *local_group_id, + std::string *local_group_snap_id, Context *on_finish) override { + if (local_group_ctx == nullptr) { + on_finish->complete(0); + return; + } + + local_group_ctx->listener->create_mirror_snapshot_start( + remote_group_snap_ns, image_replayer, local_group_pool_id, + local_group_id, local_group_snap_id, on_finish); + } + + void create_mirror_snapshot_finish(const std::string &remote_group_snap_id, + uint64_t snap_id, + Context *on_finish) override { + if (local_group_ctx == nullptr) { + on_finish->complete(0); + return; + } + + local_group_ctx->listener->create_mirror_snapshot_finish( + remote_group_snap_id, image_replayer, snap_id, on_finish); + } }; template @@ -232,8 +258,7 @@ ImageReplayer::ImageReplayer( m_lock(ceph::make_mutex("rbd::mirror::ImageReplayer " + stringify(local_io_ctx.get_id()) + " " + global_image_id)), m_progress_cxt(this), - m_replayer_listener(new ReplayerListener(this)) -{ + m_replayer_listener(new ReplayerListener(this, local_group_ctx)) { // Register asok commands using a temporary "remote_pool_name/global_image_id" // name. When the image name becomes known on start the asok commands will be // re-registered using "remote_pool_name/remote_image_name" name. diff --git a/src/tools/rbd_mirror/ImageReplayer.h b/src/tools/rbd_mirror/ImageReplayer.h index 6818f617c629e..6cc92e3a21d07 100644 --- a/src/tools/rbd_mirror/ImageReplayer.h +++ b/src/tools/rbd_mirror/ImageReplayer.h @@ -10,6 +10,7 @@ #include "cls/rbd/cls_rbd_types.h" #include "ProgressContext.h" #include "tools/rbd_mirror/Types.h" +#include "tools/rbd_mirror/image_replayer/StateBuilder.h" #include "tools/rbd_mirror/image_replayer/Types.h" #include #include @@ -128,6 +129,9 @@ public: inline const std::string& get_global_image_id() const { return m_global_image_id; } + inline const std::string& get_local_image_id() const { + return m_state_builder->local_image_id; + } void start(Context *on_finish, bool manual = false, bool restart = false); void stop(Context *on_finish, bool manual = false, bool restart = false); diff --git a/src/tools/rbd_mirror/MirrorStatusWatcher.cc b/src/tools/rbd_mirror/MirrorStatusWatcher.cc index 3e1564c5b7072..3b78e14495c65 100644 --- a/src/tools/rbd_mirror/MirrorStatusWatcher.cc +++ b/src/tools/rbd_mirror/MirrorStatusWatcher.cc @@ -62,7 +62,8 @@ template void MirrorStatusWatcher::handle_notify(uint64_t notify_id, uint64_t handle, uint64_t notifier_id, bufferlist &bl) { - dout(20) << dendl; + dout(10) << "notify_id=" << notify_id << ", handle=" << handle + << ", notifier_id=" << notifier_id << dendl; bufferlist out; acknowledge_notify(notify_id, handle, out); diff --git a/src/tools/rbd_mirror/Types.h b/src/tools/rbd_mirror/Types.h index 1a963f4160772..997e0ee62a241 100644 --- a/src/tools/rbd_mirror/Types.h +++ b/src/tools/rbd_mirror/Types.h @@ -14,6 +14,10 @@ #include "include/rados/librados.hpp" #include "include/rbd/librbd.hpp" +class Context; + +namespace cls { namespace rbd { struct MirrorSnapshotNamespace; } } + namespace rbd { namespace mirror { @@ -175,11 +179,25 @@ struct PeerSpec { std::ostream& operator<<(std::ostream& os, const PeerSpec &peer); struct GroupCtx { + struct Listener { + virtual ~Listener() { + } + + virtual void create_mirror_snapshot_start( + const cls::rbd::MirrorSnapshotNamespace &remote_group_snap_ns, + void *arg, int64_t *local_group_pool_id, std::string *local_group_id, + std::string *local_group_snap_id, Context *on_finish) = 0; + virtual void create_mirror_snapshot_finish( + const std::string &remote_group_snap_id, void *arg, uint64_t snap_id, + Context *on_finish) = 0; + }; + std::string name; std::string group_id; std::string global_group_id; bool primary = false; mutable librados::IoCtx io_ctx; + Listener *listener = nullptr; GroupCtx() { } diff --git a/src/tools/rbd_mirror/group_replayer/BootstrapRequest.cc b/src/tools/rbd_mirror/group_replayer/BootstrapRequest.cc index 83c6dad074f02..578995a7324a8 100644 --- a/src/tools/rbd_mirror/group_replayer/BootstrapRequest.cc +++ b/src/tools/rbd_mirror/group_replayer/BootstrapRequest.cc @@ -20,7 +20,7 @@ #define dout_subsys ceph_subsys_rbd_mirror #undef dout_prefix #define dout_prefix *_dout << "rbd::mirror::group_replayer::" \ - << "BootstrapRequest: " << this << " " \ + << "BootstrapRequest: " << " " \ << __func__ << ": " namespace rbd { @@ -34,6 +34,20 @@ namespace { static const uint32_t MAX_RETURN = 1024; +bool is_demoted_snap_exists( + const std::vector &snaps) { + for (auto it = snaps.rbegin(); it != snaps.rend(); it++) { + auto ns = std::get_if( + &it->snapshot_namespace); + if (ns != nullptr) { + if (ns->is_demoted()) { + return true; + } + } + } + return false; +} + int get_last_mirror_snapshot_state( const std::vector &snaps, cls::rbd::MirrorSnapshotState *state) { @@ -64,8 +78,10 @@ BootstrapRequest::BootstrapRequest( MirrorStatusUpdater *remote_status_updater, journal::CacheManagerHandler *cache_manager_handler, PoolMetaCache *pool_meta_cache, + std::string *remote_group_id, GroupCtx *local_group_ctx, std::list *>> *image_replayers, + std::map, ImageReplayer *> *image_replayer_index, Context* on_finish) : CancelableRequest("rbd::mirror::group_replayer::BootstrapRequest", reinterpret_cast(local_io_ctx.cct()), @@ -80,8 +96,10 @@ BootstrapRequest::BootstrapRequest( m_remote_status_updater(remote_status_updater), m_cache_manager_handler(cache_manager_handler), m_pool_meta_cache(pool_meta_cache), + m_remote_group_id(remote_group_id), m_local_group_ctx(local_group_ctx), m_image_replayers(image_replayers), + m_image_replayer_index(image_replayer_index), m_on_finish(on_finish) { dout(10) << "global_group_id=" << m_global_group_id << dendl; } @@ -162,7 +180,7 @@ void BootstrapRequest::handle_get_remote_group_id(int r) { if (r == 0) { auto iter = m_out_bl.cbegin(); r = librbd::cls_client::mirror_group_get_group_id_finish( - &iter, &m_remote_group_id); + &iter, m_remote_group_id); } if (r < 0) { @@ -179,7 +197,7 @@ void BootstrapRequest::get_remote_group_name() { dout(10) << dendl; librados::ObjectReadOperation op; - librbd::cls_client::dir_get_name_start(&op, m_remote_group_id); + librbd::cls_client::dir_get_name_start(&op, *m_remote_group_id); m_out_bl.clear(); auto comp = create_rados_callback< BootstrapRequest, @@ -224,7 +242,7 @@ void BootstrapRequest::get_remote_mirror_group() { dout(10) << dendl; librados::ObjectReadOperation op; - librbd::cls_client::mirror_group_get_start(&op, m_remote_group_id); + librbd::cls_client::mirror_group_get_start(&op, *m_remote_group_id); m_out_bl.clear(); auto comp = create_rados_callback< BootstrapRequest, @@ -337,7 +355,8 @@ void BootstrapRequest::list_remote_group() { &BootstrapRequest::handle_list_remote_group>(this); m_out_bl.clear(); int r = m_remote_io_ctx.aio_operate( - librbd::util::group_header_name(m_remote_group_id), comp, &op, &m_out_bl); + librbd::util::group_header_name(*m_remote_group_id), comp, &op, + &m_out_bl); ceph_assert(r == 0); comp->release(); } @@ -441,7 +460,7 @@ void BootstrapRequest::handle_get_remote_mirror_image(int r) { return; } - m_remote_images.insert({spec.pool_id, mirror_image.global_image_id}); + m_remote_images[{spec.pool_id, mirror_image.global_image_id}] = spec.image_id; m_images.pop_front(); @@ -692,10 +711,16 @@ void BootstrapRequest::handle_list_local_group_snapshots(int r) { r = get_last_mirror_snapshot_state(m_local_group_snaps, &state); if (r == -ENOENT) { derr << "failed to find local mirror group snapshot" << dendl; - finish(-EINVAL); - return; + } else { + if (state == cls::rbd::MIRROR_SNAPSHOT_STATE_PRIMARY_DEMOTED) { + // if local snapshot is primary demoted, check if there is demote snapshot + // in remote, if not then split brain + if (!is_demoted_snap_exists(m_remote_group_snaps)) { + finish(-EEXIST); + return; + } + } } - ceph_assert(r == 0); m_local_mirror_group_primary = (state == cls::rbd::MIRROR_SNAPSHOT_STATE_PRIMARY); } @@ -704,8 +729,6 @@ void BootstrapRequest::handle_list_local_group_snapshots(int r) { if (m_local_mirror_group.state == cls::rbd::MIRROR_GROUP_STATE_ENABLED && m_local_mirror_group_primary) { derr << "both remote and local groups are primary" << dendl; - finish(-EEXIST); // split-brain - return; } } else if (m_local_mirror_group.state != cls::rbd::MIRROR_GROUP_STATE_ENABLED || !m_local_mirror_group_primary) { @@ -1017,9 +1040,8 @@ void BootstrapRequest::handle_remove_local_mirror_group(int r) { } m_local_mirror_group.state = cls::rbd::MIRROR_GROUP_STATE_DISABLED; - - if (m_remote_mirror_group.state == cls::rbd::MIRROR_GROUP_STATE_ENABLED && - m_remote_mirror_group_primary) { + if (r != -ENOENT && (m_remote_mirror_group.state == cls::rbd::MIRROR_GROUP_STATE_ENABLED && + m_remote_mirror_group_primary)) { create_local_mirror_group(); } else { remove_local_group(); @@ -1260,7 +1282,10 @@ int BootstrapRequest::create_replayers() { int r = 0; if (m_remote_mirror_group.state == cls::rbd::MIRROR_GROUP_STATE_ENABLED && m_remote_mirror_group_primary) { - for (auto &[remote_pool_id, global_image_id] : m_remote_images) { + for (auto &[p, remote_image_id] : m_remote_images) { + auto &remote_pool_id = p.first; + auto &global_image_id = p.second; + m_image_replayers->emplace_back(librados::IoCtx(), nullptr); auto &local_io_ctx = m_image_replayers->back().first; auto &image_replayer = m_image_replayers->back().second; @@ -1320,6 +1345,8 @@ int BootstrapRequest::create_replayers() { // TODO only a single peer is currently supported image_replayer->add_peer({local_pool_meta.mirror_uuid, remote_io_ctx, remote_pool_meta, m_remote_status_updater}); + + (*m_image_replayer_index)[{remote_pool_id, remote_image_id}] = image_replayer; } } else if (m_local_mirror_group.state == cls::rbd::MIRROR_GROUP_STATE_ENABLED && m_local_mirror_group_primary) { diff --git a/src/tools/rbd_mirror/group_replayer/BootstrapRequest.h b/src/tools/rbd_mirror/group_replayer/BootstrapRequest.h index f99ccdce2c03d..368a83b911e6d 100644 --- a/src/tools/rbd_mirror/group_replayer/BootstrapRequest.h +++ b/src/tools/rbd_mirror/group_replayer/BootstrapRequest.h @@ -45,14 +45,16 @@ public: MirrorStatusUpdater *remote_status_updater, journal::CacheManagerHandler *cache_manager_handler, PoolMetaCache *pool_meta_cache, + std::string *remote_group_id, GroupCtx *local_group_ctx, std::list *>> *image_replayers, + std::map, ImageReplayer *> *image_replayer_index, Context *on_finish) { return new BootstrapRequest( threads, local_io_ctx, remote_io_ctx, global_group_id, local_mirror_uuid, instance_watcher, local_status_updater, remote_status_updater, - cache_manager_handler, pool_meta_cache, local_group_ctx, image_replayers, - on_finish); + cache_manager_handler, pool_meta_cache, remote_group_id, local_group_ctx, + image_replayers, image_replayer_index, on_finish); } BootstrapRequest( @@ -66,8 +68,10 @@ public: MirrorStatusUpdater *remote_status_updater, journal::CacheManagerHandler *cache_manager_handler, PoolMetaCache *pool_meta_cache, + std::string *remote_group_id, GroupCtx *local_group_ctx, std::list *>> *image_replayers, + std::map, ImageReplayer *> *image_replayer_index, Context* on_finish); void send() override; @@ -158,14 +162,15 @@ private: MirrorStatusUpdater *m_remote_status_updater; journal::CacheManagerHandler *m_cache_manager_handler; PoolMetaCache *m_pool_meta_cache; + std::string *m_remote_group_id; GroupCtx *m_local_group_ctx; std::list *>> *m_image_replayers; + std::map, ImageReplayer *> *m_image_replayer_index; Context *m_on_finish; std::atomic m_canceled = false; std::string m_group_name; - std::string m_remote_group_id; std::string m_local_group_id; bool m_local_group_id_by_name = false; cls::rbd::MirrorGroup m_remote_mirror_group; @@ -177,7 +182,7 @@ private: std::list m_images; librados::IoCtx m_image_io_ctx; - std::set m_remote_images; + std::map m_remote_images; std::set m_local_images; std::map m_local_trash_images; diff --git a/src/tools/rbd_mirror/image_replayer/BootstrapRequest.cc b/src/tools/rbd_mirror/image_replayer/BootstrapRequest.cc index f24058325d915..10312769e2c4b 100644 --- a/src/tools/rbd_mirror/image_replayer/BootstrapRequest.cc +++ b/src/tools/rbd_mirror/image_replayer/BootstrapRequest.cc @@ -76,7 +76,7 @@ BootstrapRequest::BootstrapRequest( m_do_resync(do_resync), m_lock(ceph::make_mutex(unique_lock_name("BootstrapRequest::m_lock", this))) { - dout(10) << dendl; + dout(10) << "global_image_id: " << m_global_image_id << dendl; } template diff --git a/src/tools/rbd_mirror/image_replayer/PrepareLocalImageRequest.cc b/src/tools/rbd_mirror/image_replayer/PrepareLocalImageRequest.cc index b1fef7254760a..b08d83ca27754 100644 --- a/src/tools/rbd_mirror/image_replayer/PrepareLocalImageRequest.cc +++ b/src/tools/rbd_mirror/image_replayer/PrepareLocalImageRequest.cc @@ -33,7 +33,7 @@ using librbd::util::create_rados_callback; template void PrepareLocalImageRequest::send() { - dout(10) << dendl; + dout(10) << "global_image_id: " << m_global_image_id << dendl; get_local_image_id(); } @@ -51,8 +51,8 @@ void PrepareLocalImageRequest::get_local_image_id() { template void PrepareLocalImageRequest::handle_get_local_image_id(int r) { - dout(10) << "r=" << r << ", " - << "local_image_id=" << m_local_image_id << dendl; + dout(10) << "r=" << r << ", global_image_id: " << m_global_image_id + << ", local_image_id=" << m_local_image_id << dendl; if (r < 0) { finish(r); @@ -116,7 +116,7 @@ void PrepareLocalImageRequest::get_mirror_info() { template void PrepareLocalImageRequest::handle_get_mirror_info(int r) { - dout(10) << ": r=" << r << dendl; + dout(10) << "r=" << r << dendl; if (r < 0) { derr << "failed to retrieve local mirror image info: " << cpp_strerror(r) @@ -125,7 +125,8 @@ void PrepareLocalImageRequest::handle_get_mirror_info(int r) { return; } - if (m_mirror_image.state == cls::rbd::MIRROR_IMAGE_STATE_CREATING) { + if (m_mirror_image.state == cls::rbd::MIRROR_IMAGE_STATE_CREATING && + !m_mirror_image.group_spec.is_valid()) { dout(5) << "local image is still in creating state, issuing a removal" << dendl; move_to_trash(); diff --git a/src/tools/rbd_mirror/image_replayer/ReplayerListener.h b/src/tools/rbd_mirror/image_replayer/ReplayerListener.h index f17f401b1fd15..d41f3e3cb02d6 100644 --- a/src/tools/rbd_mirror/image_replayer/ReplayerListener.h +++ b/src/tools/rbd_mirror/image_replayer/ReplayerListener.h @@ -12,6 +12,14 @@ struct ReplayerListener { virtual ~ReplayerListener() {} virtual void handle_notification() = 0; + + virtual void create_mirror_snapshot_start( + const cls::rbd::MirrorSnapshotNamespace &remote_group_snap_ns, + int64_t *local_group_pool_id, std::string *local_group_id, + std::string *local_group_snap_id, Context *on_finish) = 0; + virtual void create_mirror_snapshot_finish( + const std::string &remote_group_snap_id, uint64_t snap_id, + Context *on_finish) = 0; }; } // namespace image_replayer diff --git a/src/tools/rbd_mirror/image_replayer/snapshot/Replayer.cc b/src/tools/rbd_mirror/image_replayer/snapshot/Replayer.cc index b82666e0223db..f05613a7aafb8 100644 --- a/src/tools/rbd_mirror/image_replayer/snapshot/Replayer.cc +++ b/src/tools/rbd_mirror/image_replayer/snapshot/Replayer.cc @@ -951,7 +951,7 @@ void Replayer::handle_get_remote_image_state(int r) { return; } - create_non_primary_snapshot(); + create_group_snap_start(); } template @@ -981,6 +981,46 @@ void Replayer::handle_get_local_image_state(int r) { request_sync(); } +template +void Replayer::create_group_snap_start() { + + if (!m_remote_mirror_snap_ns.group_spec.is_valid() || + m_remote_mirror_snap_ns.group_snap_id.empty()) { + create_non_primary_snapshot(); + return; + } + + dout(10) << dendl; + + auto ctx = create_context_callback< + Replayer, &Replayer::handle_create_group_snap_start>(this); + + m_replayer_listener->create_mirror_snapshot_start( + m_remote_mirror_snap_ns, &m_local_group_pool_id, + &m_local_group_id, &m_local_group_snap_id, ctx); +} + +template +void Replayer::handle_create_group_snap_start(int r) { + dout(10) << "r=" << r << dendl; + + if (r < 0 && r != -EEXIST) { + if (r == -EAGAIN) { + dout(15) << "restarting replayer" << dendl; + load_local_image_meta(); + } else if (r == -ESTALE) { + dout(15) << "waiting for shut down" << dendl; + handle_replay_complete(r, "waiting for shut down"); + } else { + derr << "failed to create group snapshot: " << cpp_strerror(r) << dendl; + handle_replay_complete(r, "failed to create group snapshot"); + } + return; + } + + create_non_primary_snapshot(); +} + template void Replayer::create_non_primary_snapshot() { auto local_image_ctx = m_state_builder->local_image_ctx; @@ -1058,8 +1098,8 @@ void Replayer::create_non_primary_snapshot() { auto req = librbd::mirror::snapshot::CreateNonPrimaryRequest::create( local_image_ctx, m_remote_mirror_snap_ns.is_demoted(), m_state_builder->remote_mirror_uuid, m_remote_snap_id_end, - m_local_mirror_snap_ns.snap_seqs, -1, {}, {}, m_image_state, - &m_local_snap_id_end, ctx); + m_local_mirror_snap_ns.snap_seqs, m_local_group_pool_id, m_local_group_id, + m_local_group_snap_id, m_image_state, &m_local_snap_id_end, ctx); req->send(); } @@ -1076,6 +1116,36 @@ void Replayer::handle_create_non_primary_snapshot(int r) { dout(15) << "local_snap_id_end=" << m_local_snap_id_end << dendl; + create_group_snap_finish(); +} + +template +void Replayer::create_group_snap_finish() { + if (!m_remote_mirror_snap_ns.group_spec.is_valid() || + m_remote_mirror_snap_ns.group_snap_id.empty()) { + update_mirror_image_state(); + return; + } + + dout(10) << dendl; + + auto ctx = create_context_callback< + Replayer, &Replayer::handle_create_group_snap_finish>(this); + + m_replayer_listener->create_mirror_snapshot_finish( + m_remote_mirror_snap_ns.group_snap_id, m_local_snap_id_end, ctx); +} + +template +void Replayer::handle_create_group_snap_finish(int r) { + dout(10) << "r=" << r << dendl; + + if (r < 0 && r != -EEXIST) { + derr << "failed to create group snapshot: " << cpp_strerror(r) << dendl; + handle_replay_complete(r, "failed to create group snapshot"); + return; + } + update_mirror_image_state(); } diff --git a/src/tools/rbd_mirror/image_replayer/snapshot/Replayer.h b/src/tools/rbd_mirror/image_replayer/snapshot/Replayer.h index 16b28c370d6b8..40df08e0e0ff6 100644 --- a/src/tools/rbd_mirror/image_replayer/snapshot/Replayer.h +++ b/src/tools/rbd_mirror/image_replayer/snapshot/Replayer.h @@ -136,9 +136,15 @@ private: * | v | | * | GET_REMOTE_IMAGE_STATE | | * | | | | + * | v (skip if no group) | | + * | CREATE_GROUP_SNAP_START | | + * | | | | * | v | | * | CREATE_NON_PRIMARY_SNAPSHOT | | * | | | | + * | v (skip if no group) | | + * | CREATE_GROUP_SNAP_FINISH | | + * | | | | * | v (skip if not needed)| | * | UPDATE_MIRROR_IMAGE_STATE | | * | | | | @@ -238,6 +244,10 @@ private: uint64_t m_remote_snap_id_end = CEPH_NOSNAP; cls::rbd::MirrorSnapshotNamespace m_remote_mirror_snap_ns; + int64_t m_local_group_pool_id = -1; + std::string m_local_group_id; + std::string m_local_group_snap_id; + librbd::mirror::snapshot::ImageState m_image_state; DeepCopyHandler* m_deep_copy_handler = nullptr; @@ -291,9 +301,15 @@ private: void get_local_image_state(); void handle_get_local_image_state(int r); + void create_group_snap_start(); + void handle_create_group_snap_start(int r); + void create_non_primary_snapshot(); void handle_create_non_primary_snapshot(int r); + void create_group_snap_finish(); + void handle_create_group_snap_finish(int r); + void update_mirror_image_state(); void handle_update_mirror_image_state(int r); -- 2.39.5