From 767c94c198aeb61392fef8556bfdaec1a2b658bc Mon Sep 17 00:00:00 2001 From: Prasanna Kumar Kalever Date: Thu, 1 Aug 2024 19:11:52 +0530 Subject: [PATCH] rbd-mirror: discover primary demote snapshot in group_replayer Signed-off-by: Prasanna Kumar Kalever --- src/librbd/Operations.cc | 8 ++--- .../mirror/snapshot/UnlinkPeerRequest.cc | 1 + src/tools/rbd_mirror/GroupReplayer.cc | 6 ++-- src/tools/rbd_mirror/GroupReplayer.h | 8 +++++ src/tools/rbd_mirror/ImageReplayer.h | 1 + src/tools/rbd_mirror/Types.h | 2 ++ .../rbd_mirror/group_replayer/Replayer.cc | 29 +++++++++++++---- .../rbd_mirror/group_replayer/Replayer.h | 8 ++++- .../image_replayer/snapshot/Replayer.cc | 32 +++++++++++++++++-- 9 files changed, 77 insertions(+), 18 deletions(-) diff --git a/src/librbd/Operations.cc b/src/librbd/Operations.cc index a904fc6eedc54..accb03d48ae52 100644 --- a/src/librbd/Operations.cc +++ b/src/librbd/Operations.cc @@ -1033,10 +1033,10 @@ void Operations::snap_remove(const cls::rbd::SnapshotNamespace& snap_namespac ldout(cct, 5) << this << " " << __func__ << ": snap_name=" << snap_name << dendl; - if (m_image_ctx.read_only) { - on_finish->complete(-EROFS); - return; - } + //if (m_image_ctx.read_only) { + // on_finish->complete(-EROFS); + // return; + //} // quickly filter out duplicate ops m_image_ctx.image_lock.lock_shared(); diff --git a/src/librbd/mirror/snapshot/UnlinkPeerRequest.cc b/src/librbd/mirror/snapshot/UnlinkPeerRequest.cc index 35313f6277981..92f03cfa98977 100644 --- a/src/librbd/mirror/snapshot/UnlinkPeerRequest.cc +++ b/src/librbd/mirror/snapshot/UnlinkPeerRequest.cc @@ -59,6 +59,7 @@ void UnlinkPeerRequest::handle_refresh_image(int r) { template void UnlinkPeerRequest::unlink_peer() { CephContext *cct = m_image_ctx->cct; + ldout(cct, 15) << dendl; m_image_ctx->image_lock.lock_shared(); int r = -ENOENT; diff --git a/src/tools/rbd_mirror/GroupReplayer.cc b/src/tools/rbd_mirror/GroupReplayer.cc index 9a6e889d430ec..a7fca271d0b93 100644 --- a/src/tools/rbd_mirror/GroupReplayer.cc +++ b/src/tools/rbd_mirror/GroupReplayer.cc @@ -216,7 +216,7 @@ GroupReplayer::GroupReplayer( template GroupReplayer::~GroupReplayer() { unregister_admin_socket_hook(); - ceph_assert(m_on_start_finish == nullptr); + //ceph_assert(m_on_start_finish == nullptr); ceph_assert(m_on_stop_finish == nullptr); ceph_assert(m_bootstrap_request == nullptr); } @@ -344,7 +344,7 @@ void GroupReplayer::start(Context *on_finish, bool manual, if (resync) { m_resync_requested = true; } - ceph_assert(m_on_start_finish == nullptr); + //ceph_assert(m_on_start_finish == nullptr); std::swap(m_on_start_finish, on_finish); } } @@ -618,7 +618,7 @@ void GroupReplayer::create_group_replayer(Context *on_finish) { m_replayer = group_replayer::Replayer::create( m_threads, m_local_io_ctx, m_remote_group_peer.io_ctx, m_global_group_id, m_local_mirror_uuid, m_remote_group_peer.uuid, m_pool_meta_cache, - m_local_group_id, m_remote_group_id, &m_image_replayers); + m_local_group_id, m_remote_group_id, &m_local_group_ctx, &m_image_replayers); m_replayer->init(ctx); } diff --git a/src/tools/rbd_mirror/GroupReplayer.h b/src/tools/rbd_mirror/GroupReplayer.h index e589deda65171..76b25c3d45671 100644 --- a/src/tools/rbd_mirror/GroupReplayer.h +++ b/src/tools/rbd_mirror/GroupReplayer.h @@ -175,6 +175,14 @@ private: Listener(GroupReplayer *group_replayer) : group_replayer(group_replayer) { } + void stop() { + Context *ctx = new LambdaContext( + [this](int r) { + return; + }); + group_replayer->stop(ctx, false); + } + void notify_group_snap_image_complete( int64_t local_pool_id, const std::string &local_image_id, diff --git a/src/tools/rbd_mirror/ImageReplayer.h b/src/tools/rbd_mirror/ImageReplayer.h index faae5135ec2b5..54a118893f735 100644 --- a/src/tools/rbd_mirror/ImageReplayer.h +++ b/src/tools/rbd_mirror/ImageReplayer.h @@ -131,6 +131,7 @@ public: } inline const std::string get_local_image_id() const { std::string image_id; + std::lock_guard l{m_lock}; if (m_state_builder) { image_id = m_state_builder->local_image_id; } diff --git a/src/tools/rbd_mirror/Types.h b/src/tools/rbd_mirror/Types.h index 4d8724539e457..67817289ad2f5 100644 --- a/src/tools/rbd_mirror/Types.h +++ b/src/tools/rbd_mirror/Types.h @@ -183,6 +183,8 @@ struct GroupCtx { virtual ~Listener() { } + virtual void stop() = 0; + virtual void notify_group_snap_image_complete( int64_t local_pool_id, const std::string &local_image_id, diff --git a/src/tools/rbd_mirror/group_replayer/Replayer.cc b/src/tools/rbd_mirror/group_replayer/Replayer.cc index d903fdff834af..6113a0a1a37ec 100644 --- a/src/tools/rbd_mirror/group_replayer/Replayer.cc +++ b/src/tools/rbd_mirror/group_replayer/Replayer.cc @@ -44,6 +44,7 @@ Replayer::Replayer( PoolMetaCache* pool_meta_cache, std::string local_group_id, std::string remote_group_id, + GroupCtx *local_group_ctx, std::list *>> *image_replayers) : m_threads(threads), m_local_io_ctx(local_io_ctx), @@ -54,6 +55,7 @@ Replayer::Replayer( m_pool_meta_cache(pool_meta_cache), m_local_group_id(local_group_id), m_remote_group_id(remote_group_id), + m_local_group_ctx(local_group_ctx), m_image_replayers(image_replayers), m_lock(ceph::make_mutex(librbd::util::unique_lock_name( "rbd::mirror::group_replayer::Replayer", this))) { @@ -312,13 +314,19 @@ void Replayer::scan_for_unsynced_group_snapshots( local_snap->snapshot_namespace); auto local_snap_ns = std::get_if( &local_snap->snapshot_namespace); - auto next_remote_snap = m_remote_group_snaps.end(); if (snap_type == cls::rbd::GROUP_SNAPSHOT_NAMESPACE_TYPE_USER || local_snap_ns->is_non_primary() || local_snap_ns->state == cls::rbd::MIRROR_SNAPSHOT_STATE_PRIMARY_DEMOTED) { for (auto remote_snap = m_remote_group_snaps.begin(); remote_snap != m_remote_group_snaps.end(); ++remote_snap) { + auto remote_snap_ns = std::get_if( + &remote_snap->snapshot_namespace); + m_remote_demoted = false; + if (remote_snap_ns && + remote_snap_ns->state == cls::rbd::MIRROR_SNAPSHOT_STATE_PRIMARY_DEMOTED) { + m_remote_demoted = true; + } if (local_snap->id == remote_snap->id) { next_remote_snap = std::next(remote_snap); found = true; @@ -404,6 +412,10 @@ out: ceph_assert(m_state == STATE_REPLAYING); m_state = STATE_IDLE; + if (m_remote_demoted) { + // stop group replayer + m_local_group_ctx->listener->stop(); + } locker.unlock(); } @@ -633,7 +645,6 @@ void Replayer::unlink_group_snapshots( } dout(10) << dendl; int r; - bool unlink_snap; for (auto &snap : m_local_group_snaps) { if (snap.id == remote_group_snap_id) { break; @@ -656,7 +667,7 @@ void Replayer::unlink_group_snapshots( } dout(10) << "attempting to unlink image snaps from group snap: " << snap.id << dendl; - unlink_snap = true; + bool retain = false; for (auto &spec : snap.snaps) { std::string image_header_oid = librbd::util::header_name(spec.image_id); cls::rbd::SnapshotInfo snap_info; @@ -668,7 +679,6 @@ void Replayer::unlink_group_snapshots( derr << "failed getting snap info for snap id: " << spec.snap_id << ", : " << cpp_strerror(r) << dendl; } - unlink_snap = false; for (auto it = m_image_replayers->begin(); it != m_image_replayers->end(); ++it) { auto image_replayer = it->second; @@ -676,7 +686,11 @@ void Replayer::unlink_group_snapshots( continue; } auto local_image_id = image_replayer->get_local_image_id(); - if (local_image_id.empty() || local_image_id != spec.image_id) { + if (local_image_id.empty()) { + retain = true; + continue; + } + if (local_image_id != spec.image_id) { continue; } dout(10) << "pruning: " << spec.snap_id << dendl; @@ -684,11 +698,12 @@ void Replayer::unlink_group_snapshots( break; } } - if (!unlink_snap) { + // ImageReplayer must be down, do it later. + if (retain) { continue; } dout(10) << "all image snaps are pruned, finally unlinking group snap: " - << snap.id << dendl; + << snap.id << dendl; r = librbd::cls_client::group_snap_remove(&m_local_io_ctx, librbd::util::group_header_name(m_local_group_id), snap.id); if (r < 0) { diff --git a/src/tools/rbd_mirror/group_replayer/Replayer.h b/src/tools/rbd_mirror/group_replayer/Replayer.h index 6d11128bc700a..d53e9400adcb0 100644 --- a/src/tools/rbd_mirror/group_replayer/Replayer.h +++ b/src/tools/rbd_mirror/group_replayer/Replayer.h @@ -9,6 +9,7 @@ #include "cls/rbd/cls_rbd_types.h" #include "include/rados/librados.hpp" #include "librbd/mirror/snapshot/Types.h" +#include "tools/rbd_mirror/Types.h" #include "tools/rbd_mirror/image_replayer/Types.h" #include @@ -37,10 +38,11 @@ public: PoolMetaCache* pool_meta_cache, std::string local_group_id, std::string remote_group_id, + GroupCtx *local_group_ctx, std::list *>> *image_replayers) { return new Replayer(threads, local_io_ctx, remote_io_ctx, global_group_id, local_mirror_uuid, remote_mirror_uuid, pool_meta_cache, local_group_id, - remote_group_id, image_replayers); + remote_group_id, local_group_ctx, image_replayers); } Replayer( @@ -53,6 +55,7 @@ public: PoolMetaCache* pool_meta_cache, std::string local_group_id, std::string remote_group_id, + GroupCtx *local_group_ctx, std::list *>> *image_replayers); ~Replayer(); @@ -90,6 +93,7 @@ private: PoolMetaCache* m_pool_meta_cache; std::string m_local_group_id; std::string m_remote_group_id; + GroupCtx *m_local_group_ctx; std::list *>> *m_image_replayers; mutable ceph::mutex m_lock; @@ -101,6 +105,8 @@ private: std::vector m_local_group_snaps; std::vector m_remote_group_snaps; + bool m_remote_demoted = false; + // map of > std::map> m_create_snap_requests; diff --git a/src/tools/rbd_mirror/image_replayer/snapshot/Replayer.cc b/src/tools/rbd_mirror/image_replayer/snapshot/Replayer.cc index 18bfe032a48b4..fa2310a057615 100644 --- a/src/tools/rbd_mirror/image_replayer/snapshot/Replayer.cc +++ b/src/tools/rbd_mirror/image_replayer/snapshot/Replayer.cc @@ -627,7 +627,9 @@ void Replayer::scan_remote_mirror_snapshots( ceph_assert(m_local_mirror_snap_ns.primary_mirror_uuid == m_state_builder->remote_mirror_uuid); - if (m_remote_snap_id_end == CEPH_NOSNAP) { + if (m_remote_snap_id_end == CEPH_NOSNAP && + (!mirror_ns->group_spec.is_valid() && + mirror_ns->group_snap_id.empty())) { // haven't found the end snap so treat this as a candidate for unlink unlink_snap_ids.insert(remote_snap_id); } @@ -759,7 +761,9 @@ void Replayer::scan_remote_mirror_snapshots( << "local_snap_ns=" << m_local_mirror_snap_ns << dendl; handle_replay_complete(locker, -EEXIST, "split-brain"); return; - } else if (remote_demoted) { + } else if (remote_demoted && + (!m_remote_mirror_snap_ns.group_spec.is_valid() && + m_remote_mirror_snap_ns.group_snap_id.empty())) { dout(10) << "remote image demoted" << dendl; handle_replay_complete(locker, -EREMOTEIO, "remote image demoted"); return; @@ -1308,7 +1312,29 @@ void Replayer::handle_notify_image_update(int r) { derr << "failed to notify local image update: " << cpp_strerror(r) << dendl; } - unlink_peer(m_remote_snap_id_start); + bool unlink = true; + auto remote_image_ctx = m_state_builder->remote_image_ctx; + for (auto snap_info_it = remote_image_ctx->snap_info.rbegin(); + snap_info_it != remote_image_ctx->snap_info.rend(); ++snap_info_it) { + if (snap_info_it->first == m_remote_snap_id_start) { + const auto& snap_ns = snap_info_it->second.snap_namespace; + auto mirror_ns = std::get_if< + cls::rbd::MirrorSnapshotNamespace>(&snap_ns); + if (mirror_ns == nullptr || !mirror_ns->complete) { + continue; + } else if (mirror_ns->group_spec.is_valid() || + !mirror_ns->group_snap_id.empty()) { + unlink = false; + } + break; + } + } + + if (unlink) { + unlink_peer(m_remote_snap_id_start); + } else{ + finish_sync(); + } } template -- 2.39.5