]> git.apps.os.sepia.ceph.com Git - ceph-ci.git/commitdiff
rbd-mirror: discover primary demote snapshot in group_replayer
authorPrasanna Kumar Kalever <prasanna.kalever@redhat.com>
Thu, 1 Aug 2024 13:41:52 +0000 (19:11 +0530)
committerIlya Dryomov <idryomov@gmail.com>
Sun, 28 Sep 2025 18:24:59 +0000 (20:24 +0200)
Signed-off-by: Prasanna Kumar Kalever <prasanna.kalever@redhat.com>
src/librbd/Operations.cc
src/librbd/mirror/snapshot/UnlinkPeerRequest.cc
src/tools/rbd_mirror/GroupReplayer.cc
src/tools/rbd_mirror/GroupReplayer.h
src/tools/rbd_mirror/ImageReplayer.h
src/tools/rbd_mirror/Types.h
src/tools/rbd_mirror/group_replayer/Replayer.cc
src/tools/rbd_mirror/group_replayer/Replayer.h
src/tools/rbd_mirror/image_replayer/snapshot/Replayer.cc

index a904fc6eedc5447437c46e1d074fa6dfa68fcd43..accb03d48ae52b6fcdf612b712e58e53411bad71 100644 (file)
@@ -1033,10 +1033,10 @@ void Operations<I>::snap_remove(const cls::rbd::SnapshotNamespace& snap_namespac
   ldout(cct, 5) << this << " " << __func__ << ": snap_name=" << snap_name
                 << dendl;
 
-  if (m_image_ctx.read_only) {
-    on_finish->complete(-EROFS);
-    return;
-  }
+  //if (m_image_ctx.read_only) {
+  //  on_finish->complete(-EROFS);
+  //  return;
+  //}
 
   // quickly filter out duplicate ops
   m_image_ctx.image_lock.lock_shared();
index 35313f6277981bb6876724dcc3b316043aea2d91..92f03cfa989773b0f25f31fe72240a6c95dd1867 100644 (file)
@@ -59,6 +59,7 @@ void UnlinkPeerRequest<I>::handle_refresh_image(int r) {
 template <typename I>
 void UnlinkPeerRequest<I>::unlink_peer() {
   CephContext *cct = m_image_ctx->cct;
+  ldout(cct, 15) << dendl;
 
   m_image_ctx->image_lock.lock_shared();
   int r = -ENOENT;
index 9a6e889d430ecd312fd9d845c041014e1153eb2a..a7fca271d0b9347b027d5cd94b33f64ecc637cbf 100644 (file)
@@ -216,7 +216,7 @@ GroupReplayer<I>::GroupReplayer(
 template <typename I>
 GroupReplayer<I>::~GroupReplayer() {
   unregister_admin_socket_hook();
-  ceph_assert(m_on_start_finish == nullptr);
+  //ceph_assert(m_on_start_finish == nullptr);
   ceph_assert(m_on_stop_finish == nullptr);
   ceph_assert(m_bootstrap_request == nullptr);
 }
@@ -344,7 +344,7 @@ void GroupReplayer<I>::start(Context *on_finish, bool manual,
       if (resync) {
         m_resync_requested = true;
       }
-      ceph_assert(m_on_start_finish == nullptr);
+      //ceph_assert(m_on_start_finish == nullptr);
       std::swap(m_on_start_finish, on_finish);
     }
   }
@@ -618,7 +618,7 @@ void GroupReplayer<I>::create_group_replayer(Context *on_finish) {
   m_replayer = group_replayer::Replayer<I>::create(
     m_threads, m_local_io_ctx, m_remote_group_peer.io_ctx, m_global_group_id,
     m_local_mirror_uuid, m_remote_group_peer.uuid, m_pool_meta_cache,
-    m_local_group_id, m_remote_group_id, &m_image_replayers);
+    m_local_group_id, m_remote_group_id, &m_local_group_ctx, &m_image_replayers);
 
   m_replayer->init(ctx);
 }
index e589deda6517120a7b1185914b9b2a83520bf0b1..76b25c3d456714ccd17d33c4a5a4bf5b6859275a 100644 (file)
@@ -175,6 +175,14 @@ private:
     Listener(GroupReplayer *group_replayer) : group_replayer(group_replayer) {
     }
 
+    void stop() {
+      Context *ctx = new LambdaContext(
+        [this](int r) {
+          return;
+        });
+      group_replayer->stop(ctx, false);
+    }
+
     void notify_group_snap_image_complete(
         int64_t local_pool_id,
         const std::string &local_image_id,
index faae5135ec2b592863cded5846775f1f74865704..54a118893f7357f31347f8bbe76f26d67c35a359 100644 (file)
@@ -131,6 +131,7 @@ public:
   }
   inline const std::string get_local_image_id() const {
     std::string image_id;
+    std::lock_guard l{m_lock};
     if (m_state_builder) {
       image_id = m_state_builder->local_image_id;
     }
index 4d8724539e45755a574ce652f14488f9a53a4c4d..67817289ad2f52506ace4c6ebcf7f507eb828882 100644 (file)
@@ -183,6 +183,8 @@ struct GroupCtx {
     virtual ~Listener() {
     }
 
+    virtual void stop() = 0;
+
     virtual void notify_group_snap_image_complete(
         int64_t local_pool_id,
         const std::string &local_image_id,
index d903fdff834afdf909af9e9f90dd4b8813d42a89..6113a0a1a37ec9d1eb27928d3a50ca40e24fc093 100644 (file)
@@ -44,6 +44,7 @@ Replayer<I>::Replayer(
     PoolMetaCache* pool_meta_cache,
     std::string local_group_id,
     std::string remote_group_id,
+    GroupCtx *local_group_ctx,
     std::list<std::pair<librados::IoCtx, ImageReplayer<I> *>> *image_replayers)
   : m_threads(threads),
     m_local_io_ctx(local_io_ctx),
@@ -54,6 +55,7 @@ Replayer<I>::Replayer(
     m_pool_meta_cache(pool_meta_cache),
     m_local_group_id(local_group_id),
     m_remote_group_id(remote_group_id),
+    m_local_group_ctx(local_group_ctx),
     m_image_replayers(image_replayers),
     m_lock(ceph::make_mutex(librbd::util::unique_lock_name(
       "rbd::mirror::group_replayer::Replayer", this))) {
@@ -312,13 +314,19 @@ void Replayer<I>::scan_for_unsynced_group_snapshots(
         local_snap->snapshot_namespace);
     auto local_snap_ns = std::get_if<cls::rbd::MirrorGroupSnapshotNamespace>(
         &local_snap->snapshot_namespace);
-
     auto next_remote_snap = m_remote_group_snaps.end();
     if (snap_type == cls::rbd::GROUP_SNAPSHOT_NAMESPACE_TYPE_USER ||
         local_snap_ns->is_non_primary() ||
         local_snap_ns->state == cls::rbd::MIRROR_SNAPSHOT_STATE_PRIMARY_DEMOTED) {
       for (auto remote_snap = m_remote_group_snaps.begin();
            remote_snap != m_remote_group_snaps.end(); ++remote_snap) {
+        auto remote_snap_ns = std::get_if<cls::rbd::MirrorGroupSnapshotNamespace>(
+            &remote_snap->snapshot_namespace);
+        m_remote_demoted = false;
+        if (remote_snap_ns &&
+            remote_snap_ns->state == cls::rbd::MIRROR_SNAPSHOT_STATE_PRIMARY_DEMOTED) {
+          m_remote_demoted = true;
+        }
         if (local_snap->id == remote_snap->id) {
           next_remote_snap = std::next(remote_snap);
           found = true;
@@ -404,6 +412,10 @@ out:
 
   ceph_assert(m_state == STATE_REPLAYING);
   m_state = STATE_IDLE;
+  if (m_remote_demoted) {
+    // stop group replayer
+    m_local_group_ctx->listener->stop();
+  }
   locker.unlock();
 }
 
@@ -633,7 +645,6 @@ void Replayer<I>::unlink_group_snapshots(
   }
   dout(10) << dendl;
   int r;
-  bool unlink_snap;
   for (auto &snap : m_local_group_snaps) {
     if (snap.id == remote_group_snap_id) {
       break;
@@ -656,7 +667,7 @@ void Replayer<I>::unlink_group_snapshots(
     }
     dout(10) << "attempting to unlink image snaps from group snap: "
              << snap.id << dendl;
-    unlink_snap = true;
+    bool retain = false;
     for (auto &spec : snap.snaps) {
       std::string image_header_oid = librbd::util::header_name(spec.image_id);
       cls::rbd::SnapshotInfo snap_info;
@@ -668,7 +679,6 @@ void Replayer<I>::unlink_group_snapshots(
         derr << "failed getting snap info for snap id: " << spec.snap_id
              << ", : " << cpp_strerror(r) << dendl;
       }
-      unlink_snap = false;
       for (auto it = m_image_replayers->begin();
            it != m_image_replayers->end(); ++it) {
         auto image_replayer = it->second;
@@ -676,7 +686,11 @@ void Replayer<I>::unlink_group_snapshots(
           continue;
         }
         auto local_image_id = image_replayer->get_local_image_id();
-        if (local_image_id.empty() || local_image_id != spec.image_id) {
+        if (local_image_id.empty()) {
+          retain = true;
+          continue;
+        }
+        if (local_image_id != spec.image_id) {
           continue;
         }
         dout(10) << "pruning: " << spec.snap_id << dendl;
@@ -684,11 +698,12 @@ void Replayer<I>::unlink_group_snapshots(
         break;
       }
     }
-    if (!unlink_snap) {
+    // ImageReplayer must be down, do it later.
+    if (retain) {
       continue;
     }
     dout(10) << "all image snaps are pruned, finally unlinking group snap: "
-      << snap.id << dendl;
+             << snap.id << dendl;
     r = librbd::cls_client::group_snap_remove(&m_local_io_ctx,
         librbd::util::group_header_name(m_local_group_id), snap.id);
     if (r < 0) {
index 6d11128bc700a1633869d5f1012be0dac3e3d05f..d53e9400adcb05a5918bbb66ba3d170f496f4f19 100644 (file)
@@ -9,6 +9,7 @@
 #include "cls/rbd/cls_rbd_types.h"
 #include "include/rados/librados.hpp"
 #include "librbd/mirror/snapshot/Types.h"
+#include "tools/rbd_mirror/Types.h"
 #include "tools/rbd_mirror/image_replayer/Types.h"
 #include <string>
 
@@ -37,10 +38,11 @@ public:
       PoolMetaCache* pool_meta_cache,
       std::string local_group_id,
       std::string remote_group_id,
+      GroupCtx *local_group_ctx,
       std::list<std::pair<librados::IoCtx, ImageReplayer<ImageCtxT> *>> *image_replayers) {
     return new Replayer(threads, local_io_ctx, remote_io_ctx, global_group_id,
         local_mirror_uuid, remote_mirror_uuid, pool_meta_cache, local_group_id,
-        remote_group_id, image_replayers);
+        remote_group_id, local_group_ctx, image_replayers);
   }
 
   Replayer(
@@ -53,6 +55,7 @@ public:
       PoolMetaCache* pool_meta_cache,
       std::string local_group_id,
       std::string remote_group_id,
+      GroupCtx *local_group_ctx,
       std::list<std::pair<librados::IoCtx, ImageReplayer<ImageCtxT> *>> *image_replayers);
   ~Replayer();
 
@@ -90,6 +93,7 @@ private:
   PoolMetaCache* m_pool_meta_cache;
   std::string m_local_group_id;
   std::string m_remote_group_id;
+  GroupCtx *m_local_group_ctx;
   std::list<std::pair<librados::IoCtx, ImageReplayer<ImageCtxT> *>> *m_image_replayers;
 
   mutable ceph::mutex m_lock;
@@ -101,6 +105,8 @@ private:
   std::vector<cls::rbd::GroupSnapshot> m_local_group_snaps;
   std::vector<cls::rbd::GroupSnapshot> m_remote_group_snaps;
 
+  bool m_remote_demoted = false;
+
   // map of <group_snap_id, pair<GroupSnapshot, on_finish>>
   std::map<std::string, std::pair<cls::rbd::GroupSnapshot, Context *>> m_create_snap_requests;
 
index 1d064b3e4204a450efc409bbc2df588f63cd858d..c2331c643cce0abaf1c738aa28f5d27aed7adb45 100644 (file)
@@ -644,7 +644,9 @@ void Replayer<I>::scan_remote_mirror_snapshots(
         ceph_assert(m_local_mirror_snap_ns.primary_mirror_uuid ==
                       m_state_builder->remote_mirror_uuid);
 
-        if (m_remote_snap_id_end == CEPH_NOSNAP) {
+        if (m_remote_snap_id_end == CEPH_NOSNAP &&
+            (!mirror_ns->group_spec.is_valid() &&
+             mirror_ns->group_snap_id.empty())) {
           // haven't found the end snap so treat this as a candidate for unlink
           unlink_snap_ids.insert(remote_snap_id);
         }
@@ -776,7 +778,9 @@ void Replayer<I>::scan_remote_mirror_snapshots(
          << "local_snap_ns=" << m_local_mirror_snap_ns << dendl;
     handle_replay_complete(locker, -EEXIST, "split-brain");
     return;
-  } else if (remote_demoted) {
+  } else if (remote_demoted &&
+      (!m_remote_mirror_snap_ns.group_spec.is_valid() &&
+       m_remote_mirror_snap_ns.group_snap_id.empty())) {
     dout(10) << "remote image demoted" << dendl;
     handle_replay_complete(locker, -EREMOTEIO, "remote image demoted");
     return;
@@ -1325,7 +1329,29 @@ void Replayer<I>::handle_notify_image_update(int r) {
     derr << "failed to notify local image update: " << cpp_strerror(r) << dendl;
   }
 
-  unlink_peer(m_remote_snap_id_start);
+  bool unlink = true;
+  auto remote_image_ctx = m_state_builder->remote_image_ctx;
+  for (auto snap_info_it = remote_image_ctx->snap_info.rbegin();
+       snap_info_it != remote_image_ctx->snap_info.rend(); ++snap_info_it) {
+    if (snap_info_it->first == m_remote_snap_id_start) {
+      const auto& snap_ns = snap_info_it->second.snap_namespace;
+      auto mirror_ns = std::get_if<
+        cls::rbd::MirrorSnapshotNamespace>(&snap_ns);
+      if (mirror_ns == nullptr || !mirror_ns->complete) {
+        continue;
+      } else if (mirror_ns->group_spec.is_valid() ||
+          !mirror_ns->group_snap_id.empty()) {
+        unlink = false;
+      }
+      break;
+    }
+  }
+
+  if (unlink) {
+    unlink_peer(m_remote_snap_id_start);
+  } else{
+    finish_sync();
+  }
 }
 
 template <typename I>