]> git.apps.os.sepia.ceph.com Git - ceph-ci.git/commitdiff
rbd-mirror: use group_header object for resync flagging
authorPrasanna Kumar Kalever <prasanna.kalever@redhat.com>
Tue, 27 Aug 2024 07:03:21 +0000 (12:33 +0530)
committerIlya Dryomov <idryomov@gmail.com>
Sun, 28 Sep 2025 18:25:00 +0000 (20:25 +0200)
Also move the resync checks to snapshot GroupReplayer

Signed-off-by: Prasanna Kumar Kalever <prasanna.kalever@redhat.com>
src/cls/rbd/cls_rbd.cc
src/cls/rbd/cls_rbd_client.cc
src/cls/rbd/cls_rbd_client.h
src/librbd/api/Mirror.cc
src/tools/rbd_mirror/GroupReplayer.cc
src/tools/rbd_mirror/GroupReplayer.h
src/tools/rbd_mirror/InstanceReplayer.cc
src/tools/rbd_mirror/group_replayer/BootstrapRequest.cc
src/tools/rbd_mirror/group_replayer/BootstrapRequest.h
src/tools/rbd_mirror/group_replayer/Replayer.cc
src/tools/rbd_mirror/group_replayer/Replayer.h

index 5324e4a53b9de5922c9a26b2923c20bff027d6b8..e5fc9ba93c751787f561b41a2bf48c1ef49e3102 100644 (file)
@@ -7752,37 +7752,6 @@ int mirror_group_resync_set(cls_method_context_t hctx, bufferlist *in,
   return 0;
 }
 
-/**
- * Input:
- * @param global_group_id (std::string)
- * @param global_name (std::string)
- *
- * Output:
- * @returns 0 on success, negative error code on failure
- */
-int mirror_group_resync_remove(cls_method_context_t hctx, bufferlist *in,
-                              bufferlist *out) {
-  std::string global_group_id;
-  std::string group_name;
-  try {
-    auto it = in->cbegin();
-    decode(global_group_id, it);
-    decode(group_name, it);
-  } catch (const ceph::buffer::error &err) {
-    return -EINVAL;
-  }
-
-  std::string key = mirror::group_resync_key(global_group_id, group_name);
-  int r = cls_cxx_map_remove_key(hctx, key);
-  if (r < 0) {
-    CLS_ERR("error removing key %s from mirror group resync object: %s",
-            key.c_str(), cpp_strerror(r).c_str());
-    return r;
-  }
-
-  return 0;
-}
-
 /**
  * Input:
  * @param global_id (std::string)
@@ -9702,7 +9671,6 @@ CLS_INIT(rbd)
   cls_method_handle_t h_mirror_group_list;
   cls_method_handle_t h_mirror_group_resync_get;
   cls_method_handle_t h_mirror_group_resync_set;
-  cls_method_handle_t h_mirror_group_resync_remove;
   cls_method_handle_t h_mirror_group_get_group_id;
   cls_method_handle_t h_mirror_group_get;
   cls_method_handle_t h_mirror_group_set;
@@ -10085,10 +10053,6 @@ CLS_INIT(rbd)
   cls_register_cxx_method(h_class, "mirror_group_resync_set",
                           CLS_METHOD_RD | CLS_METHOD_WR,
                           mirror_group_resync_set, &h_mirror_group_resync_set);
-  cls_register_cxx_method(h_class, "mirror_group_resync_remove",
-                          CLS_METHOD_RD | CLS_METHOD_WR,
-                          mirror_group_resync_remove,
-                          &h_mirror_group_resync_remove);
   cls_register_cxx_method(h_class, "mirror_group_get_group_id", CLS_METHOD_RD,
                           mirror_group_get_group_id,
                           &h_mirror_group_get_group_id);
index 1c30b5626def2d7b9c443ef32818aa224f4b86f1..7d142574a1fa73a818bc3ff06e55876b6f6f3755 100644 (file)
@@ -2663,6 +2663,7 @@ int mirror_group_resync_get_finish(bufferlist::const_iterator *it,
 }
 
 int mirror_group_resync_get(librados::IoCtx *ioctx,
+                            const std::string &oid,
                             const std::string &global_group_id,
                             const std::string &group_name,
                             std::string *group_id)
@@ -2671,7 +2672,7 @@ int mirror_group_resync_get(librados::IoCtx *ioctx,
   mirror_group_resync_get_start(&op, global_group_id, group_name);
 
   bufferlist out_bl;
-  int r = ioctx->operate(RBD_GROUP_RESYNC, &op, &out_bl);
+  int r = ioctx->operate(oid, &op, &out_bl);
   if (r < 0) {
     return r;
   }
@@ -2693,36 +2694,14 @@ void mirror_group_resync_set(librados::ObjectWriteOperation *op,
 }
 
 int mirror_group_resync_set(librados::IoCtx *ioctx,
+                             const std::string &oid,
                              const std::string &global_group_id,
                              const std::string &group_name,
                              const std::string &group_id) {
   librados::ObjectWriteOperation op;
   mirror_group_resync_set(&op, global_group_id, group_name, group_id);
 
-  int r = ioctx->operate(RBD_GROUP_RESYNC, &op);
-  if (r < 0) {
-    return r;
-  }
-  return 0;
-}
-
-void mirror_group_resync_remove(librados::ObjectWriteOperation *op,
-                                const std::string &global_group_id,
-                                const std::string &group_name) {
-  bufferlist bl;
-  encode(global_group_id, bl);
-  encode(group_name, bl);
-
-  op->exec("rbd", "mirror_group_resync_remove", bl);
-}
-
-int mirror_group_resync_remove(librados::IoCtx *ioctx,
-                               const std::string &global_group_id,
-                               const std::string &group_name) {
-  librados::ObjectWriteOperation op;
-  mirror_group_resync_remove(&op, global_group_id, group_name);
-
-  int r = ioctx->operate(RBD_GROUP_RESYNC, &op);
+  int r = ioctx->operate(oid, &op);
   if (r < 0) {
     return r;
   }
index 2744e80811594d4e442884e6801cd65fc6c8fc42..ef89a28e006af07d96e6b10fdbdc6eb63e64db32 100644 (file)
@@ -568,6 +568,7 @@ void mirror_group_resync_get_start(librados::ObjectReadOperation *op,
 int mirror_group_resync_get_finish(bufferlist::const_iterator *it,
                                    std::string *group_id);
 int mirror_group_resync_get(librados::IoCtx *ioctx,
+                            const std::string &oid,
                             const std::string &global_group_id,
                             const std::string &group_name,
                             std::string *group_id);
@@ -576,15 +577,10 @@ void mirror_group_resync_set(librados::ObjectWriteOperation *op,
                              const std::string &group_name,
                              const std::string &group_id);
 int mirror_group_resync_set(librados::IoCtx *ioctx,
+                             const std::string &oid,
                              const std::string &global_group_id,
                              const std::string &group_name,
                              const std::string &group_id);
-void mirror_group_resync_remove(librados::ObjectWriteOperation *op,
-                                const std::string &global_group_id,
-                                const std::string &group_name);
-int mirror_group_resync_remove(librados::IoCtx *ioctx,
-                               const std::string &global_group_id,
-                               const std::string &group_name);
 void mirror_group_get_group_id_start(librados::ObjectReadOperation *op,
                                      const std::string &global_group_id);
 int mirror_group_get_group_id_finish(ceph::buffer::list::const_iterator *it,
index e943004077746f7d38a3fc154e1a093fab02467d..f0b37c4910792258f395e4345481074e93155568 100644 (file)
@@ -3397,6 +3397,7 @@ int Mirror<I>::group_resync(IoCtx& group_ioctx, const char *group_name) {
   }
 
   r = cls_client::mirror_group_resync_set(&group_ioctx,
+                                          librbd::util::group_header_name(group_id),
                                           mirror_group.global_group_id,
                                           group_name, group_id);
   if (r < 0) {
index a7fca271d0b9347b027d5cd94b33f64ecc637cbf..f2a32f1e9cf3f1700e32ae293eecf45ead23b767 100644 (file)
@@ -313,10 +313,9 @@ void GroupReplayer<I>::set_state_description(int r, const std::string &desc) {
 }
 
 template <typename I>
-void GroupReplayer<I>::start(Context *on_finish, bool manual,
-                             bool restart, bool resync) {
+void GroupReplayer<I>::start(Context *on_finish, bool manual, bool restart) {
   dout(10) << "on_finish=" << on_finish << ", manual=" << manual
-           << ", restart=" << restart << ", resync=" << resync << dendl;
+           << ", restart=" << restart << dendl;
 
   int r = 0;
   {
@@ -341,9 +340,6 @@ void GroupReplayer<I>::start(Context *on_finish, bool manual,
       m_get_remote_group_snap_ret_vals.clear();
       m_manual_stop = false;
       m_finished = false;
-      if (resync) {
-        m_resync_requested = true;
-      }
       //ceph_assert(m_on_start_finish == nullptr);
       std::swap(m_on_start_finish, on_finish);
     }
@@ -459,28 +455,20 @@ void GroupReplayer<I>::stop(Context *on_finish, bool manual, bool restart) {
 }
 
 template <typename I>
-void GroupReplayer<I>::restart(Context *on_finish, bool resync) {
-  dout(10) << "resync=" << resync << dendl;
+void GroupReplayer<I>::restart(Context *on_finish) {
+  dout(10) << dendl;
   {
     std::lock_guard locker{m_lock};
-    if (m_resync_requested) {
-      dout(10) << "resync is already in progress, cancelling restart" << dendl;
-      on_finish->complete(-ECANCELED);
-      return;
-    }
     m_restart_requested = true;
     m_on_start_finish = nullptr;
-    if (resync) {
-      m_resync_requested = true;
-    }
   }
 
   auto ctx = new LambdaContext(
-    [this, on_finish, resync](int r) {
+    [this, on_finish](int r) {
       if (r < 0) {
        // Try start anyway.
       }
-      start(on_finish, true, true, resync);
+      start(on_finish, true, true);
     });
   stop(ctx, false, true);
 }
@@ -550,7 +538,7 @@ void GroupReplayer<I>::bootstrap_group() {
     m_threads, m_local_io_ctx, m_remote_group_peer.io_ctx, m_global_group_id,
     m_local_mirror_uuid, m_instance_watcher, m_local_status_updater,
     m_remote_group_peer.mirror_status_updater, m_cache_manager_handler,
-    m_pool_meta_cache, m_resync_requested, &m_local_group_id,
+    m_pool_meta_cache, &m_resync_requested, &m_local_group_id,
     &m_remote_group_id, &m_local_group_snaps, &m_local_group_ctx,
     &m_image_replayers, &m_image_replayer_index, ctx);
 
@@ -568,7 +556,6 @@ void GroupReplayer<I>::handle_bootstrap_group(int r) {
   dout(10) << "r=" << r << dendl;
   {
     std::lock_guard locker{m_lock};
-    m_resync_requested = false;
     if (m_state == STATE_STOPPING || m_state == STATE_STOPPED) {
       dout(10) << "stop prevailed" <<dendl;
       return;
@@ -794,7 +781,7 @@ void GroupReplayer<I>::finish_start(int r, const std::string &desc) {
               [this, r, state, desc, on_finish](int) {
                 set_mirror_group_status_update(state, desc);
 
-                if (r == -ENOENT) {
+                if (r == -ENOENT && !m_resync_requested) {
                   set_finished(true);
                 }
                 if (on_finish != nullptr) {
index 76b25c3d456714ccd17d33c4a5a4bf5b6859275a..37558f7b2cea04b0ecf1456f00965ac4ab6f0b21 100644 (file)
@@ -120,10 +120,10 @@ public:
   }
 
   void start(Context *on_finish = nullptr, bool manual = false,
-             bool restart = false, bool resync = false);
+             bool restart = false);
   void stop(Context *on_finish = nullptr, bool manual = false,
             bool restart = false);
-  void restart(Context *on_finish = nullptr, bool resync = false);
+  void restart(Context *on_finish = nullptr);
   void flush();
 
   void print_status(Formatter *f);
index e0821c8d02ae327d0e90d8bb58c90c5b22c9b91f..3696bd92f63f3fa42ca60fc0fdaca447048f89f2 100644 (file)
@@ -655,25 +655,10 @@ void InstanceReplayer<I>::start_group_replayer(
   ceph_assert(ceph_mutex_is_locked(m_lock));
 
   std::string global_group_id = group_replayer->get_global_group_id();
-  std::string group_name = group_replayer->get_name();
-  std::string group_id;
-  bool resync_requested = false;
-  int r = librbd::cls_client::mirror_group_resync_get(&m_local_io_ctx,
-                                                      global_group_id,
-                                                      group_name,
-                                                      &group_id);
-  if (r < 0) {
-    derr << "getting mirror group resync for global_group_id="
-         << global_group_id << " failed: " << cpp_strerror(r) << dendl;
-  } else if (r == 0) {
-    if (group_id == group_replayer->get_local_group_id()) {
-      resync_requested = true;
-    }
-  }
   if (!group_replayer->is_stopped()) {
-    if (group_replayer->needs_restart() || resync_requested) {
-      group_replayer->restart(new C_TrackedOp(m_async_op_tracker, nullptr),
-                              resync_requested);
+    if (group_replayer->needs_restart()) {
+      stop_group_replayer(group_replayer, new C_TrackedOp(m_async_op_tracker,
+                                                          nullptr));
     } else {
       group_replayer->sync_group_names();
     }
@@ -685,28 +670,17 @@ void InstanceReplayer<I>::start_group_replayer(
     return;
   } else if (group_replayer->is_finished()) {
     // TODO temporary until policy integrated
-    if (resync_requested) {
-      resync_requested = false;
-      r = librbd::cls_client::mirror_group_resync_remove(&m_local_io_ctx,
-                                                         global_group_id,
-                                                         group_name);
-      if (r < 0) {
-        derr << "removing mirror group resync for global_group_id="
-             << global_group_id << " failed: " << cpp_strerror(r) << dendl;
-      }
-    } else {
-      dout(5) << "removing group replayer for global_group_id="
-              << global_group_id << dendl;
-      m_group_replayers.erase(group_replayer->get_global_group_id());
-      group_replayer->destroy();
-      return;
-    }
+    dout(5) << "removing group replayer for global_group_id="
+      << global_group_id << dendl;
+    m_group_replayers.erase(group_replayer->get_global_group_id());
+    group_replayer->destroy();
+    return;
   } else if (m_manual_stop) {
     return;
   }
   dout(10) << "global_group_id=" << global_group_id << dendl;
   group_replayer->start(new C_TrackedOp(m_async_op_tracker, nullptr),
-                        false, false, resync_requested);
+                        false, false);
 }
 
 template <typename I>
index 659d2d7a32f1a9f86df1db7d390ba58276e6c706..e710fcf90afe7b0305c533502d718f7c659028ba 100644 (file)
@@ -78,7 +78,7 @@ BootstrapRequest<I>::BootstrapRequest(
     MirrorStatusUpdater<I> *remote_status_updater,
     journal::CacheManagerHandler *cache_manager_handler,
     PoolMetaCache *pool_meta_cache,
-    bool resync_requested,
+    bool *resync_requested,
     std::string *local_group_id,
     std::string *remote_group_id,
     std::map<std::string, cls::rbd::GroupSnapshot> *local_group_snaps,
@@ -112,7 +112,24 @@ BootstrapRequest<I>::BootstrapRequest(
 
 template <typename I>
 void BootstrapRequest<I>::send() {
-  if (m_resync_requested) {
+  *m_resync_requested = false;
+
+  std::string group_id;
+  std::string group_header_oid = librbd::util::group_header_name(
+      *m_local_group_id);
+  int r = librbd::cls_client::mirror_group_resync_get(&m_local_io_ctx,
+                                                      group_header_oid,
+                                                      m_global_group_id,
+                                                      m_local_group_ctx->name,
+                                                      &group_id);
+  if (r < 0) {
+    derr << "getting mirror group resync for global_group_id="
+         << m_global_group_id << " failed: " << cpp_strerror(r) << dendl;
+  } else if (r == 0 && group_id == *m_local_group_id) {
+    *m_resync_requested = true;
+  }
+
+  if (*m_resync_requested) {
     get_local_group_id();
   } else {
     get_remote_group_id();
@@ -730,7 +747,8 @@ void BootstrapRequest<I>::handle_list_local_group_snapshots(int r) {
           state == cls::rbd::MIRROR_SNAPSHOT_STATE_PRIMARY_DEMOTED) {
         // if local snapshot is primary demoted, check if there is demote snapshot
         // in remote, if not then split brain
-        if (!is_demoted_snap_exists(remote_group_snaps) && !m_resync_requested) {
+        if (!is_demoted_snap_exists(remote_group_snaps)
+            && *m_resync_requested == false) {
           finish(-EEXIST);
           return;
         }
@@ -961,7 +979,7 @@ void BootstrapRequest<I>::move_local_image_to_trash() {
       &BootstrapRequest<I>::handle_move_local_image_to_trash>(this);
 
   auto req = image_deleter::TrashMoveRequest<I>::create(
-      m_image_io_ctx, global_image_id, m_resync_requested,
+      m_image_io_ctx, global_image_id, *m_resync_requested,
       m_threads->work_queue, ctx);
   req->send();
 }
index a3c834c926b5e84c6d46615a46956387777c53d2..514ecb38084f4169aaecc612b5605a57b24ee71f 100644 (file)
@@ -45,7 +45,7 @@ public:
       MirrorStatusUpdater<ImageCtxT> *remote_status_updater,
       journal::CacheManagerHandler *cache_manager_handler,
       PoolMetaCache *pool_meta_cache,
-      bool resync_requested,
+      bool *resync_requested,
       std::string *local_group_id,
       std::string *remote_group_id,
       std::map<std::string, cls::rbd::GroupSnapshot> *local_group_snaps,
@@ -72,7 +72,7 @@ public:
       MirrorStatusUpdater<ImageCtxT> *remote_status_updater,
       journal::CacheManagerHandler *cache_manager_handler,
       PoolMetaCache *pool_meta_cache,
-      bool resync_requested,
+      bool *resync_requested,
       std::string *local_group_id,
       std::string *remote_group_id,
       std::map<std::string, cls::rbd::GroupSnapshot> *local_group_snaps,
@@ -169,7 +169,7 @@ private:
   MirrorStatusUpdater<ImageCtxT> *m_remote_status_updater;
   journal::CacheManagerHandler *m_cache_manager_handler;
   PoolMetaCache *m_pool_meta_cache;
-  bool m_resync_requested = false;
+  bool *m_resync_requested;
   std::string *m_local_group_id;
   std::string *m_remote_group_id;
   std::map<std::string, cls::rbd::GroupSnapshot> *m_local_group_snaps;
index adae79034801dc0eb25f1e9ee952f3a1f4a4d646..95684447d0ebeaca6f2ef495320559f148c8380f 100644 (file)
@@ -81,6 +81,16 @@ void Replayer<I>::schedule_load_group_snapshots() {
   m_threads->timer->add_event_after(1, ctx);
 }
 
+template <typename I>
+void Replayer<I>::notify_group_listener_stop() {
+  dout(10) << dendl;
+
+  Context *ctx = new LambdaContext([this](int) {
+      m_local_group_ctx->listener->stop();
+      });
+  m_threads->work_queue->queue(ctx, 0);
+}
+
 template <typename I>
 void Replayer<I>::notify_group_snap_image_complete(
     int64_t local_pool_id,
@@ -149,6 +159,29 @@ int Replayer<I>::local_group_image_list_by_id(
   return 0;
 }
 
+
+template <typename I>
+bool Replayer<I>::is_resync_requested() {
+  dout(10) << "m_local_group_id=" << m_local_group_id << dendl;
+
+  std::string group_id;
+  std::string group_header_oid = librbd::util::group_header_name(
+      m_local_group_id);
+  int r = librbd::cls_client::mirror_group_resync_get(&m_local_io_ctx,
+                                                      group_header_oid,
+                                                      m_global_group_id,
+                                                      m_local_group_ctx->name,
+                                                      &group_id);
+  if (r < 0) {
+    derr << "getting mirror group resync for global_group_id="
+         << m_global_group_id << " failed: " << cpp_strerror(r) << dendl;
+  } else if (r == 0 && group_id == m_local_group_id) {
+    return true;
+  }
+
+  return false;
+}
+
 template <typename I>
 void Replayer<I>::init(Context* on_finish) {
   dout(10) << m_global_group_id << dendl;
@@ -180,6 +213,17 @@ void Replayer<I>::load_local_group_snapshots() {
     m_state = STATE_REPLAYING;
   }
 
+  if (m_resync_requested) {
+    return;
+  } else if (is_resync_requested()) {
+    m_resync_requested = true; // do nothing from here, anything is simply
+                               // of no use as the group is going to get
+                               // deleted soon.
+    dout(10) << "local group resync requested" << dendl;
+    // send stop for Group Replayer
+    notify_group_listener_stop();
+  }
+
   m_local_group_snaps.clear();
   auto ctx = create_context_callback<
       Replayer<I>,
@@ -407,16 +451,14 @@ out:
     return;
   }
 
-  dout(10) << "all remote snapshots synced, idling waiting for new snapshot"
-           << dendl;
-
+  // At this point all group snapshots have been synced, but we keep poll
   ceph_assert(m_state == STATE_REPLAYING);
-  m_state = STATE_IDLE;
+  locker.unlock();
   if (m_remote_demoted) {
     // stop group replayer
-    m_local_group_ctx->listener->stop();
+    notify_group_listener_stop();
   }
-  locker.unlock();
+  schedule_load_group_snapshots();
 }
 
 template <typename I>
@@ -780,6 +822,10 @@ void Replayer<I>::mirror_regular_snapshot(
       remote_group_snap_name,
       cls::rbd::GROUP_SNAPSHOT_STATE_INCOMPLETE};
 
+  // needed for generating the order key, the group_snap_set generates one
+  // only when the state is INCOMPLETE
+  librbd::cls_client::group_snap_set(&op, group_snap);
+
   auto itr = std::find_if(
       m_remote_group_snaps.begin(), m_remote_group_snaps.end(),
       [remote_group_snap_id](const cls::rbd::GroupSnapshot &s) {
index d53e9400adcb05a5918bbb66ba3d170f496f4f19..2af6a43e9dc048f7b699ab9b5c682813421c0fe6 100644 (file)
@@ -106,6 +106,7 @@ private:
   std::vector<cls::rbd::GroupSnapshot> m_remote_group_snaps;
 
   bool m_remote_demoted = false;
+  bool m_resync_requested = false;
 
   // map of <group_snap_id, pair<GroupSnapshot, on_finish>>
   std::map<std::string, std::pair<cls::rbd::GroupSnapshot, Context *>> m_create_snap_requests;
@@ -117,6 +118,8 @@ private:
       std::vector<cls::rbd::GroupImageStatus> *image_ids);
 
   void schedule_load_group_snapshots();
+  void notify_group_listener_stop();
+  bool is_resync_requested();
 
   void load_local_group_snapshots();
   void handle_load_local_group_snapshots(int r);