From: Prasanna Kumar Kalever Date: Tue, 27 Aug 2024 07:03:21 +0000 (+0530) Subject: rbd-mirror: use group_header object for resync flagging X-Git-Url: http://git.apps.os.sepia.ceph.com/?a=commitdiff_plain;h=6f96a92a3d82e808d57df7aa925fad16f1e64c34;p=ceph-ci.git rbd-mirror: use group_header object for resync flagging Also move the resync checks to snapshot GroupReplayer Signed-off-by: Prasanna Kumar Kalever --- diff --git a/src/cls/rbd/cls_rbd.cc b/src/cls/rbd/cls_rbd.cc index 5324e4a53b9..e5fc9ba93c7 100644 --- a/src/cls/rbd/cls_rbd.cc +++ b/src/cls/rbd/cls_rbd.cc @@ -7752,37 +7752,6 @@ int mirror_group_resync_set(cls_method_context_t hctx, bufferlist *in, return 0; } -/** - * Input: - * @param global_group_id (std::string) - * @param global_name (std::string) - * - * Output: - * @returns 0 on success, negative error code on failure - */ -int mirror_group_resync_remove(cls_method_context_t hctx, bufferlist *in, - bufferlist *out) { - std::string global_group_id; - std::string group_name; - try { - auto it = in->cbegin(); - decode(global_group_id, it); - decode(group_name, it); - } catch (const ceph::buffer::error &err) { - return -EINVAL; - } - - std::string key = mirror::group_resync_key(global_group_id, group_name); - int r = cls_cxx_map_remove_key(hctx, key); - if (r < 0) { - CLS_ERR("error removing key %s from mirror group resync object: %s", - key.c_str(), cpp_strerror(r).c_str()); - return r; - } - - return 0; -} - /** * Input: * @param global_id (std::string) @@ -9702,7 +9671,6 @@ CLS_INIT(rbd) cls_method_handle_t h_mirror_group_list; cls_method_handle_t h_mirror_group_resync_get; cls_method_handle_t h_mirror_group_resync_set; - cls_method_handle_t h_mirror_group_resync_remove; cls_method_handle_t h_mirror_group_get_group_id; cls_method_handle_t h_mirror_group_get; cls_method_handle_t h_mirror_group_set; @@ -10085,10 +10053,6 @@ CLS_INIT(rbd) cls_register_cxx_method(h_class, "mirror_group_resync_set", CLS_METHOD_RD | CLS_METHOD_WR, mirror_group_resync_set, &h_mirror_group_resync_set); - cls_register_cxx_method(h_class, "mirror_group_resync_remove", - CLS_METHOD_RD | CLS_METHOD_WR, - mirror_group_resync_remove, - &h_mirror_group_resync_remove); cls_register_cxx_method(h_class, "mirror_group_get_group_id", CLS_METHOD_RD, mirror_group_get_group_id, &h_mirror_group_get_group_id); diff --git a/src/cls/rbd/cls_rbd_client.cc b/src/cls/rbd/cls_rbd_client.cc index 1c30b5626de..7d142574a1f 100644 --- a/src/cls/rbd/cls_rbd_client.cc +++ b/src/cls/rbd/cls_rbd_client.cc @@ -2663,6 +2663,7 @@ int mirror_group_resync_get_finish(bufferlist::const_iterator *it, } int mirror_group_resync_get(librados::IoCtx *ioctx, + const std::string &oid, const std::string &global_group_id, const std::string &group_name, std::string *group_id) @@ -2671,7 +2672,7 @@ int mirror_group_resync_get(librados::IoCtx *ioctx, mirror_group_resync_get_start(&op, global_group_id, group_name); bufferlist out_bl; - int r = ioctx->operate(RBD_GROUP_RESYNC, &op, &out_bl); + int r = ioctx->operate(oid, &op, &out_bl); if (r < 0) { return r; } @@ -2693,36 +2694,14 @@ void mirror_group_resync_set(librados::ObjectWriteOperation *op, } int mirror_group_resync_set(librados::IoCtx *ioctx, + const std::string &oid, const std::string &global_group_id, const std::string &group_name, const std::string &group_id) { librados::ObjectWriteOperation op; mirror_group_resync_set(&op, global_group_id, group_name, group_id); - int r = ioctx->operate(RBD_GROUP_RESYNC, &op); - if (r < 0) { - return r; - } - return 0; -} - -void mirror_group_resync_remove(librados::ObjectWriteOperation *op, - const std::string &global_group_id, - const std::string &group_name) { - bufferlist bl; - encode(global_group_id, bl); - encode(group_name, bl); - - op->exec("rbd", "mirror_group_resync_remove", bl); -} - -int mirror_group_resync_remove(librados::IoCtx *ioctx, - const std::string &global_group_id, - const std::string &group_name) { - librados::ObjectWriteOperation op; - mirror_group_resync_remove(&op, global_group_id, group_name); - - int r = ioctx->operate(RBD_GROUP_RESYNC, &op); + int r = ioctx->operate(oid, &op); if (r < 0) { return r; } diff --git a/src/cls/rbd/cls_rbd_client.h b/src/cls/rbd/cls_rbd_client.h index 2744e808115..ef89a28e006 100644 --- a/src/cls/rbd/cls_rbd_client.h +++ b/src/cls/rbd/cls_rbd_client.h @@ -568,6 +568,7 @@ void mirror_group_resync_get_start(librados::ObjectReadOperation *op, int mirror_group_resync_get_finish(bufferlist::const_iterator *it, std::string *group_id); int mirror_group_resync_get(librados::IoCtx *ioctx, + const std::string &oid, const std::string &global_group_id, const std::string &group_name, std::string *group_id); @@ -576,15 +577,10 @@ void mirror_group_resync_set(librados::ObjectWriteOperation *op, const std::string &group_name, const std::string &group_id); int mirror_group_resync_set(librados::IoCtx *ioctx, + const std::string &oid, const std::string &global_group_id, const std::string &group_name, const std::string &group_id); -void mirror_group_resync_remove(librados::ObjectWriteOperation *op, - const std::string &global_group_id, - const std::string &group_name); -int mirror_group_resync_remove(librados::IoCtx *ioctx, - const std::string &global_group_id, - const std::string &group_name); void mirror_group_get_group_id_start(librados::ObjectReadOperation *op, const std::string &global_group_id); int mirror_group_get_group_id_finish(ceph::buffer::list::const_iterator *it, diff --git a/src/librbd/api/Mirror.cc b/src/librbd/api/Mirror.cc index e9430040777..f0b37c49107 100644 --- a/src/librbd/api/Mirror.cc +++ b/src/librbd/api/Mirror.cc @@ -3397,6 +3397,7 @@ int Mirror::group_resync(IoCtx& group_ioctx, const char *group_name) { } r = cls_client::mirror_group_resync_set(&group_ioctx, + librbd::util::group_header_name(group_id), mirror_group.global_group_id, group_name, group_id); if (r < 0) { diff --git a/src/tools/rbd_mirror/GroupReplayer.cc b/src/tools/rbd_mirror/GroupReplayer.cc index a7fca271d0b..f2a32f1e9cf 100644 --- a/src/tools/rbd_mirror/GroupReplayer.cc +++ b/src/tools/rbd_mirror/GroupReplayer.cc @@ -313,10 +313,9 @@ void GroupReplayer::set_state_description(int r, const std::string &desc) { } template -void GroupReplayer::start(Context *on_finish, bool manual, - bool restart, bool resync) { +void GroupReplayer::start(Context *on_finish, bool manual, bool restart) { dout(10) << "on_finish=" << on_finish << ", manual=" << manual - << ", restart=" << restart << ", resync=" << resync << dendl; + << ", restart=" << restart << dendl; int r = 0; { @@ -341,9 +340,6 @@ void GroupReplayer::start(Context *on_finish, bool manual, m_get_remote_group_snap_ret_vals.clear(); m_manual_stop = false; m_finished = false; - if (resync) { - m_resync_requested = true; - } //ceph_assert(m_on_start_finish == nullptr); std::swap(m_on_start_finish, on_finish); } @@ -459,28 +455,20 @@ void GroupReplayer::stop(Context *on_finish, bool manual, bool restart) { } template -void GroupReplayer::restart(Context *on_finish, bool resync) { - dout(10) << "resync=" << resync << dendl; +void GroupReplayer::restart(Context *on_finish) { + dout(10) << dendl; { std::lock_guard locker{m_lock}; - if (m_resync_requested) { - dout(10) << "resync is already in progress, cancelling restart" << dendl; - on_finish->complete(-ECANCELED); - return; - } m_restart_requested = true; m_on_start_finish = nullptr; - if (resync) { - m_resync_requested = true; - } } auto ctx = new LambdaContext( - [this, on_finish, resync](int r) { + [this, on_finish](int r) { if (r < 0) { // Try start anyway. } - start(on_finish, true, true, resync); + start(on_finish, true, true); }); stop(ctx, false, true); } @@ -550,7 +538,7 @@ void GroupReplayer::bootstrap_group() { m_threads, m_local_io_ctx, m_remote_group_peer.io_ctx, m_global_group_id, m_local_mirror_uuid, m_instance_watcher, m_local_status_updater, m_remote_group_peer.mirror_status_updater, m_cache_manager_handler, - m_pool_meta_cache, m_resync_requested, &m_local_group_id, + m_pool_meta_cache, &m_resync_requested, &m_local_group_id, &m_remote_group_id, &m_local_group_snaps, &m_local_group_ctx, &m_image_replayers, &m_image_replayer_index, ctx); @@ -568,7 +556,6 @@ void GroupReplayer::handle_bootstrap_group(int r) { dout(10) << "r=" << r << dendl; { std::lock_guard locker{m_lock}; - m_resync_requested = false; if (m_state == STATE_STOPPING || m_state == STATE_STOPPED) { dout(10) << "stop prevailed" <::finish_start(int r, const std::string &desc) { [this, r, state, desc, on_finish](int) { set_mirror_group_status_update(state, desc); - if (r == -ENOENT) { + if (r == -ENOENT && !m_resync_requested) { set_finished(true); } if (on_finish != nullptr) { diff --git a/src/tools/rbd_mirror/GroupReplayer.h b/src/tools/rbd_mirror/GroupReplayer.h index 76b25c3d456..37558f7b2ce 100644 --- a/src/tools/rbd_mirror/GroupReplayer.h +++ b/src/tools/rbd_mirror/GroupReplayer.h @@ -120,10 +120,10 @@ public: } void start(Context *on_finish = nullptr, bool manual = false, - bool restart = false, bool resync = false); + bool restart = false); void stop(Context *on_finish = nullptr, bool manual = false, bool restart = false); - void restart(Context *on_finish = nullptr, bool resync = false); + void restart(Context *on_finish = nullptr); void flush(); void print_status(Formatter *f); diff --git a/src/tools/rbd_mirror/InstanceReplayer.cc b/src/tools/rbd_mirror/InstanceReplayer.cc index e0821c8d02a..3696bd92f63 100644 --- a/src/tools/rbd_mirror/InstanceReplayer.cc +++ b/src/tools/rbd_mirror/InstanceReplayer.cc @@ -655,25 +655,10 @@ void InstanceReplayer::start_group_replayer( ceph_assert(ceph_mutex_is_locked(m_lock)); std::string global_group_id = group_replayer->get_global_group_id(); - std::string group_name = group_replayer->get_name(); - std::string group_id; - bool resync_requested = false; - int r = librbd::cls_client::mirror_group_resync_get(&m_local_io_ctx, - global_group_id, - group_name, - &group_id); - if (r < 0) { - derr << "getting mirror group resync for global_group_id=" - << global_group_id << " failed: " << cpp_strerror(r) << dendl; - } else if (r == 0) { - if (group_id == group_replayer->get_local_group_id()) { - resync_requested = true; - } - } if (!group_replayer->is_stopped()) { - if (group_replayer->needs_restart() || resync_requested) { - group_replayer->restart(new C_TrackedOp(m_async_op_tracker, nullptr), - resync_requested); + if (group_replayer->needs_restart()) { + stop_group_replayer(group_replayer, new C_TrackedOp(m_async_op_tracker, + nullptr)); } else { group_replayer->sync_group_names(); } @@ -685,28 +670,17 @@ void InstanceReplayer::start_group_replayer( return; } else if (group_replayer->is_finished()) { // TODO temporary until policy integrated - if (resync_requested) { - resync_requested = false; - r = librbd::cls_client::mirror_group_resync_remove(&m_local_io_ctx, - global_group_id, - group_name); - if (r < 0) { - derr << "removing mirror group resync for global_group_id=" - << global_group_id << " failed: " << cpp_strerror(r) << dendl; - } - } else { - dout(5) << "removing group replayer for global_group_id=" - << global_group_id << dendl; - m_group_replayers.erase(group_replayer->get_global_group_id()); - group_replayer->destroy(); - return; - } + dout(5) << "removing group replayer for global_group_id=" + << global_group_id << dendl; + m_group_replayers.erase(group_replayer->get_global_group_id()); + group_replayer->destroy(); + return; } else if (m_manual_stop) { return; } dout(10) << "global_group_id=" << global_group_id << dendl; group_replayer->start(new C_TrackedOp(m_async_op_tracker, nullptr), - false, false, resync_requested); + false, false); } template diff --git a/src/tools/rbd_mirror/group_replayer/BootstrapRequest.cc b/src/tools/rbd_mirror/group_replayer/BootstrapRequest.cc index 659d2d7a32f..e710fcf90af 100644 --- a/src/tools/rbd_mirror/group_replayer/BootstrapRequest.cc +++ b/src/tools/rbd_mirror/group_replayer/BootstrapRequest.cc @@ -78,7 +78,7 @@ BootstrapRequest::BootstrapRequest( MirrorStatusUpdater *remote_status_updater, journal::CacheManagerHandler *cache_manager_handler, PoolMetaCache *pool_meta_cache, - bool resync_requested, + bool *resync_requested, std::string *local_group_id, std::string *remote_group_id, std::map *local_group_snaps, @@ -112,7 +112,24 @@ BootstrapRequest::BootstrapRequest( template void BootstrapRequest::send() { - if (m_resync_requested) { + *m_resync_requested = false; + + std::string group_id; + std::string group_header_oid = librbd::util::group_header_name( + *m_local_group_id); + int r = librbd::cls_client::mirror_group_resync_get(&m_local_io_ctx, + group_header_oid, + m_global_group_id, + m_local_group_ctx->name, + &group_id); + if (r < 0) { + derr << "getting mirror group resync for global_group_id=" + << m_global_group_id << " failed: " << cpp_strerror(r) << dendl; + } else if (r == 0 && group_id == *m_local_group_id) { + *m_resync_requested = true; + } + + if (*m_resync_requested) { get_local_group_id(); } else { get_remote_group_id(); @@ -730,7 +747,8 @@ void BootstrapRequest::handle_list_local_group_snapshots(int r) { state == cls::rbd::MIRROR_SNAPSHOT_STATE_PRIMARY_DEMOTED) { // if local snapshot is primary demoted, check if there is demote snapshot // in remote, if not then split brain - if (!is_demoted_snap_exists(remote_group_snaps) && !m_resync_requested) { + if (!is_demoted_snap_exists(remote_group_snaps) + && *m_resync_requested == false) { finish(-EEXIST); return; } @@ -961,7 +979,7 @@ void BootstrapRequest::move_local_image_to_trash() { &BootstrapRequest::handle_move_local_image_to_trash>(this); auto req = image_deleter::TrashMoveRequest::create( - m_image_io_ctx, global_image_id, m_resync_requested, + m_image_io_ctx, global_image_id, *m_resync_requested, m_threads->work_queue, ctx); req->send(); } diff --git a/src/tools/rbd_mirror/group_replayer/BootstrapRequest.h b/src/tools/rbd_mirror/group_replayer/BootstrapRequest.h index a3c834c926b..514ecb38084 100644 --- a/src/tools/rbd_mirror/group_replayer/BootstrapRequest.h +++ b/src/tools/rbd_mirror/group_replayer/BootstrapRequest.h @@ -45,7 +45,7 @@ public: MirrorStatusUpdater *remote_status_updater, journal::CacheManagerHandler *cache_manager_handler, PoolMetaCache *pool_meta_cache, - bool resync_requested, + bool *resync_requested, std::string *local_group_id, std::string *remote_group_id, std::map *local_group_snaps, @@ -72,7 +72,7 @@ public: MirrorStatusUpdater *remote_status_updater, journal::CacheManagerHandler *cache_manager_handler, PoolMetaCache *pool_meta_cache, - bool resync_requested, + bool *resync_requested, std::string *local_group_id, std::string *remote_group_id, std::map *local_group_snaps, @@ -169,7 +169,7 @@ private: MirrorStatusUpdater *m_remote_status_updater; journal::CacheManagerHandler *m_cache_manager_handler; PoolMetaCache *m_pool_meta_cache; - bool m_resync_requested = false; + bool *m_resync_requested; std::string *m_local_group_id; std::string *m_remote_group_id; std::map *m_local_group_snaps; diff --git a/src/tools/rbd_mirror/group_replayer/Replayer.cc b/src/tools/rbd_mirror/group_replayer/Replayer.cc index adae7903480..95684447d0e 100644 --- a/src/tools/rbd_mirror/group_replayer/Replayer.cc +++ b/src/tools/rbd_mirror/group_replayer/Replayer.cc @@ -81,6 +81,16 @@ void Replayer::schedule_load_group_snapshots() { m_threads->timer->add_event_after(1, ctx); } +template +void Replayer::notify_group_listener_stop() { + dout(10) << dendl; + + Context *ctx = new LambdaContext([this](int) { + m_local_group_ctx->listener->stop(); + }); + m_threads->work_queue->queue(ctx, 0); +} + template void Replayer::notify_group_snap_image_complete( int64_t local_pool_id, @@ -149,6 +159,29 @@ int Replayer::local_group_image_list_by_id( return 0; } + +template +bool Replayer::is_resync_requested() { + dout(10) << "m_local_group_id=" << m_local_group_id << dendl; + + std::string group_id; + std::string group_header_oid = librbd::util::group_header_name( + m_local_group_id); + int r = librbd::cls_client::mirror_group_resync_get(&m_local_io_ctx, + group_header_oid, + m_global_group_id, + m_local_group_ctx->name, + &group_id); + if (r < 0) { + derr << "getting mirror group resync for global_group_id=" + << m_global_group_id << " failed: " << cpp_strerror(r) << dendl; + } else if (r == 0 && group_id == m_local_group_id) { + return true; + } + + return false; +} + template void Replayer::init(Context* on_finish) { dout(10) << m_global_group_id << dendl; @@ -180,6 +213,17 @@ void Replayer::load_local_group_snapshots() { m_state = STATE_REPLAYING; } + if (m_resync_requested) { + return; + } else if (is_resync_requested()) { + m_resync_requested = true; // do nothing from here, anything is simply + // of no use as the group is going to get + // deleted soon. + dout(10) << "local group resync requested" << dendl; + // send stop for Group Replayer + notify_group_listener_stop(); + } + m_local_group_snaps.clear(); auto ctx = create_context_callback< Replayer, @@ -407,16 +451,14 @@ out: return; } - dout(10) << "all remote snapshots synced, idling waiting for new snapshot" - << dendl; - + // At this point all group snapshots have been synced, but we keep poll ceph_assert(m_state == STATE_REPLAYING); - m_state = STATE_IDLE; + locker.unlock(); if (m_remote_demoted) { // stop group replayer - m_local_group_ctx->listener->stop(); + notify_group_listener_stop(); } - locker.unlock(); + schedule_load_group_snapshots(); } template @@ -780,6 +822,10 @@ void Replayer::mirror_regular_snapshot( remote_group_snap_name, cls::rbd::GROUP_SNAPSHOT_STATE_INCOMPLETE}; + // needed for generating the order key, the group_snap_set generates one + // only when the state is INCOMPLETE + librbd::cls_client::group_snap_set(&op, group_snap); + auto itr = std::find_if( m_remote_group_snaps.begin(), m_remote_group_snaps.end(), [remote_group_snap_id](const cls::rbd::GroupSnapshot &s) { diff --git a/src/tools/rbd_mirror/group_replayer/Replayer.h b/src/tools/rbd_mirror/group_replayer/Replayer.h index d53e9400adc..2af6a43e9dc 100644 --- a/src/tools/rbd_mirror/group_replayer/Replayer.h +++ b/src/tools/rbd_mirror/group_replayer/Replayer.h @@ -106,6 +106,7 @@ private: std::vector m_remote_group_snaps; bool m_remote_demoted = false; + bool m_resync_requested = false; // map of > std::map> m_create_snap_requests; @@ -117,6 +118,8 @@ private: std::vector *image_ids); void schedule_load_group_snapshots(); + void notify_group_listener_stop(); + bool is_resync_requested(); void load_local_group_snapshots(); void handle_load_local_group_snapshots(int r);