From f5eadfff807a84ff6561da3239ee804e735d64b0 Mon Sep 17 00:00:00 2001 From: Ilya Dryomov Date: Tue, 11 Feb 2025 17:44:51 +0100 Subject: [PATCH] librbd: fix mirror image status summary in a namespace For the purposes of the summary with image counts, "rbd mirror pool status" command is supposed to count each image only once. To this end, for unidirectional mirroring the status of the receiving site should be taken while for bidirectional mirroring the statuses should be combined/reduced. For example, if mirroring is enabled on a single image and everything is in order, the summary is expected to be image health: OK images: 1 total 1 replaying on both clusters even though on the primary the local status is MIRROR_IMAGE_STATUS_STATE_STOPPED and only on the secondary it's MIRROR_IMAGE_STATUS_STATE_REPLAYING. Currently this isn't the case for custom namespaces. In the same scenario the primary ends up reporting image health: OK images: 1 total 1 stopped based solely on the local status in a namespace. Fixes: https://tracker.ceph.com/issues/69911 Signed-off-by: Ilya Dryomov --- qa/workunits/rbd/rbd_mirror_bootstrap.sh | 39 ++++++++++++++++++++++++ qa/workunits/rbd/rbd_mirror_helpers.sh | 2 ++ src/librbd/api/Mirror.cc | 5 ++- 3 files changed, 45 insertions(+), 1 deletion(-) diff --git a/qa/workunits/rbd/rbd_mirror_bootstrap.sh b/qa/workunits/rbd/rbd_mirror_bootstrap.sh index 3ddb0aa219b..681e714ac8c 100755 --- a/qa/workunits/rbd/rbd_mirror_bootstrap.sh +++ b/qa/workunits/rbd/rbd_mirror_bootstrap.sh @@ -35,12 +35,28 @@ done rbd --cluster ${CLUSTER1} --pool ${POOL} mirror pool info --format json | jq -e '.peers[0].direction == "tx-only"' create_image_and_enable_mirror ${CLUSTER1} ${POOL} image1 +create_image_and_enable_mirror ${CLUSTER1} ${POOL}/${NS1} image1 wait_for_image_replay_started ${CLUSTER2} ${POOL} image1 write_image ${CLUSTER1} ${POOL} image1 100 wait_for_replay_complete ${CLUSTER2} ${CLUSTER1} ${POOL} ${POOL} image1 wait_for_replaying_status_in_pool_dir ${CLUSTER2} ${POOL} image1 +POOL_STATUS=$(get_pool_status_json ${CLUSTER1} ${POOL}) +jq -e '.summary.states == {"replaying": 1}' <<< ${POOL_STATUS} +POOL_STATUS=$(get_pool_status_json ${CLUSTER2} ${POOL}) +jq -e '.summary.states == {"replaying": 1}' <<< ${POOL_STATUS} + +wait_for_image_replay_started ${CLUSTER2} ${POOL}/${NS1} image1 +write_image ${CLUSTER1} ${POOL}/${NS1} image1 100 +wait_for_replay_complete ${CLUSTER2} ${CLUSTER1} ${POOL}/${NS1} ${POOL}/${NS1} image1 +wait_for_replaying_status_in_pool_dir ${CLUSTER2} ${POOL}/${NS1} image1 + +POOL_STATUS=$(get_pool_status_json ${CLUSTER1} ${POOL}/${NS1}) +jq -e '.summary.states == {"replaying": 1}' <<< ${POOL_STATUS} +POOL_STATUS=$(get_pool_status_json ${CLUSTER2} ${POOL}/${NS1}) +jq -e '.summary.states == {"replaying": 1}' <<< ${POOL_STATUS} + testlog "TEST: verify rx-tx direction" # both rx-tx peers are added immediately by "rbd mirror pool peer bootstrap import" rbd --cluster ${CLUSTER1} --pool ${PARENT_POOL} mirror pool info --format json | jq -e '.peers[0].direction == "rx-tx"' @@ -52,6 +68,9 @@ create_image ${CLUSTER2} ${PARENT_POOL} image2 enable_mirror ${CLUSTER1} ${PARENT_POOL} image1 enable_mirror ${CLUSTER2} ${PARENT_POOL} image2 +create_image_and_enable_mirror ${CLUSTER1} ${PARENT_POOL}/${NS1} image1 +create_image_and_enable_mirror ${CLUSTER2} ${PARENT_POOL}/${NS1} image2 + wait_for_image_replay_started ${CLUSTER2} ${PARENT_POOL} image1 write_image ${CLUSTER1} ${PARENT_POOL} image1 100 wait_for_replay_complete ${CLUSTER2} ${CLUSTER1} ${PARENT_POOL} ${PARENT_POOL} image1 @@ -62,6 +81,26 @@ write_image ${CLUSTER2} ${PARENT_POOL} image2 100 wait_for_replay_complete ${CLUSTER1} ${CLUSTER2} ${PARENT_POOL} ${PARENT_POOL} image2 wait_for_replaying_status_in_pool_dir ${CLUSTER1} ${PARENT_POOL} image2 +POOL_STATUS=$(get_pool_status_json ${CLUSTER1} ${PARENT_POOL}) +jq -e '.summary.states == {"replaying": 2}' <<< ${POOL_STATUS} +POOL_STATUS=$(get_pool_status_json ${CLUSTER2} ${PARENT_POOL}) +jq -e '.summary.states == {"replaying": 2}' <<< ${POOL_STATUS} + +wait_for_image_replay_started ${CLUSTER2} ${PARENT_POOL}/${NS1} image1 +write_image ${CLUSTER1} ${PARENT_POOL}/${NS1} image1 100 +wait_for_replay_complete ${CLUSTER2} ${CLUSTER1} ${PARENT_POOL}/${NS1} ${PARENT_POOL}/${NS1} image1 +wait_for_replaying_status_in_pool_dir ${CLUSTER2} ${PARENT_POOL}/${NS1} image1 + +wait_for_image_replay_started ${CLUSTER1} ${PARENT_POOL}/${NS1} image2 +write_image ${CLUSTER2} ${PARENT_POOL}/${NS1} image2 100 +wait_for_replay_complete ${CLUSTER1} ${CLUSTER2} ${PARENT_POOL}/${NS1} ${PARENT_POOL}/${NS1} image2 +wait_for_replaying_status_in_pool_dir ${CLUSTER1} ${PARENT_POOL}/${NS1} image2 + +POOL_STATUS=$(get_pool_status_json ${CLUSTER1} ${PARENT_POOL}/${NS1}) +jq -e '.summary.states == {"replaying": 2}' <<< ${POOL_STATUS} +POOL_STATUS=$(get_pool_status_json ${CLUSTER2} ${PARENT_POOL}/${NS1}) +jq -e '.summary.states == {"replaying": 2}' <<< ${POOL_STATUS} + testlog "TEST: pool replayer and callout cleanup when peer is updated" test_health_state ${CLUSTER1} ${PARENT_POOL} 'OK' test_health_state ${CLUSTER2} ${PARENT_POOL} 'OK' diff --git a/qa/workunits/rbd/rbd_mirror_helpers.sh b/qa/workunits/rbd/rbd_mirror_helpers.sh index 1b1436db74d..847c93c2254 100755 --- a/qa/workunits/rbd/rbd_mirror_helpers.sh +++ b/qa/workunits/rbd/rbd_mirror_helpers.sh @@ -299,9 +299,11 @@ setup_pools() rbd --cluster ${cluster} namespace create ${POOL}/${NS1} rbd --cluster ${cluster} namespace create ${POOL}/${NS2} + rbd --cluster ${cluster} namespace create ${PARENT_POOL}/${NS1} rbd --cluster ${cluster} mirror pool enable ${POOL}/${NS1} ${MIRROR_POOL_MODE} rbd --cluster ${cluster} mirror pool enable ${POOL}/${NS2} image + rbd --cluster ${cluster} mirror pool enable ${PARENT_POOL}/${NS1} ${MIRROR_POOL_MODE} if [ -z ${RBD_MIRROR_MANUAL_PEERS} ]; then if [ -z ${RBD_MIRROR_CONFIG_KEY} ]; then diff --git a/src/librbd/api/Mirror.cc b/src/librbd/api/Mirror.cc index 06a5e836faf..d370d91d858 100644 --- a/src/librbd/api/Mirror.cc +++ b/src/librbd/api/Mirror.cc @@ -1993,8 +1993,11 @@ int Mirror::image_status_summary(librados::IoCtx& io_ctx, MirrorImageStatusStates *states) { CephContext *cct = reinterpret_cast(io_ctx.cct()); + librados::IoCtx default_ns_io_ctx; + default_ns_io_ctx.dup(io_ctx); + default_ns_io_ctx.set_namespace(""); std::vector mirror_peers; - int r = cls_client::mirror_peer_list(&io_ctx, &mirror_peers); + int r = cls_client::mirror_peer_list(&default_ns_io_ctx, &mirror_peers); if (r < 0 && r != -ENOENT) { lderr(cct) << "failed to list mirror peers: " << cpp_strerror(r) << dendl; return r; -- 2.39.5