From 5555ae2b27448d1fda817c5091729d00b488135f Mon Sep 17 00:00:00 2001 From: Ilya Dryomov Date: Thu, 20 Feb 2025 16:38:41 +0100 Subject: [PATCH] qa/workunits/rbd: add a test for force promote with a user snapshot Add a reproducer for the crash on a bad variant access which was fixed in commit 7d75161051da ("librbd: fix a crash in get_rollback_snap_id"). The reproducer deliberately works around many other issues with force promote in snapshot-based mirroring: stopping rbd-mirror daemon shouldn't be necessary (let alone with SIGKILL), get_rollback_snap_id() and its caller can_create_primary_snapshot() are flawed and can pick the wrong snapshot to roll back to or skip rollback when it's actually required, the user snapshot in this scenario should be removed as part of force promoting because it's incomplete and won't be usable after the image is promoted, etc. Signed-off-by: Ilya Dryomov (cherry picked from commit 0f4a37dd9f28070d0d421379385a5f2912cc9627) Conflicts: qa/workunits/rbd/rbd_mirror_journal.sh [ commits 3fd8a0388735 ("qa/workunits/rbd: merge journal and snapshot test scripts") and 3fdbc160bb21 ("rbd-mirror: allow mirroring to a different namespace") not in reef ] qa/workunits/rbd/rbd_mirror_snapshot.sh [ duplicated/cloned for snapshot-based mirroring ] --- qa/workunits/rbd/rbd_mirror_helpers.sh | 9 ++++++++ qa/workunits/rbd/rbd_mirror_journal.sh | 26 ++++++++++++++++++++++ qa/workunits/rbd/rbd_mirror_snapshot.sh | 29 +++++++++++++++++++++++++ 3 files changed, 64 insertions(+) diff --git a/qa/workunits/rbd/rbd_mirror_helpers.sh b/qa/workunits/rbd/rbd_mirror_helpers.sh index 96ba1c7ba93..4df1fe2ecf7 100755 --- a/qa/workunits/rbd/rbd_mirror_helpers.sh +++ b/qa/workunits/rbd/rbd_mirror_helpers.sh @@ -1213,6 +1213,15 @@ count_mirror_snaps() grep -c -F " mirror (" } +get_snaps_json() +{ + local cluster=$1 + local pool=$2 + local image=$3 + + rbd --cluster ${cluster} snap ls ${pool}/${image} --all --format json +} + write_image() { local cluster=$1 diff --git a/qa/workunits/rbd/rbd_mirror_journal.sh b/qa/workunits/rbd/rbd_mirror_journal.sh index 20a3b87db77..9e1b44e0f3b 100755 --- a/qa/workunits/rbd/rbd_mirror_journal.sh +++ b/qa/workunits/rbd/rbd_mirror_journal.sh @@ -614,3 +614,29 @@ if [ -z "${RBD_MIRROR_USE_RBD_MIRROR}" ]; then CEPH_ARGS='--id admin' ceph --cluster ${CLUSTER1} osd blocklist ls 2>&1 | grep -q "listed 0 entries" CEPH_ARGS='--id admin' ceph --cluster ${CLUSTER2} osd blocklist ls 2>&1 | grep -q "listed 0 entries" fi + +testlog "TEST: force promote with a user snapshot" +force_promote_image=test_force_promote_user +create_image_and_enable_mirror ${CLUSTER2} ${POOL} ${force_promote_image} ${MIRROR_IMAGE_MODE} 10G +write_image ${CLUSTER2} ${POOL} ${force_promote_image} 100 +wait_for_image_replay_stopped ${CLUSTER2} ${POOL} ${force_promote_image} +wait_for_image_replay_started ${CLUSTER1} ${POOL} ${force_promote_image} +wait_for_replay_complete ${CLUSTER1} ${CLUSTER2} ${POOL} ${force_promote_image} +wait_for_status_in_pool_dir ${CLUSTER1} ${POOL} ${force_promote_image} 'up+replaying' 'primary_position' +wait_for_status_in_pool_dir ${CLUSTER2} ${POOL} ${force_promote_image} 'up+stopped' +write_image ${CLUSTER2} ${POOL} ${force_promote_image} 100 +create_snapshot ${CLUSTER2} ${POOL} ${force_promote_image} 'snap1' +write_image ${CLUSTER2} ${POOL} ${force_promote_image} 2560 4194304 +wait_for_snap_present ${CLUSTER1} ${POOL} ${force_promote_image} 'snap1' +sleep $((1 + RANDOM % 5)) +stop_mirrors ${CLUSTER1} -KILL +promote_image ${CLUSTER1} ${POOL} ${force_promote_image} '--force' +start_mirrors ${CLUSTER1} +wait_for_image_replay_stopped ${CLUSTER1} ${POOL} ${force_promote_image} +wait_for_image_replay_stopped ${CLUSTER2} ${POOL} ${force_promote_image} +wait_for_status_in_pool_dir ${CLUSTER1} ${POOL} ${force_promote_image} 'up+stopped' +wait_for_status_in_pool_dir ${CLUSTER2} ${POOL} ${force_promote_image} 'up+stopped' +write_image ${CLUSTER1} ${POOL} ${force_promote_image} 100 +write_image ${CLUSTER2} ${POOL} ${force_promote_image} 100 +remove_image_retry ${CLUSTER1} ${POOL} ${force_promote_image} +remove_image_retry ${CLUSTER2} ${POOL} ${force_promote_image} diff --git a/qa/workunits/rbd/rbd_mirror_snapshot.sh b/qa/workunits/rbd/rbd_mirror_snapshot.sh index 17164c4d544..2cab9d70457 100755 --- a/qa/workunits/rbd/rbd_mirror_snapshot.sh +++ b/qa/workunits/rbd/rbd_mirror_snapshot.sh @@ -517,3 +517,32 @@ if [ -z "${RBD_MIRROR_USE_RBD_MIRROR}" ]; then CEPH_ARGS='--id admin' ceph --cluster ${CLUSTER1} osd blocklist ls 2>&1 | grep -q "listed 0 entries" CEPH_ARGS='--id admin' ceph --cluster ${CLUSTER2} osd blocklist ls 2>&1 | grep -q "listed 0 entries" fi + +testlog "TEST: force promote with a user snapshot" +force_promote_image=test_force_promote_user +create_image_and_enable_mirror ${CLUSTER2} ${POOL} ${force_promote_image} ${MIRROR_IMAGE_MODE} 10G +write_image ${CLUSTER2} ${POOL} ${force_promote_image} 100 +wait_for_image_replay_stopped ${CLUSTER2} ${POOL} ${force_promote_image} +wait_for_image_replay_started ${CLUSTER1} ${POOL} ${force_promote_image} +wait_for_replay_complete ${CLUSTER1} ${CLUSTER2} ${POOL} ${force_promote_image} +wait_for_status_in_pool_dir ${CLUSTER1} ${POOL} ${force_promote_image} 'up+replaying' +wait_for_status_in_pool_dir ${CLUSTER2} ${POOL} ${force_promote_image} 'up+stopped' +write_image ${CLUSTER2} ${POOL} ${force_promote_image} 100 +create_snapshot ${CLUSTER2} ${POOL} ${force_promote_image} 'snap1' +write_image ${CLUSTER2} ${POOL} ${force_promote_image} 2560 4194304 +mirror_image_snapshot ${CLUSTER2} ${POOL} ${force_promote_image} +wait_for_snap_present ${CLUSTER1} ${POOL} ${force_promote_image} 'snap1' +sleep $((1 + RANDOM % 5)) +stop_mirrors ${CLUSTER1} -KILL +SNAPS=$(get_snaps_json ${CLUSTER1} ${POOL} ${force_promote_image}) +jq -e '.[-1].namespace["type"] == "mirror" and .[-1].namespace["state"] == "non-primary" and .[-1].namespace["complete"] == false' <<< ${SNAPS} +promote_image ${CLUSTER1} ${POOL} ${force_promote_image} '--force' +start_mirrors ${CLUSTER1} +wait_for_image_replay_stopped ${CLUSTER1} ${POOL} ${force_promote_image} +wait_for_image_replay_stopped ${CLUSTER2} ${POOL} ${force_promote_image} +wait_for_status_in_pool_dir ${CLUSTER1} ${POOL} ${force_promote_image} 'up+stopped' +wait_for_status_in_pool_dir ${CLUSTER2} ${POOL} ${force_promote_image} 'up+stopped' +write_image ${CLUSTER1} ${POOL} ${force_promote_image} 100 +write_image ${CLUSTER2} ${POOL} ${force_promote_image} 100 +remove_image_retry ${CLUSTER1} ${POOL} ${force_promote_image} +remove_image_retry ${CLUSTER2} ${POOL} ${force_promote_image} -- 2.39.5