]> git-server-git.apps.pok.os.sepia.ceph.com Git - ceph.git/commitdiff
qa/workunits/rbd: add a test for force promote with a user snapshot 61955/head
authorIlya Dryomov <idryomov@gmail.com>
Thu, 20 Feb 2025 15:38:41 +0000 (16:38 +0100)
committerIlya Dryomov <idryomov@gmail.com>
Mon, 24 Feb 2025 12:46:15 +0000 (13:46 +0100)
Add a reproducer for the crash on a bad variant access which was fixed
in commit 7d75161051da ("librbd: fix a crash in get_rollback_snap_id").

The reproducer deliberately works around many other issues with force
promote in snapshot-based mirroring: stopping rbd-mirror daemon
shouldn't be necessary (let alone with SIGKILL), get_rollback_snap_id()
and its caller can_create_primary_snapshot() are flawed and can pick
the wrong snapshot to roll back to or skip rollback when it's actually
required, the user snapshot in this scenario should be removed as part
of force promoting because it's incomplete and won't be usable after
the image is promoted, etc.

Signed-off-by: Ilya Dryomov <idryomov@gmail.com>
qa/workunits/rbd/rbd_mirror.sh
qa/workunits/rbd/rbd_mirror_helpers.sh

index 90d5204b92feb52375f1924aadff702e2d0affd8..0c2c1905172a66e22a57e4c3ee27c9f5f4e1271d 100755 (executable)
@@ -713,3 +713,36 @@ if [ -z "${RBD_MIRROR_USE_RBD_MIRROR}" ]; then
   CEPH_ARGS='--id admin' ceph --cluster ${CLUSTER1} osd blocklist ls 2>&1 | grep -q "listed 0 entries"
   CEPH_ARGS='--id admin' ceph --cluster ${CLUSTER2} osd blocklist ls 2>&1 | grep -q "listed 0 entries"
 fi
+
+testlog "TEST: force promote with a user snapshot"
+force_promote_image=test_force_promote_user
+create_image_and_enable_mirror ${CLUSTER2} ${POOL} ${force_promote_image} ${RBD_MIRROR_MODE} 10G
+write_image ${CLUSTER2} ${POOL} ${force_promote_image} 100
+wait_for_image_replay_stopped ${CLUSTER2} ${POOL} ${force_promote_image}
+wait_for_image_replay_started ${CLUSTER1} ${POOL} ${force_promote_image}
+wait_for_replay_complete ${CLUSTER1} ${CLUSTER2} ${POOL} ${POOL} ${force_promote_image}
+wait_for_replaying_status_in_pool_dir ${CLUSTER1} ${POOL} ${force_promote_image}
+wait_for_status_in_pool_dir ${CLUSTER2} ${POOL} ${force_promote_image} 'up+stopped'
+write_image ${CLUSTER2} ${POOL} ${force_promote_image} 100
+create_snapshot ${CLUSTER2} ${POOL} ${force_promote_image} 'snap1'
+write_image ${CLUSTER2} ${POOL} ${force_promote_image} 2560 4194304
+if [ "${RBD_MIRROR_MODE}" = "snapshot" ]; then
+  mirror_image_snapshot ${CLUSTER2} ${POOL} ${force_promote_image}
+fi
+wait_for_snap_present ${CLUSTER1} ${POOL} ${force_promote_image} 'snap1'
+sleep $((1 + RANDOM % 5))
+stop_mirrors ${CLUSTER1} -KILL
+if [ "${RBD_MIRROR_MODE}" = "snapshot" ]; then
+  SNAPS=$(get_snaps_json ${CLUSTER1} ${POOL} ${force_promote_image})
+  jq -e '.[-1].namespace["type"] == "mirror" and .[-1].namespace["state"] == "non-primary" and .[-1].namespace["complete"] == false' <<< ${SNAPS}
+fi
+promote_image ${CLUSTER1} ${POOL} ${force_promote_image} '--force'
+start_mirrors ${CLUSTER1}
+wait_for_image_replay_stopped ${CLUSTER1} ${POOL} ${force_promote_image}
+wait_for_image_replay_stopped ${CLUSTER2} ${POOL} ${force_promote_image}
+wait_for_status_in_pool_dir ${CLUSTER1} ${POOL} ${force_promote_image} 'up+stopped'
+wait_for_status_in_pool_dir ${CLUSTER2} ${POOL} ${force_promote_image} 'up+stopped'
+write_image ${CLUSTER1} ${POOL} ${force_promote_image} 100
+write_image ${CLUSTER2} ${POOL} ${force_promote_image} 100
+remove_image_retry ${CLUSTER1} ${POOL} ${force_promote_image}
+remove_image_retry ${CLUSTER2} ${POOL} ${force_promote_image}
index 847c93c22549c5dfb6d54bfd1e1a79c41e357442..b94317d75d3a538b1eb130ab8bff9664f741e6eb 100755 (executable)
@@ -1254,6 +1254,15 @@ count_mirror_snaps()
         grep -c -F " mirror ("
 }
 
+get_snaps_json()
+{
+    local cluster=$1
+    local pool=$2
+    local image=$3
+
+    rbd --cluster ${cluster} snap ls ${pool}/${image} --all --format json
+}
+
 write_image()
 {
     local cluster=$1