]> git.apps.os.sepia.ceph.com Git - ceph.git/commitdiff
qa/workunits/rbd: add a test for force promote with a user snapshot 62045/head
authorIlya Dryomov <idryomov@gmail.com>
Thu, 20 Feb 2025 15:38:41 +0000 (16:38 +0100)
committerIlya Dryomov <idryomov@gmail.com>
Fri, 28 Feb 2025 19:47:57 +0000 (20:47 +0100)
Add a reproducer for the crash on a bad variant access which was fixed
in commit 7d75161051da ("librbd: fix a crash in get_rollback_snap_id").

The reproducer deliberately works around many other issues with force
promote in snapshot-based mirroring: stopping rbd-mirror daemon
shouldn't be necessary (let alone with SIGKILL), get_rollback_snap_id()
and its caller can_create_primary_snapshot() are flawed and can pick
the wrong snapshot to roll back to or skip rollback when it's actually
required, the user snapshot in this scenario should be removed as part
of force promoting because it's incomplete and won't be usable after
the image is promoted, etc.

Signed-off-by: Ilya Dryomov <idryomov@gmail.com>
(cherry picked from commit 0f4a37dd9f28070d0d421379385a5f2912cc9627)

Conflicts:
qa/workunits/rbd/rbd_mirror_journal.sh [ commits 3fd8a0388735
  ("qa/workunits/rbd: merge journal and snapshot test scripts")
  and 3fdbc160bb21 ("rbd-mirror: allow mirroring to a different
  namespace") not in reef ]
qa/workunits/rbd/rbd_mirror_snapshot.sh [ duplicated/cloned for
  snapshot-based mirroring ]

qa/workunits/rbd/rbd_mirror_helpers.sh
qa/workunits/rbd/rbd_mirror_journal.sh
qa/workunits/rbd/rbd_mirror_snapshot.sh

index 96ba1c7ba93015f9448e3cab695ba53bd4cdf31f..4df1fe2ecf790fff6d895cb2d395b286b18f5011 100755 (executable)
@@ -1213,6 +1213,15 @@ count_mirror_snaps()
         grep -c -F " mirror ("
 }
 
+get_snaps_json()
+{
+    local cluster=$1
+    local pool=$2
+    local image=$3
+
+    rbd --cluster ${cluster} snap ls ${pool}/${image} --all --format json
+}
+
 write_image()
 {
     local cluster=$1
index 20a3b87db77f5942ddfd31f7c10ae5edfeb3b373..9e1b44e0f3b43f874ac34a7482a1e259abf03ec6 100755 (executable)
@@ -614,3 +614,29 @@ if [ -z "${RBD_MIRROR_USE_RBD_MIRROR}" ]; then
   CEPH_ARGS='--id admin' ceph --cluster ${CLUSTER1} osd blocklist ls 2>&1 | grep -q "listed 0 entries"
   CEPH_ARGS='--id admin' ceph --cluster ${CLUSTER2} osd blocklist ls 2>&1 | grep -q "listed 0 entries"
 fi
+
+testlog "TEST: force promote with a user snapshot"
+force_promote_image=test_force_promote_user
+create_image_and_enable_mirror ${CLUSTER2} ${POOL} ${force_promote_image} ${MIRROR_IMAGE_MODE} 10G
+write_image ${CLUSTER2} ${POOL} ${force_promote_image} 100
+wait_for_image_replay_stopped ${CLUSTER2} ${POOL} ${force_promote_image}
+wait_for_image_replay_started ${CLUSTER1} ${POOL} ${force_promote_image}
+wait_for_replay_complete ${CLUSTER1} ${CLUSTER2} ${POOL} ${force_promote_image}
+wait_for_status_in_pool_dir ${CLUSTER1} ${POOL} ${force_promote_image} 'up+replaying' 'primary_position'
+wait_for_status_in_pool_dir ${CLUSTER2} ${POOL} ${force_promote_image} 'up+stopped'
+write_image ${CLUSTER2} ${POOL} ${force_promote_image} 100
+create_snapshot ${CLUSTER2} ${POOL} ${force_promote_image} 'snap1'
+write_image ${CLUSTER2} ${POOL} ${force_promote_image} 2560 4194304
+wait_for_snap_present ${CLUSTER1} ${POOL} ${force_promote_image} 'snap1'
+sleep $((1 + RANDOM % 5))
+stop_mirrors ${CLUSTER1} -KILL
+promote_image ${CLUSTER1} ${POOL} ${force_promote_image} '--force'
+start_mirrors ${CLUSTER1}
+wait_for_image_replay_stopped ${CLUSTER1} ${POOL} ${force_promote_image}
+wait_for_image_replay_stopped ${CLUSTER2} ${POOL} ${force_promote_image}
+wait_for_status_in_pool_dir ${CLUSTER1} ${POOL} ${force_promote_image} 'up+stopped'
+wait_for_status_in_pool_dir ${CLUSTER2} ${POOL} ${force_promote_image} 'up+stopped'
+write_image ${CLUSTER1} ${POOL} ${force_promote_image} 100
+write_image ${CLUSTER2} ${POOL} ${force_promote_image} 100
+remove_image_retry ${CLUSTER1} ${POOL} ${force_promote_image}
+remove_image_retry ${CLUSTER2} ${POOL} ${force_promote_image}
index 17164c4d544a25d4fb95d24375816423cbc69135..2cab9d70457b43ecc2ce3c9371438dc965307691 100755 (executable)
@@ -517,3 +517,32 @@ if [ -z "${RBD_MIRROR_USE_RBD_MIRROR}" ]; then
   CEPH_ARGS='--id admin' ceph --cluster ${CLUSTER1} osd blocklist ls 2>&1 | grep -q "listed 0 entries"
   CEPH_ARGS='--id admin' ceph --cluster ${CLUSTER2} osd blocklist ls 2>&1 | grep -q "listed 0 entries"
 fi
+
+testlog "TEST: force promote with a user snapshot"
+force_promote_image=test_force_promote_user
+create_image_and_enable_mirror ${CLUSTER2} ${POOL} ${force_promote_image} ${MIRROR_IMAGE_MODE} 10G
+write_image ${CLUSTER2} ${POOL} ${force_promote_image} 100
+wait_for_image_replay_stopped ${CLUSTER2} ${POOL} ${force_promote_image}
+wait_for_image_replay_started ${CLUSTER1} ${POOL} ${force_promote_image}
+wait_for_replay_complete ${CLUSTER1} ${CLUSTER2} ${POOL} ${force_promote_image}
+wait_for_status_in_pool_dir ${CLUSTER1} ${POOL} ${force_promote_image} 'up+replaying'
+wait_for_status_in_pool_dir ${CLUSTER2} ${POOL} ${force_promote_image} 'up+stopped'
+write_image ${CLUSTER2} ${POOL} ${force_promote_image} 100
+create_snapshot ${CLUSTER2} ${POOL} ${force_promote_image} 'snap1'
+write_image ${CLUSTER2} ${POOL} ${force_promote_image} 2560 4194304
+mirror_image_snapshot ${CLUSTER2} ${POOL} ${force_promote_image}
+wait_for_snap_present ${CLUSTER1} ${POOL} ${force_promote_image} 'snap1'
+sleep $((1 + RANDOM % 5))
+stop_mirrors ${CLUSTER1} -KILL
+SNAPS=$(get_snaps_json ${CLUSTER1} ${POOL} ${force_promote_image})
+jq -e '.[-1].namespace["type"] == "mirror" and .[-1].namespace["state"] == "non-primary" and .[-1].namespace["complete"] == false' <<< ${SNAPS}
+promote_image ${CLUSTER1} ${POOL} ${force_promote_image} '--force'
+start_mirrors ${CLUSTER1}
+wait_for_image_replay_stopped ${CLUSTER1} ${POOL} ${force_promote_image}
+wait_for_image_replay_stopped ${CLUSTER2} ${POOL} ${force_promote_image}
+wait_for_status_in_pool_dir ${CLUSTER1} ${POOL} ${force_promote_image} 'up+stopped'
+wait_for_status_in_pool_dir ${CLUSTER2} ${POOL} ${force_promote_image} 'up+stopped'
+write_image ${CLUSTER1} ${POOL} ${force_promote_image} 100
+write_image ${CLUSTER2} ${POOL} ${force_promote_image} 100
+remove_image_retry ${CLUSTER1} ${POOL} ${force_promote_image}
+remove_image_retry ${CLUSTER2} ${POOL} ${force_promote_image}