]> git-server-git.apps.pok.os.sepia.ceph.com Git - ceph.git/commitdiff
qa/rbd-mirror: add OMAP cleanup checks
authorArthur Outhenin-Chalandre <arthur.outhenin-chalandre@cern.ch>
Fri, 11 Jun 2021 07:29:59 +0000 (09:29 +0200)
committerArthur Outhenin-Chalandre <arthur.outhenin-chalandre@cern.ch>
Wed, 12 Jan 2022 09:03:36 +0000 (10:03 +0100)
This make sure that all images are deleted in the existing qa scripts
and checks if all rbd-mirror metadata in OMAP are correctly deleted.

Signed-off-by: Arthur Outhenin-Chalandre <arthur.outhenin-chalandre@cern.ch>
(cherry picked from commit 4db66da51211504ba0a2353180ae084ba1ab3fcf)

qa/workunits/rbd/rbd_mirror_helpers.sh
qa/workunits/rbd/rbd_mirror_journal.sh
qa/workunits/rbd/rbd_mirror_snapshot.sh
qa/workunits/rbd/rbd_mirror_stress.sh

index 82299715e231e1e5d2110e5825eda8c4eafd4c80..ca715d854c17669b09c463fa83a0580ec300e04b 100755 (executable)
@@ -549,6 +549,8 @@ status()
 
                 echo "${cluster} ${image_pool} ${image_ns} rbd_mirroring omap vals"
                 rados --cluster ${cluster} -p ${image_pool} --namespace "${image_ns}" listomapvals rbd_mirroring
+                echo "${cluster} ${image_pool} ${image_ns} rbd_mirror_leader omap vals"
+                rados --cluster ${cluster} -p ${image_pool} --namespace "${image_ns}" listomapvals rbd_mirror_leader
                 echo
             done
         done
@@ -1096,6 +1098,20 @@ unprotect_snapshot()
     rbd --cluster ${cluster} snap unprotect ${pool}/${image}@${snap}
 }
 
+unprotect_snapshot_retry()
+{
+    local cluster=$1
+    local pool=$2
+    local image=$3
+    local snap=$4
+
+    for s in 0 1 2 4 8 16 32; do
+        sleep ${s}
+        unprotect_snapshot ${cluster} ${pool} ${image} ${snap} && return 0
+    done
+    return 1
+}
+
 wait_for_snap_present()
 {
     local cluster=$1
@@ -1292,6 +1308,8 @@ enable_mirror()
     local mode=${4:-${MIRROR_IMAGE_MODE}}
 
     rbd --cluster=${cluster} mirror image enable ${pool}/${image} ${mode}
+    # Display image info including the global image id for debugging purpose
+    rbd --cluster=${cluster} info ${pool}/${image}
 }
 
 test_image_present()
@@ -1389,6 +1407,58 @@ get_clone_format()
              }'
 }
 
+list_omap_keys()
+{
+    local cluster=$1
+    local pool=$2
+    local obj_name=$3
+
+    rados --cluster ${cluster} -p ${pool} listomapkeys ${obj_name}
+}
+
+count_omap_keys_with_filter()
+{
+    local cluster=$1
+    local pool=$2
+    local obj_name=$3
+    local filter=$4
+
+    list_omap_keys ${cluster} ${pool} ${obj_name} | grep -c ${filter}
+}
+
+wait_for_omap_keys()
+{
+    local cluster=$1
+    local pool=$2
+    local obj_name=$3
+    local filter=$4
+
+    for s in 0 1 2 2 4 4 8 8 8 16 16 32; do
+        sleep $s
+
+        set +e
+        test "$(count_omap_keys_with_filter ${cluster} ${pool} ${obj_name} ${filter})" = 0
+        error_code=$?
+        set -e
+
+        if [ $error_code -eq 0 ]; then
+            return 0
+        fi
+    done
+
+    return 1
+}
+
+wait_for_image_in_omap()
+{
+    local cluster=$1
+    local pool=$2
+
+    wait_for_omap_keys ${cluster} ${pool} rbd_mirroring status_global
+    wait_for_omap_keys ${cluster} ${pool} rbd_mirroring image_
+    wait_for_omap_keys ${cluster} ${pool} rbd_mirror_leader image_map
+}
+
 #
 # Main
 #
index da856861b0c9587406b493d91909d22b4e0982ad..39db7bcb023e0bebe2c91bf25a896b8bde52d57a 100755 (executable)
@@ -119,6 +119,8 @@ if [ -z "${RBD_MIRROR_USE_RBD_MIRROR}" ]; then
   all_admin_daemons ${CLUSTER1} rbd mirror status
 fi
 
+remove_image_retry ${CLUSTER2} ${POOL} ${image1}
+
 testlog "TEST: test image rename"
 new_name="${image}_RENAMED"
 rename_image ${CLUSTER2} ${POOL} ${image} ${new_name}
@@ -138,6 +140,18 @@ wait_for_image_present ${CLUSTER1} ${POOL} ${image} 'deleted'
 trash_restore ${CLUSTER2} ${POOL} ${image_id}
 wait_for_image_replay_started ${CLUSTER1} ${POOL} ${image}
 
+testlog "TEST: check if removed images' OMAP are removed (with rbd-mirror on one cluster)"
+remove_image_retry ${CLUSTER2} ${POOL} ${image}
+
+wait_for_image_in_omap ${CLUSTER1} ${POOL}
+wait_for_image_in_omap ${CLUSTER2} ${POOL}
+
+create_image_and_enable_mirror ${CLUSTER2} ${POOL} ${image}
+wait_for_image_replay_started ${CLUSTER1} ${POOL} ${image}
+write_image ${CLUSTER2} ${POOL} ${image} 100
+wait_for_replay_complete ${CLUSTER1} ${CLUSTER2} ${POOL} ${image}
+wait_for_status_in_pool_dir ${CLUSTER1} ${POOL} ${image} 'up+replaying'
+
 testlog "TEST: failover and failback"
 start_mirrors ${CLUSTER2}
 
@@ -216,6 +230,8 @@ wait_for_status_in_pool_dir ${CLUSTER1} ${POOL} ${force_promote_image} 'up+stopp
 wait_for_status_in_pool_dir ${CLUSTER2} ${POOL} ${force_promote_image} 'up+stopped'
 write_image ${CLUSTER1} ${POOL} ${force_promote_image} 100
 write_image ${CLUSTER2} ${POOL} ${force_promote_image} 100
+remove_image_retry ${CLUSTER1} ${POOL} ${force_promote_image}
+remove_image_retry ${CLUSTER2} ${POOL} ${force_promote_image}
 
 testlog "TEST: cloned images"
 testlog " - default"
@@ -240,6 +256,7 @@ wait_for_image_replay_started ${CLUSTER1} ${POOL} ${clone_image}
 wait_for_replay_complete ${CLUSTER1} ${CLUSTER2} ${POOL} ${clone_image}
 wait_for_status_in_pool_dir ${CLUSTER1} ${POOL} ${clone_image} 'up+replaying' 'primary_position'
 compare_images ${POOL} ${clone_image}
+remove_image_retry ${CLUSTER2} ${POOL} ${clone_image}
 
 testlog " - clone v1"
 clone_image ${CLUSTER1} ${PARENT_POOL} ${parent_image} ${parent_snap} ${POOL} ${clone_image}1
@@ -249,6 +266,10 @@ clone_image ${CLUSTER2} ${PARENT_POOL} ${parent_image} ${parent_snap} ${POOL} \
 test $(get_clone_format ${CLUSTER2} ${POOL} ${clone_image}_v1) = 1
 wait_for_image_replay_started ${CLUSTER1} ${POOL} ${clone_image}_v1
 test $(get_clone_format ${CLUSTER1} ${POOL} ${clone_image}_v1) = 1
+remove_image_retry ${CLUSTER2} ${POOL} ${clone_image}_v1
+remove_image_retry ${CLUSTER1} ${POOL} ${clone_image}1
+unprotect_snapshot_retry ${CLUSTER2} ${PARENT_POOL} ${parent_image} ${parent_snap}
+remove_snapshot ${CLUSTER2} ${PARENT_POOL} ${parent_image} ${parent_snap}
 
 testlog " - clone v2"
 parent_snap=snap_v2
@@ -277,6 +298,7 @@ test_snap_removed_from_trash ${CLUSTER2} ${PARENT_POOL} ${parent_image} ${parent
 wait_for_snap_moved_to_trash ${CLUSTER1} ${PARENT_POOL} ${parent_image} ${parent_snap}
 remove_image_retry ${CLUSTER1} ${POOL} ${clone_image}_v2
 wait_for_snap_removed_from_trash ${CLUSTER1} ${PARENT_POOL} ${parent_image} ${parent_snap}
+remove_image_retry ${CLUSTER2} ${PARENT_POOL} ${parent_image}
 
 testlog "TEST: data pool"
 dp_image=test_data_pool
@@ -295,6 +317,7 @@ wait_for_status_in_pool_dir ${CLUSTER1} ${POOL} ${dp_image} 'up+replaying' 'prim
 compare_images ${POOL} ${dp_image}@snap1
 compare_images ${POOL} ${dp_image}@snap2
 compare_images ${POOL} ${dp_image}
+remove_image_retry ${CLUSTER2} ${POOL} ${dp_image}
 
 testlog "TEST: disable mirroring / delete non-primary image"
 image2=test2
@@ -379,7 +402,18 @@ for i in `seq 1 20`; do
 done
 wait_for_snap_present ${CLUSTER1} ${POOL} ${image2} "${snap_name}_${i}"
 
+unprotect_snapshot ${CLUSTER2} ${POOL} ${image4} 'snap1'
+unprotect_snapshot ${CLUSTER2} ${POOL} ${image4} 'snap2'
+for i in ${image2} ${image4}; do
+    remove_image_retry ${CLUSTER2} ${POOL} ${i}
+done
+
 testlog "TEST: disable mirror while daemon is stopped"
+# TODO: workaround for the daemon to ack the deletion, to remove when
+#       image_map cleanup is fixed
+for i in ${image2} ${image4}; do
+        wait_for_image_present ${CLUSTER1} ${POOL} ${i} 'deleted'
+done
 stop_mirrors ${CLUSTER1}
 stop_mirrors ${CLUSTER2}
 set_pool_mirror_mode ${CLUSTER2} ${POOL} 'image'
@@ -388,6 +422,11 @@ if [ -z "${RBD_MIRROR_USE_RBD_MIRROR}" ]; then
   test_image_present ${CLUSTER1} ${POOL} ${image} 'present'
 fi
 start_mirrors ${CLUSTER1}
+start_mirrors ${CLUSTER2} # TODO: remove start/stop of cluster2 deamons when
+                          #       image_map cleanup at startup is resolved
+wait_for_image_in_omap ${CLUSTER1} ${POOL}
+wait_for_image_in_omap ${CLUSTER2} ${POOL}
+stop_mirrors ${CLUSTER2}
 wait_for_image_present ${CLUSTER1} ${POOL} ${image} 'deleted'
 set_pool_mirror_mode ${CLUSTER2} ${POOL} 'pool'
 enable_journaling ${CLUSTER2} ${POOL} ${image}
@@ -415,6 +454,7 @@ remove_image_retry ${CLUSTER2} ${POOL}/${NS1} ${image}
 disable_mirror ${CLUSTER2} ${POOL}/${NS2} ${image}
 wait_for_image_present ${CLUSTER1} ${POOL}/${NS1} ${image} 'deleted'
 wait_for_image_present ${CLUSTER1} ${POOL}/${NS2} ${image} 'deleted'
+remove_image_retry ${CLUSTER2} ${POOL}/${NS2} ${image}
 
 testlog " - data pool"
 dp_image=test_data_pool
@@ -428,6 +468,7 @@ write_image ${CLUSTER2} ${POOL}/${NS1} ${dp_image} 100
 wait_for_replay_complete ${CLUSTER1} ${CLUSTER2} ${POOL}/${NS1} ${dp_image}
 wait_for_status_in_pool_dir ${CLUSTER1} ${POOL}/${NS1} ${dp_image} 'up+replaying' 'primary_position'
 compare_images ${POOL}/${NS1} ${dp_image}
+remove_image_retry ${CLUSTER2} ${POOL}/${NS1} ${dp_image}
 
 testlog "TEST: simple image resync"
 request_resync_image ${CLUSTER1} ${POOL} ${image} image_id
@@ -460,6 +501,7 @@ wait_for_image_present ${CLUSTER1} ${POOL} ${image} 'present'
 wait_for_image_replay_started ${CLUSTER1} ${POOL} ${image}
 wait_for_status_in_pool_dir ${CLUSTER1} ${POOL} ${image} 'up+replaying' 'primary_position'
 compare_images ${POOL} ${image}
+remove_image_retry ${CLUSTER2} ${POOL} ${image}
 
 testlog "TEST: client disconnect"
 image=laggy
@@ -531,6 +573,7 @@ disconnect_image ${CLUSTER2} ${POOL} ${image}
 test -z "$(get_mirror_journal_position ${CLUSTER2} ${POOL} ${image})"
 wait_for_image_replay_stopped ${CLUSTER1} ${POOL} ${image}
 wait_for_status_in_pool_dir ${CLUSTER1} ${POOL} ${image} 'up+error' 'disconnected'
+remove_image_retry ${CLUSTER2} ${POOL} ${image}
 
 testlog "TEST: split-brain"
 image=split-brain
@@ -544,6 +587,12 @@ demote_image ${CLUSTER1} ${POOL} ${image}
 wait_for_status_in_pool_dir ${CLUSTER1} ${POOL} ${image} 'up+error' 'split-brain'
 request_resync_image ${CLUSTER1} ${POOL} ${image} image_id
 wait_for_status_in_pool_dir ${CLUSTER1} ${POOL} ${image} 'up+replaying' 'primary_position'
+remove_image_retry ${CLUSTER2} ${POOL} ${image}
+
+testlog "TEST: check if removed images' OMAP are removed"
+start_mirrors ${CLUSTER2}
+wait_for_image_in_omap ${CLUSTER1} ${POOL}
+wait_for_image_in_omap ${CLUSTER2} ${POOL}
 
 if [ -z "${RBD_MIRROR_USE_RBD_MIRROR}" ]; then
   # teuthology will trash the daemon
index 7ab2239db03a33e85aa307a6cb32e248142ed12b..6d3f5846de1687e3ea3950d7c1b50f578a98fdb5 100755 (executable)
@@ -122,6 +122,8 @@ if [ -z "${RBD_MIRROR_USE_RBD_MIRROR}" ]; then
   all_admin_daemons ${CLUSTER1} rbd mirror status
 fi
 
+remove_image_retry ${CLUSTER2} ${POOL} ${image1}
+
 testlog "TEST: test image rename"
 new_name="${image}_RENAMED"
 rename_image ${CLUSTER2} ${POOL} ${image} ${new_name}
@@ -144,6 +146,18 @@ trash_restore ${CLUSTER2} ${POOL} ${image_id}
 enable_mirror ${CLUSTER2} ${POOL} ${image} snapshot
 wait_for_image_replay_started ${CLUSTER1} ${POOL} ${image}
 
+testlog "TEST: check if removed images' OMAP are removed (with rbd-mirror on one cluster)"
+remove_image_retry ${CLUSTER2} ${POOL} ${image}
+
+wait_for_image_in_omap ${CLUSTER1} ${POOL}
+wait_for_image_in_omap ${CLUSTER2} ${POOL}
+
+create_image_and_enable_mirror ${CLUSTER2} ${POOL} ${image}
+wait_for_image_replay_started ${CLUSTER1} ${POOL} ${image}
+write_image ${CLUSTER2} ${POOL} ${image} 100
+wait_for_replay_complete ${CLUSTER1} ${CLUSTER2} ${POOL} ${image}
+wait_for_status_in_pool_dir ${CLUSTER1} ${POOL} ${image} 'up+replaying'
+
 testlog "TEST: failover and failback"
 start_mirrors ${CLUSTER2}
 
@@ -222,6 +236,8 @@ wait_for_status_in_pool_dir ${CLUSTER1} ${POOL} ${force_promote_image} 'up+stopp
 wait_for_status_in_pool_dir ${CLUSTER2} ${POOL} ${force_promote_image} 'up+stopped'
 write_image ${CLUSTER1} ${POOL} ${force_promote_image} 100
 write_image ${CLUSTER2} ${POOL} ${force_promote_image} 100
+remove_image_retry ${CLUSTER1} ${POOL} ${force_promote_image}
+remove_image_retry ${CLUSTER2} ${POOL} ${force_promote_image}
 
 testlog "TEST: cloned images"
 testlog " - default"
@@ -246,6 +262,7 @@ wait_for_image_replay_started ${CLUSTER1} ${POOL} ${clone_image}
 wait_for_replay_complete ${CLUSTER1} ${CLUSTER2} ${POOL} ${clone_image}
 wait_for_status_in_pool_dir ${CLUSTER1} ${POOL} ${clone_image} 'up+replaying'
 compare_images ${POOL} ${clone_image}
+remove_image_retry ${CLUSTER2} ${POOL} ${clone_image}
 
 testlog " - clone v1"
 clone_image_and_enable_mirror ${CLUSTER1} ${PARENT_POOL} ${parent_image} \
@@ -256,6 +273,10 @@ clone_image_and_enable_mirror ${CLUSTER2} ${PARENT_POOL} ${parent_image} \
 test $(get_clone_format ${CLUSTER2} ${POOL} ${clone_image}_v1) = 1
 wait_for_image_replay_started ${CLUSTER1} ${POOL} ${clone_image}_v1
 test $(get_clone_format ${CLUSTER1} ${POOL} ${clone_image}_v1) = 1
+remove_image_retry ${CLUSTER2} ${POOL} ${clone_image}_v1
+remove_image_retry ${CLUSTER1} ${POOL} ${clone_image}1
+unprotect_snapshot_retry ${CLUSTER2} ${PARENT_POOL} ${parent_image} ${parent_snap}
+remove_snapshot ${CLUSTER2} ${PARENT_POOL} ${parent_image} ${parent_snap}
 
 testlog " - clone v2"
 parent_snap=snap_v2
@@ -288,6 +309,7 @@ mirror_image_snapshot ${CLUSTER2} ${PARENT_POOL} ${parent_image}
 wait_for_snap_moved_to_trash ${CLUSTER1} ${PARENT_POOL} ${parent_image} ${parent_snap}
 remove_image_retry ${CLUSTER1} ${POOL} ${clone_image}_v2
 wait_for_snap_removed_from_trash ${CLUSTER1} ${PARENT_POOL} ${parent_image} ${parent_snap}
+remove_image_retry ${CLUSTER2} ${PARENT_POOL} ${parent_image}
 
 testlog "TEST: data pool"
 dp_image=test_data_pool
@@ -306,6 +328,7 @@ wait_for_status_in_pool_dir ${CLUSTER1} ${POOL} ${dp_image} 'up+replaying'
 compare_images ${POOL} ${dp_image}@snap1
 compare_images ${POOL} ${dp_image}@snap2
 compare_images ${POOL} ${dp_image}
+remove_image_retry ${CLUSTER2} ${POOL} ${dp_image}
 
 testlog "TEST: disable mirroring / delete non-primary image"
 image2=test2
@@ -354,7 +377,18 @@ done
 mirror_image_snapshot ${CLUSTER2} ${POOL} ${image2}
 wait_for_snap_present ${CLUSTER1} ${POOL} ${image2} "${snap_name}_${i}"
 
+unprotect_snapshot ${CLUSTER2} ${POOL} ${image4} 'snap1'
+unprotect_snapshot ${CLUSTER2} ${POOL} ${image4} 'snap2'
+for i in ${image2} ${image4}; do
+    remove_image_retry ${CLUSTER2} ${POOL} ${i}
+done
+
 testlog "TEST: disable mirror while daemon is stopped"
+# TODO: workaround for the daemon to ack the deletion, to remove when
+#       image_map cleanup is fixed
+for i in ${image2} ${image4}; do
+        wait_for_image_present ${CLUSTER1} ${POOL} ${i} 'deleted'
+done
 stop_mirrors ${CLUSTER1}
 stop_mirrors ${CLUSTER2}
 disable_mirror ${CLUSTER2} ${POOL} ${image}
@@ -362,6 +396,11 @@ if [ -z "${RBD_MIRROR_USE_RBD_MIRROR}" ]; then
   test_image_present ${CLUSTER1} ${POOL} ${image} 'present'
 fi
 start_mirrors ${CLUSTER1}
+start_mirrors ${CLUSTER2} # TODO: remove start/stop of cluster2 deamons when
+                          #       image_map cleanup at startup is resolved
+wait_for_image_in_omap ${CLUSTER1} ${POOL}
+wait_for_image_in_omap ${CLUSTER2} ${POOL}
+stop_mirrors ${CLUSTER2}
 wait_for_image_present ${CLUSTER1} ${POOL} ${image} 'deleted'
 enable_mirror ${CLUSTER2} ${POOL} ${image}
 wait_for_image_present ${CLUSTER1} ${POOL} ${image} 'present'
@@ -387,6 +426,7 @@ remove_image_retry ${CLUSTER2} ${POOL}/${NS1} ${image}
 disable_mirror ${CLUSTER2} ${POOL}/${NS2} ${image}
 wait_for_image_present ${CLUSTER1} ${POOL}/${NS1} ${image} 'deleted'
 wait_for_image_present ${CLUSTER1} ${POOL}/${NS2} ${image} 'deleted'
+remove_image_retry ${CLUSTER2} ${POOL}/${NS2} ${image}
 
 testlog " - data pool"
 dp_image=test_data_pool
@@ -400,6 +440,7 @@ write_image ${CLUSTER2} ${POOL}/${NS1} ${dp_image} 100
 wait_for_replay_complete ${CLUSTER1} ${CLUSTER2} ${POOL}/${NS1} ${dp_image}
 wait_for_status_in_pool_dir ${CLUSTER1} ${POOL}/${NS1} ${dp_image} 'up+replaying'
 compare_images ${POOL}/${NS1} ${dp_image}
+remove_image_retry ${CLUSTER2} ${POOL}/${NS1} ${dp_image}
 
 testlog "TEST: simple image resync"
 request_resync_image ${CLUSTER1} ${POOL} ${image} image_id
@@ -432,6 +473,7 @@ wait_for_image_present ${CLUSTER1} ${POOL} ${image} 'present'
 wait_for_image_replay_started ${CLUSTER1} ${POOL} ${image}
 wait_for_status_in_pool_dir ${CLUSTER1} ${POOL} ${image} 'up+replaying'
 compare_images ${POOL} ${image}
+remove_image_retry ${CLUSTER2} ${POOL} ${image}
 
 testlog "TEST: split-brain"
 image=split-brain
@@ -445,6 +487,12 @@ demote_image ${CLUSTER1} ${POOL} ${image}
 wait_for_status_in_pool_dir ${CLUSTER1} ${POOL} ${image} 'up+error' 'split-brain'
 request_resync_image ${CLUSTER1} ${POOL} ${image} image_id
 wait_for_status_in_pool_dir ${CLUSTER1} ${POOL} ${image} 'up+replaying'
+remove_image_retry ${CLUSTER2} ${POOL} ${image}
+
+testlog "TEST: check if removed images' OMAP are removed"
+start_mirrors ${CLUSTER2}
+wait_for_image_in_omap ${CLUSTER1} ${POOL}
+wait_for_image_in_omap ${CLUSTER2} ${POOL}
 
 if [ -z "${RBD_MIRROR_USE_RBD_MIRROR}" ]; then
   # teuthology will trash the daemon
index a17ad75e1b58e98f301fbfbd9944319b03ce84fe..cb79aba7ebc9c79126e10bf3188904914df2c51f 100755 (executable)
@@ -214,3 +214,8 @@ do
   purge_snapshots ${CLUSTER2} ${POOL} ${image}
   remove_image_retry ${CLUSTER2} ${POOL} ${image}
 done
+
+testlog "TEST: check if removed images' OMAP are removed"
+
+wait_for_image_in_omap ${CLUSTER1} ${POOL}
+wait_for_image_in_omap ${CLUSTER2} ${POOL}