]> git.apps.os.sepia.ceph.com Git - ceph-ci.git/commitdiff
qa/workunits/rbd: update to mirror group snapshot tests
authorJohn Agombar <agombar@uk.ibm.com>
Thu, 10 Apr 2025 18:29:48 +0000 (19:29 +0100)
committerIlya Dryomov <idryomov@gmail.com>
Sun, 28 Sep 2025 18:25:04 +0000 (20:25 +0200)
Added -d <filename> option to rbd_mirror_group_simple.sh.  This can be used to
save the stdout and stderr from commands run during a test+scenario into $TEMPDIR/filename.
After a successful test completion the contents of this file are deleted prior to running
the next test to prevent the file from becoming too large.

Updated tests:
• various - Added step after a resync request to wait for the group id to change before
  continuing.  This ensures that the group delete/recreate step has been completed and
  prevents later commands from failing with group doesn't exist type errors.
• test_enable_mirroring_when_duplicate_group_exists - Added checks of the "state" and
  "description" fields for the peer site in the group status output.  Test is disabled
  as it currently fails
• test_enable_mirroring_when_duplicate_image_exists_scenarios - simplified test to only
  have a single duplicate named image.  Test fails still and is disabled.
• test_remote_namespace - added steps to take new snapshot on primary after failover
  and check that this syncs successfully.
• test_group_and_standalone_images_do_io - merged 2 scenarios to remove duplication

New tests:
Updated tests:
• test_demote_snap_sync - Checks that a demote snap is correctly synced to the secondary
  after the deamon is restarted

Signed-off-by: John Agombar <agombar@uk.ibm.com>
qa/workunits/rbd/rbd_mirror_group.sh
qa/workunits/rbd/rbd_mirror_group_simple.sh
qa/workunits/rbd/rbd_mirror_helpers.sh

index c1e5a560a6142b194f5facdcdaae322e37950cf1..c6f04ed0e8a6a3db1f43a78a93b9ed1ed3b4836a 100755 (executable)
@@ -483,7 +483,10 @@ mirror_group_demote ${CLUSTER1} ${POOL}/${group}
 test_fields_in_group_info ${CLUSTER1} ${POOL}/${group} 'snapshot' 'enabled' 'false'
 test_fields_in_group_info ${CLUSTER2} ${POOL}/${group} 'snapshot' 'enabled' 'true'
 wait_for_group_status_in_pool_dir ${CLUSTER1} ${POOL}/${group} 'up+error' 0 'split-brain detected'
+
+get_id_from_group_info ${CLUSTER1} ${POOL}/${group} group_id_before
 mirror_group_resync ${CLUSTER1} ${POOL}/${group}
+wait_for_group_id_changed ${CLUSTER1} ${POOL}/${group} ${group_id_before}
 wait_for_group_status_in_pool_dir ${CLUSTER1} ${POOL}/${group} 'up+replaying' 1
 
 #TODO: Fix blocklisting IP's which are consequence of "TEST: stop mirror, create group, start mirror and test replay"
index 369dd2e43cdf43b08ed872d5cf85245c3da7cabf..9eb1e08ac06174e2203ab380cedab7740341b6cc 100755 (executable)
@@ -18,6 +18,8 @@
 #    ../qa/workunits/rbd/rbd_mirror_group_simple.sh 
 #
 # Alternatively the script takes a number of optional arguments:
+#  -d <filename>  Save debug info for a test+scenario to $TEMPDIR/filename.  After a successful test completion the contents of this file
+#                 are deleted prior to running the next test to free up disk space.
 #  -m <multiplier> Some tests can be run with a variable number of images.  The multiplier value can be specified
 #                  to increase the default number of images. (image_count = default_count * multiplier)                          
 #  -r <repeat_count> repeat_count is a number that sets the number of times each test should be run. 
@@ -51,8 +53,11 @@ image_multiplier=1
 repeat_count=1
 feature=0
 
-while getopts ":f:m:r:s:t:" opt; do
+while getopts "d:f:m:r:s:t:" opt; do
   case $opt in
+    d)
+      RBD_MIRROR_SAVE_CLI_OUTPUT=$OPTARG
+      ;;
     f)
       feature=$OPTARG
       ;;
@@ -319,6 +324,8 @@ test_enable_mirroring_when_duplicate_group_exists()
   local group=$1 ; shift
   local scenario=$1 ; shift
 
+  start_mirrors "${primary_cluster}"
+
   group_create "${primary_cluster}" "${pool}/${group}"
   group_create "${secondary_cluster}" "${pool}/${group}"
   
@@ -326,40 +333,28 @@ test_enable_mirroring_when_duplicate_group_exists()
 
   # group will be present on secondary, but won't be mirrored
   wait_for_group_present "${secondary_cluster}" "${pool}" "${group}" 0
-  wait_for_group_status_in_pool_dir "${primary_cluster}" "${pool}"/"${group}" 'down+unknown'
-  test_fields_in_group_info ${primary_cluster} ${pool}/${group} 'snapshot' 'enabled' 'true'
-
-#TODO need to finish this test so that it looks at the "state" and "description" fields for the peer site in the
-# group status output.
-
-# Example output is:
-#test-group0:
-  #global_id:   2abbba6c-4d06-46b5-ad9a-3e833b4555f1
-  #state:       down+unknown
-  #description: status not found
-  #last_update: 
-  #images:
-  #peer_sites:
-    #name: cluster1
-    #state: up+error
-    #description: bootstrap failed
-
-  # TODO write a helper function - wait_for_peer_group_status_in_pool_dir() that checks the state and description 
-  # see get_fields_from_mirror_group_status() function to help with this
+  # TODO - fails on next line with blank description
+  wait_for_group_status_in_pool_dir "${primary_cluster}" "${pool}"/"${group}" 'up+stopped' 'local group is primary'
+  test_fields_in_group_info "${primary_cluster}" "${pool}/${group}" 'snapshot' 'enabled' 'true'
+
+  # Look at the "state" and "description" fields for the peer site in the group status output.
+  # Can't look at the state directly on the secondary because mirroring should have failed to be enabled
+
+  # TODO - fails with incorrect description in peer status
   if [ "${scenario}" = 'remove' ]; then
-    wait_for_peer_group_status_in_pool_dir "${secondary_cluster}" "${pool}"/"${group}" 'up+error' 'split-brain'
+    wait_for_peer_group_status_in_pool_dir "${primary_cluster}" "${pool}"/"${group}" 'up+error' 'split-brain detected'
     # remove the non-mirrored group on the secondary
     group_remove "${secondary_cluster}" "${pool}/${group}"
   elif  [ "${scenario}" = 'rename_secondary' ]; then
-    wait_for_peer_group_status_in_pool_dir "${secondary_cluster}" "${pool}"/"${group}" 'up+error' 'split-brain'
+    wait_for_peer_group_status_in_pool_dir "${primary_cluster}" "${pool}"/"${group}" 'up+error' 'split-brain detected'
     group_rename "${secondary_cluster}" "${pool}/${group}" "${pool}/${group}_renamed"
   elif  [ "${scenario}" = 'rename_primary' ]; then
-    wait_for_peer_group_status_in_pool_dir "${secondary_cluster}" "${pool}"/"${group}" 'up+error' 'split-brain'
+    wait_for_peer_group_status_in_pool_dir "${primary_cluster}" "${pool}"/"${group}" 'up+error' 'split-brain detected'
     group_rename "${primary_cluster}" "${pool}/${group}" "${pool}/${group}_renamed"
     group_orig="${group}"
     group="${group}_renamed"
   elif  [ "${scenario}" = 'disable_then_rename_primary' ]; then
-    wait_for_peer_group_status_in_pool_dir "${secondary_cluster}" "${pool}"/"${group}" 'up+error' 'split-brain'
+    wait_for_peer_group_status_in_pool_dir "${primary_cluster}" "${pool}"/"${group}" 'up+error' 'split-brain detected'
     mirror_group_disable "${primary_cluster}" "${pool}/${group}"
     group_rename "${primary_cluster}" "${pool}/${group}" "${pool}/${group}_renamed"
     group_orig="${group}"
@@ -368,17 +363,17 @@ test_enable_mirroring_when_duplicate_group_exists()
   fi
 
   if [ "${scenario}" = 'remove' ]; then
-    wait_for_peer_group_status_in_pool_dir "${secondary_cluster}" "${pool}"/"${group}" 'up+replaying'
-    test_fields_in_group_info ${secondary_cluster} ${pool}/${group} 'snapshot' 'enabled' 'false'
+    wait_for_peer_group_status_in_pool_dir "${primary_cluster}" "${pool}"/"${group}" 'up+replaying'
+    test_fields_in_group_info "${secondary_cluster}" "${pool}/${group}" 'snapshot' 'enabled' 'false'
   elif  [ "${scenario}" = 'rename_secondary' ]; then
-    wait_for_peer_group_status_in_pool_dir "${secondary_cluster}" "${pool}"/"${group}" 'up+replaying'
-    test_fields_in_group_info ${secondary_cluster} ${pool}/${group} 'snapshot' 'enabled' 'false'
+    wait_for_peer_group_status_in_pool_dir "${primary_cluster}" "${pool}"/"${group}" 'up+replaying'
+    test_fields_in_group_info "${secondary_cluster}" "${pool}/${group}" 'snapshot' 'enabled' 'false'
   elif  [ "${scenario}" = 'rename_primary' ]; then
-    # Group should still not be mirrored in this case
-    wait_for_peer_group_status_in_pool_dir "${secondary_cluster}" "${pool}"/"${group}" 'up+error' 'split-brain'
+    # Group should still not be mirrored in this case - need to disable, rename and renable to fix
+    wait_for_peer_group_status_in_pool_dir "${primary_cluster}" "${pool}"/"${group}" 'up+error' 'split-brain detected'
   elif  [ "${scenario}" = 'disable_then_rename_primary' ]; then
-    wait_for_peer_group_status_in_pool_dir "${secondary_cluster}" "${pool}"/"${group}" 'up+replaying'
-    test_fields_in_group_info ${secondary_cluster} ${pool}/${group} 'snapshot' 'enabled' 'false'
+    wait_for_peer_group_status_in_pool_dir "${primary_cluster}" "${pool}"/"${group}" 'up+replaying'
+    test_fields_in_group_info "${secondary_cluster}" "${pool}/${group}" 'snapshot' 'enabled' 'false'
   fi
 
   group_remove "${primary_cluster}" "${pool}/${group}"
@@ -390,122 +385,99 @@ test_enable_mirroring_when_duplicate_group_exists()
   elif  [ "${scenario}" = 'rename_primary' ]; then
     group_remove "${secondary_cluster}" "${pool}/${group_orig}"
   fi
+
+  wait_for_no_keys "${primary_cluster}"
+  stop_mirrors "${primary_cluster}"
 }
 
-declare -a test_enable_mirroring_when_duplicate_group_and_images_exists_1=("${CLUSTER2}" "${CLUSTER1}" "${pool0}" "${group0}" "${image_prefix}" 2 'remove')
-declare -a test_enable_mirroring_when_duplicate_group_and_images_exists_2=("${CLUSTER2}" "${CLUSTER1}" "${pool0}" "${group0}" "${image_prefix}" 2 'rename_secondary')
-declare -a test_enable_mirroring_when_duplicate_group_and_images_exists_3=("${CLUSTER2}" "${CLUSTER1}" "${pool0}" "${group0}" "${image_prefix}" 2 'rename_primary')
-declare -a test_enable_mirroring_when_duplicate_group_and_images_exists_4=("${CLUSTER2}" "${CLUSTER1}" "${pool0}" "${group0}" "${image_prefix}" 2 'disable_then_rename_primary')
+declare -a test_enable_mirroring_when_duplicate_image_exists_1=("${CLUSTER2}" "${CLUSTER1}" 'remove')
+declare -a test_enable_mirroring_when_duplicate_image_exists_2=("${CLUSTER2}" "${CLUSTER1}" 'rename_secondary')
+declare -a test_enable_mirroring_when_duplicate_image_exists_3=("${CLUSTER2}" "${CLUSTER1}" 'rename_primary')
+declare -a test_enable_mirroring_when_duplicate_image_exists_4=("${CLUSTER2}" "${CLUSTER1}" 'disable_then_rename_primary')
 
-test_enable_mirroring_when_duplicate_group_and_images_exists_scenarios=4
+test_enable_mirroring_when_duplicate_image_exists_scenarios=4
 
-# This test does the following
-# 1. create a group with images on primary site
-# 2. create a group with the same name with images with the same name on the secondary site
-# 3. enable mirroring on the primary site 
+# This test does the following 
+# 1. create a group with an image on primary site
+# 2. create an image with the same name on the secondary site
+# 3. enable mirroring for the group on the primary site 
 # 4. take different actions to allow mirroring to proceed
-#    scenario 1 - delete the duplicate named group and images on the secondary
-#    scenario 2 - rename the duplicate named group and images on the secondary
-#    scenario 3 - rename the duplicate named group and images on the primary without disabling mirroring
-#    scenario 4 - disable mirroing then rename the duplicate named group and images on the primary and re-enable mirroring
+#    scenario 1 - delete the duplicate image on the secondary
+#    scenario 2 - rename the duplicate image on the secondary
+#    scenario 3 - rename the duplicate image on the primary without disabling mirroring
+#    scenario 4 - disable mirroing then rename the duplicate named image on the primary and re-enable mirroring
 # 5. check that group and all images are successfully mirrored to secondary - apart from scenario 3
-test_enable_mirroring_when_duplicate_group_and_images_exists()
+test_enable_mirroring_when_duplicate_image_exists()
 {
   local primary_cluster=$1 ; shift
   local secondary_cluster=$1 ; shift
-  local pool=$1 ; shift
-  local group=$1 ; shift
-  local image_prefix=$1 ; shift
-  local image_count=$(($1*"${image_multiplier}")) ; shift
   local scenario=$1 ; shift
+  local image_count=1
+  local pool="${pool0}"
+  local group="${group0}"
+
+  start_mirrors "${primary_cluster}"
 
   group_create "${primary_cluster}" "${pool}/${group}"
   images_create "${primary_cluster}" "${pool}/${image_prefix}" "${image_count}"
   group_images_add "${primary_cluster}" "${pool}/${group}" "${pool}/${image_prefix}" "${image_count}"
 
-  group_create "${secondary_cluster}" "${pool}/${group}"
   images_create "${secondary_cluster}" "${pool}/${image_prefix}" "${image_count}"
-  group_images_add "${secondary_cluster}" "${pool}/${group}" "${pool}/${image_prefix}" "${image_count}"
   
   mirror_group_enable "${primary_cluster}" "${pool}/${group}"
 
-  # group will be present on secondary, but won't be mirrored
-  wait_for_group_present "${secondary_cluster}" "${pool}" "${group}" "${image_count}"
-  wait_for_group_status_in_pool_dir "${primary_cluster}" "${pool}"/"${group}" 'down+unknown'
-  test_fields_in_group_info ${primary_cluster} ${pool}/${group} 'snapshot' 'enabled' 'true'
-  check_daemon_running "${secondary_cluster}"
-
-  if [ "${scenario}" = 'remove' ]; then
-    # remove the non-mirrored group on the secondary
-    group_remove "${secondary_cluster}" "${pool}/${group}"
-  elif  [ "${scenario}" = 'rename_secondary' ]; then
-    group_rename "${secondary_cluster}" "${pool}/${group}" "${pool}/${group}_renamed"
-  elif  [ "${scenario}" = 'rename_primary' ]; then
-    group_rename "${primary_cluster}" "${pool}/${group}" "${pool}/${group}_renamed"
-    group_orig="${group}"
-    group="${group}_renamed"
-  elif  [ "${scenario}" = 'disable_then_rename_primary' ]; then
-    mirror_group_disable "${primary_cluster}" "${pool}/${group}"
-    group_rename "${primary_cluster}" "${pool}/${group}" "${pool}/${group}_renamed"
-    group_orig="${group}"
-    group="${group}_renamed"
-    mirror_group_enable "${primary_cluster}" "${pool}/${group}"
-  fi
+  # group will be present on secondary, but image won't be mirrored
+  wait_for_group_present "${secondary_cluster}" "${pool}" "${group}" 0
+  # TODO fails on next line with description 'bootstrap failed'
+  wait_for_group_status_in_pool_dir "${primary_cluster}" "${pool}"/"${group}" 'up+stopped' 'local group is primary'
+  test_fields_in_group_info "${primary_cluster}" "${pool}/${group}" 'snapshot' 'enabled' 'true'
 
-  # group should now be mirrored, but images can't be
-  wait_for_group_status_in_pool_dir "${secondary_cluster}" "${pool}"/"${group}" 'up+error'
-  test_fields_in_group_info ${secondary_cluster} ${pool}/${group} 'snapshot' 'enabled' 'false'
+  # group should be mirrored, but image can't be
+  wait_for_group_status_in_pool_dir "${secondary_cluster}" "${pool}"/"${group}" 'up+error' 'failed to start image replayers'
+  test_fields_in_group_info "${secondary_cluster}" "${pool}/${group}" 'snapshot' 'enabled' 'false'
 
   if [ "${scenario}" = 'remove' ]; then
-    # remove the non-mirrored images on the secondary
+    # remove the non-mirrored image on the secondary
     images_remove "${secondary_cluster}" "${pool}/${image_prefix}" "${image_count}"
   elif  [ "${scenario}" = 'rename_secondary' ]; then
-    local i
-    for i in $(seq 0 $((image_count-1))); do
-      image_rename "${secondary_cluster}" "${pool}/${image_prefix}${i}" "${pool}/${image_prefix}_renamed${i}"
-    done
+    image_rename "${secondary_cluster}" "${pool}/${image_prefix}0" "${pool}/${image_prefix}_renamed0"
   elif  [ "${scenario}" = 'rename_primary' ]; then
-    local i
-    for i in $(seq 0 $((image_count-1))); do
-      image_rename "${primary_cluster}" "${pool}/${image_prefix}${i}" "${pool}/${image_prefix}_renamed${i}"
-    done
+    image_rename "${primary_cluster}" "${pool}/${image_prefix}0" "${pool}/${image_prefix}_renamed0"
     image_prefix_orig="${image_prefix}"
     image_prefix="${image_prefix}_renamed"
   elif  [ "${scenario}" = 'disable_then_rename_primary' ]; then
     mirror_group_disable "${primary_cluster}" "${pool}/${group}"
-    local i
-    for i in $(seq 0 $((image_count-1))); do
-      image_rename "${primary_cluster}" "${pool}/${image_prefix}${i}" "${pool}/${image_prefix}_renamed${i}"
-    done
+    image_rename "${primary_cluster}" "${pool}/${image_prefix}0" "${pool}/${image_prefix}_renamed0"
     image_prefix_orig="${image_prefix}"
     image_prefix="${image_prefix}_renamed"
     mirror_group_enable "${primary_cluster}" "${pool}/${group}"
+    wait_for_group_present "${secondary_cluster}" "${pool}" "${group}" "${image_count}"
   fi
 
-  # TODO scenario 3 fails on the next line - no images are listed in the group 
-  wait_for_group_status_in_pool_dir "${secondary_cluster}" "${pool}"/"${group}" 'up+replaying' "${image_count}"
-  wait_for_group_synced "${primary_cluster}" "${pool}/${group}" "${secondary_cluster}" "${pool}"/"${group}"
-
-  wait_for_group_replay_started "${secondary_cluster}" "${pool}"/"${group}" "${image_count}"
-  check_daemon_running "${secondary_cluster}"
-
-  if [ -z "${RBD_MIRROR_USE_RBD_MIRROR}" ]; then
-    wait_for_group_status_in_pool_dir "${primary_cluster}" "${pool}"/"${group}" 'down+unknown' 0
+  if [ "${scenario}" = 'rename_primary' ]; then
+    # Group should still not be mirrored in this case - need to disable, rename and renable to fix
+    # TODO scenario 3 fails on the next line - description is blank'
+    wait_for_peer_group_status_in_pool_dir "${primary_cluster}" "${pool}"/"${group}" 'up+stopped' 'local group is primary'
+  else
+    wait_for_group_status_in_pool_dir "${secondary_cluster}" "${pool}"/"${group}" 'up+replaying' "${image_count}"
+    wait_for_group_synced "${primary_cluster}" "${pool}/${group}" "${secondary_cluster}" "${pool}"/"${group}"
+    wait_for_group_replay_started "${secondary_cluster}" "${pool}"/"${group}" "${image_count}"
+    wait_for_group_status_in_pool_dir "${primary_cluster}" "${pool}"/"${group}" 'up+stopped' "${image_count}"
   fi
 
   group_remove "${primary_cluster}" "${pool}/${group}"
   wait_for_group_not_present "${primary_cluster}" "${pool}" "${group}"
   wait_for_group_not_present "${secondary_cluster}" "${pool}" "${group}"
-  check_daemon_running "${secondary_cluster}"
   images_remove "${primary_cluster}" "${pool}/${image_prefix}" "${image_count}"
 
   if [ "${scenario}" = 'rename_secondary' ]; then
-    group_remove "${secondary_cluster}" "${pool}/${group}_renamed"
     images_remove "${secondary_cluster}" "${pool}/${image_prefix}_renamed" "${image_count}"
   elif  [ "${scenario}" = 'rename_primary' ]; then
-    group_remove "${secondary_cluster}" "${pool}/${group_orig}"
     images_remove "${secondary_cluster}" "${pool}/${image_prefix_orig}" "${image_count}"
   fi
+
+  wait_for_no_keys "${primary_cluster}"
+  stop_mirrors "${primary_cluster}"
 }
 
 # record the time taken to enable and sync for a group with increasing number of images.
@@ -686,7 +658,7 @@ test_create_group_stop_daemon_then_recreate()
 
   # Wait for rbd_mirror_leader to be empty
   for s in 0.1 1 2 4 8 8 8 8 8 8 8 8 16 16; do
-    sleep ${s}
+    sleep "${s}"
     count_omap_keys_with_filter "${secondary_cluster}" "${pool}" "rbd_mirror_leader" "image_map" key_count
     test "${key_count}" = 0 && break
   done
@@ -1138,7 +1110,7 @@ test_remote_namespace()
     wait_for_status_in_pool_dir "${secondary_cluster}" "${secondary_pool_spec}" "${standalone_image}" 'up+unknown'
   fi
 
-  # try promote, modify image and check new snapshot syncs 
+  # try promote (of original secondary), modify image and check new snapshot syncs 
   if [ "${test_group}" = 'true' ]; then
     mirror_group_promote "${secondary_cluster}" "${secondary_pool_spec}/${group}"
     test_fields_in_group_info "${secondary_cluster}" "${secondary_pool_spec}/${group}" 'snapshot' 'enabled' 'true'
@@ -1157,7 +1129,7 @@ test_remote_namespace()
     wait_for_snapshot_sync_complete "${primary_cluster}" "${secondary_cluster}" "${primary_pool_spec}" "${secondary_pool_spec}" "${standalone_image}"
   fi  
 
-  # try demote, promote and resync
+  # try demote, promote (of original primary) and resync
   if [ "${test_group}" = 'true' ]; then
     mirror_group_demote "${secondary_cluster}" "${secondary_pool_spec}/${group}"
     test_fields_in_group_info "${secondary_cluster}" "${secondary_pool_spec}/${group}" 'snapshot' 'enabled' 'false'
@@ -1168,7 +1140,10 @@ test_remote_namespace()
     test_fields_in_group_info "${primary_cluster}" "${primary_pool_spec}/${group}" 'snapshot' 'enabled' 'true'
     wait_for_group_status_in_pool_dir "${secondary_cluster}" "${secondary_pool_spec}"/"${group}" 'up+replaying' "${image_count}"
 
+    local group_id_before
+    get_id_from_group_info "${secondary_cluster}" "${secondary_pool_spec}/${group}" group_id_before
     mirror_group_resync "${secondary_cluster}" "${secondary_pool_spec}/${group}"
+    wait_for_group_id_changed "${secondary_cluster}" "${secondary_pool_spec}/${group}" "${group_id_before}"
 
     wait_for_group_synced "${primary_cluster}" "${primary_pool_spec}/${group}" "${secondary_cluster}" "${secondary_pool_spec}/${group}"
 
@@ -1190,6 +1165,22 @@ test_remote_namespace()
     wait_for_snapshot_sync_complete "${secondary_cluster}" "${primary_cluster}" "${secondary_pool_spec}" "${primary_pool_spec}" "${standalone_image}"
   fi    
 
+  # try another manual snapshot and check that it still syncs OK
+  if [ "${test_group}" = 'true' ]; then
+    write_image "${primary_cluster}" "${primary_pool_spec}" "${image_prefix}0" 10 4096
+    local group_snap_id
+    mirror_group_snapshot "${primary_cluster}" "${primary_pool_spec}/${group}" group_snap_id
+    wait_for_group_snap_present "${secondary_cluster}" "${secondary_pool_spec}/${group}" "${group_snap_id}"
+    wait_for_group_snap_sync_complete "${secondary_cluster}" "${secondary_pool_spec}/${group}" "${group_snap_id}"
+
+    # Check all images in the group and confirms that they are synced
+    test_group_synced_image_status "${secondary_cluster}" "${secondary_pool_spec}/${group}" "${group_snap_id}" "${image_count}"
+  else
+    # try a standalone image
+    mirror_image_snapshot "${primary_cluster}" "${primary_pool_spec}" "${standalone_image}"
+    wait_for_snapshot_sync_complete "${secondary_cluster}" "${primary_cluster}" "${secondary_pool_spec}" "${primary_pool_spec}" "${standalone_image}"
+  fi  
+
   if [ "${test_group}" = 'true' ]; then
     group_remove "${primary_cluster}" "${primary_pool_spec}/${group}"
     wait_for_group_not_present "${primary_cluster}" "${primary_pool_spec}" "${group}"
@@ -1273,7 +1264,11 @@ test_empty_group()
   test_fields_in_group_info "${primary_cluster}" "${pool}/${group}" 'snapshot' 'enabled' 'true'
   wait_for_group_status_in_pool_dir "${secondary_cluster}" "${pool}"/"${group}" 'up+replaying'
 
+  local group_id_before
+  get_id_from_group_info "${secondary_cluster}" "${pool}/${group}" group_id_before
   mirror_group_resync "${secondary_cluster}" "${pool}/${group}"
+  wait_for_group_id_changed "${secondary_cluster}" "${pool}/${group}" "${group_id_before}"
+
   wait_for_group_synced "${primary_cluster}" "${pool}/${group}" "${secondary_cluster}" "${pool}/${group}"
 
   wait_for_group_status_in_pool_dir "${secondary_cluster}" "${pool}"/"${group}" 'up+replaying'
@@ -1434,7 +1429,11 @@ test_empty_groups()
   test_fields_in_group_info "${primary_cluster}" "${pool}/${group1}" 'snapshot' 'enabled' 'true'
   wait_for_group_status_in_pool_dir "${secondary_cluster}" "${pool}"/"${group1}" 'up+replaying'
 
+  local group_id_before
+  get_id_from_group_info "${secondary_cluster}" "${pool}/${group1}" group_id_before
   mirror_group_resync "${secondary_cluster}" "${pool}/${group1}"
+  wait_for_group_id_changed "${secondary_cluster}" "${pool}/${group1}" "${group_id_before}"
+
   wait_for_group_synced "${primary_cluster}" "${pool}/${group1}" "${secondary_cluster}" "${pool}/${group1}"
 
   wait_for_group_status_in_pool_dir "${secondary_cluster}" "${pool}"/"${group1}" 'up+replaying'
@@ -1963,13 +1962,22 @@ test_stopped_daemon()
   get_newest_group_snapshot_id "${secondary_cluster}" "${pool}"/"${group}" secondary_group_snap_id
   test "${primary_group_snap_id}" = "${secondary_group_snap_id}" ||  { fail "mismatched ids"; return 1; }
 
-  # TODO test more actions whilst daemon is stopped
-  # add image, take snapshot, remove image, take snapshot, restart
-  # disable mirroring 
-  
+  # TODO When dynamic groups are support this test could be extended with more actions whilst daemon is stopped.
+  # eg add image, take snapshot, remove image, take snapshot, restart
+
+  # Disable mirroring for synced group (whilst daemon is stopped)
+  echo "stopping daemon"
+  stop_mirrors "${secondary_cluster}"
+
   mirror_group_disable "${primary_cluster}" "${pool}/${group}"
   group_remove "${primary_cluster}" "${pool}/${group}"
   wait_for_group_not_present "${primary_cluster}" "${pool}" "${group}"
+  test_group_present "${secondary_cluster}" "${pool}" "${group}" 1 "${group_image_count}"
+
+  # restart daemon and confirm that group is removed from secondary
+  echo "starting daemon"
+  start_mirrors "${secondary_cluster}"
+
   wait_for_group_not_present "${secondary_cluster}" "${pool}" "${group}"
 
   images_remove "${primary_cluster}" "${pool}/${image_prefix}" "${group_image_count}"
@@ -1977,10 +1985,8 @@ test_stopped_daemon()
 }
 
 # multiple images in group and standalone images too with io
-declare -a test_group_and_standalone_images_do_io_1=("${CLUSTER2}" "${CLUSTER1}" "${pool0}" "${group0}" "${image_prefix}" 'false')
-declare -a test_group_and_standalone_images_do_io_2=("${CLUSTER2}" "${CLUSTER1}" "${pool0}" "${group0}" "${image_prefix}" 'true')
+declare -a test_group_and_standalone_images_do_io_1=("${CLUSTER2}" "${CLUSTER1}" "${pool0}" "${group0}" "${image_prefix}")
 
-# TODO scenario 2 fails currently - it is waiting for the groups to be listed in the pool status
 test_group_and_standalone_images_do_io_scenarios=1 
 
 test_group_and_standalone_images_do_io()
@@ -1990,7 +1996,6 @@ test_group_and_standalone_images_do_io()
   local pool=$3
   local group=$4
   local image_prefix=$5
-  local test_pool_status=$6
 
   local standalone_image_prefix=standalone-image
   local standalone_image_count=4
@@ -2002,25 +2007,20 @@ test_group_and_standalone_images_do_io()
   images_create "${primary_cluster}" "${pool}/${image_prefix}" "${group_image_count}"
   group_images_add "${primary_cluster}" "${pool}/${group}" "${pool}/${image_prefix}" "${group_image_count}"
 
-  if [ 'true' = "${test_pool_status}" ]; then
-    local fields=(//status/images/image/name //status/groups/group/name)
-    local pool_fields_count_arr=()
-    count_fields_in_mirror_pool_status "${primary_cluster}" "${pool}" pool_fields_count_arr "${fields[@]}"
-    # Check count of images and groups in the command output
-    test 0 = "${pool_fields_count_arr[0]}" || fail "unexpected count of images : ${pool_fields_count_arr[0]}"
-    test 0 = "${pool_fields_count_arr[1]}" || fail "unexpected count of groups : ${pool_fields_count_arr[1]}"
-  fi
+  local fields=(//status/images/image/name //status/groups/group/name)
+  local pool_fields_count_arr=()
+  count_fields_in_mirror_pool_status "${primary_cluster}" "${pool}" pool_fields_count_arr "${fields[@]}"
+  # Check count of images and groups in the command output
+  test 0 = "${pool_fields_count_arr[0]}" || fail "unexpected count of images : ${pool_fields_count_arr[0]}"
+  test 0 = "${pool_fields_count_arr[1]}" || fail "unexpected count of groups : ${pool_fields_count_arr[1]}"
 
   mirror_group_enable "${primary_cluster}" "${pool}/${group}"
 
-  if [ 'true' = "${test_pool_status}" ]; then
-    local fields=(//status/images/image/name //status/groups/group/name)
-    pool_fields_count_arr=()
-    count_fields_in_mirror_pool_status "${primary_cluster}" "${pool}" pool_fields_count_arr "${fields[@]}"
-    # Check count of images and groups in the command output
-    test $((${group_image_count})) = "${pool_fields_count_arr[0]}" || fail "unexpected count of images : ${pool_fields_count_arr[0]}"
-    test 1 = "${pool_fields_count_arr[1]}" || fail "unexpected count of groups : ${pool_fields_count_arr[1]}"
-  fi
+  pool_fields_count_arr=()
+  count_fields_in_mirror_pool_status "${primary_cluster}" "${pool}" pool_fields_count_arr "${fields[@]}"
+  # Check count of images and groups in the command output
+  test $((${group_image_count})) = "${pool_fields_count_arr[0]}" || fail "unexpected count of images : ${pool_fields_count_arr[0]}"
+  test 1 = "${pool_fields_count_arr[1]}" || fail "unexpected count of groups : ${pool_fields_count_arr[1]}"
 
   wait_for_group_present "${secondary_cluster}" "${pool}" "${group}" "${group_image_count}"
 
@@ -2041,14 +2041,11 @@ test_group_and_standalone_images_do_io()
     compare_images "${secondary_cluster}" "${primary_cluster}" "${pool}" "${pool}" "${standalone_image_prefix}${loop_instance}"
   done
 
-  if [ 'true' = "${test_pool_status}" ]; then
-    local fields=(//status/images/image/name //status/groups/group/name)
-    pool_fields_count_arr=()
-    count_fields_in_mirror_pool_status "${primary_cluster}" "${pool}" pool_fields_count_arr "${fields[@]}"
-    # Check count of images and groups in the command output
-    test $((${standalone_image_count}+${group_image_count})) = "${pool_fields_count_arr[0]}" || fail "unexpected count of images : ${pool_fields_count_arr[0]}"
-    test 1 = "${pool_fields_count_arr[1]}" || fail "unexpected count of groups : ${pool_fields_count_arr[1]}"
-  fi
+  pool_fields_count_arr=()
+  count_fields_in_mirror_pool_status "${primary_cluster}" "${pool}" pool_fields_count_arr "${fields[@]}"
+  # Check count of images and groups in the command output
+  test $((${standalone_image_count}+${group_image_count})) = "${pool_fields_count_arr[0]}" || fail "unexpected count of images : ${pool_fields_count_arr[0]}"
+  test 1 = "${pool_fields_count_arr[1]}" || fail "unexpected count of groups : ${pool_fields_count_arr[1]}"
 
   local io_count=1024
   local io_size=4096
@@ -2186,7 +2183,7 @@ test_create_multiple_groups_do_io()
   #        - also maybe change image count in groups and check rebalancing
   local group_arr
   local image_arr
-  for instance in $(seq 0 ${LAST_MIRROR_INSTANCE}); do
+  for instance in $(seq 0 "${LAST_MIRROR_INSTANCE}"); do
     local result
     query_replayer_assignment "${secondary_cluster}" "${instance}" result
     group_arr+=("${result[0]}")
@@ -2194,7 +2191,7 @@ test_create_multiple_groups_do_io()
     group_count_arr+=("${result[2]}")
     image_count_arr+=("${result[3]}")
   done
-  for instance in $(seq 0 ${LAST_MIRROR_INSTANCE}); do
+  for instance in $(seq 0 "${LAST_MIRROR_INSTANCE}"); do
       echo -e "${RED}MIRROR DAEMON INSTANCE:${instance}${NO_COLOUR}";
       echo -e "${RED}GROUP_REPLAYERS:${group_count_arr[$instance]}${NO_COLOUR}";
       echo -e "${RED}${group_arr[$instance]}${NO_COLOUR}";
@@ -2460,7 +2457,8 @@ test_image_move_group()
   wait_for_group_replay_started "${secondary_cluster}" "${pool}"/"${group0}" $(("${image_count}"-4))
   wait_for_group_status_in_pool_dir "${secondary_cluster}" "${pool}"/"${group0}" 'up+replaying' $(("${image_count}"-4))
 
-  # set up a chain of moves TODO
+  # TODO when dynamic groups are supported, this test could be extended to set up a chain of moves 
+  # ie stop daemon, move image from group A->B, move image from B->C then restart daemon
 
   mirror_group_disable "${primary_cluster}" "${pool}/${group0}"
   group_remove "${primary_cluster}" "${pool}/${group0}"
@@ -2665,10 +2663,9 @@ test_force_promote()
     sleep 5
   fi  
 
+  wait_for_group_id_changed "${secondary_cluster}" "${pool}/${group0}" "${group_id_before}"
+
   wait_for_group_synced "${primary_cluster}" "${pool}"/"${group0}" "${secondary_cluster}" "${pool}/${group0}"
-  local group_id_after
-  get_id_from_group_info "${secondary_cluster}" "${pool}/${group0}" group_id_after
-  test "${group_id_before}" != "${group_id_after}" || fail "group was not recreated by resync"
 
   compare_image_with_snapshot "${secondary_cluster}" "${pool}/${image_prefix}0" "${secondary_cluster}" "${pool}/${image_prefix}0@${snap0}"
   compare_image_with_snapshot "${secondary_cluster}" "${pool}/${big_image}" "${secondary_cluster}" "${pool}/${big_image}@${snap0}"
@@ -2735,10 +2732,10 @@ test_force_promote_delete_group()
   stop_mirrors "${secondary_cluster}" '-9'
   mirror_group_promote "${secondary_cluster}" "${pool}/${group0}" '--force'
   start_mirrors "${secondary_cluster}"
-  wait_for_group_replay_stopped ${secondary_cluster} ${pool}/${group0}
-  wait_for_group_replay_stopped ${primary_cluster} ${pool}/${group0}
-  wait_for_group_status_in_pool_dir ${secondary_cluster} ${pool}/${group0} 'up+stopped' "${image_count}"
-  wait_for_group_status_in_pool_dir ${primary_cluster} ${pool}/${group0} 'up+stopped' "${image_count}"
+  wait_for_group_replay_stopped "${secondary_cluster}" "${pool}/${group0}"
+  wait_for_group_replay_stopped "${primary_cluster}" "${pool}/${group0}"
+  wait_for_group_status_in_pool_dir "${secondary_cluster}" "${pool}/${group0}" 'up+stopped' "${image_count}"
+  wait_for_group_status_in_pool_dir "${primary_cluster}" "${pool}/${group0}" 'up+stopped' "${image_count}"
 
   wait_for_group_present "${primary_cluster}" "${pool}" "${group0}" "${image_count}"
   wait_for_group_present "${secondary_cluster}" "${pool}" "${group0}" "${image_count}"
@@ -2749,21 +2746,21 @@ test_force_promote_delete_group()
   wait_for_group_present "${secondary_cluster}" "${pool}" "${group0}" "${image_count}"
 
   # group still exists on original primary
-  wait_for_group_status_in_pool_dir ${primary_cluster} ${pool}/${group0} 'up+stopped' "${image_count}"
+  wait_for_group_status_in_pool_dir "${primary_cluster}" "${pool}/${group0}" 'up+stopped' "${image_count}"
 
-  group_image_remove ${secondary_cluster} ${pool}/${group0} ${pool}/${image_prefix}0
+  group_image_remove "${secondary_cluster}" "${pool}/${group0}" "${pool}/${image_prefix}0"
 
   wait_for_group_present "${primary_cluster}" "${pool}" "${group0}" "${image_count}"
   wait_for_group_present "${secondary_cluster}" "${pool}" "${group0}" $(("${image_count}"-1))
-  test_fields_in_group_info ${primary_cluster} ${pool}/${group0} 'snapshot' 'enabled' 'true'
-  wait_for_group_status_in_pool_dir ${primary_cluster} ${pool}/${group0} 'up+stopped' "${image_count}"
+  test_fields_in_group_info "${primary_cluster}" "${pool}/${group0}" 'snapshot' 'enabled' 'true'
+  wait_for_group_status_in_pool_dir "${primary_cluster}" "${pool}/${group0}" 'up+stopped' "${image_count}"
 
   mirror_group_enable "${secondary_cluster}" "${pool}/${group0}"
-  test_fields_in_group_info ${primary_cluster} ${pool}/${group0} 'snapshot' 'enabled' 'true'
-  test_fields_in_group_info ${secondary_cluster} ${pool}/${group0} 'snapshot' 'enabled' 'true'
+  test_fields_in_group_info "${primary_cluster}" "${pool}/${group0}" 'snapshot' 'enabled' 'true'
+  test_fields_in_group_info "${secondary_cluster}" "${pool}/${group0}" 'snapshot' 'enabled' 'true'
 
-  wait_for_group_status_in_pool_dir ${primary_cluster} ${pool}/${group0} 'up+stopped' "${image_count}"
-  wait_for_group_status_in_pool_dir ${secondary_cluster} ${pool}/${group0} 'up+stopped' $(("${image_count}"-1))
+  wait_for_group_status_in_pool_dir "${primary_cluster}" "${pool}/${group0}" 'up+stopped' "${image_count}"
+  wait_for_group_status_in_pool_dir "${secondary_cluster}" "${pool}/${group0}" 'up+stopped' $(("${image_count}"-1))
 
   wait_for_group_present "${secondary_cluster}" "${pool}" "${group0}" $(("${image_count}"-1))
   wait_for_group_present "${primary_cluster}" "${pool}" "${group0}" "${image_count}"
@@ -2841,9 +2838,11 @@ test_force_promote_before_initial_sync()
   # force promote the group on the secondary - this should fail with a sensible error message
   expect_failure "no initial group snapshot available" rbd --cluster=${secondary_cluster} mirror group promote ${pool}/${group0} --force
 
+  local group_id_before
+  get_id_from_group_info "${secondary_cluster}" "${pool}/${group0}" group_id_before
   mirror_group_resync "${secondary_cluster}" "${pool}/${group0}"
   start_mirrors "${secondary_cluster}"
-  sleep 5
+  wait_for_group_id_changed "${secondary_cluster}" "${pool}/${group0}" "${group_id_before}"
 
   wait_for_group_synced "${primary_cluster}" "${pool}"/"${group0}" "${secondary_cluster}" "${pool}/${group0}"
 
@@ -2854,9 +2853,10 @@ test_force_promote_before_initial_sync()
   # demote and try to resync again
   mirror_group_demote "${secondary_cluster}" "${pool}/${group0}"
 
+  get_id_from_group_info "${secondary_cluster}" "${pool}/${group0}" group_id_before
   mirror_group_resync "${secondary_cluster}" "${pool}/${group0}"
   start_mirrors "${secondary_cluster}"
-  sleep 5
+  wait_for_group_id_changed "${secondary_cluster}" "${pool}/${group0}" "${group_id_before}"
 
   wait_for_group_synced "${primary_cluster}" "${pool}"/"${group0}" "${secondary_cluster}" "${pool}/${group0}"
 
@@ -2978,7 +2978,7 @@ test_multiple_mirror_group_snapshot_whilst_stopped()
 
   wait_for_group_synced "${primary_cluster}" "${pool}/${group0}" "${secondary_cluster}" "${pool}"/"${group0}" 
   max_image=$((image_count-1))
-  for i in $(seq 0 ${max_image}); do  
+  for i in $(seq 0 "${max_image}"); do  
       wait_for_status_in_pool_dir "${secondary_cluster}" "${pool}" "${image_prefix}$i" 'up+replaying'
   done;
 
@@ -3051,15 +3051,12 @@ test_odf_failover_failback()
   # original site comes alive again
   mirror_group_demote "${primary_cluster}" "${pool}/${group0}"
 
-  local group_id_before group_id_after
-  get_id_from_group_info ${primary_cluster} ${pool}/${group0} group_id_before
+  local group_id_before
+  get_id_from_group_info "${primary_cluster}" "${pool}/${group0}" group_id_before
   mirror_group_resync "${primary_cluster}" "${pool}/${group0}" 
-
+  wait_for_group_id_changed "${primary_cluster}" "${pool}/${group0}" "${group_id_before}"
   wait_for_group_synced "${secondary_cluster}" "${pool}"/"${group0}" "${primary_cluster}" "${pool}/${group0}" 
 
-  get_id_from_group_info ${primary_cluster} ${pool}/${group0} group_id_after
-  test "${group_id_before}" != "${group_id_after}" || fail "group was not recreated"
-
   compare_image_with_snapshot "${primary_cluster}" "${pool}/${image_prefix}0" "${primary_cluster}" "${pool}/${image_prefix}0@${snap0}"
 
   write_image "${secondary_cluster}" "${pool}" "${image_prefix}0" 10 4096
@@ -3085,18 +3082,18 @@ test_odf_failover_failback()
   test "${group_snap_id_a}" != "${group_snap_id_c}" || fail "new snap not taken by demote"
 
   local group_id_before group_id_after
-  get_id_from_group_info ${secondary_cluster} ${pool}/${group0} group_id_before
+  get_id_from_group_info "${secondary_cluster}" "${pool}/${group0}" group_id_before
   local image_id_before image_id_after
-  get_image_id2 ${secondary_cluster} ${pool}/${image_prefix}0 image_id_before
+  get_image_id2 "${secondary_cluster}" "${pool}/${image_prefix}0" image_id_before
   
   if [ "${scenario}" = 'resync_on_failback' ]; then
     # request resync - won't happen until other site is marked as primary
     mirror_group_resync "${secondary_cluster}" "${pool}/${group0}" 
   fi  
 
-  get_id_from_group_info ${secondary_cluster} ${pool}/${group0} group_id_after
+  get_id_from_group_info "${secondary_cluster}" "${pool}/${group0}" group_id_after
   test "${group_id_before}" = "${group_id_after}" || fail "group recreated with no primary"
-  get_image_id2 ${secondary_cluster} ${pool}/${image_prefix}0 image_id_after
+  get_image_id2 "${secondary_cluster}" "${pool}/${image_prefix}0" image_id_after
   test "${image_id_before}" = "${image_id_after}" || fail "image recreated with no primary"
 
   if [ "${scenario}" != 'retry_promote' ]; then
@@ -3110,6 +3107,7 @@ test_odf_failover_failback()
     test "${group_snap_id_c}" = "${group_snap_id_f}" || fail "group not synced"
   fi
 
+  # promote original primary again
   if [ "${scenario}" = 'retry_promote' ]; then
     while true; do
       { mirror_group_promote_try "${primary_cluster}" "${pool}/${group0}" && break; } || :
@@ -3118,20 +3116,19 @@ test_odf_failover_failback()
     mirror_group_promote "${primary_cluster}" "${pool}/${group0}"
   fi  
 
+  if [ "${scenario}" = 'resync_on_failback' ]; then
+    # check that group and images were deleted and recreated on secondary cluster (as a result of the resync request)
+    wait_for_group_id_changed "${secondary_cluster}" "${pool}/${group0}" "${group_id_before}"
+    get_image_id2 "${secondary_cluster}" "${pool}/${image_prefix}0" image_id_after
+    test "${image_id_before}" != "${image_id_after}" || fail "image not recreated by resync"
+  fi  
+
   wait_for_group_status_in_pool_dir "${secondary_cluster}" "${pool}"/"${group0}" 'up+replaying' "${image_count}"
   wait_for_group_status_in_pool_dir "${primary_cluster}" "${pool}"/"${group0}" 'up+stopped'
 
   # Write some data, take a regular mirror snapshot, wait for it to sync on secondary cluster
   write_image "${primary_cluster}" "${pool}" "${image_prefix}0" 10 4096
   mirror_group_snapshot_and_wait_for_sync_complete "${secondary_cluster}" "${primary_cluster}" "${pool}"/"${group0}"
-    
-  if [ "${scenario}" = 'resync_on_failback' ]; then
-    # check that group and images were deleted and recreated on secondary cluster (as a result of the resync request)
-    get_id_from_group_info ${secondary_cluster} ${pool}/${group0} group_id_after
-    test "${group_id_before}" != "${group_id_after}" || fail "group not recreated by resync"
-    get_image_id2 ${secondary_cluster} ${pool}/${image_prefix}0 image_id_after
-    test "${image_id_before}" != "${image_id_after}" || fail "image not recreated by resync"
-  fi  
 
   group_remove "${primary_cluster}" "${pool}/${group0}"
   wait_for_group_not_present "${primary_cluster}" "${pool}" "${group0}"
@@ -3189,25 +3186,25 @@ test_resync_marker()
   wait_for_group_synced "${primary_cluster}" "${pool}"/"${group0}" "${secondary_cluster}" "${pool}"/"${group0}"
 
   local group_id_before group_id_after
-  get_id_from_group_info ${secondary_cluster} ${pool}/${group0} group_id_before
+  get_id_from_group_info "${secondary_cluster}" "${pool}/${group0}" group_id_before
   local image_id_before image_id_after
-  get_image_id2 ${secondary_cluster} ${pool}/${image_prefix}0 image_id_before
+  get_image_id2 "${secondary_cluster}" "${pool}/${image_prefix}0" image_id_before
 
   # demote primary and request resync on secondary - check that group does not get deleted (due to resync request flag)
   mirror_group_demote "${primary_cluster}" "${pool}/${group0}" 
   mirror_group_resync "${secondary_cluster}" "${pool}/${group0}" 
   wait_for_group_status_in_pool_dir "${secondary_cluster}" "${pool}"/"${group0}" 'up+unknown'
 
-  get_id_from_group_info ${secondary_cluster} ${pool}/${group0} group_id_after
+  get_id_from_group_info "${secondary_cluster}" "${pool}/${group0}" group_id_after
   test "${group_id_before}" = "${group_id_after}" || fail "group recreated with no primary"
-  get_image_id2 ${secondary_cluster} ${pool}/${image_prefix}0 image_id_after
+  get_image_id2 "${secondary_cluster}" "${pool}/${image_prefix}0" image_id_after
   test "${image_id_before}" = "${image_id_after}" || fail "image recreated with no primary"
 
   mirror_group_promote "${secondary_cluster}" "${pool}/${group0}"
 
-  get_id_from_group_info ${secondary_cluster} ${pool}/${group0} group_id_after
+  get_id_from_group_info "${secondary_cluster}" "${pool}/${group0}" group_id_after
   test "${group_id_before}" = "${group_id_after}" || fail "group recreated"
-  get_image_id2 ${secondary_cluster} ${pool}/${image_prefix}0 image_id_after
+  get_image_id2 "${secondary_cluster}" "${pool}/${image_prefix}0" image_id_after
   test "${image_id_before}" = "${image_id_after}" || fail "image recreated"
 
   write_image "${secondary_cluster}" "${pool}" "${image_prefix}0" 10 4096
@@ -3225,9 +3222,9 @@ test_resync_marker()
   mirror_group_promote "${primary_cluster}" "${pool}/${group0}"
 
   # confirm that group and image are not recreated - resync flag was cleared
-  get_id_from_group_info ${secondary_cluster} ${pool}/${group0} group_id_after
+  get_id_from_group_info "${secondary_cluster}" "${pool}/${group0}" group_id_after
   test "${group_id_before}" = "${group_id_after}" || fail "group recreated"
-  get_image_id2 ${secondary_cluster} ${pool}/${image_prefix}0 image_id_after
+  get_image_id2 "${secondary_cluster}" "${pool}/${image_prefix}0" image_id_after
   test "${image_id_before}" = "${image_id_after}" || fail "image recreated"
 
   # write some data, take a snapshot and wait for sync to complete
@@ -3235,9 +3232,9 @@ test_resync_marker()
   mirror_group_snapshot_and_wait_for_sync_complete "${secondary_cluster}" "${primary_cluster}" "${pool}"/"${group0}"
   
   # check that group and image ids still not changed on secondary
-  get_id_from_group_info ${secondary_cluster} ${pool}/${group0} group_id_after
+  get_id_from_group_info "${secondary_cluster}" "${pool}/${group0}" group_id_after
   test "${group_id_before}" = "${group_id_after}" || fail "group recreated"
-  get_image_id2 ${secondary_cluster} ${pool}/${image_prefix}0 image_id_after
+  get_image_id2 "${secondary_cluster}" "${pool}/${image_prefix}0" image_id_after
   test "${image_id_before}" = "${image_id_after}" || fail "image recreated"
 
   group_remove "${primary_cluster}" "${pool}/${group0}"
@@ -3305,7 +3302,10 @@ test_resync()
   
   # restart daemon and request a resync from primary
   start_mirrors "${secondary_cluster}"
+  local group_id_before
+  get_id_from_group_info "${secondary_cluster}" "${pool}/${group0}" group_id_before
   mirror_group_resync "${secondary_cluster}" "${pool}"/"${group0}"
+  wait_for_group_id_changed  "${secondary_cluster}" "${pool}/${group0}" "${group_id_before}"
 
   # confirm that data on secondary again matches initial snapshot on primary
   wait_for_group_synced "${primary_cluster}" "${pool}"/"${group0}" "${secondary_cluster}" "${pool}"/"${group0}"
@@ -3329,7 +3329,9 @@ test_resync()
   
   # restart daemon and request a resync from primary
   start_mirrors "${secondary_cluster}"
+  get_id_from_group_info "${secondary_cluster}" "${pool}/${group0}" group_id_before
   mirror_group_resync "${secondary_cluster}" "${pool}/${group0}"
+  wait_for_group_id_changed  "${secondary_cluster}" "${pool}/${group0}" "${group_id_before}"
 
   # confirm that data on secondary again matches latest snapshot on primary
   wait_for_group_synced "${primary_cluster}" "${pool}"/"${group0}"  "${secondary_cluster}" "${pool}"/"${group0}"
@@ -3354,7 +3356,10 @@ test_resync()
 
   # restart daemon and request a resync from primary
   start_mirrors "${primary_cluster}"
+  
+  get_id_from_group_info "${primary_cluster}" "${pool}/${group0}" group_id_before
   mirror_group_resync "${primary_cluster}" "${pool}/${group0}"
+  wait_for_group_id_changed  "${primary_cluster}" "${pool}/${group0}" "${group_id_before}"
 
   # confirm that data on secondary again matches latest snapshot on primary
   wait_for_group_synced "${secondary_cluster}" "${pool}"/"${group0}" "${primary_cluster}" "${pool}"/"${group0}"
@@ -3375,6 +3380,80 @@ test_resync()
   start_mirrors "${secondary_cluster}"
 }
 
+# test ODF failover/failback sequence
+declare -a test_demote_snap_sync_1=("${CLUSTER2}" "${CLUSTER1}" "${pool0}" "${image_prefix}" 'wait_before_promote' 3)
+
+test_demote_snap_sync_scenarios=1
+
+# 1. Create and mirror enable a group. Wait until it syncs to the secondary
+# 2. Kill the rbd-mirror daemon on the secondary
+# 3. Demote the primary group
+# 4. Start the rbd-mirror daemon on the secondary
+# 5. Check that the demote snap is synced to the secondary
+# 6. promote the group on the secondary
+
+test_demote_snap_sync()
+{
+  local primary_cluster=$1 ; shift
+  local secondary_cluster=$1 ; shift
+  local pool=$1 ; shift
+  local image_prefix=$1 ; shift
+  local scenario=$1 ; shift
+  local image_count=$(($1*"${image_multiplier}")) ; shift
+
+  start_mirrors "${primary_cluster}"
+
+  group_create "${primary_cluster}" "${pool}/${group0}"
+  images_create "${primary_cluster}" "${pool}/${image_prefix}" "${image_count}"
+  write_image "${primary_cluster}" "${pool}" "${image_prefix}0" 10 4096
+  group_images_add "${primary_cluster}" "${pool}/${group0}" "${pool}/${image_prefix}" "${image_count}"
+
+  mirror_group_enable "${primary_cluster}" "${pool}/${group0}"
+  wait_for_group_present "${secondary_cluster}" "${pool}" "${group0}" "${image_count}"
+  wait_for_group_replay_started "${secondary_cluster}" "${pool}"/"${group0}" "${image_count}"
+  wait_for_group_status_in_pool_dir "${secondary_cluster}" "${pool}"/"${group0}" 'up+replaying' "${image_count}"
+  wait_for_group_status_in_pool_dir "${primary_cluster}" "${pool}"/"${group0}" 'up+stopped' 
+
+  wait_for_group_synced "${primary_cluster}" "${pool}"/"${group0}" "${secondary_cluster}" "${pool}"/"${group0}" 
+
+  # stop daemon on secondary (cluster1), demote original primary (cluster2) and restart daemon on secondary
+  stop_mirrors "${secondary_cluster}" 
+  wait_for_group_status_in_pool_dir "${secondary_cluster}" "${pool}"/"${group0}" 'down+stopped'
+  local group_snap_id
+  get_newest_group_snapshot_id "${primary_cluster}" "${pool}/${group0}" group_snap_id
+
+  mirror_group_demote "${primary_cluster}" "${pool}/${group0}"
+
+  local primary_demote_snap_id
+  get_newest_group_snapshot_id "${primary_cluster}" "${pool}/${group0}" primary_demote_snap_id
+
+  test "${group_snap_id}" != "${primary_demote_snap_id}" ||  { fail "no new snapshot after demote"; return 1; }
+
+  start_mirrors "${secondary_cluster}"
+
+  wait_for_group_status_in_pool_dir "${secondary_cluster}" "${pool}/${group0}" 'up+unknown'
+  wait_for_group_status_in_pool_dir "${primary_cluster}" "${pool}/${group0}" 'up+unknown'
+
+  local secondary_snap_id
+  get_newest_group_snapshot_id "${secondary_cluster}" "${pool}/${group0}" secondary_snap_id
+
+  # TODO this test currently fails on the next line. Waiting for fix to issue 39
+  test "${primary_demote_snap_id}" = "${secondary_snap_id}" ||  { fail "demote snapshot ${primary_demote_snap_id} not synced"; return 1; }
+
+  mirror_group_promote "${secondary_cluster}" "${pool}/${group0}" 
+  wait_for_group_status_in_pool_dir "${primary_cluster}" "${pool}"/"${group0}" 'up+replaying' "${image_count}"
+  wait_for_group_status_in_pool_dir "${secondary_cluster}" "${pool}"/"${group0}" 'up+stopped' 
+
+  group_remove "${secondary_cluster}" "${pool}/${group0}"
+  wait_for_group_not_present "${primary_cluster}" "${pool}" "${group0}"
+  wait_for_group_not_present "${secondary_cluster}" "${pool}" "${group0}"
+
+  images_remove "${secondary_cluster}" "${pool}/${image_prefix}" "${image_count}"
+  wait_for_no_keys "${secondary_cluster}"
+  stop_mirrors "${primary_cluster}"
+  check_daemon_running "${secondary_cluster}"
+}
+
 check_for_no_keys()
 {
   local primary_cluster=$1
@@ -3383,14 +3462,14 @@ check_for_no_keys()
 
   for cluster in ${primary_cluster} ${secondary_cluster}; do
     local pools
-    pools=$(CEPH_ARGS='' ceph --cluster ${cluster} osd pool ls  | grep -v "^\." | xargs)
+    pools=$(CEPH_ARGS='' ceph --cluster "${cluster}" osd pool ls  | grep -v "^\." | xargs)
 
     for pool in ${pools}; do
       # see if the rbd_mirror_leader object exists in the pool
       get_pool_obj_count "${cluster}" "${pool}" "rbd_mirror_leader" obj_count
 
       # if it does then check that there are no entries left in it
-      if [ $obj_count -gt 0 ]; then
+      if [ "$obj_count" -gt 0 ]; then
         count_omap_keys_with_filter "${cluster}" "${pool}" "rbd_mirror_leader" "image_map" key_count
         test "${key_count}" = 0 || testlog "last test left keys" 
       fi
@@ -3415,7 +3494,7 @@ wait_for_no_keys()
       count_omap_keys_with_filter "${cluster}" "${pool}" "rbd_mirror_leader" "image_map" key_count
       if [ "${key_count}" -gt 0 ]; then
         for s in 0.1 1 2 4 8 8 8 8 8 8 8 8 16 16; do
-          sleep ${s}
+          sleep "${s}"
           count_omap_keys_with_filter "${cluster}" "${pool}" "rbd_mirror_leader" "image_map" key_count
           test "${key_count}" = 0 && break
         done
@@ -3448,10 +3527,15 @@ run_test()
     # look at every pool on both clusters and check that there are no entries leftover in rbd_image_leader
     check_for_no_keys "${primary_cluster}" "${secondary_cluster}"
 
+    if [ -n "${RBD_MIRROR_SAVE_CLI_OUTPUT}" ]; then 
+      # Record the test name and scenario and clear any old output in the file
+      echo "Test:${test_name} Scenario:${test_scenario}" > "${TEMPDIR}/${RBD_MIRROR_SAVE_CLI_OUTPUT}"
+    fi
+
      # if the "mirror" pool doesn't exist then call setup to recreate all the required pools
     local pool_count
     get_pool_count "${primary_cluster}" 'mirror' pool_count
-    if [ 0 = ${pool_count} ]; then
+    if [ 0 = "${pool_count}" ]; then
       setup
     fi
   else
@@ -3486,7 +3570,7 @@ run_test_all_scenarios()
   local working_test_scenarios 
   if [[ $test_scenarios =~ ^[0-9]+$ ]]
   then
-    working_test_scenarios=$(seq 1 $test_scenarios)
+    working_test_scenarios=$(seq 1 "$test_scenarios")
   else
     working_test_scenarios=$test_scenarios
   fi
@@ -3494,7 +3578,7 @@ run_test_all_scenarios()
 
   local loop
   for loop in $working_test_scenarios; do
-    run_test $test_name $loop
+    run_test "$test_name" "$loop"
   done
 }
 
@@ -3527,11 +3611,12 @@ run_all_tests()
   run_test_all_scenarios test_create_group_with_image_remove_then_repeat
   run_test_all_scenarios test_enable_disable_repeat
   run_test_all_scenarios test_empty_group_omap_keys
+  # TODO next test is disabled waiting for fix to issue 28
   #run_test_all_scenarios test_group_with_clone_image
   run_test_all_scenarios test_multiple_mirror_group_snapshot_unlink_time
   run_test_all_scenarios test_force_promote_delete_group
   run_test_all_scenarios test_create_group_stop_daemon_then_recreate
-  # TODO these next 2 tests are disabled as they need a bit more work
+  # TODO these next 2 tests are disabled as they fails with incorrect state/description in mirror group status - issue 50
   #run_test_all_scenarios test_enable_mirroring_when_duplicate_group_exists
   #run_test_all_scenarios test_enable_mirroring_when_duplicate_group_and_images_exists
   run_test_all_scenarios test_odf_failover_failback
@@ -3539,6 +3624,8 @@ run_all_tests()
   run_test_all_scenarios test_force_promote_before_initial_sync
   run_test_all_scenarios test_image_snapshots_with_group
   run_test_all_scenarios test_group_rename
+  # TODO this test is disabled until Nithya delivers her bootstrap changes
+  #run_test_all_scenarios test_demote_snap_sync
 }
 
 if [ -n "${RBD_MIRROR_HIDE_BASH_DEBUGGING}" ]; then
index 8ba61191eddf1817bef4e5d2dddf7b5e1477b3bb..9c1f0fec88bf610f6378fc87a68ebbc7eda76720 100755 (executable)
@@ -189,6 +189,16 @@ run_cmd_internal() {
         cat "$CMD_STDERR" 1>&2
     fi
 
+    if [ -n "${RBD_MIRROR_SAVE_CLI_OUTPUT}" ]; then 
+        if [ 'true' = "${as_admin}" ]; then
+            echo "CEPH_ARGS=''" "$@" >> "${TEMPDIR}/${RBD_MIRROR_SAVE_CLI_OUTPUT}"
+        else
+            echo "CEPH_ARGS='--id ${CEPH_ID}'" "$@" >> "${TEMPDIR}/${RBD_MIRROR_SAVE_CLI_OUTPUT}"
+        fi
+        cat "$CMD_STDOUT" >> "${TEMPDIR}/${RBD_MIRROR_SAVE_CLI_OUTPUT}"
+        cat "$CMD_STDERR" >> "${TEMPDIR}/${RBD_MIRROR_SAVE_CLI_OUTPUT}"
+    fi
+
     if [ 0 = $rc ] ; then
         return 0
     fi
@@ -1145,10 +1155,11 @@ get_fields_from_group_info()
 {
     local cluster=$1 ; shift
     local group_spec=$1 ; shift
+    local runner=$1 ; shift
     local -n _group_info_result_arr=$1 ; shift
     local fields=("$@")
 
-    run_cmd "rbd --cluster ${cluster} group info ${group_spec} --format xml --pretty-format" || { fail; return 1; }
+    "$runner" "rbd --cluster ${cluster} group info ${group_spec} --format xml --pretty-format" || { fail; return 1; }
 
     local field result
     for field in "${fields[@]}"; do
@@ -1157,7 +1168,6 @@ get_fields_from_group_info()
     done
 }
 
-# TODO need to verify the new mirroring fields in the group info once they are available
 test_fields_in_group_info()
 {
     local cluster=$1 ; shift
@@ -1168,7 +1178,7 @@ test_fields_in_group_info()
 
     local fields=(//group/group_name //group/group_id //group/mirroring/mode //group/mirroring/state //group/mirroring/global_id //group/mirroring/primary)
     local fields_arr
-    get_fields_from_group_info "${cluster}" "${group_spec}" fields_arr "${fields[@]}"
+    get_fields_from_group_info "${cluster}" "${group_spec}" "run_cmd" fields_arr "${fields[@]}"
     test "${fields_arr[2]}" = "${expected_mode}" || { fail "mode = ${fields_arr[2]}"; return 1; }
     test "${fields_arr[3]}" = "${expected_state}" || { fail "state = ${fields_arr[3]}"; return 1; }
     test "${fields_arr[5]}" = "${expected_is_primary}" || { fail "primary = ${fields_arr[5]}"; return 1; }
@@ -1179,10 +1189,11 @@ get_id_from_group_info()
     local cluster=$1 ; shift
     local group_spec=$1 ; shift
     local -n _result=$1 ; shift
+    local runner=${1:-"run_cmd"}
 
     local fields=(//group/group_id)
     local fields_arr
-    get_fields_from_group_info "${cluster}" "${group_spec}" fields_arr "${fields[@]}"
+    get_fields_from_group_info "${cluster}" "${group_spec}" "${runner}" fields_arr "${fields[@]}" || { fail; return 1; }
     _result="${fields_arr[0]}"
 }
 
@@ -1218,15 +1229,12 @@ check_fields_in_group_and_image_status()
         get_fields_from_mirror_image_status "${cluster}" "${image_spec}" image_fields_arr "${fields[@]}"
 
         # check that the image "state" matches the group "state"
-# TODO. The imaage status doesn not always get updated before the group status - see slack thread.   Fail and allow retry for now
-#        test "${image_fields_arr[0]}" = "${group_fields_arr[0]}" || { fail "image:${image_spec} ${image_fields_arr[0]} != ${group_fields_arr[0]}"; return 1; } 
-        test "${image_fields_arr[0]}" = "${group_fields_arr[0]}" || { fail; return 1; } 
+        test "${image_fields_arr[0]}" = "${group_fields_arr[0]}" || { fail "image:${image_spec} ${image_fields_arr[0]} != ${group_fields_arr[0]}"; return 1; } 
 
         # check that the image "description" matches the group "description".  Need to remove the extra information from the image description first
         local image_description
         image_description=$(cut -d ',' -f 1 <<< "${image_fields_arr[1]}")
-#        test "${image_description}" = "${group_fields_arr[1]}" || { fail "image:${image_spec} ${image_description} != ${group_fields_arr[1]}"; return 1; } 
-         test "${image_description}" = "${group_fields_arr[1]}" || { fail;  return 1; } 
+        test "${image_description}" = "${group_fields_arr[1]}" || { fail "image:${image_spec} ${image_description} != ${group_fields_arr[1]}"; return 1; } 
     done
 }
 
@@ -2579,6 +2587,32 @@ wait_for_group_not_present()
     wait_for_test_group_present "${cluster}" "${pool}" "${group}" 0 0
 }
 
+test_group_id_changed()
+{
+    local cluster=$1
+    local group_spec=$2
+    local orig_group_id=$3    
+    local current_group_id
+
+    get_id_from_group_info "${cluster}" "${group_spec}" current_group_id "try_cmd" || { fail; return 1; }
+    test "${orig_group_id}" != "${current_group_id}" || { fail; return 1; }
+  }
+
+wait_for_group_id_changed()
+{
+    local cluster=$1
+    local group_spec=$2
+    local orig_group_id=$3
+    local s
+
+    for s in 0.1 1 2 4 8 8 8 8 8 8 8 8 16 16 32 32; do
+        sleep ${s}
+        test_group_id_changed "${cluster}" "${group_spec}" "${orig_group_id}" && return 0
+    done
+    fail "wait for group with name ${group} to change id from ${orig_group_id} failed on ${cluster}"
+    return 1
+}
+
 test_group_snap_present()
 {
     local cluster=$1
@@ -2586,9 +2620,7 @@ test_group_snap_present()
     local group_snap_id=$3
     local expected_snap_count=$4
 
-    # TODO - have seen this next cmd fail with rc=2 and an empty list
-    # this should not happen, but if it does then retry as a temp workaround
-    try_cmd "rbd --cluster ${cluster} group snap list ${group_spec} --format xml --pretty-format" 
+    run_cmd "rbd --cluster ${cluster} group snap list ${group_spec} --format xml --pretty-format" 
 
     test "${expected_snap_count}" = "$(xmlstarlet sel -t -v "count(//group_snaps/group_snap[id='${group_snap_id}'])" < "$CMD_STDOUT")" || { fail; return 1; }
 }
@@ -2635,9 +2667,7 @@ test_group_snap_sync_state()
     local group_snap_id=$3
     local expected_state=$4
 
-    # TODO - have seen this next cmd fail with rc=2 and an empty list
-    # this should not happen, but if it does then retry as a temp workaround
-    try_cmd "rbd --cluster ${cluster} group snap list ${group_spec} --format xml --pretty-format" 
+    run_cmd "rbd --cluster ${cluster} group snap list ${group_spec} --format xml --pretty-format" 
 
     test "${expected_state}" = "$(xmlstarlet sel -t -v "//group_snaps/group_snap[id='${group_snap_id}']/state" < "$CMD_STDOUT")" || { fail; return 1; }
 }
@@ -2660,6 +2690,17 @@ test_group_snap_sync_incomplete()
     test_group_snap_sync_state "${cluster}" "${group_spec}" "${group_snap_id}" 'incomplete'
 }
 
+list_image_snaps_for_group()
+{
+    local cluster=$1
+    local group_spec=$2
+
+    try_cmd "rbd --cluster ${cluster} group image list ${group_spec}"
+    for image_spec in $(cat "$CMD_STDOUT" | xargs); do
+        try_cmd "rbd --cluster ${cluster} snap list -a ${image_spec}" || :
+    done
+}
+
 wait_for_test_group_snap_sync_complete()
 {
     local cluster=$1
@@ -2667,9 +2708,15 @@ wait_for_test_group_snap_sync_complete()
     local group_snap_id=$3
     local s
 
-    for s in 0.1 1 2 4 8 8 8 8 8 8 8 8 16 16 32 32 32 32 64; do
+    for s in 0.1 1 2 4 8 8 8 8 8 8 8 8 16 16 32 32 32 32 64 64 64 64 64; do
         sleep ${s}
         test_group_snap_sync_complete "${cluster}" "${group_spec}" "${group_snap_id}" && return 0
+
+        if [ "$s" -gt 32 ]; then
+            # query the snap progress for each image in the group - debug info to check that sync is progressing
+            list_image_snaps_for_group "${cluster}" "${group_spec}"
+        fi
+
     done
 
     fail "wait for group snap with id ${group_snap_id} to be synced failed on ${cluster}"
@@ -2807,17 +2854,10 @@ get_newest_group_snapshot_id()
     local group_spec=$2
     local -n _group_snap_id=$3
 
-    # TODO - have seen this next cmd fail with rc=2 and an empty list
-    # this should not happen, but if it does then retry as a temp workaround
-    try_cmd "rbd --cluster ${cluster} group snap list ${group_spec} --format xml --pretty-format" &&
-      { _group_snap_id=$(xmlstarlet sel -t -v "(//group_snaps/group_snap[state='complete']/id)[last()]" "$CMD_STDOUT" ); return; }
-    for s in 0.1 1 2 4 8 8 8 8 8 8 8 8 16 16; do
-        echo -e "${RED}RETRYING COMMAND${NO_COLOUR}";
-        sleep ${s}
-        try_cmd "rbd --cluster ${cluster} group snap list ${group_spec} --format xml --pretty-format" && {
-          _group_snap_id=$(xmlstarlet sel -t -v "(//group_snaps/group_snap[state='complete']/id)[last()]" "$CMD_STDOUT" ); return; }
-    done
-    fail "Failed to execute command"
+    run_cmd "rbd --cluster ${cluster} group snap list ${group_spec} --format xml --pretty-format" 
+    _group_snap_id=$(xmlstarlet sel -t -v "(//group_snaps/group_snap[state='complete']/id)[last()]" "$CMD_STDOUT" ) && return 0
+
+    fail "Failed to get snapshot id"
     return 1
 }
 
@@ -2888,10 +2928,7 @@ test_group_status_in_pool_dir()
     local description_pattern=$5
     local current_state=stopped
 
-    # When running the split-brain test in rbd_mirror_group.sh this next command sometimes fails with a message
-    # "rbd: mirroring not enabled on the group" (even though it clearly is).  To stop the test from failing, treat this as a non-fatal
-    # error for now and the caller will retry the command.  TODO change back to run_admin_cmd.
-    try_admin_cmd "rbd --cluster ${cluster} mirror group status ${group_spec} --format xml --pretty-format" || { fail; return 1; }
+    run_admin_cmd "rbd --cluster ${cluster} mirror group status ${group_spec} --format xml --pretty-format" || { fail; return 1; }
 
     test -n "${state_pattern}" && { test "${state_pattern}" = $(xmlstarlet sel -t -v "//group/state" < "${CMD_STDOUT}" ) || { fail; return 1; } }
     test -n "${description_pattern}" && { test "${description_pattern}" = "$(xmlstarlet sel -t -v "//group/description" "${CMD_STDOUT}" )" || { fail; return 1; } }
@@ -2915,7 +2952,7 @@ test_group_status_in_pool_dir()
         fi
     fi
 
-    # TODO enable this once tests are more reliable
+    # TODO enable this once there is more coordination between the group and image replayer to ensure that the state is in sync
     #check_fields_in_group_and_image_status "${cluster}" "${group_spec}" ||  { fail; return 1; }
     
     return 0
@@ -2939,6 +2976,38 @@ wait_for_group_status_in_pool_dir()
     return 1
 }
 
+test_peer_group_status_in_pool_dir()
+{
+    local cluster=$1
+    local group_spec=$2
+    local state_pattern=$3
+    local description_pattern=$4
+
+    local fields=(//group/peer_sites/peer_site/state //group/peer_sites/peer_site/description)
+    local group_fields_arr
+    get_fields_from_mirror_group_status "${cluster}" "${group_spec}" group_fields_arr "${fields[@]}"
+
+    test "${state_pattern}" = "${group_fields_arr[0]}" || { fail; return 1; } 
+    if [ -n "${description_pattern}" ]; then
+        test "${description_pattern}" = "${group_fields_arr[1]}" || { fail; return 1; } 
+    fi
+}
+
+wait_for_peer_group_status_in_pool_dir()
+{
+    local cluster=$1
+    local group_spec=$2
+    local state_pattern=$3
+    local description_pattern=$4
+
+    for s in 1 2 4 8 8 8 8 8 8 8 8 16 16; do
+        sleep ${s}
+        test_peer_group_status_in_pool_dir "${cluster}" "${group_spec}" "${state_pattern}" "${description_pattern}" && return 0
+    done
+    fail 1 "failed to reach expected peer status"
+    return 1
+}
+
 stop_daemons_on_clusters()
 {
     local cluster_list=$1